diff --git a/.github/actions/install-rust/main.js b/.github/actions/install-rust/main.js index 9f16b81834..2de34dbf16 100644 --- a/.github/actions/install-rust/main.js +++ b/.github/actions/install-rust/main.js @@ -30,3 +30,8 @@ set_env("CARGO_INCREMENTAL", "0"); // Turn down debuginfo from 2 to 1 to help save disk space set_env("CARGO_PROFILE_DEV_DEBUG", "1"); set_env("CARGO_PROFILE_TEST_DEBUG", "1"); + +if (process.platform === 'darwin') { + set_env("CARGO_PROFILE_DEV_SPLIT_DEBUGINFO", "unpacked"); + set_env("CARGO_PROFILE_TEST_SPLIT_DEBUGINFO", "unpacked"); +} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 520014269c..71dc0a3b3c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,6 +48,7 @@ jobs: - uses: actions/checkout@v2 with: submodules: true + - run: rustup update stable && rustup default stable - run: | set -e curl -L https://github.com/rust-lang-nursery/mdBook/releases/download/v0.4.4/mdbook-v0.4.4-x86_64-unknown-linux-gnu.tar.gz | tar xzf - @@ -73,12 +74,15 @@ jobs: - uses: actions/checkout@v2 with: submodules: true - # Note that we use nightly Rust for the doc_cfg feature (enabled via `nightlydoc` above) - # This version is an older nightly for the new x64 backend (see below) - uses: ./.github/actions/install-rust with: - toolchain: nightly-2020-12-26 - - run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta + toolchain: nightly-2021-04-11 + - run: | + cargo doc --no-deps --workspace \ + --exclude wasmtime-cli \ + --exclude test-programs \ + --exclude cranelift-codegen-meta \ + --exclude 'peepmatic*' - run: cargo doc --package cranelift-codegen-meta --document-private-items - uses: actions/upload-artifact@v1 with: @@ -165,7 +169,7 @@ jobs: # flags to rustc. - uses: ./.github/actions/install-rust with: - toolchain: nightly + toolchain: nightly-2021-04-11 - run: cargo install cargo-fuzz --vers "^0.8" - run: cargo fetch working-directory: ./fuzz @@ -178,16 +182,9 @@ jobs: - uses: actions/checkout@v2 with: submodules: true + - run: rustup update stable && rustup default stable - name: Test `peepmatic` - run: | - cargo test \ - --package peepmatic \ - --package peepmatic-automata \ - --package peepmatic-fuzzing \ - --package peepmatic-macro \ - --package peepmatic-runtime \ - --package peepmatic-test \ - --package peepmatic-souper + run: cargo test --package 'peepmatic*' - name: Rebuild Peepmatic-based peephole optimizers run: | cargo test \ @@ -211,6 +208,7 @@ jobs: name: Test runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: build: [stable, beta, nightly, windows, macos] include: @@ -222,7 +220,7 @@ jobs: rust: beta - build: nightly os: ubuntu-latest - rust: nightly + rust: nightly-2021-04-11 - build: macos os: macos-latest rust: stable @@ -270,18 +268,10 @@ jobs: - run: | cargo test \ --features test-programs/test_programs \ - --all \ - --exclude lightbeam \ - --exclude wasmtime-lightbeam \ - --exclude wasmtime-wasi-nn \ - --exclude wasmtime-wasi-crypto \ - --exclude peepmatic \ - --exclude peepmatic-automata \ - --exclude peepmatic-fuzzing \ - --exclude peepmatic-macro \ - --exclude peepmatic-runtime \ - --exclude peepmatic-test \ - --exclude peepmatic-souper + --workspace \ + --exclude '*lightbeam*' \ + --exclude 'wasmtime-wasi-*' \ + --exclude 'peepmatic*' env: RUST_BACKTRACE: 1 @@ -297,7 +287,7 @@ jobs: # Test debug (DWARF) related functionality on new backend. - run: | sudo apt-get update && sudo apt-get install -y gdb lldb - cargo test --features experimental_x64 test_debug_dwarf -- --ignored --test-threads 1 --test debug:: + cargo test test_debug_dwarf -- --ignored --test-threads 1 --test debug:: if: matrix.os == 'ubuntu-latest' env: RUST_BACKTRACE: 1 @@ -320,13 +310,9 @@ jobs: env: RUST_BACKTRACE: 1 - # Perform all tests (debug mode) for `wasmtime` with the experimental x64 - # backend. This runs on an older nightly of Rust (because of issues with - # unifying Cargo features on stable) on Ubuntu such that it's new enough - # to build Wasmtime, but old enough where the -Z options being used - # haven't been stabilized yet. + # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. test_x64: - name: Test x64 new backend + name: Test old x86 backend runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -334,7 +320,7 @@ jobs: submodules: true - uses: ./.github/actions/install-rust with: - toolchain: nightly-2020-12-26 + toolchain: stable - uses: ./.github/actions/define-llvm-env # Install wasm32 targets in order to build various tests throughout the @@ -342,43 +328,9 @@ jobs: - run: rustup target add wasm32-wasi - run: rustup target add wasm32-unknown-unknown - # Run the x64 CI script. - - run: ./ci/run-experimental-x64-ci.sh + # Run the old x86 backend CI (we will eventually remove this). + - run: ./ci/run-old-x86-ci.sh env: - CARGO_VERSION: "+nightly-2020-12-26" - RUST_BACKTRACE: 1 - - # Perform tests on the new x64 backend on Windows as well. - test_x64_win: - name: Test x64 new backend on Windows - runs-on: windows-latest - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - uses: ./.github/actions/install-rust - with: - toolchain: nightly-2020-11-29 - - uses: ./.github/actions/define-llvm-env - - - name: Install libclang - # Note: libclang is pre-installed on the macOS and linux images. - if: matrix.os == 'windows-latest' - run: | - curl https://releases.llvm.org/9.0.0/LLVM-9.0.0-win64.exe -o llvm-installer.exe - 7z x llvm-installer.exe -oC:/llvm-binary - echo LIBCLANG_PATH=C:/llvm-binary/bin/libclang.dll >> $GITHUB_ENV - echo C:/llvm-binary/bin >> $GITHUB_PATH - - # Install wasm32 targets in order to build various tests throughout the - # repo. - - run: rustup target add wasm32-wasi - - run: rustup target add wasm32-unknown-unknown - - # Run the x64 CI script. - - run: ./ci/run-experimental-x64-ci.sh - env: - CARGO_VERSION: "+nightly-2020-11-29" RUST_BACKTRACE: 1 # Build and test the wasi-nn module. @@ -390,8 +342,6 @@ jobs: with: submodules: true - uses: ./.github/actions/install-rust - with: - toolchain: nightly - run: rustup target add wasm32-wasi - uses: ./.github/actions/install-openvino - run: ./ci/run-wasi-nn-example.sh @@ -433,6 +383,7 @@ jobs: name: Build wasmtime runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: include: - build: x86_64-linux @@ -517,18 +468,10 @@ jobs: $CENTOS cargo test \ --features test-programs/test_programs \ --release \ - --all \ - --exclude lightbeam \ - --exclude wasmtime-lightbeam \ - --exclude wasmtime-wasi-nn \ - --exclude wasmtime-wasi-crypto \ - --exclude peepmatic \ - --exclude peepmatic-automata \ - --exclude peepmatic-fuzzing \ - --exclude peepmatic-macro \ - --exclude peepmatic-runtime \ - --exclude peepmatic-test \ - --exclude peepmatic-souper \ + --workspace \ + --exclude '*lightbeam*' \ + --exclude 'wasmtime-wasi-*' \ + --exclude 'peepmatic*' \ --exclude wasmtime-fuzz env: RUST_BACKTRACE: 1 diff --git a/Cargo.lock b/Cargo.lock index 574c97a2f2..aad4a2bc11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.14.1" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7" +checksum = "03345e98af8f3d786b6d9f656ccfa6ac316d954e92bc4841f0bba20789d5fb5a" dependencies = [ "gimli", ] @@ -161,11 +161,12 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "backtrace" -version = "0.3.56" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d117600f438b1707d4e4ae15d3595657288f8235a0eb593e80ecc98ab34e1bc" +checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744" dependencies = [ "addr2line", + "cc", "cfg-if 1.0.0", "libc", "miniz_oxide", @@ -189,30 +190,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.55.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b13ce559e6433d360c26305643803cb52cfbabbc2b9c47ce04a58493dfb443" -dependencies = [ - "bitflags", - "cexpr", - "cfg-if 0.1.10", - "clang-sys", - "clap", - "env_logger 0.7.1", - "lazy_static", - "lazycell", - "log", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "which", -] - [[package]] name = "bindgen" version = "0.57.0" @@ -273,6 +250,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.6.1" @@ -286,23 +275,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" [[package]] -name = "cap-fs-ext" -version = "0.13.7" +name = "bytes" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee87a3a916d6f051fc6809c39c4627f0c3a73b2a803bcfbb5fdf2bdfa1da0cb" +checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" + +[[package]] +name = "cap-fs-ext" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3a1e32332db9ad29d6da34693ce9a7ac26a9edf96abb5c1788d193410031ab" dependencies = [ "cap-primitives", "cap-std", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", ] [[package]] name = "cap-primitives" -version = "0.13.7" +version = "0.13.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3e3ea29994a34f3bc67b5396a43c87597d302d9e2e5e3b3d5ba952d86c7b41" +checksum = "2d253b74de50b097594462618e7dd17b93b3e3bef19f32d2e512996f9095661f" dependencies = [ "errno", "fs-set-times", @@ -311,7 +306,7 @@ dependencies = [ "maybe-owned", "once_cell", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", "winapi-util", @@ -320,30 +315,41 @@ dependencies = [ [[package]] name = "cap-rand" -version = "0.13.7" +version = "0.13.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0418058b38db7efc6021c5ce012e3a39c57e1a4d7bf2ddcd3789771de505d2f" +checksum = "458e98ed00e4276d0ac60da888d80957a177dfa7efa8dbb3be59f1e2b0e02ae5" dependencies = [ "rand 0.8.3", ] [[package]] name = "cap-std" -version = "0.13.7" +version = "0.13.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5f20cbb3055e9c72b16ba45913fe9f92836d2aa7a880e1ffacb8d244f454319" +checksum = "7019d48ea53c5f378e0fdab0fe5f627fc00e76d65e75dffd6fb1cbc0c9b382ee" dependencies = [ "cap-primitives", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", ] [[package]] -name = "cap-time-ext" +name = "cap-tempfile" version = "0.13.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b684f9db089b0558520076b4eeda2b719a5c4c06f329be96c9497f2b48c3944" +checksum = "8d2f6f45ddb06ff26f4cf2ba9838d5826d52e1a5f6b321d71f114bb38cf34a57" +dependencies = [ + "cap-std", + "rand 0.8.3", + "uuid", +] + +[[package]] +name = "cap-time-ext" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90585adeada7f804e6dcf71b8ff74217ad8742188fc870b9da5deab4722baa04" dependencies = [ "cap-primitives", "once_cell", @@ -371,6 +377,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cast" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75" +dependencies = [ + "rustc_version 0.2.3", +] + [[package]] name = "cc" version = "1.0.67" @@ -526,7 +541,7 @@ checksum = "dcb25d077389e53838a8158c8e99174c5a9d902dee4904320db714f3c653ffba" [[package]] name = "cranelift" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "cranelift-frontend", @@ -534,21 +549,21 @@ dependencies = [ [[package]] name = "cranelift-bforest" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.72.0" +version = "0.73.0" dependencies = [ "bincode", - "byteorder", "cranelift-bforest", "cranelift-codegen-meta", "cranelift-codegen-shared", "cranelift-entity", + "criterion", "gimli", "hashbrown", "log", @@ -560,13 +575,12 @@ dependencies = [ "smallvec", "souper-ir", "target-lexicon", - "thiserror", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] name = "cranelift-codegen-meta" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen-shared", "cranelift-entity", @@ -574,24 +588,23 @@ dependencies = [ [[package]] name = "cranelift-codegen-shared" -version = "0.72.0" +version = "0.73.0" dependencies = [ "serde", ] [[package]] name = "cranelift-entity" -version = "0.72.0" +version = "0.73.0" dependencies = [ "serde", ] [[package]] name = "cranelift-filetests" -version = "0.66.0" +version = "0.73.0" dependencies = [ "anyhow", - "byteorder", "cranelift-codegen", "cranelift-frontend", "cranelift-interpreter", @@ -610,7 +623,7 @@ dependencies = [ [[package]] name = "cranelift-frontend" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "hashbrown", @@ -621,7 +634,7 @@ dependencies = [ [[package]] name = "cranelift-interpreter" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -634,7 +647,7 @@ dependencies = [ [[package]] name = "cranelift-jit" -version = "0.72.0" +version = "0.73.0" dependencies = [ "anyhow", "cranelift", @@ -643,7 +656,6 @@ dependencies = [ "cranelift-frontend", "cranelift-module", "cranelift-native", - "errno", "libc", "log", "memmap2", @@ -654,19 +666,18 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.72.0" +version = "0.73.0" dependencies = [ "anyhow", "cranelift-codegen", "cranelift-entity", "hashbrown", "log", - "thiserror", ] [[package]] name = "cranelift-native" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "target-lexicon", @@ -674,7 +685,7 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.72.0" +version = "0.73.0" dependencies = [ "anyhow", "cranelift-codegen", @@ -688,7 +699,7 @@ dependencies = [ [[package]] name = "cranelift-preopt" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -696,17 +707,16 @@ dependencies = [ [[package]] name = "cranelift-reader" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "smallvec", "target-lexicon", - "thiserror", ] [[package]] name = "cranelift-serde" -version = "0.72.0" +version = "0.73.0" dependencies = [ "clap", "cranelift-codegen", @@ -718,7 +728,7 @@ dependencies = [ [[package]] name = "cranelift-tools" -version = "0.66.0" +version = "0.73.0" dependencies = [ "anyhow", "capstone", @@ -754,13 +764,13 @@ dependencies = [ [[package]] name = "cranelift-wasm" -version = "0.72.0" +version = "0.73.0" dependencies = [ "cranelift-codegen", "cranelift-entity", "cranelift-frontend", "hashbrown", - "itertools", + "itertools 0.10.0", "log", "serde", "smallvec", @@ -779,6 +789,42 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "criterion" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab327ed7354547cc2ef43cbe20ef68b988e70b4b593cbd66a2a61733123a3d23" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools 0.10.0", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" +dependencies = [ + "cast", + "itertools 0.9.0", +] + [[package]] name = "crossbeam-channel" version = "0.5.0" @@ -834,6 +880,28 @@ dependencies = [ "subtle", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "ctr" version = "0.6.0" @@ -1102,6 +1170,10 @@ dependencies = [ name = "example-fib-debug-wasm" version = "0.0.0" +[[package]] +name = "example-tokio-wasm" +version = "0.0.0" + [[package]] name = "example-wasi-wasm" version = "0.0.0" @@ -1234,9 +1306,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" +checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189" dependencies = [ "fallible-iterator", "indexmap", @@ -1260,6 +1332,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + [[package]] name = "hashbrown" version = "0.9.1" @@ -1368,15 +1446,24 @@ checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" [[package]] name = "iter-enum" -version = "0.2.7" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cad34f24d3b48ceffdff38af2df5ce1b7d1d9cc113e503d8e86fe8cdb889c871" +checksum = "4f947f0d9df7e69c4df60a950c0a83741455bb9ebd8fd9b5a87994dda4dbb005" dependencies = [ "derive_utils", "quote", "syn", ] +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.10.0" @@ -1410,6 +1497,15 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "k256" version = "0.7.2" @@ -1444,9 +1540,9 @@ checksum = "3576a87f2ba00f6f106fdfcd16db1d698d648a26ad8e0573cad8537c3c362d2a" [[package]] name = "libc" -version = "0.2.87" +version = "0.2.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265d751d31d6780a3f956bb5b8022feba2d94eeee5a84ba64f4212eedca42213" +checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" [[package]] name = "libfuzzer-sys" @@ -1476,7 +1572,7 @@ checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a" [[package]] name = "lightbeam" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "arrayvec", @@ -1486,7 +1582,7 @@ dependencies = [ "dynasm", "dynasmrt", "iter-enum", - "itertools", + "itertools 0.10.0", "lazy_static", "memoffset", "more-asserts", @@ -1580,6 +1676,28 @@ dependencies = [ "autocfg 1.0.1", ] +[[package]] +name = "mio" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" +dependencies = [ + "libc", + "log", + "miow", + "ntapi", + "winapi", +] + +[[package]] +name = "miow" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" +dependencies = [ + "winapi", +] + [[package]] name = "more-asserts" version = "0.2.1" @@ -1609,6 +1727,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi", +] + [[package]] name = "num-bigint" version = "0.2.6" @@ -1710,9 +1837,9 @@ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" [[package]] name = "object" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4" +checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170" dependencies = [ "crc32fast", "indexmap", @@ -1724,6 +1851,12 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "opaque-debug" version = "0.3.0" @@ -1732,22 +1865,30 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openvino" -version = "0.1.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43eeb44285b7ce8e2012b92bec32968622e1dad452e812e6edea9e001e5e9410" +checksum = "0cb74b3d8c653f7a9928bda494d329e6363ea0b428d3a3e5805b45ebb74ace76" dependencies = [ "openvino-sys", "thiserror", ] [[package]] -name = "openvino-sys" -version = "0.1.8" +name = "openvino-finder" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb64bef270a1ff665b0b2e28ebfa213e6205a007ce88223d020730225d6008f" +checksum = "426587a131841eb1e1111b0fea96cbd4fd0fd5d7b6526fb9c41400587d1c525c" + +[[package]] +name = "openvino-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d5e5d5e913f4e9aa42b2a7ae9c8719aedb4bc0eb443bf92f07d9ee9a05e7b1" dependencies = [ - "bindgen 0.55.1", "cmake", + "lazy_static", + "libloading", + "openvino-finder", ] [[package]] @@ -1816,7 +1957,7 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "peepmatic" -version = "0.72.0" +version = "0.73.0" dependencies = [ "anyhow", "peepmatic-automata", @@ -1825,13 +1966,13 @@ dependencies = [ "peepmatic-test-operator", "peepmatic-traits", "serde", - "wast 35.0.0", + "wast 35.0.2", "z3", ] [[package]] name = "peepmatic-automata" -version = "0.72.0" +version = "0.73.0" dependencies = [ "serde", ] @@ -1853,12 +1994,12 @@ dependencies = [ "peepmatic-traits", "rand 0.8.3", "serde", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] name = "peepmatic-macro" -version = "0.72.0" +version = "0.73.0" dependencies = [ "proc-macro2", "quote", @@ -1867,7 +2008,7 @@ dependencies = [ [[package]] name = "peepmatic-runtime" -version = "0.72.0" +version = "0.73.0" dependencies = [ "bincode", "bumpalo", @@ -1878,19 +2019,19 @@ dependencies = [ "serde", "serde_test", "thiserror", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] name = "peepmatic-souper" -version = "0.72.0" +version = "0.73.0" dependencies = [ "anyhow", "log", "peepmatic", "peepmatic-test-operator", "souper-ir", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] @@ -1907,16 +2048,16 @@ dependencies = [ [[package]] name = "peepmatic-test-operator" -version = "0.72.0" +version = "0.73.0" dependencies = [ "peepmatic-traits", "serde", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] name = "peepmatic-traits" -version = "0.72.0" +version = "0.73.0" [[package]] name = "pem" @@ -1955,6 +2096,34 @@ dependencies = [ "zeroize", ] +[[package]] +name = "plotters" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590" + +[[package]] +name = "plotters-svg" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211" +dependencies = [ + "plotters-backend", +] + [[package]] name = "poly1305" version = "0.6.2" @@ -2389,13 +2558,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + [[package]] name = "rustc_version" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", ] [[package]] @@ -2451,15 +2629,30 @@ dependencies = [ "syn", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser 0.7.0", +] + [[package]] name = "semver" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" dependencies = [ - "semver-parser", + "semver-parser 0.10.2", ] +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "semver-parser" version = "0.10.2" @@ -2478,6 +2671,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.123" @@ -2665,9 +2868,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.60" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" +checksum = "6498a9efc342871f91cc2d0d694c674368b4ceb40f62b65a7a08c3792935e702" dependencies = [ "proc-macro2", "quote", @@ -2688,16 +2891,16 @@ dependencies = [ [[package]] name = "system-interface" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd411f50bd848d1efefd5957d494eddc80979380e3c4f80b4ba2ebd26d1b673" +checksum = "ff09d1260270c02199b44e68140aab5225c27b365a38684e0d7b6155f0c37ffb" dependencies = [ "atty", "bitflags", "cap-fs-ext", "cap-std", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", "winx", @@ -2705,9 +2908,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422045212ea98508ae3d28025bc5aaa2bd4a9cdaecd442a08da2ee620ee9ea95" +checksum = "64ae3b39281e4b14b8123bdbaddd472b7dfe215e444181f2f9d2443c2444f834" [[package]] name = "tempfile" @@ -2753,6 +2956,7 @@ dependencies = [ "pretty_env_logger", "target-lexicon", "tempfile", + "tokio", "wasi-cap-std-sync", "wasi-common", "wasmtime", @@ -2808,6 +3012,43 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tokio" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5" +dependencies = [ + "autocfg 1.0.1", + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf7b11a536f46a809a8a9f0bb4237020f70ecbf115b842360afb127ea2fda57" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "toml" version = "0.5.8" @@ -2959,11 +3200,11 @@ dependencies = [ [[package]] name = "unsafe-io" -version = "0.6.2" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0301dd0f2c21baed606faa2717fbfbb1a68b7e289ea29b40bc21a16f5ae9f5aa" +checksum = "fe39acfe60d3754452ea6881613c3240100b23ffd94a627c138863f8cd314b1b" dependencies = [ - "rustc_version", + "rustc_version 0.3.3", "winapi", ] @@ -2986,11 +3227,20 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ada4f4ae167325015f52cc65f9fb6c251b868d8fb3b6dd0ce2d60e497c4870a" dependencies = [ - "bindgen 0.57.0", + "bindgen", "cc", "cfg-if 0.1.10", ] +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.2", +] + [[package]] name = "vec_map" version = "0.8.2" @@ -3043,9 +3293,10 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasi-cap-std-sync" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", + "async-trait", "bitflags", "cap-fs-ext", "cap-rand", @@ -3064,7 +3315,7 @@ dependencies = [ [[package]] name = "wasi-common" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "bitflags", @@ -3107,19 +3358,98 @@ dependencies = [ ] [[package]] -name = "wasm-encoder" -version = "0.4.0" +name = "wasi-tokio" +version = "0.26.0" +dependencies = [ + "anyhow", + "bitflags", + "cap-fs-ext", + "cap-std", + "cap-tempfile", + "cap-time-ext", + "fs-set-times", + "lazy_static", + "libc", + "posish", + "system-interface", + "tempfile", + "tokio", + "tracing", + "unsafe-io", + "wasi-cap-std-sync", + "wasi-common", + "wiggle", + "winapi", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75fa62cf1464aa6655479ae454202a159cc82b7b4d66e8f174409669c0654c5" +checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" + +[[package]] +name = "wasm-encoder" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b4949d4f2b25a4b208317dcf86aacef9e7a5884e48dfc45d4aeb91808d6f86" dependencies = [ "leb128", ] [[package]] name = "wasm-smith" -version = "0.4.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a5c40555e0fd974d984d9a1186cf2aa7092450912bef9943a581941dd61bfa" +checksum = "a982408719f704307ac7f45247350f06ce739d759362ef8293ed7b4d922adee8" dependencies = [ "arbitrary", "indexmap", @@ -3153,15 +3483,15 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.76.0" +version = "0.77.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755a9a4afe3f6cccbbe6d7e965eef44cf260b001f93e547eba84255c1d0187d8" +checksum = "b35c86d22e720a07d954ebbed772d01180501afe7d03d464f413bb5f8914a8d6" [[package]] name = "wasmprinter" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc04e945b98318820984d1bdc9132987cb628fc43ee2b95e21fdd751341e5686" +checksum = "d0a64a1ecb60865b59789953ee6f62e49c3d47dde66b5b3f521c99b49d921836" dependencies = [ "anyhow", "wasmparser", @@ -3169,7 +3499,7 @@ dependencies = [ [[package]] name = "wasmtime" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "backtrace", @@ -3177,9 +3507,11 @@ dependencies = [ "cfg-if 1.0.0", "cpp_demangle", "indexmap", + "lazy_static", "libc", "log", "paste", + "psm", "region", "rustc-demangle", "serde", @@ -3240,7 +3572,7 @@ dependencies = [ [[package]] name = "wasmtime-cache" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "base64", @@ -3264,14 +3596,14 @@ dependencies = [ [[package]] name = "wasmtime-cli" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", - "cap-std", "env_logger 0.8.3", "file-per-thread-logger", "filecheck", "humantime 2.1.0", + "lazy_static", "libc", "log", "more-asserts", @@ -3282,9 +3614,8 @@ dependencies = [ "target-lexicon", "tempfile", "test-programs", + "tokio", "tracing-subscriber", - "wasi-cap-std-sync", - "wasi-common", "wasmparser", "wasmtime", "wasmtime-cache", @@ -3298,25 +3629,26 @@ dependencies = [ "wasmtime-wasi-crypto", "wasmtime-wasi-nn", "wasmtime-wast", - "wast 35.0.0", + "wast 35.0.2", "wat", ] [[package]] name = "wasmtime-cranelift" -version = "0.25.0" +version = "0.26.0" dependencies = [ "cranelift-codegen", "cranelift-entity", "cranelift-frontend", "cranelift-wasm", + "target-lexicon", "wasmparser", "wasmtime-environ", ] [[package]] name = "wasmtime-debug" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "gimli", @@ -3330,9 +3662,8 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "0.25.0" +version = "0.26.0" dependencies = [ - "anyhow", "cfg-if 1.0.0", "cranelift-codegen", "cranelift-entity", @@ -3341,7 +3672,6 @@ dependencies = [ "indexmap", "log", "more-asserts", - "region", "serde", "thiserror", "wasmparser", @@ -3349,7 +3679,7 @@ dependencies = [ [[package]] name = "wasmtime-fiber" -version = "0.25.0" +version = "0.26.0" dependencies = [ "backtrace", "cc", @@ -3393,7 +3723,7 @@ dependencies = [ [[package]] name = "wasmtime-jit" -version = "0.25.0" +version = "0.26.0" dependencies = [ "addr2line", "anyhow", @@ -3425,7 +3755,7 @@ dependencies = [ [[package]] name = "wasmtime-lightbeam" -version = "0.25.0" +version = "0.26.0" dependencies = [ "cranelift-codegen", "lightbeam", @@ -3435,7 +3765,7 @@ dependencies = [ [[package]] name = "wasmtime-obj" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "more-asserts", @@ -3447,7 +3777,7 @@ dependencies = [ [[package]] name = "wasmtime-profiling" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "cfg-if 1.0.0", @@ -3465,7 +3795,7 @@ dependencies = [ [[package]] name = "wasmtime-runtime" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "backtrace", @@ -3478,18 +3808,18 @@ dependencies = [ "mach", "memoffset", "more-asserts", - "psm", "rand 0.8.3", "region", "thiserror", "userfaultfd", "wasmtime-environ", + "wasmtime-fiber", "winapi", ] [[package]] name = "wasmtime-rust" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "wasmtime", @@ -3499,7 +3829,7 @@ dependencies = [ [[package]] name = "wasmtime-rust-macro" -version = "0.25.0" +version = "0.26.0" dependencies = [ "proc-macro2", "quote", @@ -3508,10 +3838,12 @@ dependencies = [ [[package]] name = "wasmtime-wasi" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", + "wasi-cap-std-sync", "wasi-common", + "wasi-tokio", "wasmtime", "wasmtime-wiggle", "wiggle", @@ -3519,7 +3851,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-crypto" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "wasi-crypto", @@ -3530,7 +3862,7 @@ dependencies = [ [[package]] name = "wasmtime-wasi-nn" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "log", @@ -3546,16 +3878,16 @@ dependencies = [ [[package]] name = "wasmtime-wast" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "wasmtime", - "wast 35.0.0", + "wast 35.0.2", ] [[package]] name = "wasmtime-wiggle" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "proptest", @@ -3568,7 +3900,7 @@ dependencies = [ [[package]] name = "wasmtime-wiggle-macro" -version = "0.25.0" +version = "0.26.0" dependencies = [ "proc-macro2", "quote", @@ -3588,34 +3920,35 @@ dependencies = [ [[package]] name = "wast" -version = "35.0.0" +version = "35.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5ae96da18bb5926341516fd409b5a8ce4e4714da7f0a1063d3b20ac9f9a1e1" +checksum = "2ef140f1b49946586078353a453a1d28ba90adfc54dde75710bc1931de204d68" dependencies = [ "leb128", ] [[package]] name = "wat" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b0fa059022c5dabe129f02b429d67086400deb8277f89c975555dacc1dadbcc" +checksum = "8ec280a739b69173e0ffd12c1658507996836ba4e992ed9bc1e5385a0bd72a02" dependencies = [ - "wast 35.0.0", + "wast 35.0.2", ] [[package]] -name = "which" -version = "3.1.1" +name = "web-sys" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" dependencies = [ - "libc", + "js-sys", + "wasm-bindgen", ] [[package]] name = "wiggle" -version = "0.25.0" +version = "0.26.0" dependencies = [ "async-trait", "bitflags", @@ -3629,14 +3962,14 @@ dependencies = [ [[package]] name = "wiggle-borrow" -version = "0.25.0" +version = "0.26.0" dependencies = [ "wiggle", ] [[package]] name = "wiggle-generate" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "heck", @@ -3649,8 +3982,9 @@ dependencies = [ [[package]] name = "wiggle-macro" -version = "0.25.0" +version = "0.26.0" dependencies = [ + "proc-macro2", "quote", "syn", "wiggle", @@ -3704,9 +4038,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "winx" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a316462681accd062e32c37f9d78128691a4690764917d13bd8ea041baf2913e" +checksum = "2bdb79e12a5ac98f09e863b99c38c72f942a41f643ae0bb05d4d6d2633481341" dependencies = [ "bitflags", "winapi", diff --git a/Cargo.toml b/Cargo.toml index ab98691b70..bc121c2082 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-cli" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Command-line interface for Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -22,31 +22,29 @@ doc = false [dependencies] # Enable all supported architectures by default. -wasmtime = { path = "crates/wasmtime", version = "0.25.0", default-features = false, features = ['cache'] } -wasmtime-cache = { path = "crates/cache", version = "0.25.0" } -wasmtime-debug = { path = "crates/debug", version = "0.25.0" } -wasmtime-environ = { path = "crates/environ", version = "0.25.0" } -wasmtime-jit = { path = "crates/jit", version = "0.25.0" } -wasmtime-obj = { path = "crates/obj", version = "0.25.0" } -wasmtime-wast = { path = "crates/wast", version = "0.25.0" } -wasmtime-wasi = { path = "crates/wasi", version = "0.25.0" } -wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.25.0", optional = true } -wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.25.0", optional = true } -wasi-common = { path = "crates/wasi-common", version = "0.25.0" } -wasi-cap-std-sync = { path = "crates/wasi-common/cap-std-sync", version = "0.25.0" } +wasmtime = { path = "crates/wasmtime", version = "0.26.0", default-features = false, features = ['cache'] } +wasmtime-cache = { path = "crates/cache", version = "0.26.0" } +wasmtime-debug = { path = "crates/debug", version = "0.26.0" } +wasmtime-environ = { path = "crates/environ", version = "0.26.0" } +wasmtime-jit = { path = "crates/jit", version = "0.26.0" } +wasmtime-obj = { path = "crates/obj", version = "0.26.0" } +wasmtime-wast = { path = "crates/wast", version = "0.26.0" } +wasmtime-wasi = { path = "crates/wasi", version = "0.26.0" } +wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.26.0", optional = true } +wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.26.0", optional = true } structopt = { version = "0.3.5", features = ["color", "suggestions"] } -object = { version = "0.23.0", default-features = false, features = ["write"] } +object = { version = "0.24.0", default-features = false, features = ["write"] } anyhow = "1.0.19" -target-lexicon = { version = "0.11.0", default-features = false } +target-lexicon = { version = "0.12.0", default-features = false } pretty_env_logger = "0.4.0" file-per-thread-logger = "0.1.1" -wat = "1.0.36" +wat = "1.0.37" libc = "0.2.60" log = "0.4.8" rayon = "1.2.1" humantime = "2.0.0" -wasmparser = "0.76.0" -cap-std = "0.13" +wasmparser = "0.77.0" +lazy_static = "1.4.0" [dev-dependencies] env_logger = "0.8.1" @@ -56,6 +54,7 @@ tempfile = "3.1.0" test-programs = { path = "crates/test-programs" } wasmtime-fuzzing = { path = "crates/fuzzing" } wasmtime-runtime = { path = "crates/runtime" } +tokio = { version = "1.5.0", features = ["rt", "time", "macros", "rt-multi-thread"] } tracing-subscriber = "0.2.16" wast = "35.0.0" @@ -66,6 +65,7 @@ anyhow = "1.0.19" opt-level = 0 [workspace] +resolver = '2' members = [ "cranelift", "crates/bench-api", @@ -79,23 +79,29 @@ members = [ "crates/wiggle/wasmtime", "crates/wasi-common", "crates/wasi-common/cap-std-sync", + "crates/wasi-common/tokio", "examples/fib-debug/wasm", "examples/wasi/wasm", + "examples/tokio/wasm", "fuzz", ] [features] -default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"] +default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"] lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] uffd = ["wasmtime/uffd"] +all-arch = ["wasmtime/all-arch"] -# Try the experimental, work-in-progress new x86_64 backend. This is not stable -# as of June 2020. -experimental_x64 = ["wasmtime-jit/experimental_x64"] +# Stub feature that does nothing, for Cargo-features compatibility: the new +# backend is the default now. +experimental_x64 = [] + +# Use the old x86 backend. +old-x86-backend = ["wasmtime/old-x86-backend"] [badges] maintenance = { status = "actively-developed" } @@ -104,5 +110,9 @@ maintenance = { status = "actively-developed" } name = "host_segfault" harness = false +[[example]] +name = "tokio" +required-features = ["wasmtime-wasi/tokio"] + [profile.dev.package.backtrace] debug = false # FIXME(#1813) diff --git a/RELEASES.md b/RELEASES.md index dd8ca5ddd2..acf115f1c5 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -2,6 +2,137 @@ -------------------------------------------------------------------------------- +## Unreleased + +### Added + +* Added `Store::with_limits`, `StoreLimits`, and `ResourceLimiter` to the + Wasmtime API to help with enforcing resource limits at runtime. The + `ResourceLimiter` trait can be implemented by custom resource limiters to + decide if linear memories or tables can be grown. + +### Changed + +* Breaking: `Memory::new` has been changed to return `Result` as creating a + host memory object is now a fallible operation when the initial size of + the memory exceeds the store limits. + +## 0.26.0 + +Released 2021-04-05. + +### Added + +* Added the `wasmtime compile` command to support AOT compilation of Wasm + modules. This adds the `Engine::precompile_module` method. Also added the + `Config::target` method to change the compilation target of the + configuration. This can be used in conjunction with + `Engine::precompile_module` to target a different host triple than the + current one. + [#2791](https://github.com/bytecodealliance/wasmtime/pull/2791) + +* Support for macOS on aarch64 (Apple M1 Silicon), including Apple-specific + calling convention details and unwinding/exception handling using Mach ports. + [#2742](https://github.com/bytecodealliance/wasmtime/pull/2742), + [#2723](https://github.com/bytecodealliance/wasmtime/pull/2723) + +* A number of SIMD instruction implementations in the new x86-64 backend. + [#2771](https://github.com/bytecodealliance/wasmtime/pull/2771) + +* Added the `Config::cranelift_flag_enable` method to enable setting Cranelift + boolean flags or presets in a config. + +* Added CLI option `--cranelift-enable` to enable boolean settings and ISA presets. + +* Deduplicate function signatures in Wasm modules. + [#2772](https://github.com/bytecodealliance/wasmtime/pull/2772) + +* Optimize overheads of calling into Wasm functions. + [#2757](https://github.com/bytecodealliance/wasmtime/pull/2757), + [#2759](https://github.com/bytecodealliance/wasmtime/pull/2759) + +* Improvements related to Module Linking: compile fewer trampolines; + + [#2774](https://github.com/bytecodealliance/wasmtime/pull/2774) + +* Re-export sibling crates from `wasmtime-wasi` to make embedding easier + without needing to match crate versions. + [#2776](https://github.com/bytecodealliance/wasmtime/pull/2776) + +### Changed + +* Switched the default compiler backend on x86-64 to Cranelift's new backend. + This should not have any user-visible effects other than possibly runtime + performance improvements. The old backend is still available with the + `old-x86-backend` feature flag to the `cranelift-codegen` or `wasmtime` + crates, or programmatically with `BackendVariant::Legacy`. We plan to + maintain the old backend for at least one more release and ensure it works on + CI. + [#2718](https://github.com/bytecodealliance/wasmtime/pull/2718) + +* Breaking: `Module::deserialize` has been removed in favor of `Module::new`. + +* Breaking: `Config::cranelift_clear_cpu_flags` was removed. Use `Config::target` + to clear the CPU flags for the host's target. + +* Breaking: `Config::cranelift_other_flag` was renamed to `Config::cranelift_flag_set`. + +* CLI changes: + * Wasmtime CLI options to enable WebAssembly features have been replaced with + a singular `--wasm-features` option. The previous options are still + supported, but are not displayed in help text. + * Breaking: the CLI option `--cranelift-flags` was changed to + `--cranelift-set`. + * Breaking: the CLI option `--enable-reference-types=false` has been changed + to `--wasm-features=-reference-types`. + * Breaking: the CLI option `--enable-multi-value=false` has been changed to + `--wasm-features=-multi-value`. + * Breaking: the CLI option `--enable-bulk-memory=false` has been changed to + `--wasm-features=-bulk-memory`. + +* Improved error-reporting in wiggle. + [#2760](https://github.com/bytecodealliance/wasmtime/pull/2760) + +* Make WASI sleeping fallible (some systems do not support sleep). + [#2756](https://github.com/bytecodealliance/wasmtime/pull/2756) + +* WASI: Support `poll_oneoff` with a sleep. + [#2753](https://github.com/bytecodealliance/wasmtime/pull/2753) + +* Allow a `StackMapSink` to be passed when defining functions with + `cranelift-module`. + [#2739](https://github.com/bytecodealliance/wasmtime/pull/2739) + +* Some refactoring in new x86-64 backend to prepare for VEX/EVEX (e.g., + AVX-512) instruction encodings to be supported. + [#2799](https://github.com/bytecodealliance/wasmtime/pull/2799) + +### Fixed + +* Fixed a corner case in `srem` (signed remainder) in the new x86-64 backend: + `INT_MIN % -1` should return `0`, rather than trapping. This only occurred + when `avoid_div_traps == false` was set by the embedding. + [#2763](https://github.com/bytecodealliance/wasmtime/pull/2763) + +* Fixed a memory leak of the `Store` when an instance traps. + [#2803](https://github.com/bytecodealliance/wasmtime/pull/2803) + +* Some fuzzing-related fixes. + [#2788](https://github.com/bytecodealliance/wasmtime/pull/2788), + [#2770](https://github.com/bytecodealliance/wasmtime/pull/2770) + +* Fixed memory-initialization bug in uffd allocator that could copy into the + wrong destination under certain conditions. Does not affect the default + wasmtime instance allocator. + [#2801](https://github.com/bytecodealliance/wasmtime/pull/2801) + +* Fix printing of float values from the Wasmtime CLI. + [#2797](https://github.com/bytecodealliance/wasmtime/pull/2797) + +* Remove the ability for the `Linker` to instantiate modules with duplicate + import strings of different types. + [#2789](https://github.com/bytecodealliance/wasmtime/pull/2789) + ## 0.25.0 Released 2021-03-16. @@ -39,7 +170,7 @@ Released 2021-03-16. ### Fixed -* Interepretation of timestamps in `poll_oneoff` for WASI have been fixed to +* Interpretation of timestamps in `poll_oneoff` for WASI have been fixed to correctly use nanoseconds instead of microseconds. [#2717](https://github.com/bytecodealliance/wasmtime/pull/2717) diff --git a/build.rs b/build.rs index 95f1c24be7..9ee3b893c7 100644 --- a/build.rs +++ b/build.rs @@ -155,11 +155,8 @@ fn write_testsuite_tests( let testname = extract_name(path); writeln!(out, "#[test]")?; - if experimental_x64_should_panic(testsuite, &testname, strategy) { - writeln!( - out, - r#"#[cfg_attr(feature = "experimental_x64", should_panic)]"# - )?; + if x64_should_panic(testsuite, &testname, strategy) { + writeln!(out, r#"#[should_panic]"#)?; } else if ignore(testsuite, &testname, strategy) { writeln!(out, "#[ignore]")?; } else if pooling { @@ -186,10 +183,10 @@ fn write_testsuite_tests( Ok(()) } -/// For experimental_x64 backend features that are not supported yet, mark tests as panicking, so +/// For x64 backend features that are not supported yet, mark tests as panicking, so /// they stop "passing" once the features are properly implemented. -fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool { - if !cfg!(feature = "experimental_x64") || strategy != "Cranelift" { +fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool { + if !platform_is_x64() || strategy != "Cranelift" { return false; } @@ -222,12 +219,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { _ => (), }, "Cranelift" => match (testsuite, testname) { - // TODO(#1886): Ignore reference types tests if this isn't x64, - // because Cranelift only supports reference types on x64. - ("reference_types", _) => { - return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64"; - } + // No simd support yet for s390x. + ("simd", _) if platform_is_s390x() => return true, + ("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend. // These are new instructions that are not really implemented in any backend. ("simd", "simd_i8x16_arith2") | ("simd", "simd_conversions") @@ -240,22 +235,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { | ("simd", "simd_i64x2_extmul_i32x4") | ("simd", "simd_int_to_int_extend") => return true, - // These are only implemented on x64. - ("simd", "simd_i64x2_arith2") | ("simd", "simd_boolean") => { - return !cfg!(feature = "experimental_x64") - } - - // These are only implemented on aarch64 and x64. - ("simd", "simd_i64x2_cmp") - | ("simd", "simd_f32x4_pmin_pmax") - | ("simd", "simd_f64x2_pmin_pmax") - | ("simd", "simd_f32x4_rounding") - | ("simd", "simd_f64x2_rounding") - | ("simd", "simd_i32x4_dot_i16x8") => { - return !(cfg!(feature = "experimental_x64") - || env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "aarch64") - } - _ => {} }, _ => panic!("unrecognized strategy"), @@ -263,3 +242,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { false } + +fn platform_is_x64() -> bool { + env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "x86_64" +} + +fn platform_is_s390x() -> bool { + env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x" +} diff --git a/ci/run-experimental-x64-ci.sh b/ci/run-experimental-x64-ci.sh deleted file mode 100755 index e978bbd934..0000000000 --- a/ci/run-experimental-x64-ci.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# Use the Nightly variant of the compiler to properly unify the -# experimental_x64 feature across all crates. Once the feature has stabilized -# and become the default, we can remove this. -CARGO_VERSION=${CARGO_VERSION:-"+nightly"} - -# Some WASI tests seem to have an issue on Windows with symlinks if we run them -# with this particular invocation. It's unclear why (nightly toolchain?) but -# we're moving to the new backend by default soon enough, and all tests seem to -# work with the main test setup, so let's just work around this by skipping -# the tests for now. -MINGW_EXTRA="" -if [ `uname -o` == "Msys" ]; then - MINGW_EXTRA="-- --skip wasi_cap_std_sync" -fi - -cargo $CARGO_VERSION \ - --locked \ - -Zfeatures=all -Zpackage-features \ - test \ - --features test-programs/test_programs \ - --features experimental_x64 \ - --all \ - --exclude wasmtime-lightbeam \ - --exclude wasmtime-wasi-nn \ - --exclude wasmtime-wasi-crypto \ - --exclude peepmatic \ - --exclude peepmatic-automata \ - --exclude peepmatic-fuzzing \ - --exclude peepmatic-macro \ - --exclude peepmatic-runtime \ - --exclude peepmatic-test \ - --exclude peepmatic-souper \ - --exclude lightbeam \ - $MINGW_EXTRA diff --git a/ci/run-old-x86-ci.sh b/ci/run-old-x86-ci.sh new file mode 100755 index 0000000000..2f0983030a --- /dev/null +++ b/ci/run-old-x86-ci.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +cargo test \ + --locked \ + --features test-programs/test_programs \ + --features old-x86-backend \ + --all \ + --exclude wasmtime-lightbeam \ + --exclude wasmtime-wasi-nn \ + --exclude wasmtime-wasi-crypto \ + --exclude peepmatic \ + --exclude peepmatic-automata \ + --exclude peepmatic-fuzzing \ + --exclude peepmatic-macro \ + --exclude peepmatic-runtime \ + --exclude peepmatic-test \ + --exclude peepmatic-souper \ + --exclude lightbeam diff --git a/ci/run-wasi-crypto-example.sh b/ci/run-wasi-crypto-example.sh index d2582c71b1..b027e48340 100755 --- a/ci/run-wasi-crypto-example.sh +++ b/ci/run-wasi-crypto-example.sh @@ -7,4 +7,4 @@ pushd "$RUST_BINDINGS" cargo build --release --target=wasm32-wasi popd -cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm" +cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm" --wasi-modules=experimental-wasi-crypto diff --git a/ci/run-wasi-nn-example.sh b/ci/run-wasi-nn-example.sh index e24ffa75ac..97b55362c6 100755 --- a/ci/run-wasi-nn-example.sh +++ b/ci/run-wasi-nn-example.sh @@ -7,7 +7,7 @@ # executed with the Wasmtime CLI. set -e WASMTIME_DIR=$(dirname "$0" | xargs dirname) -FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/alexnet +FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/mobilenet if [ -z "${1+x}" ]; then # If no temporary directory is specified, create one. TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX) @@ -26,9 +26,9 @@ source /opt/intel/openvino/bin/setupvars.sh OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo build -p wasmtime-cli --features wasi-nn # Download all necessary test fixtures to the temporary directory. -wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.bin -wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.xml -wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/tensor-1x3x227x227-f32.bgr +wget --no-clobber $FIXTURE/mobilenet.bin --output-document=$TMP_DIR/model.bin +wget --no-clobber $FIXTURE/mobilenet.xml --output-document=$TMP_DIR/model.xml +wget --no-clobber $FIXTURE/tensor-1x224x224x3-f32.bgr --output-document=$TMP_DIR/tensor.bgr # Now build an example that uses the wasi-nn API. pushd $WASMTIME_DIR/crates/wasi-nn/examples/classification-example @@ -37,9 +37,9 @@ cp target/wasm32-wasi/release/wasi-nn-example.wasm $TMP_DIR popd # Run the example in Wasmtime (note that the example uses `fixture` as the expected location of the model/tensor files). -OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo run --features wasi-nn -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm +cargo run -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm --wasi-modules=experimental-wasi-nn # Clean up the temporary directory only if it was not specified (users may want to keep the directory around). if [[ $REMOVE_TMP_DIR -eq 1 ]]; then rm -rf $TMP_DIR -fi \ No newline at end of file +fi diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index a5fe3acd3a..d2166c0c25 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cranelift-tools" authors = ["The Cranelift Project Developers"] -version = "0.66.0" +version = "0.73.0" description = "Binaries for testing the Cranelift libraries" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/index.md" @@ -15,27 +15,27 @@ path = "src/clif-util.rs" [dependencies] cfg-if = "1.0" -cranelift-codegen = { path = "codegen", version = "0.72.0" } -cranelift-entity = { path = "entity", version = "0.72.0" } -cranelift-interpreter = { path = "interpreter", version = "0.72.0" } -cranelift-reader = { path = "reader", version = "0.72.0" } -cranelift-frontend = { path = "frontend", version = "0.72.0" } -cranelift-serde = { path = "serde", version = "0.72.0", optional = true } -cranelift-wasm = { path = "wasm", version = "0.72.0", optional = true } -cranelift-native = { path = "native", version = "0.72.0" } -cranelift-filetests = { path = "filetests", version = "0.66.0" } -cranelift-module = { path = "module", version = "0.72.0" } -cranelift-object = { path = "object", version = "0.72.0" } -cranelift-jit = { path = "jit", version = "0.72.0" } -cranelift-preopt = { path = "preopt", version = "0.72.0" } -cranelift = { path = "umbrella", version = "0.72.0" } +cranelift-codegen = { path = "codegen", version = "0.73.0" } +cranelift-entity = { path = "entity", version = "0.73.0" } +cranelift-interpreter = { path = "interpreter", version = "0.73.0" } +cranelift-reader = { path = "reader", version = "0.73.0" } +cranelift-frontend = { path = "frontend", version = "0.73.0" } +cranelift-serde = { path = "serde", version = "0.73.0", optional = true } +cranelift-wasm = { path = "wasm", version = "0.73.0", optional = true } +cranelift-native = { path = "native", version = "0.73.0" } +cranelift-filetests = { path = "filetests", version = "0.73.0" } +cranelift-module = { path = "module", version = "0.73.0" } +cranelift-object = { path = "object", version = "0.73.0" } +cranelift-jit = { path = "jit", version = "0.73.0" } +cranelift-preopt = { path = "preopt", version = "0.73.0" } +cranelift = { path = "umbrella", version = "0.73.0" } filecheck = "0.5.0" log = "0.4.8" termcolor = "1.1.2" capstone = { version = "0.7.0", optional = true } wat = { version = "1.0.36", optional = true } -target-lexicon = { version = "0.11", features = ["std"] } -peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.72.0", optional = true } +target-lexicon = { version = "0.12", features = ["std"] } +peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.73.0", optional = true } pretty_env_logger = "0.4.0" rayon = { version = "1", optional = true } file-per-thread-logger = "0.1.2" @@ -50,6 +50,6 @@ default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper", "s disas = ["capstone"] enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"] wasm = ["wat", "cranelift-wasm"] -experimental_x64 = ["cranelift-codegen/x64", "cranelift-filetests/experimental_x64", "cranelift-reader/experimental_x64"] experimental_arm32 = ["cranelift-codegen/arm32", "cranelift-filetests/experimental_arm32"] souper-harvest = ["cranelift-codegen/souper-harvest", "rayon"] +all-arch = ["cranelift-codegen/all-arch"] diff --git a/cranelift/bforest/Cargo.toml b/cranelift/bforest/Cargo.toml index e88658c4dd..a4da439e36 100644 --- a/cranelift/bforest/Cargo.toml +++ b/cranelift/bforest/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-bforest" -version = "0.72.0" +version = "0.73.0" description = "A forest of B+-trees" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-bforest" @@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"] edition = "2018" [dependencies] -cranelift-entity = { path = "../entity", version = "0.72.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.73.0", default-features = false } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 068c7847a4..2f35b5fe4b 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-codegen" -version = "0.72.0" +version = "0.73.0" description = "Low-level code generator library" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-codegen" @@ -13,21 +13,19 @@ build = "build.rs" edition = "2018" [dependencies] -cranelift-codegen-shared = { path = "./shared", version = "0.72.0" } -cranelift-entity = { path = "../entity", version = "0.72.0" } -cranelift-bforest = { path = "../bforest", version = "0.72.0" } +cranelift-codegen-shared = { path = "./shared", version = "0.73.0" } +cranelift-entity = { path = "../entity", version = "0.73.0" } +cranelift-bforest = { path = "../bforest", version = "0.73.0" } hashbrown = { version = "0.9.1", optional = true } -target-lexicon = "0.11" +target-lexicon = "0.12" log = { version = "0.4.6", default-features = false } serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } -gimli = { version = "0.23.0", default-features = false, features = ["write"], optional = true } +gimli = { version = "0.24.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -thiserror = "1.0.4" -byteorder = { version = "1.3.2", default-features = false } -peepmatic = { path = "../peepmatic", optional = true, version = "0.72.0" } -peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.72.0" } -peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.72.0" } +peepmatic = { path = "../peepmatic", optional = true, version = "0.73.0" } +peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.73.0" } +peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.73.0" } regalloc = { version = "0.0.31" } souper-ir = { version = "2.1.0", optional = true } wast = { version = "35.0.0", optional = true } @@ -36,8 +34,11 @@ wast = { version = "35.0.0", optional = true } # machine code. Integration tests that need external dependencies can be # accomodated in `tests`. +[dev-dependencies] +criterion = "0.3" + [build-dependencies] -cranelift-codegen-meta = { path = "meta", version = "0.72.0" } +cranelift-codegen-meta = { path = "meta", version = "0.73.0" } [features] default = ["std", "unwind"] @@ -63,14 +64,22 @@ unwind = ["gimli"] x86 = [] arm64 = [] riscv = [] -x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel. +s390x = [] arm32 = [] # Work-in-progress codegen backend for ARM. +# Stub feature that does nothing, for Cargo-features compatibility: the new +# backend is the default now. +experimental_x64 = [] + +# Make the old x86 backend the default. +old-x86-backend = [] + # Option to enable all architectures. all-arch = [ "x86", "arm64", - "riscv" + "riscv", + "s390x" ] # For dependent crates that want to serialize some parts of cranelift @@ -97,3 +106,7 @@ souper-harvest = ["souper-ir", "souper-ir/stringify"] [badges] maintenance = { status = "experimental" } + +[[bench]] +name = "x64-evex-encoding" +harness = false diff --git a/cranelift/codegen/benches/x64-evex-encoding.rs b/cranelift/codegen/benches/x64-evex-encoding.rs new file mode 100644 index 0000000000..e5c1434181 --- /dev/null +++ b/cranelift/codegen/benches/x64-evex-encoding.rs @@ -0,0 +1,138 @@ +//! Measure instruction encoding latency using various approaches; the +//! benchmarking is feature-gated on `x86` since it only measures the encoding +//! mechanism of that backend. + +#[cfg(feature = "x86")] +mod x86 { + use cranelift_codegen::isa::x64::encoding::{ + evex::{EvexContext, EvexInstruction, EvexMasking, EvexVectorLength, Register}, + rex::OpcodeMap, + rex::{encode_modrm, LegacyPrefixes}, + ByteSink, + }; + use cranelift_codegen_shared::isa::x86::EncodingBits; + use criterion::{criterion_group, Criterion}; + + // Define the benchmarks. + fn x64_evex_encoding_benchmarks(c: &mut Criterion) { + let mut group = c.benchmark_group("x64 EVEX encoding"); + let rax = Register::from(0); + let rdx = Register::from(2); + + group.bench_function("EvexInstruction (builder pattern)", |b| { + let mut sink = vec![]; + b.iter(|| { + sink.clear(); + EvexInstruction::new() + .prefix(LegacyPrefixes::_66) + .map(OpcodeMap::_0F38) + .w(true) + .opcode(0x1F) + .reg(rax) + .rm(rdx) + .length(EvexVectorLength::V128) + .encode(&mut sink); + }); + }); + + group.bench_function("encode_evex (function pattern)", |b| { + let mut sink = vec![]; + let bits = EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1); + let vvvvv = Register::from(0); + b.iter(|| { + sink.clear(); + encode_evex( + bits, + rax, + vvvvv, + rdx, + EvexContext::Other { + length: EvexVectorLength::V128, + }, + EvexMasking::default(), + &mut sink, + ); + }) + }); + } + criterion_group!(benches, x64_evex_encoding_benchmarks); + + /// Using an inner module to feature-gate the benchmarks means that we must + /// manually specify how to run the benchmarks (see `criterion_main!`). + pub fn run_benchmarks() { + criterion::__warn_about_html_reports_feature(); + criterion::__warn_about_cargo_bench_support_feature(); + benches(); + Criterion::default().configure_from_args().final_summary(); + } + + /// From the legacy x86 backend: a mechanism for encoding an EVEX + /// instruction, including the prefixes, the instruction opcode, and the + /// ModRM byte. This EVEX encoding function only encodes the `reg` (operand + /// 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are possible + /// (see section 2.6.2, Intel Software Development Manual, volume 2A), + /// requiring refactoring of this function or separate functions for each + /// form (e.g. as for the REX prefix). + #[inline(always)] + pub fn encode_evex( + enc: EncodingBits, + reg: Register, + vvvvv: Register, + rm: Register, + context: EvexContext, + masking: EvexMasking, + sink: &mut CS, + ) { + let reg: u8 = reg.into(); + let rm: u8 = rm.into(); + let vvvvv: u8 = vvvvv.into(); + + // EVEX prefix. + sink.put1(0x62); + + debug_assert!(enc.mm() < 0b100); + let mut p0 = enc.mm() & 0b11; + p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset + sink.put1(p0); + + let mut p1 = enc.pp() | 0b100; // bit 2 is always set + p1 |= (!(vvvvv) & 0b1111) << 3; + p1 |= (enc.rex_w() & 0b1) << 7; + sink.put1(p1); + + let mut p2 = masking.aaa_bits(); + p2 |= (!(vvvvv >> 4) & 0b1) << 3; + p2 |= context.bits() << 4; + p2 |= masking.z_bit() << 7; + sink.put1(p2); + + // Opcode. + sink.put1(enc.opcode_byte()); + + // ModR/M byte. + sink.put1(encode_modrm(3, reg & 7, rm & 7)) + } + + /// From the legacy x86 backend: encode the RXBR' bits of the EVEX P0 byte. + /// For an explanation of these bits, see section 2.6.1 in the Intel + /// Software Development Manual, volume 2A. These bits can be used by + /// different addressing modes (see section 2.6.2), requiring different + /// `vex*` functions than this one. + fn evex2(rm: u8, reg: u8) -> u8 { + let b = !(rm >> 3) & 1; + let x = !(rm >> 4) & 1; + let r = !(reg >> 3) & 1; + let r_ = !(reg >> 4) & 1; + 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) + } +} + +fn main() { + #[cfg(feature = "x86")] + x86::run_benchmarks(); + + #[cfg(not(feature = "x86"))] + println!( + "Unable to run the x64-evex-encoding benchmark; the `x86` feature must be enabled in Cargo.", + ); +} diff --git a/cranelift/codegen/meta/Cargo.toml b/cranelift/codegen/meta/Cargo.toml index c2ad1a8d4b..d5eaaa55bf 100644 --- a/cranelift/codegen/meta/Cargo.toml +++ b/cranelift/codegen/meta/Cargo.toml @@ -1,19 +1,20 @@ [package] name = "cranelift-codegen-meta" authors = ["The Cranelift Project Developers"] -version = "0.72.0" +version = "0.73.0" description = "Metaprogram for cranelift-codegen code generator library" license = "Apache-2.0 WITH LLVM-exception" repository = "https://github.com/bytecodealliance/wasmtime" readme = "README.md" edition = "2018" -[package.metadata.docs.rs] -rustdoc-args = [ "--document-private-items" ] +# FIXME(rust-lang/cargo#9300): uncomment once that lands +# [package.metadata.docs.rs] +# rustdoc-args = [ "--document-private-items" ] [dependencies] -cranelift-codegen-shared = { path = "../shared", version = "0.72.0" } -cranelift-entity = { path = "../../entity", version = "0.72.0" } +cranelift-codegen-shared = { path = "../shared", version = "0.73.0" } +cranelift-entity = { path = "../../entity", version = "0.73.0" } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/codegen/meta/src/cdsl/settings.rs b/cranelift/codegen/meta/src/cdsl/settings.rs index 217bad9955..52c51d54a8 100644 --- a/cranelift/codegen/meta/src/cdsl/settings.rs +++ b/cranelift/codegen/meta/src/cdsl/settings.rs @@ -20,6 +20,7 @@ pub(crate) enum SpecificSetting { #[derive(Hash, PartialEq, Eq)] pub(crate) struct Setting { pub name: &'static str, + pub description: &'static str, pub comment: &'static str, pub specific: SpecificSetting, pub byte_offset: u8, @@ -88,6 +89,7 @@ impl Into for PresetIndex { #[derive(Hash, PartialEq, Eq)] pub(crate) struct Preset { pub name: &'static str, + pub description: &'static str, values: Vec, } @@ -169,6 +171,7 @@ pub(crate) enum ProtoSpecificSetting { /// This is the information provided during building for a setting. struct ProtoSetting { name: &'static str, + description: &'static str, comment: &'static str, specific: ProtoSpecificSetting, } @@ -251,11 +254,13 @@ impl SettingGroupBuilder { fn add_setting( &mut self, name: &'static str, + description: &'static str, comment: &'static str, specific: ProtoSpecificSetting, ) { self.settings.push(ProtoSetting { name, + description, comment, specific, }) @@ -264,6 +269,7 @@ impl SettingGroupBuilder { pub fn add_bool( &mut self, name: &'static str, + description: &'static str, comment: &'static str, default: bool, ) -> BoolSettingIndex { @@ -271,28 +277,55 @@ impl SettingGroupBuilder { self.predicates.is_empty(), "predicates must be added after the boolean settings" ); - self.add_setting(name, comment, ProtoSpecificSetting::Bool(default)); + self.add_setting( + name, + description, + comment, + ProtoSpecificSetting::Bool(default), + ); BoolSettingIndex(self.settings.len() - 1) } pub fn add_enum( &mut self, name: &'static str, + description: &'static str, comment: &'static str, values: Vec<&'static str>, ) { - self.add_setting(name, comment, ProtoSpecificSetting::Enum(values)); + self.add_setting( + name, + description, + comment, + ProtoSpecificSetting::Enum(values), + ); } - pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) { - self.add_setting(name, comment, ProtoSpecificSetting::Num(default)); + pub fn add_num( + &mut self, + name: &'static str, + description: &'static str, + comment: &'static str, + default: u8, + ) { + self.add_setting( + name, + description, + comment, + ProtoSpecificSetting::Num(default), + ); } pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) { self.predicates.push(ProtoPredicate { name, node }); } - pub fn add_preset(&mut self, name: &'static str, args: Vec) -> PresetIndex { + pub fn add_preset( + &mut self, + name: &'static str, + description: &'static str, + args: Vec, + ) -> PresetIndex { let mut values = Vec::new(); for arg in args { match arg { @@ -302,7 +335,11 @@ impl SettingGroupBuilder { PresetType::BoolSetting(index) => values.push(index), } } - self.presets.push(Preset { name, values }); + self.presets.push(Preset { + name, + description, + values, + }); PresetIndex(self.presets.len() - 1) } @@ -347,6 +384,7 @@ impl SettingGroupBuilder { group.settings.push(Setting { name: s.name, + description: s.description, comment: s.comment, byte_offset, specific, @@ -367,6 +405,7 @@ impl SettingGroupBuilder { }; group.settings.push(Setting { name: s.name, + description: s.description, comment: s.comment, byte_offset: byte_offset + predicate_number / 8, specific: SpecificSetting::Bool(BoolSetting { diff --git a/cranelift/codegen/meta/src/gen_settings.rs b/cranelift/codegen/meta/src/gen_settings.rs index a70ddccfe1..d7116cac9c 100644 --- a/cranelift/codegen/meta/src/gen_settings.rs +++ b/cranelift/codegen/meta/src/gen_settings.rs @@ -70,6 +70,33 @@ fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatte fmtln!(fmt, "}"); } +/// Generates the `iter` function. +fn gen_iterator(group: &SettingGroup, fmt: &mut Formatter) { + fmtln!(fmt, "impl Flags {"); + fmt.indent(|fmt| { + fmt.doc_comment("Iterates the setting values."); + fmtln!(fmt, "pub fn iter(&self) -> impl Iterator {"); + fmt.indent(|fmt| { + fmtln!(fmt, "let mut bytes = [0; {}];", group.settings_size); + fmtln!(fmt, "bytes.copy_from_slice(&self.bytes[0..{}]);", group.settings_size); + fmtln!(fmt, "DESCRIPTORS.iter().filter_map(move |d| {"); + fmt.indent(|fmt| { + fmtln!(fmt, "let values = match &d.detail {"); + fmt.indent(|fmt| { + fmtln!(fmt, "detail::Detail::Preset => return None,"); + fmtln!(fmt, "detail::Detail::Enum { last, enumerators } => Some(TEMPLATE.enums(*last, *enumerators)),"); + fmtln!(fmt, "_ => None"); + }); + fmtln!(fmt, "};"); + fmtln!(fmt, "Some(Value{ name: d.name, detail: d.detail, values, value: bytes[d.offset as usize] })"); + }); + fmtln!(fmt, "})"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + /// Emit Display and FromStr implementations for enum settings. fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) { fmtln!(fmt, "impl fmt::Display for {} {{", name); @@ -136,7 +163,7 @@ fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) { /// Emit a getter function for `setting`. fn gen_getter(setting: &Setting, fmt: &mut Formatter) { - fmt.doc_comment(setting.comment); + fmt.doc_comment(format!("{}\n{}", setting.description, setting.comment)); match setting.specific { SpecificSetting::Bool(BoolSetting { predicate_number, .. @@ -254,6 +281,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) { fmtln!(fmt, "detail::Descriptor {"); fmt.indent(|fmt| { fmtln!(fmt, "name: \"{}\",", setting.name); + fmtln!(fmt, "description: \"{}\",", setting.description); fmtln!(fmt, "offset: {},", setting.byte_offset); match setting.specific { SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => { @@ -286,6 +314,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) { fmtln!(fmt, "detail::Descriptor {"); fmt.indent(|fmt| { fmtln!(fmt, "name: \"{}\",", preset.name); + fmtln!(fmt, "description: \"{}\",", preset.description); fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size); fmtln!(fmt, "detail: detail::Detail::Preset,"); }); @@ -427,6 +456,7 @@ fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) { fmtln!(fmt, "}"); gen_constructor(group, parent, fmt); + gen_iterator(group, fmt); gen_enum_types(group, fmt); gen_getters(group, fmt); gen_descriptors(group, fmt); diff --git a/cranelift/codegen/meta/src/isa/arm64/mod.rs b/cranelift/codegen/meta/src/isa/arm64/mod.rs index cbc21347e9..4277e147a7 100644 --- a/cranelift/codegen/meta/src/isa/arm64/mod.rs +++ b/cranelift/codegen/meta/src/isa/arm64/mod.rs @@ -9,7 +9,7 @@ use crate::shared::Definitions as SharedDefinitions; fn define_settings(_shared: &SettingGroup) -> SettingGroup { let mut setting = SettingGroupBuilder::new("arm64"); - let has_lse = setting.add_bool("has_lse", "Large System Extensions", false); + let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false); setting.add_predicate("use_lse", predicate!(has_lse)); setting.build() diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index ed8db85f0d..34032842c2 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -6,6 +6,7 @@ use std::fmt; mod arm32; mod arm64; mod riscv; +mod s390x; pub(crate) mod x86; /// Represents known ISA target. @@ -15,6 +16,7 @@ pub enum Isa { X86, Arm32, Arm64, + S390x, } impl Isa { @@ -31,6 +33,7 @@ impl Isa { match arch { "riscv" => Some(Isa::Riscv), "aarch64" => Some(Isa::Arm64), + "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32), _ => None, @@ -39,7 +42,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64] + &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] } } @@ -51,6 +54,7 @@ impl fmt::Display for Isa { Isa::X86 => write!(f, "x86"), Isa::Arm32 => write!(f, "arm32"), Isa::Arm64 => write!(f, "arm64"), + Isa::S390x => write!(f, "s390x"), } } } @@ -62,6 +66,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec x86::define(shared_defs), Isa::Arm32 => arm32::define(shared_defs), Isa::Arm64 => arm64::define(shared_defs), + Isa::S390x => s390x::define(shared_defs), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/riscv/mod.rs b/cranelift/codegen/meta/src/isa/riscv/mod.rs index 801e61a3d2..49f26391ce 100644 --- a/cranelift/codegen/meta/src/isa/riscv/mod.rs +++ b/cranelift/codegen/meta/src/isa/riscv/mod.rs @@ -17,33 +17,39 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup { let supports_m = setting.add_bool( "supports_m", "CPU supports the 'M' extension (mul/div)", + "", false, ); let supports_a = setting.add_bool( "supports_a", "CPU supports the 'A' extension (atomics)", + "", false, ); let supports_f = setting.add_bool( "supports_f", "CPU supports the 'F' extension (float)", + "", false, ); let supports_d = setting.add_bool( "supports_d", "CPU supports the 'D' extension (double)", + "", false, ); let enable_m = setting.add_bool( "enable_m", "Enable the use of 'M' instructions if available", + "", true, ); setting.add_bool( "enable_e", "Enable the 'RV32E' instruction set with only 16 registers", + "", false, ); diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x/mod.rs new file mode 100644 index 0000000000..2ec1040553 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/s390x/mod.rs @@ -0,0 +1,31 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegsBuilder; +use crate::cdsl::settings::SettingGroupBuilder; + +use crate::shared::Definitions as SharedDefinitions; + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + let settings = SettingGroupBuilder::new("s390x").build(); + let regs = IsaRegsBuilder::new().build(); + let recipes = Recipes::new(); + let encodings_predicates = InstructionPredicateMap::new(); + + let mut mode = CpuMode::new("s390x"); + let expand = shared_defs.transform_groups.by_name("expand"); + mode.legalize_default(expand); + let cpu_modes = vec![mode]; + + TargetIsa::new( + "s390x", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index dddd69abb3..70b829787d 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -4,37 +4,77 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { let mut settings = SettingGroupBuilder::new("x86"); // CPUID.01H:ECX - let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); - let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); - let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false); - let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false); - let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false); - let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false); + let has_sse3 = settings.add_bool( + "has_sse3", + "Has support for SSE3.", + "SSE3: CPUID.01H:ECX.SSE3[bit 0]", + false, + ); + let has_ssse3 = settings.add_bool( + "has_ssse3", + "Has support for SSSE3.", + "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", + false, + ); + let has_sse41 = settings.add_bool( + "has_sse41", + "Has support for SSE4.1.", + "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", + false, + ); + let has_sse42 = settings.add_bool( + "has_sse42", + "Has support for SSE4.2.", + "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", + false, + ); + let has_avx = settings.add_bool( + "has_avx", + "Has support for AVX.", + "AVX: CPUID.01H:ECX.AVX[bit 28]", + false, + ); + let has_avx2 = settings.add_bool( + "has_avx2", + "Has support for AVX2.", + "AVX2: CPUID.07H:EBX.AVX2[bit 5]", + false, + ); let has_avx512dq = settings.add_bool( "has_avx512dq", + "Has support for AVX512DQ.", "AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]", false, ); let has_avx512vl = settings.add_bool( "has_avx512vl", + "Has support for AVX512VL.", "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]", false, ); let has_avx512f = settings.add_bool( "has_avx512f", + "Has support for AVX512F.", "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]", false, ); - let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false); + let has_popcnt = settings.add_bool( + "has_popcnt", + "Has support for POPCNT.", + "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", + false, + ); // CPUID.(EAX=07H, ECX=0H):EBX let has_bmi1 = settings.add_bool( "has_bmi1", + "Has support for BMI1.", "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]", false, ); let has_bmi2 = settings.add_bool( "has_bmi2", + "Has support for BMI2.", "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]", false, ); @@ -42,6 +82,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { // CPUID.EAX=80000001H:ECX let has_lzcnt = settings.add_bool( "has_lzcnt", + "Has support for LZCNT.", "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]", false, ); @@ -85,7 +126,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { settings.add_predicate("use_lzcnt", predicate!(has_lzcnt)); // Some shared boolean values are used in x86 instruction predicates, so we need to group them - // in the same TargetIsa, for compabitibity with code generated by meta-python. + // in the same TargetIsa, for compatibility with code generated by meta-python. // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it // back in the shared SettingGroup, and use it in x86 instruction predicates. @@ -104,21 +145,40 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { // Presets corresponding to x86 CPUs. - settings.add_preset("baseline", preset!()); + settings.add_preset( + "baseline", + "A baseline preset with no extensions enabled.", + preset!(), + ); let nehalem = settings.add_preset( "nehalem", + "Nehalem microarchitecture.", preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt), ); let haswell = settings.add_preset( "haswell", + "Haswell microarchitecture.", preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt), ); - let broadwell = settings.add_preset("broadwell", preset!(haswell)); - let skylake = settings.add_preset("skylake", preset!(broadwell)); - let cannonlake = settings.add_preset("cannonlake", preset!(skylake)); - settings.add_preset("icelake", preset!(cannonlake)); + let broadwell = settings.add_preset( + "broadwell", + "Broadwell microarchitecture.", + preset!(haswell), + ); + let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell)); + let cannonlake = settings.add_preset( + "cannonlake", + "Canon Lake microarchitecture.", + preset!(skylake), + ); + settings.add_preset( + "icelake", + "Ice Lake microarchitecture.", + preset!(cannonlake), + ); settings.add_preset( "znver1", + "Zen (first generation) microarchitecture.", preset!( has_sse3 && has_ssse3 diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index ead2c4442f..29a545aad6 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -116,6 +116,9 @@ pub fn generate( isa::Isa::Arm64 => { // aarch64 doesn't have platform-specific settings. } + isa::Isa::S390x => { + // s390x doesn't have platform-specific settings. + } isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), } } diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index e3e9b4fefc..ef141c8a92 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3582,7 +3582,7 @@ pub(crate) fn define( "fmin_pseudo", r#" Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``. - See https://github.com/WebAssembly/simd/pull/122 for background. + See for background. The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour for zero or NaN inputs follows from the behaviour of ``<`` with such inputs. @@ -3614,7 +3614,7 @@ pub(crate) fn define( "fmax_pseudo", r#" Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``. - See https://github.com/WebAssembly/simd/pull/122 for background. + See for background. The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour for zero or NaN inputs follows from the behaviour of ``<`` with such inputs. @@ -4102,7 +4102,7 @@ pub(crate) fn define( This will double the lane width and halve the number of lanes. So the resulting vector has the same number of bits as `x` and `y` do (individually). - See https://github.com/WebAssembly/simd/pull/127 for background info. + See for background info. "#, &formats.binary, ) @@ -4325,6 +4325,26 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "fcvt_low_from_sint", + r#" + Converts packed signed doubleword integers to packed double precision floating point. + + Considering only the low half of the register, each lane in `x` is interpreted as a + signed doubleword integer that is then converted to a double precision float. This + instruction differs from fcvt_from_sint in that it converts half the number of lanes + which are converted to occupy twice the number of bits. No rounding should be needed + for the resulting float. + + The result type will have half the number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let WideInt = &TypeVar::new( "WideInt", "An integer type with lanes from `i16` upwards", diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index 2d3cfc6f38..2233e85dbc 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -5,29 +5,29 @@ pub(crate) fn define() -> SettingGroup { settings.add_enum( "regalloc", - r#"Register allocator to use with the MachInst backend. + "Register allocator to use with the MachInst backend.", + r#" + This selects the register allocator as an option among those offered by the `regalloc.rs` + crate. Please report register allocation bugs to the maintainers of this crate whenever + possible. - This selects the register allocator as an option among those offered by the `regalloc.rs` - crate. Please report register allocation bugs to the maintainers of this crate whenever - possible. + Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this + means the x86_64 backend doesn't use this yet. - Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this - means the x86_64 backend doesn't use this yet. + Possible values: - Possible values: - - - `backtracking` is a greedy, backtracking register allocator as implemented in - Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but - it should generate better code in general, resulting in better throughput of generated - code. - - `backtracking_checked` is the backtracking allocator with additional self checks that may - take some time to run, and thus these checks are disabled by default. - - `experimental_linear_scan` is an experimental linear scan allocator. It may take less - time to allocate registers, but generated code's quality may be inferior. As of - 2020-04-17, it is still experimental and it should not be used in production settings. - - `experimental_linear_scan_checked` is the linear scan allocator with additional self - checks that may take some time to run, and thus these checks are disabled by default. - "#, + - `backtracking` is a greedy, backtracking register allocator as implemented in + Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but + it should generate better code in general, resulting in better throughput of generated + code. + - `backtracking_checked` is the backtracking allocator with additional self checks that may + take some time to run, and thus these checks are disabled by default. + - `experimental_linear_scan` is an experimental linear scan allocator. It may take less + time to allocate registers, but generated code's quality may be inferior. As of + 2020-04-17, it is still experimental and it should not be used in production settings. + - `experimental_linear_scan_checked` is the linear scan allocator with additional self + checks that may take some time to run, and thus these checks are disabled by default. + "#, vec![ "backtracking", "backtracking_checked", @@ -38,24 +38,23 @@ pub(crate) fn define() -> SettingGroup { settings.add_enum( "opt_level", + "Optimization level for generated code.", r#" - Optimization level: + Supported levels: - - none: Minimise compile time by disabling most optimizations. - - speed: Generate the fastest possible code - - speed_and_size: like "speed", but also perform transformations - aimed at reducing code size. + - `none`: Minimise compile time by disabling most optimizations. + - `speed`: Generate the fastest possible code + - `speed_and_size`: like "speed", but also perform transformations aimed at reducing code size. "#, vec!["none", "speed", "speed_and_size"], ); settings.add_bool( "enable_verifier", + "Run the Cranelift IR verifier at strategic times during compilation.", r#" - Run the Cranelift IR verifier at strategic times during compilation. - - This makes compilation slower but catches many bugs. The verifier is always enabled by - default, which is useful during development. + This makes compilation slower but catches many bugs. The verifier is always enabled by + default, which is useful during development. "#, true, ); @@ -65,110 +64,110 @@ pub(crate) fn define() -> SettingGroup { // `colocated` flag on external functions and global values. settings.add_bool( "is_pic", - "Enable Position-Independent Code generation", + "Enable Position-Independent Code generation.", + "", false, ); settings.add_bool( "use_colocated_libcalls", + "Use colocated libcalls.", r#" - Use colocated libcalls. - Generate code that assumes that libcalls can be declared "colocated", meaning they will be defined along with the current function, such that they can use more efficient addressing. - "#, + "#, false, ); settings.add_bool( "avoid_div_traps", + "Generate explicit checks around native division instructions to avoid their trapping.", r#" - Generate explicit checks around native division instructions to avoid - their trapping. - This is primarily used by SpiderMonkey which doesn't install a signal handler for SIGFPE, but expects a SIGILL trap for division by zero. On ISAs like ARM where the native division instructions don't trap, this setting has no effect - explicit checks are always inserted. - "#, + "#, false, ); settings.add_bool( "enable_float", + "Enable the use of floating-point instructions.", r#" - Enable the use of floating-point instructions - Disabling use of floating-point instructions is not yet implemented. - "#, + "#, true, ); settings.add_bool( "enable_nan_canonicalization", + "Enable NaN canonicalization.", r#" - Enable NaN canonicalization - This replaces NaNs with a single canonical value, for users requiring entirely deterministic WebAssembly computation. This is not required by the WebAssembly spec, so it is not enabled by default. - "#, + "#, false, ); settings.add_bool( "enable_pinned_reg", - r#"Enable the use of the pinned register. - - This register is excluded from register allocation, and is completely under the control of - the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it - with the set_pinned_reg instruction. + "Enable the use of the pinned register.", + r#" + This register is excluded from register allocation, and is completely under the control of + the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it + with the set_pinned_reg instruction. "#, false, ); settings.add_bool( "use_pinned_reg_as_heap_base", - r#"Use the pinned register as the heap base. + "Use the pinned register as the heap base.", + r#" + Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom + legalization of the `heap_addr` instruction so it will use the pinned register as the heap + base, instead of fetching it from a global value. - Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom - legalization of the `heap_addr` instruction so it will use the pinned register as the heap - base, instead of fetching it from a global value. - - Warning! Enabling this means that the pinned register *must* be maintained to contain the - heap base address at all times, during the lifetime of a function. Using the pinned - register for other purposes when this is set is very likely to cause crashes. + Warning! Enabling this means that the pinned register *must* be maintained to contain the + heap base address at all times, during the lifetime of a function. Using the pinned + register for other purposes when this is set is very likely to cause crashes. "#, false, ); - settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false); + settings.add_bool( + "enable_simd", + "Enable the use of SIMD instructions.", + "", + false, + ); settings.add_bool( "enable_atomics", "Enable the use of atomic instructions", + "", true, ); settings.add_bool( "enable_safepoints", + "Enable safepoint instruction insertions.", r#" - Enable safepoint instruction insertions. - This will allow the emit_stack_maps() function to insert the safepoint instruction on top of calls and interrupt traps in order to display the live reference values at that point in the program. - "#, + "#, false, ); settings.add_enum( "tls_model", - r#" - Defines the model used to perform TLS accesses. - "#, + "Defines the model used to perform TLS accesses.", + "", vec!["none", "elf_gd", "macho", "coff"], ); @@ -176,9 +175,9 @@ pub(crate) fn define() -> SettingGroup { settings.add_enum( "libcall_call_conv", + "Defines the calling convention to use for LibCalls call expansion.", r#" - Defines the calling convention to use for LibCalls call expansion, - since it may be different from the ISA default calling convention. + This may be different from the ISA default calling convention. The default value is to use the same calling convention as the ISA default calling convention. @@ -192,6 +191,7 @@ pub(crate) fn define() -> SettingGroup { "cold", "system_v", "windows_fastcall", + "apple_aarch64", "baldrdash_system_v", "baldrdash_windows", "baldrdash_2020", @@ -201,9 +201,8 @@ pub(crate) fn define() -> SettingGroup { settings.add_num( "baldrdash_prologue_words", + "Number of pointer-sized words pushed by the baldrdash prologue.", r#" - Number of pointer-sized words pushed by the baldrdash prologue. - Functions with the `baldrdash` calling convention don't generate their own prologue and epilogue. They depend on externally generated code that pushes a fixed number of words in the prologue and restores them @@ -212,15 +211,14 @@ pub(crate) fn define() -> SettingGroup { This setting configures the number of pointer-sized words pushed on the stack when the Cranelift-generated code is entered. This includes the pushed return address on x86. - "#, + "#, 0, ); settings.add_bool( "enable_llvm_abi_extensions", + "Enable various ABI extensions defined by LLVM's behavior.", r#" - Enable various ABI extensions defined by LLVM's behavior. - In some cases, LLVM's implementation of an ABI (calling convention) goes beyond a standard and supports additional argument types or behavior. This option instructs Cranelift codegen to follow LLVM's @@ -231,18 +229,18 @@ pub(crate) fn define() -> SettingGroup { registers. The Fastcall implementation otherwise does not support `i128` arguments, and will panic if they are present and this option is not set. - "#, + "#, false, ); settings.add_bool( "unwind_info", + "Generate unwind information.", r#" - Generate unwind info. This increases metadata size and compile time, - but allows for the debugger to trace frames, is needed for GC tracing - that relies on libunwind (such as in Wasmtime), and is - unconditionally needed on certain platforms (such as Windows) that - must always be able to unwind. + This increases metadata size and compile time, but allows for the + debugger to trace frames, is needed for GC tracing that relies on + libunwind (such as in Wasmtime), and is unconditionally needed on + certain platforms (such as Windows) that must always be able to unwind. "#, true, ); @@ -252,6 +250,7 @@ pub(crate) fn define() -> SettingGroup { settings.add_bool( "emit_all_ones_funcaddrs", "Emit not-yet-relocated function addresses as all-ones bit patterns.", + "", false, ); @@ -259,32 +258,27 @@ pub(crate) fn define() -> SettingGroup { settings.add_bool( "enable_probestack", - r#" - Enable the use of stack probes, for calling conventions which support this - functionality. - "#, + "Enable the use of stack probes for supported calling conventions.", + "", true, ); settings.add_bool( "probestack_func_adjusts_sp", - r#" - Set this to true of the stack probe function modifies the stack pointer - itself. - "#, + "Enable if the stack probe adjusts the stack pointer.", + "", false, ); settings.add_num( "probestack_size_log2", + "The log2 of the size of the stack guard region.", r#" - The log2 of the size of the stack guard region. - Stack frames larger than this size will have stack overflow checked by calling the probestack function. The default is 12, which translates to a size of 4096. - "#, + "#, 12, ); @@ -293,6 +287,7 @@ pub(crate) fn define() -> SettingGroup { settings.add_bool( "enable_jump_tables", "Enable the use of jump tables in generated machine code.", + "", true, ); @@ -300,16 +295,15 @@ pub(crate) fn define() -> SettingGroup { settings.add_bool( "enable_heap_access_spectre_mitigation", + "Enable Spectre mitigation on heap bounds checks.", r#" - Enable Spectre mitigation on heap bounds checks. + This is a no-op for any heap that needs no bounds checks; e.g., + if the limit is static and the guard region is large enough that + the index cannot reach past it. - This is a no-op for any heap that needs no bounds checks; e.g., - if the limit is static and the guard region is large enough that - the index cannot reach past it. - - This option is enabled by default because it is highly - recommended for secure sandboxing. The embedder should consider - the security implications carefully before disabling this option. + This option is enabled by default because it is highly + recommended for secure sandboxing. The embedder should consider + the security implications carefully before disabling this option. "#, true, ); diff --git a/cranelift/codegen/shared/Cargo.toml b/cranelift/codegen/shared/Cargo.toml index c99bed20a4..2d15aa4367 100644 --- a/cranelift/codegen/shared/Cargo.toml +++ b/cranelift/codegen/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-codegen-shared" -version = "0.72.0" +version = "0.73.0" description = "For code shared between cranelift-codegen-meta and cranelift-codegen" license = "Apache-2.0 WITH LLVM-exception" repository = "https://github.com/bytecodealliance/wasmtime" diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index b534ec9765..aa3102797e 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -60,6 +60,8 @@ pub enum Reloc { Arm64Call, /// RISC-V call target RiscvCall, + /// s390x PC-relative 4-byte offset + S390xPCRel32Dbl, /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol. ElfX86_64TlsGd, @@ -75,6 +77,7 @@ impl fmt::Display for Reloc { match *self { Self::Abs4 => write!(f, "Abs4"), Self::Abs8 => write!(f, "Abs8"), + Self::S390xPCRel32Dbl => write!(f, "PCRel32Dbl"), Self::X86PCRel4 => write!(f, "PCRel4"), Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"), Self::X86CallPCRel4 => write!(f, "CallPCRel4"), diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index b831f9966a..33c205d226 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -267,13 +267,7 @@ impl Context { isa: &dyn TargetIsa, ) -> CodegenResult> { if let Some(backend) = isa.get_mach_backend() { - use crate::isa::CallConv; - use crate::machinst::UnwindInfoKind; - let unwind_info_kind = match self.func.signature.call_conv { - CallConv::Fast | CallConv::Cold | CallConv::SystemV => UnwindInfoKind::SystemV, - CallConv::WindowsFastcall => UnwindInfoKind::Windows, - _ => UnwindInfoKind::None, - }; + let unwind_info_kind = isa.unwind_info_kind(); let result = self.mach_compile_result.as_ref().unwrap(); return backend.emit_unwind_info(result, unwind_info_kind); } diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index 193607f392..a317c7f394 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -5,7 +5,6 @@ use crate::ir::{types, ConstantData, Type}; use core::convert::TryInto; use core::fmt::{self, Display, Formatter}; use core::ptr; -use thiserror::Error; /// Represent a data value. Where [Value] is an SSA reference, [DataValue] is the type + value /// that would be referred to by a [Value]. @@ -97,15 +96,38 @@ impl DataValue { } /// Record failures to cast [DataValue]. -#[derive(Error, Debug, PartialEq)] +#[derive(Debug, PartialEq)] #[allow(missing_docs)] pub enum DataValueCastFailure { - #[error("unable to cast data value of type {0} to type {1}")] TryInto(Type, Type), - #[error("unable to cast i64({0}) to a data value of type {1}")] FromInteger(i64, Type), } +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for DataValueCastFailure {} + +impl Display for DataValueCastFailure { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + DataValueCastFailure::TryInto(from, to) => { + write!( + f, + "unable to cast data value of type {} to type {}", + from, to + ) + } + DataValueCastFailure::FromInteger(val, to) => { + write!( + f, + "unable to cast i64({}) to a data value of type {}", + val, to + ) + } + } + } +} + /// Helper for creating conversion implementations for [DataValue]. macro_rules! build_conversion_impl { ( $rust_ty:ty, $data_value_ty:ident, $cranelift_ty:ident ) => { diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs index 09eaed3bec..d8ca7cef36 100644 --- a/cranelift/codegen/src/ir/entities.rs +++ b/cranelift/codegen/src/ir/entities.rs @@ -146,7 +146,7 @@ impl StackSlot { /// [`VmContext`](super::GlobalValueData::VMContext) using /// [`FuncEnvironment::make_global`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_global). /// - When compiling to native code, you can use it for objects in static memory with -/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_data_in_func). +/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_data_in_func). /// - For any compilation target, it can be registered with /// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value). /// @@ -264,9 +264,9 @@ impl JumpTable { /// /// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function) /// for external functions -/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_func_in_func) +/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_func_in_func) /// for functions declared elsewhere in the same native -/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html) +/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html) /// - [`FuncEnvironment::make_direct_func`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func) /// for functions declared in the same WebAssembly /// [`FuncEnvironment`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 2c3776fbf7..6a264f1604 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -135,12 +135,28 @@ impl Into for StackAMode { // Returns the size of stack space needed to store the // `int_reg` and `vec_reg`. fn saved_reg_stack_size( + call_conv: isa::CallConv, int_reg: &[Writable], vec_reg: &[Writable], ) -> (usize, usize) { // Round up to multiple of 2, to keep 16-byte stack alignment. let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8; - let vec_save_bytes = vec_reg.len() * 16; + // The Baldrdash ABIs require saving and restoring the whole 16-byte + // SIMD & FP registers, so the necessary stack space is always a + // multiple of the mandatory 16-byte stack alignment. However, the + // Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64, + // including several related ABIs such as the one used by Windows) + // mandates saving only the bottom 8 bytes of the vector registers, + // so in that case we round up the number of registers to ensure proper + // stack alignment (similarly to the situation with `int_reg`). + let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 }; + let vec_save_padding = if call_conv.extends_baldrdash() { + 0 + } else { + vec_reg.len() & 1 + }; + let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size; + (int_save_bytes, vec_save_bytes) } @@ -171,6 +187,21 @@ impl ABIMachineSpec for AArch64MachineDeps { let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020; // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. + // + // MacOS aarch64 is slightly different, see also + // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms. + // We are diverging from the MacOS aarch64 implementation in the + // following ways: + // - sign- and zero- extensions of data types less than 32 bits are not + // implemented yet. + // - i128 arguments passing isn't implemented yet in the standard (non + // MacOS) aarch64 ABI. + // - we align the arguments stack space to a 16-bytes boundary, while + // the MacOS allows aligning only on 8 bytes. In practice it means we're + // slightly overallocating when calling, which is fine, and doesn't + // break our other invariants that the stack is always allocated in + // 16-bytes chunks. + let mut next_xreg = 0; let mut next_vreg = 0; let mut next_stack: u64 = 0; @@ -182,18 +213,26 @@ impl ABIMachineSpec for AArch64MachineDeps { next_stack = 16; } - // Note on return values: on the regular non-baldrdash ABI, we may return values in 8 - // registers for V128 and I64 registers independently of the number of register values - // returned in the other class. That is, we can return values in up to 8 integer and 8 - // vector registers at once. - // In Baldrdash, we can only use one register for return value for all the register - // classes. That is, we can't return values in both one integer and one vector register; - // only one return value may be in a register. + let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets { + ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7 - let (max_per_class_reg_vals, mut remaining_reg_vals) = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => (8, 16), // x0-x7 and v0-v7 - (ArgsOrRets::Rets, false) => (8, 16), // x0-x7 and v0-v7 - (ArgsOrRets::Rets, true) => (1, 1), // x0 or v0, but not both + // Note on return values: on the regular ABI, we may return values + // in 8 registers for V128 and I64 registers independently of the + // number of register values returned in the other class. That is, + // we can return values in up to 8 integer and + // 8 vector registers at once. + // + // In Baldrdash and Wasmtime, we can only use one register for + // return value for all the register classes. That is, we can't + // return values in both one integer and one vector register; only + // one return value may be in a register. + ArgsOrRets::Rets => { + if is_baldrdash || call_conv.extends_wasmtime() { + (1, 1) // x0 or v0, but not both + } else { + (8, 16) // x0-x7 and v0-v7 + } + } }; for i in 0..params.len() { @@ -264,13 +303,27 @@ impl ABIMachineSpec for AArch64MachineDeps { *next_reg += 1; remaining_reg_vals -= 1; } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) + // Compute the stack slot's size. let size = (ty_bits(param.value_type) / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. + + let size = if call_conv == isa::CallConv::AppleAarch64 + || (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets) + { + // MacOS aarch64 and Wasmtime allow stack slots with + // sizes less than 8 bytes. They still need to be + // properly aligned on their natural data alignment, + // though. + size + } else { + // Every arg takes a minimum slot of 8 bytes. (16-byte stack + // alignment happens separately after all args.) + std::cmp::max(size, 8) + }; + + // Align the stack slot. debug_assert!(size.is_power_of_two()); next_stack = align_to(next_stack, size); + ret.push(ABIArg::stack( next_stack as i64, param.value_type, @@ -550,11 +603,13 @@ impl ABIMachineSpec for AArch64MachineDeps { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); - let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec); + let (int_save_bytes, vec_save_bytes) = + saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec); let total_save_bytes = int_save_bytes + vec_save_bytes; let clobber_size = total_save_bytes as i32; @@ -583,59 +638,170 @@ impl ABIMachineSpec for AArch64MachineDeps { // `frame_offset` tracks offset above start-of-clobbers for unwind-info // purposes. let mut clobber_offset = clobber_size as u32; - for reg_pair in clobbered_int.chunks(2) { - let (r1, r2) = if reg_pair.len() == 2 { - // .to_reg().to_reg(): Writable --> RealReg --> Reg - (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg()) - } else { - (reg_pair[0].to_reg().to_reg(), zero_reg()) - }; + let clobber_offset_change = 16; + let iter = clobbered_int.chunks_exact(2); - debug_assert!(r1.get_class() == RegClass::I64); - debug_assert!(r2.get_class() == RegClass::I64); + if let [rd] = iter.remainder() { + let rd = rd.to_reg().to_reg(); - // stp r1, r2, [sp, #-16]! - insts.push(Inst::StoreP64 { - rt: r1, - rt2: r2, - mem: PairAMode::PreIndexed( + debug_assert_eq!(rd.get_class(), RegClass::I64); + // str rd, [sp, #-16]! + insts.push(Inst::Store64 { + rd, + mem: AMode::PreIndexed( writable_stack_reg(), - SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), + SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), ), flags: MemFlags::trusted(), }); + if flags.unwind_info() { - clobber_offset -= 8; - if r2 != zero_reg() { - insts.push(Inst::Unwind { - inst: UnwindInst::SaveReg { - clobber_offset, - reg: r2.to_real_reg(), - }, - }); - } - clobber_offset -= 8; + clobber_offset -= clobber_offset_change as u32; insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: r1.to_real_reg(), + reg: rd.to_real_reg(), }, }); } } - for reg in clobbered_vec.iter() { - insts.push(Inst::FpuStore128 { - rd: reg.to_reg().to_reg(), - mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()), + let mut iter = iter.rev(); + + while let Some([rt, rt2]) = iter.next() { + // .to_reg().to_reg(): Writable --> RealReg --> Reg + let rt = rt.to_reg().to_reg(); + let rt2 = rt2.to_reg().to_reg(); + + debug_assert!(rt.get_class() == RegClass::I64); + debug_assert!(rt2.get_class() == RegClass::I64); + + // stp rt, rt2, [sp, #-16]! + insts.push(Inst::StoreP64 { + rt, + rt2, + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(), + ), flags: MemFlags::trusted(), }); + if flags.unwind_info() { - clobber_offset -= 16; + clobber_offset -= clobber_offset_change as u32; insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: reg.to_reg(), + reg: rt.to_real_reg(), + }, + }); + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32, + reg: rt2.to_real_reg(), + }, + }); + } + } + + let store_vec_reg = |rd| { + if call_conv.extends_baldrdash() { + Inst::FpuStore128 { + rd, + mem: AMode::PreIndexed( + writable_stack_reg(), + SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), + ), + flags: MemFlags::trusted(), + } + } else { + Inst::FpuStore64 { + rd, + mem: AMode::PreIndexed( + writable_stack_reg(), + SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), + ), + flags: MemFlags::trusted(), + } + } + }; + let iter = clobbered_vec.chunks_exact(2); + + if let [rd] = iter.remainder() { + let rd = rd.to_reg().to_reg(); + + debug_assert_eq!(rd.get_class(), RegClass::V128); + insts.push(store_vec_reg(rd)); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change as u32; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rd.to_real_reg(), + }, + }); + } + } + + let store_vec_reg_pair = |rt, rt2| { + if call_conv.extends_baldrdash() { + let clobber_offset_change = 32; + + ( + Inst::FpuStoreP128 { + rt, + rt2, + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + clobber_offset_change as u32, + ) + } else { + let clobber_offset_change = 16; + + ( + Inst::FpuStoreP64 { + rt, + rt2, + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + clobber_offset_change as u32, + ) + } + }; + let mut iter = iter.rev(); + + while let Some([rt, rt2]) = iter.next() { + let rt = rt.to_reg().to_reg(); + let rt2 = rt2.to_reg().to_reg(); + + debug_assert_eq!(rt.get_class(), RegClass::V128); + debug_assert_eq!(rt2.get_class(), RegClass::V128); + + let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2); + + insts.push(inst); + + if flags.unwind_info() { + clobber_offset -= clobber_offset_change; + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset, + reg: rt.to_real_reg(), + }, + }); + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_offset + clobber_offset_change / 2, + reg: rt2.to_real_reg(), }, }); } @@ -654,6 +820,7 @@ impl ABIMachineSpec for AArch64MachineDeps { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); @@ -663,31 +830,83 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32)); } - for reg in clobbered_vec.iter().rev() { - insts.push(Inst::FpuLoad128 { - rd: Writable::from_reg(reg.to_reg().to_reg()), - mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), - flags: MemFlags::trusted(), - }); + let load_vec_reg = |rd| { + if call_conv.extends_baldrdash() { + Inst::FpuLoad128 { + rd, + mem: AMode::PostIndexed( + writable_stack_reg(), + SImm9::maybe_from_i64(16).unwrap(), + ), + flags: MemFlags::trusted(), + } + } else { + Inst::FpuLoad64 { + rd, + mem: AMode::PostIndexed( + writable_stack_reg(), + SImm9::maybe_from_i64(16).unwrap(), + ), + flags: MemFlags::trusted(), + } + } + }; + let load_vec_reg_pair = |rt, rt2| { + if call_conv.extends_baldrdash() { + Inst::FpuLoadP128 { + rt, + rt2, + mem: PairAMode::PostIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + } + } else { + Inst::FpuLoadP64 { + rt, + rt2, + mem: PairAMode::PostIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(16, F64).unwrap(), + ), + flags: MemFlags::trusted(), + } + } + }; + + let mut iter = clobbered_vec.chunks_exact(2); + + while let Some([rt, rt2]) = iter.next() { + let rt = rt.map(|r| r.to_reg()); + let rt2 = rt2.map(|r| r.to_reg()); + + debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128); + debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128); + insts.push(load_vec_reg_pair(rt, rt2)); } - for reg_pair in clobbered_int.chunks(2).rev() { - let (r1, r2) = if reg_pair.len() == 2 { - ( - reg_pair[0].map(|r| r.to_reg()), - reg_pair[1].map(|r| r.to_reg()), - ) - } else { - (reg_pair[0].map(|r| r.to_reg()), writable_zero_reg()) - }; + debug_assert!(iter.remainder().len() <= 1); - debug_assert!(r1.to_reg().get_class() == RegClass::I64); - debug_assert!(r2.to_reg().get_class() == RegClass::I64); + if let [rd] = iter.remainder() { + let rd = rd.map(|r| r.to_reg()); - // ldp r1, r2, [sp], #16 + debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128); + insts.push(load_vec_reg(rd)); + } + + let mut iter = clobbered_int.chunks_exact(2); + + while let Some([rt, rt2]) = iter.next() { + let rt = rt.map(|r| r.to_reg()); + let rt2 = rt2.map(|r| r.to_reg()); + + debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64); + debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64); + // ldp rt, rt2, [sp], #16 insts.push(Inst::LoadP64 { - rt: r1, - rt2: r2, + rt, + rt2, mem: PairAMode::PostIndexed( writable_stack_reg(), SImm7Scaled::maybe_from_i64(16, I64).unwrap(), @@ -696,6 +915,20 @@ impl ABIMachineSpec for AArch64MachineDeps { }); } + debug_assert!(iter.remainder().len() <= 1); + + if let [rd] = iter.remainder() { + let rd = rd.map(|r| r.to_reg()); + + debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64); + // ldr rd, [sp], #16 + insts.push(Inst::ULoad64 { + rd, + mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), + flags: MemFlags::trusted(), + }); + } + // If this is Baldrdash-2020, restore the callee (i.e., our) TLS // register. We may have allocated it for something else and clobbered // it, but the ABI expects us to leave the TLS register unchanged. diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index aa708a8524..6621e3f409 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -258,6 +258,28 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable) -> u32 { | machreg_to_vec(rt.to_reg()) } +fn enc_ldst_vec_pair( + opc: u32, + amode: u32, + is_load: bool, + simm7: SImm7Scaled, + rn: Reg, + rt: Reg, + rt2: Reg, +) -> u32 { + debug_assert_eq!(opc & 0b11, opc); + debug_assert_eq!(amode & 0b11, amode); + + 0b00_10110_00_0_0000000_00000_00000_00000 + | opc << 30 + | amode << 23 + | (is_load as u32) << 22 + | simm7.bits() << 15 + | machreg_to_vec(rt2) << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_vec(rt) +} + fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable) -> u32 { (top11 << 21) | (machreg_to_vec(rm) << 16) @@ -405,6 +427,15 @@ fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable, rn: R | machreg_to_vec(rd.to_reg()) } +fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); + + 0b010_11110_11_11000_11011_10_00000_00000 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: Reg) -> u32 { debug_assert_eq!(q & 0b1, q); debug_assert_eq!(u & 0b1, u); @@ -923,7 +954,7 @@ impl MachInstEmit for Inst { let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. + // Register the offset at which the actual store instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } @@ -987,7 +1018,7 @@ impl MachInstEmit for Inst { } => { let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { - // Register the offset at which the actual load instruction starts. + // Register the offset at which the actual store instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } match mem { @@ -1034,6 +1065,120 @@ impl MachInstEmit for Inst { } } } + &Inst::FpuLoadP64 { + rt, + rt2, + ref mem, + flags, + } + | &Inst::FpuLoadP128 { + rt, + rt2, + ref mem, + flags, + } => { + let srcloc = state.cur_srcloc(); + + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + let opc = match self { + &Inst::FpuLoadP64 { .. } => 0b01, + &Inst::FpuLoadP128 { .. } => 0b10, + _ => unreachable!(), + }; + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + + match mem { + &PairAMode::SignedOffset(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); + } + &PairAMode::PreIndexed(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair( + opc, + 0b11, + true, + simm7, + reg.to_reg(), + rt, + rt2, + )); + } + &PairAMode::PostIndexed(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair( + opc, + 0b01, + true, + simm7, + reg.to_reg(), + rt, + rt2, + )); + } + } + } + &Inst::FpuStoreP64 { + rt, + rt2, + ref mem, + flags, + } + | &Inst::FpuStoreP128 { + rt, + rt2, + ref mem, + flags, + } => { + let srcloc = state.cur_srcloc(); + + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual store instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + let opc = match self { + &Inst::FpuStoreP64 { .. } => 0b01, + &Inst::FpuStoreP128 { .. } => 0b10, + _ => unreachable!(), + }; + + match mem { + &PairAMode::SignedOffset(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); + } + &PairAMode::PreIndexed(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair( + opc, + 0b11, + false, + simm7, + reg.to_reg(), + rt, + rt2, + )); + } + &PairAMode::PostIndexed(reg, simm7) => { + assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + sink.put4(enc_ldst_vec_pair( + opc, + 0b01, + false, + simm7, + reg.to_reg(), + rt, + rt2, + )); + } + } + } &Inst::Mov64 { rd, rm } => { assert!(rd.to_reg().get_class() == rm.get_class()); assert!(rm.get_class() == RegClass::I64); @@ -1492,6 +1637,7 @@ impl MachInstEmit for Inst { debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16); (0b0, 0b00101, enc_size) } + VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size), }; sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } @@ -1918,6 +2064,13 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::VecRRPair { op, rd, rn } => { + let bits_12_16 = match op { + VecPairOp::Addp => 0b11011, + }; + + sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn)); + } &Inst::VecRRR { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 55e25de5d8..9f628fced6 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2311,6 +2311,16 @@ fn test_aarch64_binemit() { "sqxtun v16.8b, v23.8h", )); + insns.push(( + Inst::VecRRPair { + op: VecPairOp::Addp, + rd: writable_vreg(0), + rn: vreg(30), + }, + "C0BBF15E", + "addp d0, v30.2d", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3803,6 +3813,17 @@ fn test_aarch64_binemit() { "cnt v23.8b, v5.8b", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Cmeq0, + rd: writable_vreg(12), + rn: vreg(27), + size: VectorSize::Size16x8, + }, + "6C9B604E", + "cmeq v12.8h, v27.8h, #0", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, @@ -5105,6 +5126,168 @@ fn test_aarch64_binemit() { "str q16, [x8, x9, LSL #4]", )); + insns.push(( + Inst::FpuLoadP64 { + rt: writable_vreg(0), + rt2: writable_vreg(31), + mem: PairAMode::SignedOffset(xreg(0), SImm7Scaled::zero(F64)), + flags: MemFlags::trusted(), + }, + "007C406D", + "ldp d0, d31, [x0]", + )); + + insns.push(( + Inst::FpuLoadP64 { + rt: writable_vreg(19), + rt2: writable_vreg(11), + mem: PairAMode::PreIndexed( + writable_xreg(25), + SImm7Scaled::maybe_from_i64(-512, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "332FE06D", + "ldp d19, d11, [x25, #-512]!", + )); + + insns.push(( + Inst::FpuLoadP64 { + rt: writable_vreg(7), + rt2: writable_vreg(20), + mem: PairAMode::PostIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(64, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "E753C46C", + "ldp d7, d20, [sp], #64", + )); + + insns.push(( + Inst::FpuStoreP64 { + rt: vreg(4), + rt2: vreg(26), + mem: PairAMode::SignedOffset( + stack_reg(), + SImm7Scaled::maybe_from_i64(504, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "E4EB1F6D", + "stp d4, d26, [sp, #504]", + )); + + insns.push(( + Inst::FpuStoreP64 { + rt: vreg(16), + rt2: vreg(8), + mem: PairAMode::PreIndexed( + writable_xreg(15), + SImm7Scaled::maybe_from_i64(48, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "F021836D", + "stp d16, d8, [x15, #48]!", + )); + + insns.push(( + Inst::FpuStoreP64 { + rt: vreg(5), + rt2: vreg(6), + mem: PairAMode::PostIndexed( + writable_xreg(28), + SImm7Scaled::maybe_from_i64(-32, F64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "851BBE6C", + "stp d5, d6, [x28], #-32", + )); + + insns.push(( + Inst::FpuLoadP128 { + rt: writable_vreg(0), + rt2: writable_vreg(17), + mem: PairAMode::SignedOffset(xreg(3), SImm7Scaled::zero(I8X16)), + flags: MemFlags::trusted(), + }, + "604440AD", + "ldp q0, q17, [x3]", + )); + + insns.push(( + Inst::FpuLoadP128 { + rt: writable_vreg(29), + rt2: writable_vreg(9), + mem: PairAMode::PreIndexed( + writable_xreg(16), + SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "1D26E0AD", + "ldp q29, q9, [x16, #-1024]!", + )); + + insns.push(( + Inst::FpuLoadP128 { + rt: writable_vreg(10), + rt2: writable_vreg(20), + mem: PairAMode::PostIndexed( + writable_xreg(26), + SImm7Scaled::maybe_from_i64(256, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "4A53C8AC", + "ldp q10, q20, [x26], #256", + )); + + insns.push(( + Inst::FpuStoreP128 { + rt: vreg(9), + rt2: vreg(31), + mem: PairAMode::SignedOffset( + stack_reg(), + SImm7Scaled::maybe_from_i64(1008, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "E9FF1FAD", + "stp q9, q31, [sp, #1008]", + )); + + insns.push(( + Inst::FpuStoreP128 { + rt: vreg(27), + rt2: vreg(13), + mem: PairAMode::PreIndexed( + writable_stack_reg(), + SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "FB37BAAD", + "stp q27, q13, [sp, #-192]!", + )); + + insns.push(( + Inst::FpuStoreP128 { + rt: vreg(18), + rt2: vreg(22), + mem: PairAMode::PostIndexed( + writable_xreg(13), + SImm7Scaled::maybe_from_i64(304, I8X16).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "B2D989AC", + "stp q18, q22, [x13], #304", + )); + insns.push(( Inst::LoadFpuConst64 { rd: writable_vreg(16), diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 34c2946db0..08fc75431d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -73,7 +73,7 @@ impl SImm7Scaled { /// Create a SImm7Scaled from a raw offset and the known scale type, if /// possible. pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option { - assert!(scale_ty == I64 || scale_ty == I32); + assert!(scale_ty == I64 || scale_ty == I32 || scale_ty == F64 || scale_ty == I8X16); let scale = scale_ty.bytes(); assert!(scale.is_power_of_two()); let scale = i64::from(scale); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 03e5c6f47b..35903c18d0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -334,6 +334,8 @@ pub enum VecMisc2 { Frintp, /// Population count per byte Cnt, + /// Compare bitwise equal to 0 + Cmeq0, } /// A Vector narrowing operation with two registers. @@ -347,6 +349,13 @@ pub enum VecMiscNarrowOp { Sqxtun, } +/// A vector operation on a pair of elements with one register. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecPairOp { + /// Add pair of elements + Addp, +} + /// An operation across the lanes of vectors. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecLanesOp { @@ -848,7 +857,34 @@ pub enum Inst { mem: AMode, flags: MemFlags, }, - + /// A load of a pair of floating-point registers, double precision (64-bit). + FpuLoadP64 { + rt: Writable, + rt2: Writable, + mem: PairAMode, + flags: MemFlags, + }, + /// A store of a pair of floating-point registers, double precision (64-bit). + FpuStoreP64 { + rt: Reg, + rt2: Reg, + mem: PairAMode, + flags: MemFlags, + }, + /// A load of a pair of floating-point registers, 128-bit. + FpuLoadP128 { + rt: Writable, + rt2: Writable, + mem: PairAMode, + flags: MemFlags, + }, + /// A store of a pair of floating-point registers, 128-bit. + FpuStoreP128 { + rt: Reg, + rt2: Reg, + mem: PairAMode, + flags: MemFlags, + }, LoadFpuConst64 { rd: Writable, const_data: u64, @@ -984,6 +1020,13 @@ pub enum Inst { high_half: bool, }, + /// 1-operand vector instruction that operates on a pair of elements. + VecRRPair { + op: VecPairOp, + rd: Writable, + rn: Reg, + }, + /// A vector ALU op. VecRRR { alu_op: VecALUOp, @@ -1908,6 +1951,34 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(rd); memarg_regs(mem, collector); } + &Inst::FpuLoadP64 { + rt, rt2, ref mem, .. + } => { + collector.add_def(rt); + collector.add_def(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::FpuStoreP64 { + rt, rt2, ref mem, .. + } => { + collector.add_use(rt); + collector.add_use(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::FpuLoadP128 { + rt, rt2, ref mem, .. + } => { + collector.add_def(rt); + collector.add_def(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::FpuStoreP128 { + rt, rt2, ref mem, .. + } => { + collector.add_use(rt); + collector.add_use(rt2); + pairmemarg_regs(mem, collector); + } &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => { collector.add_def(rd); } @@ -1973,6 +2044,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); } } + &Inst::VecRRPair { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } &Inst::VecRRR { alu_op, rd, rn, rm, .. } => { @@ -2590,6 +2665,46 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, rd); map_mem(mapper, mem); } + &mut Inst::FpuLoadP64 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_def(mapper, rt); + map_def(mapper, rt2); + map_pairmem(mapper, mem); + } + &mut Inst::FpuStoreP64 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_use(mapper, rt); + map_use(mapper, rt2); + map_pairmem(mapper, mem); + } + &mut Inst::FpuLoadP128 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_def(mapper, rt); + map_def(mapper, rt2); + map_pairmem(mapper, mem); + } + &mut Inst::FpuStoreP128 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_use(mapper, rt); + map_use(mapper, rt2); + map_pairmem(mapper, mem); + } &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { map_def(mapper, rd); } @@ -2721,6 +2836,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); } } + &mut Inst::VecRRPair { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } &mut Inst::VecRRR { alu_op, ref mut rd, @@ -3508,6 +3631,42 @@ impl Inst { let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } + &Inst::FpuLoadP64 { + rt, rt2, ref mem, .. + } => { + let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64); + let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64); + let mem = mem.show_rru(mb_rru); + + format!("ldp {}, {}, {}", rt, rt2, mem) + } + &Inst::FpuStoreP64 { + rt, rt2, ref mem, .. + } => { + let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64); + let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64); + let mem = mem.show_rru(mb_rru); + + format!("stp {}, {}, {}", rt, rt2, mem) + } + &Inst::FpuLoadP128 { + rt, rt2, ref mem, .. + } => { + let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128); + let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128); + let mem = mem.show_rru(mb_rru); + + format!("ldp {}, {}, {}", rt, rt2, mem) + } + &Inst::FpuStoreP128 { + rt, rt2, ref mem, .. + } => { + let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128); + let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128); + let mem = mem.show_rru(mb_rru); + + format!("stp {}, {}, {}", rt, rt2, mem) + } &Inst::LoadFpuConst64 { rd, const_data } => { let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); format!( @@ -3725,6 +3884,15 @@ impl Inst { }; format!("{} {}, {}", op, rd, rn) } + &Inst::VecRRPair { op, rd, rn } => { + let op = match op { + VecPairOp::Addp => "addp", + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2); + + format!("{} {}, {}", op, rd, rn) + } &Inst::VecRRR { rd, rn, @@ -3788,43 +3956,44 @@ impl Inst { format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { - let is_shll = op == VecMisc2::Shll; - let suffix = match (is_shll, size) { - (true, VectorSize::Size8x8) => ", #8", - (true, VectorSize::Size16x4) => ", #16", - (true, VectorSize::Size32x2) => ", #32", - _ => "", - }; - - let (op, size) = match op { - VecMisc2::Not => ( - "mvn", - if size.is_128bits() { + let (op, rd_size, size, suffix) = match op { + VecMisc2::Not => { + let size = if size.is_128bits() { VectorSize::Size8x16 } else { VectorSize::Size8x8 + }; + + ("mvn", size, size, "") + } + VecMisc2::Neg => ("neg", size, size, ""), + VecMisc2::Abs => ("abs", size, size, ""), + VecMisc2::Fabs => ("fabs", size, size, ""), + VecMisc2::Fneg => ("fneg", size, size, ""), + VecMisc2::Fsqrt => ("fsqrt", size, size, ""), + VecMisc2::Rev64 => ("rev64", size, size, ""), + VecMisc2::Shll => ( + "shll", + size.widen(), + size, + match size { + VectorSize::Size8x8 => ", #8", + VectorSize::Size16x4 => ", #16", + VectorSize::Size32x2 => ", #32", + _ => panic!("Unexpected vector size: {:?}", size), }, ), - VecMisc2::Neg => ("neg", size), - VecMisc2::Abs => ("abs", size), - VecMisc2::Fabs => ("fabs", size), - VecMisc2::Fneg => ("fneg", size), - VecMisc2::Fsqrt => ("fsqrt", size), - VecMisc2::Rev64 => ("rev64", size), - VecMisc2::Shll => ("shll", size), - VecMisc2::Fcvtzs => ("fcvtzs", size), - VecMisc2::Fcvtzu => ("fcvtzu", size), - VecMisc2::Scvtf => ("scvtf", size), - VecMisc2::Ucvtf => ("ucvtf", size), - VecMisc2::Frintn => ("frintn", size), - VecMisc2::Frintz => ("frintz", size), - VecMisc2::Frintm => ("frintm", size), - VecMisc2::Frintp => ("frintp", size), - VecMisc2::Cnt => ("cnt", size), + VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""), + VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""), + VecMisc2::Scvtf => ("scvtf", size, size, ""), + VecMisc2::Ucvtf => ("ucvtf", size, size, ""), + VecMisc2::Frintn => ("frintn", size, size, ""), + VecMisc2::Frintz => ("frintz", size, size, ""), + VecMisc2::Frintm => ("frintm", size, size, ""), + VecMisc2::Frintp => ("frintp", size, size, ""), + VecMisc2::Cnt => ("cnt", size, size, ""), + VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"), }; - - let rd_size = if is_shll { size.widen() } else { size }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}{}", op, rd, rn, suffix) diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index 9f2eb741a0..b514dc20b8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -56,8 +56,8 @@ impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { fn sp(&self) -> u16 { regs::stack_reg().get_hw_encoding().into() } - fn fp(&self) -> u16 { - regs::fp_reg().get_hw_encoding().into() + fn fp(&self) -> Option { + Some(regs::fp_reg().get_hw_encoding().into()) } fn lr(&self) -> Option { Some(regs::link_reg().get_hw_encoding().into()) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 9a6b711cb2..ede66295e9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1950,6 +1950,40 @@ pub(crate) fn lower_insn_to_regs>( } } + Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap(); + + // cmeq vtmp.2d, vm.2d, #0 + // addp dtmp, vtmp.2d + // fcmp dtmp, dtmp + // cset xd, eq + // + // Note that after the ADDP the value of the temporary register will + // be either 0 when all input elements are true, i.e. non-zero, or a + // NaN otherwise (either -1 or -2 when represented as an integer); + // NaNs are the only floating-point numbers that compare unequal to + // themselves. + + ctx.emit(Inst::VecMisc { + op: VecMisc2::Cmeq0, + rd: tmp, + rn: rm, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::VecRRPair { + op: VecPairOp::Addp, + rd: tmp, + rn: tmp.to_reg(), + }); + ctx.emit(Inst::FpuCmp64 { + rn: tmp.to_reg(), + rm: tmp.to_reg(), + }); + materialize_bool_result(ctx, insn, rd, Cond::Eq); + } + Opcode::VanyTrue | Opcode::VallTrue => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -2180,6 +2214,47 @@ pub(crate) fn lower_insn_to_regs>( size: VectorSize::Size32x4, }); } + I64X2 => { + // mov dst_r, src_v.d[0] + // mov tmp_r0, src_v.d[1] + // lsr dst_r, dst_r, #63 + // lsr tmp_r0, tmp_r0, #63 + // add dst_r, dst_r, tmp_r0, lsl #1 + ctx.emit(Inst::MovFromVec { + rd: dst_r, + rn: src_v, + idx: 0, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::MovFromVec { + rd: tmp_r0, + rn: src_v, + idx: 1, + size: VectorSize::Size64x2, + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: dst_r, + rn: dst_r.to_reg(), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }); + ctx.emit(Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: tmp_r0, + rn: tmp_r0.to_reg(), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }); + ctx.emit(Inst::AluRRRShift { + alu_op: ALUOp::Add32, + rd: dst_r, + rn: dst_r.to_reg(), + rm: tmp_r0.to_reg(), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(1).unwrap(), + ), + }); + } _ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty), } } @@ -3013,6 +3088,7 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::TlsValue => unimplemented!("tls_value"), + Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), } Ok(()) diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 42b47b645e..a6892b301d 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -7,10 +7,8 @@ use crate::isa::Builder as IsaBuilder; use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; use crate::settings as shared_settings; - -use alloc::boxed::Box; +use alloc::{boxed::Box, vec::Vec}; use core::hash::{Hash, Hasher}; - use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Aarch64Architecture, Architecture, Triple}; @@ -104,6 +102,10 @@ impl MachBackend for AArch64Backend { &self.flags } + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { self.flags.hash(&mut hasher); self.isa_flags.hash(&mut hasher); diff --git a/cranelift/codegen/src/isa/aarch64/settings.rs b/cranelift/codegen/src/isa/aarch64/settings.rs index a9849c121b..9d3898e7b5 100644 --- a/cranelift/codegen/src/isa/aarch64/settings.rs +++ b/cranelift/codegen/src/isa/aarch64/settings.rs @@ -1,6 +1,6 @@ //! AArch64 Settings. -use crate::settings::{self, detail, Builder}; +use crate::settings::{self, detail, Builder, Value}; use core::fmt; // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index 94627a227c..5a4145d8b7 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -319,6 +319,7 @@ impl ABIMachineSpec for Arm32MachineDeps { _flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { let mut insts = SmallVec::new(); if fixed_frame_storage_size > 0 { @@ -348,6 +349,7 @@ impl ABIMachineSpec for Arm32MachineDeps { _flags: &settings::Flags, clobbers: &Set>, _fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let clobbered_vec = get_callee_saves(clobbers); diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs index 5757b844d2..832fc46f47 100644 --- a/cranelift/codegen/src/isa/arm32/mod.rs +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -7,7 +7,7 @@ use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, use crate::result::CodegenResult; use crate::settings; -use alloc::boxed::Box; +use alloc::{boxed::Box, vec::Vec}; use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Architecture, ArmArchitecture, Triple}; @@ -92,6 +92,10 @@ impl MachBackend for Arm32Backend { &self.flags } + fn isa_flags(&self) -> Vec { + Vec::new() + } + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { self.flags.hash(&mut hasher); } diff --git a/cranelift/codegen/src/isa/call_conv.rs b/cranelift/codegen/src/isa/call_conv.rs index 61a94e5a43..fc5856e6e5 100644 --- a/cranelift/codegen/src/isa/call_conv.rs +++ b/cranelift/codegen/src/isa/call_conv.rs @@ -10,23 +10,35 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum CallConv { - /// Best performance, not ABI-stable + /// Best performance, not ABI-stable. Fast, - /// Smallest caller code size, not ABI-stable + /// Smallest caller code size, not ABI-stable. Cold, - /// System V-style convention used on many platforms + /// System V-style convention used on many platforms. SystemV, - /// Windows "fastcall" convention, also used for x64 and ARM + /// Windows "fastcall" convention, also used for x64 and ARM. WindowsFastcall, - /// SpiderMonkey WebAssembly convention on systems using natively SystemV + /// Mac aarch64 calling convention, which is a tweak aarch64 ABI. + AppleAarch64, + /// SpiderMonkey WebAssembly convention on systems using natively SystemV. BaldrdashSystemV, - /// SpiderMonkey WebAssembly convention on Windows + /// SpiderMonkey WebAssembly convention on Windows. BaldrdashWindows, /// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS /// register slots in the frame. Baldrdash2020, - /// Specialized convention for the probestack function + /// Specialized convention for the probestack function. Probestack, + /// Wasmtime equivalent of SystemV, not ABI-stable. + /// + /// Currently only differs in how multiple return values are handled, + /// returning the first return value in a register and everything else + /// through a return-pointer. + WasmtimeSystemV, + /// Wasmtime equivalent of WindowsFastcall, not ABI-stable. + /// + /// Differs from fastcall in the same way as `WasmtimeSystemV`. + WasmtimeFastcall, } impl CallConv { @@ -36,6 +48,7 @@ impl CallConv { // Default to System V for unknown targets because most everything // uses System V. Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV, + Ok(CallingConvention::AppleAarch64) => Self::AppleAarch64, Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall, Ok(unimp) => unimplemented!("calling convention: {:?}", unimp), } @@ -49,6 +62,7 @@ impl CallConv { LibcallCallConv::Cold => Self::Cold, LibcallCallConv::SystemV => Self::SystemV, LibcallCallConv::WindowsFastcall => Self::WindowsFastcall, + LibcallCallConv::AppleAarch64 => Self::AppleAarch64, LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV, LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows, LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020, @@ -59,7 +73,7 @@ impl CallConv { /// Is the calling convention extending the Windows Fastcall ABI? pub fn extends_windows_fastcall(self) -> bool { match self { - Self::WindowsFastcall | Self::BaldrdashWindows => true, + Self::WindowsFastcall | Self::BaldrdashWindows | Self::WasmtimeFastcall => true, _ => false, } } @@ -71,6 +85,14 @@ impl CallConv { _ => false, } } + + /// Is the calling convention extending the Wasmtime ABI? + pub fn extends_wasmtime(self) -> bool { + match self { + Self::WasmtimeSystemV | Self::WasmtimeFastcall => true, + _ => false, + } + } } impl fmt::Display for CallConv { @@ -80,10 +102,13 @@ impl fmt::Display for CallConv { Self::Cold => "cold", Self::SystemV => "system_v", Self::WindowsFastcall => "windows_fastcall", + Self::AppleAarch64 => "apple_aarch64", Self::BaldrdashSystemV => "baldrdash_system_v", Self::BaldrdashWindows => "baldrdash_windows", Self::Baldrdash2020 => "baldrdash_2020", Self::Probestack => "probestack", + Self::WasmtimeSystemV => "wasmtime_system_v", + Self::WasmtimeFastcall => "wasmtime_fastcall", }) } } @@ -96,10 +121,13 @@ impl str::FromStr for CallConv { "cold" => Ok(Self::Cold), "system_v" => Ok(Self::SystemV), "windows_fastcall" => Ok(Self::WindowsFastcall), + "apple_aarch64" => Ok(Self::AppleAarch64), "baldrdash_system_v" => Ok(Self::BaldrdashSystemV), "baldrdash_windows" => Ok(Self::BaldrdashWindows), "baldrdash_2020" => Ok(Self::Baldrdash2020), "probestack" => Ok(Self::Probestack), + "wasmtime_system_v" => Ok(Self::WasmtimeSystemV), + "wasmtime_fastcall" => Ok(Self::WasmtimeFastcall), _ => Err(()), } } diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 94895b0b6e..481c0c8de3 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -57,35 +57,34 @@ use crate::flowgraph; use crate::ir; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; -use crate::machinst::MachBackend; +use crate::machinst::{MachBackend, UnwindInfoKind}; use crate::regalloc; use crate::result::CodegenResult; use crate::settings; use crate::settings::SetResult; use crate::timing; -use alloc::borrow::Cow; -use alloc::boxed::Box; +use alloc::{borrow::Cow, boxed::Box, vec::Vec}; use core::any::Any; use core::fmt; use core::fmt::{Debug, Formatter}; use core::hash::Hasher; -use target_lexicon::{triple, Architecture, PointerWidth, Triple}; -use thiserror::Error; +use target_lexicon::{triple, Architecture, OperatingSystem, PointerWidth, Triple}; #[cfg(feature = "riscv")] mod riscv; -// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) can both be -// included; if the new backend is included, then it is the default backend -// returned for an x86-64 triple, but a specific option can request the old -// backend. It is important to have the ability to instantiate *both* backends -// in the same build so that we can do things like differential fuzzing between -// backends, or perhaps offer a runtime configuration flag in the future. +// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both +// included whenever building with x86 support. The new backend is the default, +// but the old can be requested with `BackendVariant::Legacy`. However, if this +// crate is built with the `old-x86-backend` feature, then the old backend is +// default instead. #[cfg(feature = "x86")] mod x86; -#[cfg(feature = "x64")] -mod x64; +// This module is made public here for benchmarking purposes. No guarantees are +// made regarding API stability. +#[cfg(feature = "x86")] +pub mod x64; #[cfg(feature = "arm32")] mod arm32; @@ -93,6 +92,9 @@ mod arm32; #[cfg(feature = "arm64")] pub(crate) mod aarch64; +#[cfg(feature = "s390x")] +mod s390x; + pub mod unwind; mod call_conv; @@ -123,7 +125,7 @@ macro_rules! isa_builder { /// The "variant" for a given target. On one platform (x86-64), we have two /// backends, the "old" and "new" one; the new one is the default if included /// in the build configuration and not otherwise specified. -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum BackendVariant { /// Any backend available. Any, @@ -150,18 +152,19 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result { - isa_builder!(x64, (feature = "x64"), triple) + isa_builder!(x64, (feature = "x86"), triple) } - #[cfg(feature = "x64")] + #[cfg(not(feature = "old-x86-backend"))] (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x64, (feature = "x64"), triple) + isa_builder!(x64, (feature = "x86"), triple) } - #[cfg(not(feature = "x64"))] + #[cfg(feature = "old-x86-backend")] (Architecture::X86_64, BackendVariant::Any) => { isa_builder!(x86, (feature = "x86"), triple) } (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple), (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple), + (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple), _ => Err(LookupError::Unsupported), } } @@ -180,17 +183,30 @@ pub fn lookup_by_name(name: &str) -> Result { } /// Describes reason for target lookup failure -#[derive(Error, PartialEq, Eq, Copy, Clone, Debug)] +#[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum LookupError { /// Support for this target was disabled in the current build. - #[error("Support for this target is disabled")] SupportDisabled, /// Support for this target has not yet been implemented. - #[error("Support for this target has not been implemented yet")] Unsupported, } +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for LookupError {} + +impl fmt::Display for LookupError { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + LookupError::SupportDisabled => write!(f, "Support for this target is disabled"), + LookupError::Unsupported => { + write!(f, "Support for this target has not been implemented yet") + } + } + } +} + /// Builder for a `TargetIsa`. /// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`. #[derive(Clone)] @@ -201,6 +217,16 @@ pub struct Builder { } impl Builder { + /// Gets the triple for the builder. + pub fn triple(&self) -> &Triple { + &self.triple + } + + /// Iterates the available settings in the builder. + pub fn iter(&self) -> impl Iterator { + self.setup.iter() + } + /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a /// fully configured `TargetIsa` trait object. pub fn finish(self, shared_flags: settings::Flags) -> Box { @@ -265,6 +291,14 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-independent flags that were used to make this trait object. fn flags(&self) -> &settings::Flags; + /// Get the ISA-dependent flag values that were used to make this trait object. + fn isa_flags(&self) -> Vec; + + /// Get the variant of this ISA (Legacy or MachInst). + fn variant(&self) -> BackendVariant { + BackendVariant::Legacy + } + /// Hashes all flags, both ISA-independent and ISA-specific, into the /// specified hasher. fn hash_all_flags(&self, hasher: &mut dyn Hasher); @@ -460,6 +494,18 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry). fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC; + /// Returns the flavor of unwind information emitted for this target. + fn unwind_info_kind(&self) -> UnwindInfoKind { + match self.triple().operating_system { + #[cfg(feature = "unwind")] + OperatingSystem::Windows => UnwindInfoKind::Windows, + #[cfg(feature = "unwind")] + _ => UnwindInfoKind::SystemV, + #[cfg(not(feature = "unwind"))] + _ => UnwindInfoKind::None, + } + } + /// Creates unwind information for the function. /// /// Returns `None` if there is no unwind information for the function. diff --git a/cranelift/codegen/src/isa/riscv/mod.rs b/cranelift/codegen/src/isa/riscv/mod.rs index 500451c72e..2c1ebf1c85 100644 --- a/cranelift/codegen/src/isa/riscv/mod.rs +++ b/cranelift/codegen/src/isa/riscv/mod.rs @@ -15,8 +15,7 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding use crate::isa::Builder as IsaBuilder; use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; use crate::regalloc; -use alloc::borrow::Cow; -use alloc::boxed::Box; +use alloc::{borrow::Cow, boxed::Box, vec::Vec}; use core::any::Any; use core::fmt; use core::hash::{Hash, Hasher}; @@ -70,6 +69,10 @@ impl TargetIsa for Isa { &self.shared_flags } + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { self.shared_flags.hash(&mut hasher); self.isa_flags.hash(&mut hasher); diff --git a/cranelift/codegen/src/isa/riscv/settings.rs b/cranelift/codegen/src/isa/riscv/settings.rs index 40aa3bed2b..3da9f491fd 100644 --- a/cranelift/codegen/src/isa/riscv/settings.rs +++ b/cranelift/codegen/src/isa/riscv/settings.rs @@ -1,6 +1,6 @@ //! RISC-V Settings. -use crate::settings::{self, detail, Builder}; +use crate::settings::{self, detail, Builder, Value}; use core::fmt; // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs new file mode 100644 index 0000000000..14344e5866 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -0,0 +1,770 @@ +//! Implementation of a standard S390x ABI. +//! +//! This machine uses the "vanilla" ABI implementation from abi_impl.rs, +//! however a few details are different from the description there: +//! +//! - On s390x, the caller must provide a "register save area" of 160 +//! bytes to any function it calls. The called function is free to use +//! this space for any purpose; usually to save callee-saved GPRs. +//! (Note that while this area is allocated by the caller, it is counted +//! as part of the callee's stack frame; in particular, the callee's CFA +//! is the top of the register save area, not the incoming SP value.) +//! +//! - Overflow arguments are passed on the stack starting immediately +//! above the register save area. On s390x, this space is allocated +//! only once directly in the prologue, using a size large enough to +//! hold overflow arguments for every call in the function. +//! +//! - On s390x we do not use a frame pointer register; instead, every +//! element of the stack frame is addressed via (constant) offsets +//! from the stack pointer. Note that due to the above (and because +//! there are no variable-sized stack allocations in cranelift), the +//! value of the stack pointer register never changes after the +//! initial allocation in the function prologue. +//! +//! Overall, the stack frame layout on s390x is as follows: +//! +//! ```plain +//! (high address) +//! +//! +---------------------------+ +//! | ... | +//! CFA -----> | stack args | +//! +---------------------------+ +//! | ... | +//! | 160 bytes reg save area | +//! SP at function entry -----> | (used to save GPRs) | +//! +---------------------------+ +//! | ... | +//! | clobbered callee-saves | +//! | (used to save FPRs) | +//! unwind-frame base ----> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | spill slots | +//! | (accessed via nominal SP) | +//! | ... | +//! | stack slots | +//! | (accessed via nominal SP) | +//! nominal SP ---------------> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | args for call | +//! | outgoing reg save area | +//! SP during function ------> | (alloc'd by prologue) | +//! +---------------------------+ +//! +//! (low address) +//! ``` + +use crate::ir; +use crate::ir::condcodes::IntCC; +use crate::ir::types; +use crate::ir::MemFlags; +use crate::ir::Type; +use crate::isa; +use crate::isa::s390x::inst::*; +use crate::isa::unwind::UnwindInst; +use crate::machinst::*; +use crate::settings; +use crate::{CodegenError, CodegenResult}; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::{smallvec, SmallVec}; +use std::convert::TryFrom; + +// We use a generic implementation that factors out ABI commonalities. + +/// Support for the S390x ABI from the callee side (within a function body). +pub type S390xABICallee = ABICalleeImpl; + +/// Support for the S390x ABI from the caller side (at a callsite). +pub type S390xABICaller = ABICallerImpl; + +/// ABI Register usage + +fn in_int_reg(ty: Type) -> bool { + match ty { + types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, + types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + _ => false, + } +} + +fn in_flt_reg(ty: Type) -> bool { + match ty { + types::F32 | types::F64 => true, + _ => false, + } +} + +fn get_intreg_for_arg(idx: usize) -> Option { + match idx { + 0 => Some(regs::gpr(2)), + 1 => Some(regs::gpr(3)), + 2 => Some(regs::gpr(4)), + 3 => Some(regs::gpr(5)), + 4 => Some(regs::gpr(6)), + _ => None, + } +} + +fn get_fltreg_for_arg(idx: usize) -> Option { + match idx { + 0 => Some(regs::fpr(0)), + 1 => Some(regs::fpr(2)), + 2 => Some(regs::fpr(4)), + 3 => Some(regs::fpr(6)), + _ => None, + } +} + +fn get_intreg_for_ret(idx: usize) -> Option { + match idx { + 0 => Some(regs::gpr(2)), + // ABI extension to support multi-value returns: + 1 => Some(regs::gpr(3)), + 2 => Some(regs::gpr(4)), + 3 => Some(regs::gpr(5)), + _ => None, + } +} + +fn get_fltreg_for_ret(idx: usize) -> Option { + match idx { + 0 => Some(regs::fpr(0)), + // ABI extension to support multi-value returns: + 1 => Some(regs::fpr(2)), + 2 => Some(regs::fpr(4)), + 3 => Some(regs::fpr(6)), + _ => None, + } +} + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + +impl Into for StackAMode { + fn into(self) -> MemArg { + match self { + StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off }, + StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off }, + StackAMode::SPOffset(off, _ty) => { + MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted()) + } + } + } +} + +/// S390x-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct S390xMachineDeps; + +impl ABIMachineSpec for S390xMachineDeps { + type I = Inst; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 8 + } + + fn compute_arg_locs( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let mut next_gpr = 0; + let mut next_fpr = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + if args_or_rets == ArgsOrRets::Args { + next_stack = 160; + } + + for i in 0..params.len() { + let param = ¶ms[i]; + + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } + + let intreg = in_int_reg(param.value_type); + let fltreg = in_flt_reg(param.value_type); + debug_assert!(intreg || fltreg); + debug_assert!(!(intreg && fltreg)); + + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg(next_gpr), + ArgsOrRets::Rets => get_intreg_for_ret(next_gpr), + }; + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg(next_fpr), + ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr), + }; + (&mut next_fpr, candidate) + }; + + // In the Wasmtime ABI only the first return value can be in a register. + let candidate = + if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets && i > 0 { + None + } else { + candidate + }; + + if let Some(reg) = candidate { + ret.push(ABIArg::reg( + reg.to_real_reg(), + param.value_type, + param.extension, + param.purpose, + )); + *next_reg += 1; + } else { + // Compute size. Every argument or return value takes a slot of + // at least 8 bytes, except for return values in the Wasmtime ABI. + let size = (ty_bits(param.value_type) / 8) as u64; + let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets + { + size + } else { + std::cmp::max(size, 8) + }; + + // Align the stack slot. + debug_assert!(slot_size.is_power_of_two()); + next_stack = align_to(next_stack, slot_size); + + // If the type is actually of smaller size (and the argument + // was not extended), it is passed right-aligned. + let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None { + slot_size - size + } else { + 0 + }; + ret.push(ABIArg::stack( + (next_stack + offset) as i64, + param.value_type, + param.extension, + param.purpose, + )); + next_stack += slot_size; + } + } + + next_stack = align_to(next_stack, 8); + + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg(next_gpr) { + ret.push(ABIArg::reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); + } else { + ret.push(ABIArg::stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) + } + + fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { + 0 + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn gen_ret() -> Inst { + Inst::Ret { link: gpr(14) } + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallInstVec { + let mut insts = SmallVec::new(); + if let Some(imm) = UImm12::maybe_from_u64(imm as u64) { + insts.push(Inst::LoadAddr { + rd: into_reg, + mem: MemArg::BXD12 { + base: from_reg, + index: zero_reg(), + disp: imm, + flags: MemFlags::trusted(), + }, + }); + } else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) { + insts.push(Inst::LoadAddr { + rd: into_reg, + mem: MemArg::BXD20 { + base: from_reg, + index: zero_reg(), + disp: imm, + flags: MemFlags::trusted(), + }, + }); + } else { + if from_reg != into_reg.to_reg() { + insts.push(Inst::mov64(into_reg, from_reg)); + } + insts.push(Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: into_reg, + imm, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::CmpTrapRR { + op: CmpOp::CmpL64, + rn: stack_reg(), + rm: limit_reg, + cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual), + trap_code: ir::TrapCode::StackOverflow, + }); + insts + } + + fn gen_epilogue_placeholder() -> Inst { + Inst::EpiloguePlaceholder + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + let mem = mem.into(); + Inst::LoadAddr { rd: into_reg, mem } + } + + fn get_stacklimit_reg() -> Reg { + spilltmp_reg() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); + Inst::gen_load(into_reg, mem, ty) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); + Inst::gen_store(mem, from_reg, ty) + } + + fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec { + if imm == 0 { + return SmallVec::new(); + } + + let mut insts = SmallVec::new(); + if let Ok(imm) = i16::try_from(imm) { + insts.push(Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + imm, + }); + } else { + insts.push(Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + imm, + }); + } + insts + } + + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + offset: offset.into(), + } + } + + fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec { + SmallVec::new() + } + + fn gen_epilogue_frame_restore(_flags: &settings::Flags) -> SmallInstVec { + SmallVec::new() + } + + fn gen_probestack(_: u32) -> SmallInstVec { + // TODO: implement if we ever require stack probes on an s390x host + // (unlikely unless Lucet is ported) + smallvec![] + } + + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + call_conv: isa::CallConv, + flags: &settings::Flags, + clobbers: &Set>, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + + // Collect clobbered registers. + let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers); + let mut first_clobbered_gpr = 16; + for reg in clobbered_gpr { + let enc = reg.to_reg().get_hw_encoding(); + if enc < first_clobbered_gpr { + first_clobbered_gpr = enc; + } + } + let clobber_size = clobbered_fpr.len() * 8; + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_upward_to_caller_sp: 160, + offset_downward_to_clobbers: clobber_size as u32, + }, + }); + } + + // Use STMG to save clobbered GPRs into save area. + if first_clobbered_gpr < 16 { + let offset = 8 * first_clobbered_gpr as i64; + insts.push(Inst::StoreMultiple64 { + rt: gpr(first_clobbered_gpr as u8), + rt2: gpr(15), + addr_reg: stack_reg(), + addr_off: SImm20::maybe_from_i64(offset).unwrap(), + }); + } + if flags.unwind_info() { + for i in first_clobbered_gpr..16 { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_size as u32 + (i * 8) as u32, + reg: gpr(i as u8).to_real_reg(), + }, + }); + } + } + + // Decrement stack pointer. + let stack_size = + outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32; + insts.extend(Self::gen_sp_reg_adjust(-stack_size)); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::StackAlloc { + size: stack_size as u32, + }, + }); + } + + let sp_adj = outgoing_args_size as i32; + if sp_adj > 0 { + insts.push(Self::gen_nominal_sp_adj(sp_adj)); + } + + // Save FPRs. + for (i, reg) in clobbered_fpr.iter().enumerate() { + insts.push(Inst::FpuStore64 { + rd: reg.to_reg().to_reg(), + mem: MemArg::reg_plus_off( + stack_reg(), + (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, + MemFlags::trusted(), + ), + }); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: (i * 8) as u32, + reg: reg.to_reg(), + }, + }); + } + } + + (clobber_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + _: &settings::Flags, + clobbers: &Set>, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + + // Collect clobbered registers. + let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers); + let mut first_clobbered_gpr = 16; + for reg in clobbered_gpr { + let enc = reg.to_reg().get_hw_encoding(); + if enc < first_clobbered_gpr { + first_clobbered_gpr = enc; + } + } + let clobber_size = clobbered_fpr.len() * 8; + + // Restore FPRs. + for (i, reg) in clobbered_fpr.iter().enumerate() { + insts.push(Inst::FpuLoad64 { + rd: Writable::from_reg(reg.to_reg().to_reg()), + mem: MemArg::reg_plus_off( + stack_reg(), + (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, + MemFlags::trusted(), + ), + }); + } + + // Increment stack pointer unless it will be restored implicitly. + let stack_size = + outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32; + let implicit_sp_restore = first_clobbered_gpr < 16 + && SImm20::maybe_from_i64(8 * first_clobbered_gpr as i64 + stack_size as i64).is_some(); + if !implicit_sp_restore { + insts.extend(Self::gen_sp_reg_adjust(stack_size)); + } + + // Use LMG to restore clobbered GPRs from save area. + if first_clobbered_gpr < 16 { + let mut offset = 8 * first_clobbered_gpr as i64; + if implicit_sp_restore { + offset += stack_size as i64; + } + insts.push(Inst::LoadMultiple64 { + rt: writable_gpr(first_clobbered_gpr as u8), + rt2: writable_gpr(15), + addr_reg: stack_reg(), + addr_off: SImm20::maybe_from_i64(offset).unwrap(), + }); + } + + insts + } + + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + opcode: ir::Opcode, + tmp: Writable, + _callee_conv: isa::CallConv, + _caller_conv: isa::CallConv, + ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( + InstIsSafepoint::Yes, + Inst::Call { + link: writable_gpr(14), + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + opcode, + }), + }, + )), + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + InstIsSafepoint::No, + Inst::LoadExtNameFar { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }, + )); + insts.push(( + InstIsSafepoint::Yes, + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: tmp.to_reg(), + uses, + defs, + opcode, + }), + }, + )); + } + &CallDest::Reg(reg) => insts.push(( + InstIsSafepoint::Yes, + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + opcode, + }), + }, + )), + } + + insts + } + + fn gen_memcpy( + _call_conv: isa::CallConv, + _dst: Reg, + _src: Reg, + _size: usize, + ) -> SmallVec<[Self::I; 8]> { + unimplemented!("StructArgs not implemented for S390X yet"); + } + + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::F64, _) => 1, + _ => panic!("Unexpected register class!"), + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.initial_sp_offset + } + + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { + let mut caller_saved = Vec::new(); + for i in 0..15 { + let x = writable_gpr(i); + if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) { + caller_saved.push(x); + } + } + for i in 0..15 { + let v = writable_fpr(i); + if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) { + caller_saved.push(v); + } + } + caller_saved + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } +} + +fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool { + match r.get_class() { + RegClass::I64 => { + // r6 - r15 inclusive are callee-saves. + r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15 + } + RegClass::F64 => { + // f8 - f15 inclusive are callee-saves. + r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15 + } + _ => panic!("Unexpected RegClass"), + } +} + +fn get_regs_saved_in_prologue( + call_conv: isa::CallConv, + regs: &Set>, +) -> (Vec>, Vec>) { + let mut int_saves = vec![]; + let mut fpr_saves = vec![]; + for ® in regs.iter() { + if is_reg_saved_in_prologue(call_conv, reg.to_reg()) { + match reg.to_reg().get_class() { + RegClass::I64 => int_saves.push(reg), + RegClass::F64 => fpr_saves.push(reg), + _ => panic!("Unexpected RegClass"), + } + } + } + // Sort registers for deterministic code output. + int_saves.sort_by_key(|r| r.to_reg().get_index()); + fpr_saves.sort_by_key(|r| r.to_reg().get_index()); + (int_saves, fpr_saves) +} + +fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool { + match r.get_class() { + RegClass::I64 => { + // r0 - r5 inclusive are caller-saves. + r.get_hw_encoding() <= 5 + } + RegClass::F64 => { + // f0 - f7 inclusive are caller-saves. + r.get_hw_encoding() <= 7 + } + _ => panic!("Unexpected RegClass"), + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs new file mode 100644 index 0000000000..75ee5cbcfe --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -0,0 +1,317 @@ +//! S390x ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::MemFlags; +use crate::isa::s390x::inst::*; +use crate::machinst::MachLabel; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg}; + +use std::string::String; + +//============================================================================= +// Instruction sub-components (memory addresses): definitions + +/// A memory argument to load/store, encapsulating the possible addressing modes. +#[derive(Clone, Debug)] +pub enum MemArg { + // + // Real IBM Z addressing modes: + // + /// Base register, index register, and 12-bit unsigned displacement. + BXD12 { + base: Reg, + index: Reg, + disp: UImm12, + flags: MemFlags, + }, + + /// Base register, index register, and 20-bit signed displacement. + BXD20 { + base: Reg, + index: Reg, + disp: SImm20, + flags: MemFlags, + }, + + /// PC-relative Reference to a label. + Label { target: BranchTarget }, + + /// PC-relative Reference to a near symbol. + Symbol { + name: Box, + offset: i32, + flags: MemFlags, + }, + + // + // Virtual addressing modes that are lowered at emission time: + // + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset { reg: Reg, off: i64, flags: MemFlags }, + + /// Offset from the stack pointer at function entry. + InitialSPOffset { off: i64 }, + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::s390x::abi](the ABI module) for more details. + NominalSPOffset { off: i64 }, +} + +impl MemArg { + /// Memory reference using an address in a register. + pub fn reg(reg: Reg, flags: MemFlags) -> MemArg { + MemArg::BXD12 { + base: reg, + index: zero_reg(), + disp: UImm12::zero(), + flags, + } + } + + /// Memory reference using the sum of two registers as an address. + pub fn reg_plus_reg(reg1: Reg, reg2: Reg, flags: MemFlags) -> MemArg { + MemArg::BXD12 { + base: reg1, + index: reg2, + disp: UImm12::zero(), + flags, + } + } + + /// Memory reference using the sum of a register an an offset as address. + pub fn reg_plus_off(reg: Reg, off: i64, flags: MemFlags) -> MemArg { + MemArg::RegOffset { reg, off, flags } + } + + pub(crate) fn get_flags(&self) -> MemFlags { + match self { + MemArg::BXD12 { flags, .. } => *flags, + MemArg::BXD20 { flags, .. } => *flags, + MemArg::RegOffset { flags, .. } => *flags, + MemArg::Label { .. } => MemFlags::trusted(), + MemArg::Symbol { flags, .. } => *flags, + MemArg::InitialSPOffset { .. } => MemFlags::trusted(), + MemArg::NominalSPOffset { .. } => MemFlags::trusted(), + } + } + + pub(crate) fn can_trap(&self) -> bool { + !self.get_flags().notrap() + } +} + +//============================================================================= +// Instruction sub-components (conditions, branches and branch targets): +// definitions + +/// Condition for conditional branches. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Cond { + mask: u8, +} + +impl Cond { + pub fn from_mask(mask: u8) -> Cond { + assert!(mask >= 1 && mask <= 14); + Cond { mask } + } + + pub fn from_intcc(cc: IntCC) -> Cond { + let mask = match cc { + IntCC::Equal => 8, + IntCC::NotEqual => 4 | 2, + IntCC::SignedGreaterThanOrEqual => 8 | 2, + IntCC::SignedGreaterThan => 2, + IntCC::SignedLessThanOrEqual => 8 | 4, + IntCC::SignedLessThan => 4, + IntCC::UnsignedGreaterThanOrEqual => 8 | 2, + IntCC::UnsignedGreaterThan => 2, + IntCC::UnsignedLessThanOrEqual => 8 | 4, + IntCC::UnsignedLessThan => 4, + IntCC::Overflow => 1, + IntCC::NotOverflow => 8 | 4 | 2, + }; + Cond { mask } + } + + pub fn from_floatcc(cc: FloatCC) -> Cond { + let mask = match cc { + FloatCC::Ordered => 8 | 4 | 2, + FloatCC::Unordered => 1, + FloatCC::Equal => 8, + FloatCC::NotEqual => 4 | 2 | 1, + FloatCC::OrderedNotEqual => 4 | 2, + FloatCC::UnorderedOrEqual => 8 | 1, + FloatCC::LessThan => 4, + FloatCC::LessThanOrEqual => 8 | 4, + FloatCC::GreaterThan => 2, + FloatCC::GreaterThanOrEqual => 8 | 2, + FloatCC::UnorderedOrLessThan => 4 | 1, + FloatCC::UnorderedOrLessThanOrEqual => 8 | 4 | 1, + FloatCC::UnorderedOrGreaterThan => 2 | 1, + FloatCC::UnorderedOrGreaterThanOrEqual => 8 | 2 | 1, + }; + Cond { mask } + } + + /// Return the inverted condition. + pub fn invert(self) -> Cond { + Cond { + mask: !self.mask & 15, + } + } + + /// Return the machine encoding of this condition. + pub fn bits(self) -> u8 { + self.mask + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_ri_offset_or_zero(self) -> u16 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 1, + _ => 0, + }; + assert!(off <= 0x7fff); + assert!(off >= -0x8000); + off as u16 + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_ril_offset_or_zero(self) -> u32 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 1, + _ => 0, + }; + off as u32 + } +} + +impl PrettyPrint for MemArg { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &MemArg::BXD12 { + base, index, disp, .. + } => { + if base != zero_reg() { + if index != zero_reg() { + format!( + "{}({},{})", + disp.show_rru(mb_rru), + index.show_rru(mb_rru), + base.show_rru(mb_rru) + ) + } else { + format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + } + } else { + if index != zero_reg() { + format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + } else { + format!("{}", disp.show_rru(mb_rru)) + } + } + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + if base != zero_reg() { + if index != zero_reg() { + format!( + "{}({},{})", + disp.show_rru(mb_rru), + index.show_rru(mb_rru), + base.show_rru(mb_rru) + ) + } else { + format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + } + } else { + if index != zero_reg() { + format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + } else { + format!("{}", disp.show_rru(mb_rru)) + } + } + } + &MemArg::Label { ref target } => target.show_rru(mb_rru), + &MemArg::Symbol { + ref name, offset, .. + } => format!("{} + {}", name, offset), + // Eliminated by `mem_finalize()`. + &MemArg::InitialSPOffset { .. } + | &MemArg::NominalSPOffset { .. } + | &MemArg::RegOffset { .. } => { + panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") + } + } + } +} + +impl PrettyPrint for Cond { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let s = match self.mask { + 1 => "o", + 2 => "h", + 3 => "nle", + 4 => "l", + 5 => "nhe", + 6 => "lh", + 7 => "ne", + 8 => "e", + 9 => "nlh", + 10 => "he", + 11 => "nl", + 12 => "le", + 13 => "nh", + 14 => "no", + _ => unreachable!(), + }; + s.to_string() + } +} + +impl PrettyPrint for BranchTarget { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &BranchTarget::Label(label) => format!("label{:?}", label.get()), + &BranchTarget::ResolvedOffset(off) => format!("{}", off), + } + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs new file mode 100644 index 0000000000..da1574fdf3 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -0,0 +1,1965 @@ +//! S390x ISA: binary code emission. + +use crate::binemit::{Reloc, StackMap}; +use crate::ir::condcodes::IntCC; +use crate::ir::MemFlags; +use crate::ir::{SourceLoc, TrapCode}; +use crate::isa::s390x::inst::*; +use core::convert::TryFrom; +use log::debug; +use regalloc::{Reg, RegClass}; + +/// Memory addressing mode finalization: convert "special" modes (e.g., +/// generic arbitrary stack offset) into real addressing modes, possibly by +/// emitting some helper instructions that come immediately before the use +/// of this amode. +pub fn mem_finalize( + mem: &MemArg, + state: &EmitState, + have_d12: bool, + have_d20: bool, + have_pcrel: bool, + have_index: bool, +) -> (SmallVec<[Inst; 4]>, MemArg) { + let mut insts = SmallVec::new(); + + // Resolve virtual addressing modes. + let mem = match mem { + &MemArg::RegOffset { off, .. } + | &MemArg::InitialSPOffset { off } + | &MemArg::NominalSPOffset { off } => { + let base = match mem { + &MemArg::RegOffset { reg, .. } => reg, + &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => stack_reg(), + _ => unreachable!(), + }; + let adj = match mem { + &MemArg::InitialSPOffset { .. } => { + state.initial_sp_offset + state.virtual_sp_offset + } + &MemArg::NominalSPOffset { .. } => state.virtual_sp_offset, + _ => 0, + }; + let off = off + adj; + + if let Some(disp) = UImm12::maybe_from_u64(off as u64) { + MemArg::BXD12 { + base, + index: zero_reg(), + disp, + flags: mem.get_flags(), + } + } else if let Some(disp) = SImm20::maybe_from_i64(off) { + MemArg::BXD20 { + base, + index: zero_reg(), + disp, + flags: mem.get_flags(), + } + } else { + let tmp = writable_spilltmp_reg(); + assert!(base != tmp.to_reg()); + insts.extend(Inst::load_constant64(tmp, off as u64)); + MemArg::reg_plus_reg(base, tmp.to_reg(), mem.get_flags()) + } + } + _ => mem.clone(), + }; + + // If this addressing mode cannot be handled by the instruction, use load-address. + let need_load_address = match &mem { + &MemArg::Label { .. } | &MemArg::Symbol { .. } if !have_pcrel => true, + &MemArg::BXD20 { .. } if !have_d20 => true, + &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !have_index => { + index != zero_reg() + } + _ => false, + }; + let mem = if need_load_address { + let flags = mem.get_flags(); + let tmp = writable_spilltmp_reg(); + insts.push(Inst::LoadAddr { rd: tmp, mem }); + MemArg::reg(tmp.to_reg(), flags) + } else { + mem + }; + + // Convert 12-bit displacement to 20-bit if required. + let mem = match &mem { + &MemArg::BXD12 { + base, + index, + disp, + flags, + } if !have_d12 => { + assert!(have_d20); + MemArg::BXD20 { + base, + index, + disp: SImm20::from_uimm12(disp), + flags, + } + } + _ => mem, + }; + + (insts, mem) +} + +pub fn mem_emit( + rd: Reg, + mem: &MemArg, + opcode_rx: Option, + opcode_rxy: Option, + opcode_ril: Option, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize( + mem, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put( + sink, + &enc_rx(opcode_rx.unwrap(), rd, base, index, disp.bits()), + ); + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + put( + sink, + &enc_rxy(opcode_rxy.unwrap(), rd, base, index, disp.bits()), + ); + } + &MemArg::Label { ref target } => { + if let Some(l) = target.as_label() { + sink.use_label_at_offset(sink.cur_offset(), l, LabelUse::BranchRIL); + } + put( + sink, + &enc_ril_b(opcode_ril.unwrap(), rd, target.as_ril_offset_or_zero()), + ); + } + &MemArg::Symbol { + ref name, offset, .. + } => { + let reloc = Reloc::S390xPCRel32Dbl; + let srcloc = state.cur_srcloc(); + put_with_reloc( + sink, + &enc_ril_b(opcode_ril.unwrap(), rd, 0), + 2, + srcloc, + reloc, + name, + offset.into(), + ); + } + _ => unreachable!(), + } +} + +pub fn mem_imm8_emit( + imm: u8, + mem: &MemArg, + opcode_si: u16, + opcode_siy: u16, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_si(opcode_si, base, disp.bits(), imm)); + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_siy(opcode_siy, base, disp.bits(), imm)); + } + _ => unreachable!(), + } +} + +pub fn mem_imm16_emit( + imm: i16, + mem: &MemArg, + opcode_sil: u16, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_sil(opcode_sil, base, disp.bits(), imm)); + } + _ => unreachable!(), + } +} + +//============================================================================= +// Instructions and subcomponents: emission + +fn machreg_to_gpr(m: Reg) -> u8 { + assert_eq!(m.get_class(), RegClass::I64); + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_fpr(m: Reg) -> u8 { + assert_eq!(m.get_class(), RegClass::F64); + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_gpr_or_fpr(m: Reg) -> u8 { + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +/// E-type instructions. +/// +/// 15 +/// opcode +/// 0 +/// +fn enc_e(opcode: u16) -> [u8; 2] { + let mut enc: [u8; 2] = [0; 2]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + + enc[0] = opcode1; + enc[1] = opcode2; + enc +} + +/// RIa-type instructions. +/// +/// 31 23 19 15 +/// opcode1 r1 opcode2 i2 +/// 24 20 16 0 +/// +fn enc_ri_a(opcode: u16, r1: Reg, i2: u16) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// RIb-type instructions. +/// +/// 31 23 19 15 +/// opcode1 r1 opcode2 ri2 +/// 24 20 16 0 +/// +fn enc_ri_b(opcode: u16, r1: Reg, ri2: i32) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let ri2 = ((ri2 >> 1) & 0xffff) as u16; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RIc-type instructions. +/// +/// 31 23 19 15 +/// opcode1 m1 opcode2 ri2 +/// 24 20 16 0 +/// +fn enc_ri_c(opcode: u16, m1: u8, ri2: i32) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let m1 = m1 & 0x0f; + let ri2 = ((ri2 >> 1) & 0xffff) as u16; + + enc[0] = opcode1; + enc[1] = m1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RIEa-type instructions. +/// +/// 47 39 35 31 15 11 7 +/// opcode1 r1 -- i2 m3 -- opcode2 +/// 40 36 32 16 12 8 0 +/// +fn enc_rie_a(opcode: u16, r1: Reg, i2: u16, m3: u8) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let m3 = m3 & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[4] = m3 << 4; + enc[5] = opcode2; + enc +} + +/// RIEd-type instructions. +/// +/// 47 39 35 31 15 7 +/// opcode1 r1 r3 i2 -- opcode2 +/// 40 36 32 16 8 0 +/// +fn enc_rie_d(opcode: u16, r1: Reg, r3: Reg, i2: u16) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let r3 = machreg_to_gpr(r3) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | r3; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[5] = opcode2; + enc +} + +/// RIEg-type instructions. +/// +/// 47 39 35 31 15 7 +/// opcode1 r1 m3 i2 -- opcode2 +/// 40 36 32 16 8 0 +/// +fn enc_rie_g(opcode: u16, r1: Reg, i2: u16, m3: u8) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let m3 = m3 & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | m3; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[5] = opcode2; + enc +} + +/// RILa-type instructions. +/// +/// 47 39 35 31 +/// opcode1 r1 opcode2 i2 +/// 40 36 32 0 +/// +fn enc_ril_a(opcode: u16, r1: Reg, i2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// RILb-type instructions. +/// +/// 47 39 35 31 +/// opcode1 r1 opcode2 ri2 +/// 40 36 32 0 +/// +fn enc_ril_b(opcode: u16, r1: Reg, ri2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RILc-type instructions. +/// +/// 47 39 35 31 +/// opcode1 m1 opcode2 i2 +/// 40 36 32 0 +/// +fn enc_ril_c(opcode: u16, m1: u8, ri2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let m1 = m1 & 0x0f; + + enc[0] = opcode1; + enc[1] = m1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RR-type instructions. +/// +/// 15 7 3 +/// opcode r1 r2 +/// 8 4 0 +/// +fn enc_rr(opcode: u16, r1: Reg, r2: Reg) -> [u8; 2] { + let mut enc: [u8; 2] = [0; 2]; + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + + enc[0] = opcode; + enc[1] = r1 << 4 | r2; + enc +} + +/// RRD-type instructions. +/// +/// 31 15 11 7 3 +/// opcode r1 -- r3 r2 +/// 16 12 8 4 0 +/// +fn enc_rrd(opcode: u16, r1: Reg, r2: Reg, r3: Reg) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_fpr(r1) & 0x0f; + let r2 = machreg_to_fpr(r2) & 0x0f; + let r3 = machreg_to_fpr(r3) & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = r1 << 4; + enc[3] = r3 << 4 | r2; + enc +} + +/// RRE-type instructions. +/// +/// 31 15 7 3 +/// opcode -- r1 r2 +/// 16 8 4 0 +/// +fn enc_rre(opcode: u16, r1: Reg, r2: Reg) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[3] = r1 << 4 | r2; + enc +} + +/// RRFa/b-type instructions. +/// +/// 31 15 11 7 3 +/// opcode r3 m4 r1 r2 +/// 16 12 8 4 0 +/// +fn enc_rrf_ab(opcode: u16, r1: Reg, r2: Reg, r3: Reg, m4: u8) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let m4 = m4 & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = r3 << 4 | m4; + enc[3] = r1 << 4 | r2; + enc +} + +/// RRFc/d/e-type instructions. +/// +/// 31 15 11 7 3 +/// opcode m3 m4 r1 r2 +/// 16 12 8 4 0 +/// +fn enc_rrf_cde(opcode: u16, r1: Reg, r2: Reg, m3: u8, m4: u8) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + let m3 = m3 & 0x0f; + let m4 = m4 & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = m3 << 4 | m4; + enc[3] = r1 << 4 | r2; + enc +} + +/// RS-type instructions. +/// +/// 31 23 19 15 11 +/// opcode r1 r3 b2 d2 +/// 24 20 16 12 0 +/// +fn enc_rs(opcode: u16, r1: Reg, r3: Reg, b2: Reg, d2: u32) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = r1 << 4 | r3; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc +} + +/// RSY-type instructions. +/// +/// 47 39 35 31 27 15 7 +/// opcode1 r1 r3 b2 dl2 dh2 opcode2 +/// 40 36 32 28 16 8 0 +/// +fn enc_rsy(opcode: u16, r1: Reg, r3: Reg, b2: Reg, d2: u32) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let dl2_lo = (d2 & 0xff) as u8; + let dl2_hi = ((d2 >> 8) & 0x0f) as u8; + let dh2 = ((d2 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = r1 << 4 | r3; + enc[2] = b2 << 4 | dl2_hi; + enc[3] = dl2_lo; + enc[4] = dh2; + enc[5] = opcode2; + enc +} + +/// RX-type instructions. +/// +/// 31 23 19 15 11 +/// opcode r1 x2 b2 d2 +/// 24 20 16 12 0 +/// +fn enc_rx(opcode: u16, r1: Reg, b2: Reg, x2: Reg, d2: u32) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = r1 << 4 | x2; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc +} + +/// RXY-type instructions. +/// +/// 47 39 35 31 27 15 7 +/// opcode1 r1 x2 b2 dl2 dh2 opcode2 +/// 40 36 32 28 16 8 0 +/// +fn enc_rxy(opcode: u16, r1: Reg, b2: Reg, x2: Reg, d2: u32) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let dl2_lo = (d2 & 0xff) as u8; + let dl2_hi = ((d2 >> 8) & 0x0f) as u8; + let dh2 = ((d2 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = r1 << 4 | x2; + enc[2] = b2 << 4 | dl2_hi; + enc[3] = dl2_lo; + enc[4] = dh2; + enc[5] = opcode2; + enc +} + +/// SI-type instructions. +/// +/// 31 23 15 11 +/// opcode i2 b1 d1 +/// 24 16 12 0 +/// +fn enc_si(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = i2; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc +} + +/// SIL-type instructions. +/// +/// 47 31 27 15 +/// opcode b1 d1 i2 +/// 32 28 16 0 +/// +fn enc_sil(opcode: u16, b1: Reg, d1: u32, i2: i16) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc[4..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// SIY-type instructions. +/// +/// 47 39 31 27 15 7 +/// opcode1 i2 b1 dl1 dh1 opcode2 +/// 40 32 28 16 8 0 +/// +fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let dl1_lo = (d1 & 0xff) as u8; + let dl1_hi = ((d1 >> 8) & 0x0f) as u8; + let dh1 = ((d1 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = i2; + enc[2] = b1 << 4 | dl1_hi; + enc[3] = dl1_lo; + enc[4] = dh1; + enc[5] = opcode2; + enc +} + +/// VRR-type instructions. +/// +/// 47 39 35 31 27 23 19 15 11 7 +/// opcode1 v1 v2 v3 - m6 m5 m4 rxb opcode2 +/// 40 36 32 28 24 20 16 12 8 0 +/// +fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = 0; // FIXME + let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME + let v2 = machreg_to_fpr(v2) & 0x0f; // FIXME + let v3 = machreg_to_fpr(v3) & 0x0f; // FIXME + let m4 = m4 & 0x0f; + let m5 = m5 & 0x0f; + let m6 = m6 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | v2; + enc[2] = v3 << 4; + enc[3] = m6 << 4 | m5; + enc[4] = m4 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// VRX-type instructions. +/// +/// 47 39 35 31 27 15 11 7 +/// opcode1 v1 x2 b2 d2 m3 rxb opcode2 +/// 40 36 32 28 16 12 8 0 +/// +fn enc_vrx(opcode: u16, v1: Reg, b2: Reg, x2: Reg, d2: u32, m3: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = 0; // FIXME + let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + let m3 = m3 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | x2; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc[4] = m3 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// Emit encoding to sink. +fn put(sink: &mut MachBuffer, enc: &[u8]) { + for byte in enc { + sink.put1(*byte); + } +} + +/// Emit encoding to sink, adding a trap on the last byte. +fn put_with_trap(sink: &mut MachBuffer, enc: &[u8], srcloc: SourceLoc, trap_code: TrapCode) { + let len = enc.len(); + for i in 0..len - 1 { + sink.put1(enc[i]); + } + sink.add_trap(srcloc, trap_code); + sink.put1(enc[len - 1]); +} + +/// Emit encoding to sink, adding a relocation at byte offset. +fn put_with_reloc( + sink: &mut MachBuffer, + enc: &[u8], + offset: usize, + ri2_srcloc: SourceLoc, + ri2_reloc: Reloc, + ri2_name: &ExternalName, + ri2_offset: i64, +) { + let len = enc.len(); + for i in 0..offset { + sink.put1(enc[i]); + } + sink.add_reloc(ri2_srcloc, ri2_reloc, ri2_name, ri2_offset + offset as i64); + for i in offset..len { + sink.put1(enc[i]); + } +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + pub(crate) initial_sp_offset: i64, + pub(crate) virtual_sp_offset: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: SourceLoc, +} + +impl MachInstEmitState for EmitState { + fn new(abi: &dyn ABICallee) -> Self { + EmitState { + virtual_sp_offset: 0, + initial_sp_offset: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: SourceLoc::default(), + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: SourceLoc) { + self.cur_srcloc = srcloc; + } +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> SourceLoc { + self.cur_srcloc + } +} + +/// Constant state used during function compilation. +pub struct EmitInfo(settings::Flags); + +impl EmitInfo { + pub(crate) fn new(flags: settings::Flags) -> Self { + Self(flags) + } +} + +impl MachInstEmitInfo for EmitInfo { + fn flags(&self) -> &settings::Flags { + &self.0 + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + + match self { + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (opcode, have_rr) = match alu_op { + ALUOp::Add32 => (0xb9f8, true), // ARK + ALUOp::Add64 => (0xb9e8, true), // AGRK + ALUOp::Sub32 => (0xb9f9, true), // SRK + ALUOp::Sub64 => (0xb9e9, true), // SGRK + ALUOp::Mul32 => (0xb9fd, true), // MSRKC + ALUOp::Mul64 => (0xb9ed, true), // MSGRKC + ALUOp::And32 => (0xb9f4, true), // NRK + ALUOp::And64 => (0xb9e4, true), // NGRK + ALUOp::Orr32 => (0xb9f6, true), // ORK + ALUOp::Orr64 => (0xb9e6, true), // OGRK + ALUOp::Xor32 => (0xb9f7, true), // XRK + ALUOp::Xor64 => (0xb9e7, true), // XGRK + ALUOp::AndNot32 => (0xb974, false), // NNRK + ALUOp::AndNot64 => (0xb964, false), // NNGRK + ALUOp::OrrNot32 => (0xb976, false), // NORK + ALUOp::OrrNot64 => (0xb966, false), // NOGRK + ALUOp::XorNot32 => (0xb977, false), // NXRK + ALUOp::XorNot64 => (0xb967, false), // NXGRK + _ => unreachable!(), + }; + if have_rr && rd.to_reg() == rn { + let inst = Inst::AluRR { alu_op, rd, rm }; + inst.emit(sink, emit_info, state); + } else { + put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); + } + } + &Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + } => { + if rd.to_reg() == rn { + let inst = Inst::AluRSImm16 { alu_op, rd, imm }; + inst.emit(sink, emit_info, state); + } else { + let opcode = match alu_op { + ALUOp::Add32 => 0xecd8, // AHIK + ALUOp::Add64 => 0xecd9, // AGHIK + _ => unreachable!(), + }; + put(sink, &enc_rie_d(opcode, rd.to_reg(), rn, imm as u16)); + } + } + &Inst::AluRR { alu_op, rd, rm } => { + let (opcode, is_rre) = match alu_op { + ALUOp::Add32 => (0x1a, false), // AR + ALUOp::Add64 => (0xb908, true), // AGR + ALUOp::Add64Ext32 => (0xb918, true), // AGFR + ALUOp::Sub32 => (0x1b, false), // SR + ALUOp::Sub64 => (0xb909, true), // SGR + ALUOp::Sub64Ext32 => (0xb919, true), // SGFR + ALUOp::Mul32 => (0xb252, true), // MSR + ALUOp::Mul64 => (0xb90c, true), // MSGR + ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR + ALUOp::And32 => (0x14, false), // NR + ALUOp::And64 => (0xb980, true), // NGR + ALUOp::Orr32 => (0x16, false), // OR + ALUOp::Orr64 => (0xb981, true), // OGR + ALUOp::Xor32 => (0x17, false), // XR + ALUOp::Xor64 => (0xb982, true), // XGR + _ => unreachable!(), + }; + if is_rre { + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } else { + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + } + &Inst::AluRX { + alu_op, + rd, + ref mem, + } => { + let (opcode_rx, opcode_rxy) = match alu_op { + ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) + ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) + ALUOp::Add64 => (None, Some(0xe308)), // AG + ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH + ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF + ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) + ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) + ALUOp::Sub64 => (None, Some(0xe309)), // SG + ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH + ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF + ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) + ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) + ALUOp::Mul64 => (None, Some(0xe30c)), // MSG + ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH + ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF + ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) + ALUOp::And64 => (None, Some(0xe380)), // NG + ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) + ALUOp::Orr64 => (None, Some(0xe381)), // OG + ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) + ALUOp::Xor64 => (None, Some(0xe382)), // XG + _ => unreachable!(), + }; + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, + ); + } + &Inst::AluRSImm16 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xa7a, // AHI + ALUOp::Add64 => 0xa7b, // AGHI + ALUOp::Mul32 => 0xa7c, // MHI + ALUOp::Mul64 => 0xa7d, // MGHI + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::AluRSImm32 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xc29, // AFI + ALUOp::Add64 => 0xc28, // AGFI + ALUOp::Mul32 => 0xc21, // MSFI + ALUOp::Mul64 => 0xc20, // MSGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); + } + &Inst::AluRUImm32 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xc2b, // ALFI + ALUOp::Add64 => 0xc2a, // ALGFI + ALUOp::Sub32 => 0xc25, // SLFI + ALUOp::Sub64 => 0xc24, // SLGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); + } + &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { + let opcode = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => 0xa57, // NILL + (ALUOp::And32, 1) => 0xa56, // NILH + (ALUOp::And64, 0) => 0xa57, // NILL + (ALUOp::And64, 1) => 0xa56, // NILH + (ALUOp::And64, 2) => 0xa55, // NIHL + (ALUOp::And64, 3) => 0xa54, // NIHL + (ALUOp::Orr32, 0) => 0xa5b, // OILL + (ALUOp::Orr32, 1) => 0xa5a, // OILH + (ALUOp::Orr64, 0) => 0xa5b, // OILL + (ALUOp::Orr64, 1) => 0xa5a, // OILH + (ALUOp::Orr64, 2) => 0xa59, // OIHL + (ALUOp::Orr64, 3) => 0xa58, // OIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { + let opcode = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => 0xc0b, // NILF + (ALUOp::And64, 0) => 0xc0b, // NILF + (ALUOp::And64, 1) => 0xc0a, // NIHF + (ALUOp::Orr32, 0) => 0xc0d, // OILF + (ALUOp::Orr64, 0) => 0xc0d, // OILF + (ALUOp::Orr64, 1) => 0xc0c, // OILF + (ALUOp::Xor32, 0) => 0xc07, // XILF + (ALUOp::Xor64, 0) => 0xc07, // XILF + (ALUOp::Xor64, 1) => 0xc06, // XILH + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + + &Inst::SMulWide { rn, rm } => { + let opcode = 0xb9ec; // MGRK + put(sink, &enc_rrf_ab(opcode, gpr(0), rn, rm, 0)); + } + &Inst::UMulWide { rn } => { + let opcode = 0xb986; // MLGR + put(sink, &enc_rre(opcode, gpr(0), rn)); + } + &Inst::SDivMod32 { rn } => { + let opcode = 0xb91d; // DSGFR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::SDivMod64 { rn } => { + let opcode = 0xb90d; // DSGR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::UDivMod32 { rn } => { + let opcode = 0xb997; // DLR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::UDivMod64 { rn } => { + let opcode = 0xb987; // DLGR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::Flogr { rn } => { + let opcode = 0xb983; // FLOGR + put(sink, &enc_rre(opcode, gpr(0), rn)); + } + + &Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + } => { + let opcode = match shift_op { + ShiftOp::RotL32 => 0xeb1d, // RLL + ShiftOp::RotL64 => 0xeb1c, // RLLG + ShiftOp::LShL32 => 0xebdf, // SLLK (SLL ?) + ShiftOp::LShL64 => 0xeb0d, // SLLG + ShiftOp::LShR32 => 0xebde, // SRLK (SRL ?) + ShiftOp::LShR64 => 0xeb0c, // SRLG + ShiftOp::AShR32 => 0xebdc, // SRAK (SRA ?) + ShiftOp::AShR64 => 0xeb0a, // SRAG + }; + let shift_reg = match shift_reg { + Some(reg) => reg, + None => zero_reg(), + }; + put( + sink, + &enc_rsy(opcode, rd.to_reg(), rn, shift_reg, shift_imm.bits()), + ); + } + + &Inst::UnaryRR { op, rd, rn } => { + match op { + UnaryOp::Abs32 => { + let opcode = 0x10; // LPR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + UnaryOp::Abs64 => { + let opcode = 0xb900; // LPGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Abs64Ext32 => { + let opcode = 0xb910; // LPGFR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg32 => { + let opcode = 0x13; // LCR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg64 => { + let opcode = 0xb903; // LCGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg64Ext32 => { + let opcode = 0xb913; // LCGFR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::PopcntByte => { + let opcode = 0xb9e1; // POPCNT + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); + } + UnaryOp::PopcntReg => { + let opcode = 0xb9e1; // POPCNT + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 8, 0)); + } + } + } + + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let opcode = match (signed, from_bits, to_bits) { + (_, 1, 32) => 0xb926, // LBR + (_, 1, 64) => 0xb906, // LGBR + (false, 8, 32) => 0xb994, // LLCR + (false, 8, 64) => 0xb984, // LLGCR + (true, 8, 32) => 0xb926, // LBR + (true, 8, 64) => 0xb906, // LGBR + (false, 16, 32) => 0xb995, // LLHR + (false, 16, 64) => 0xb985, // LLGHR + (true, 16, 32) => 0xb927, // LHR + (true, 16, 64) => 0xb907, // LGHR + (false, 32, 64) => 0xb916, // LLGFR + (true, 32, 64) => 0xb914, // LGFR + _ => panic!( + "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}", + signed, from_bits, to_bits + ), + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + + &Inst::CmpRR { op, rn, rm } => { + let (opcode, is_rre) = match op { + CmpOp::CmpS32 => (0x19, false), // CR + CmpOp::CmpS64 => (0xb920, true), // CGR + CmpOp::CmpS64Ext32 => (0xb930, true), // CGFR + CmpOp::CmpL32 => (0x15, false), // CLR + CmpOp::CmpL64 => (0xb921, true), // CLGR + CmpOp::CmpL64Ext32 => (0xb931, true), // CLGFR + _ => unreachable!(), + }; + if is_rre { + put(sink, &enc_rre(opcode, rn, rm)); + } else { + put(sink, &enc_rr(opcode, rn, rm)); + } + } + &Inst::CmpRX { op, rn, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match op { + CmpOp::CmpS32 => (Some(0x59), Some(0xe359), Some(0xc6d)), // C(Y), CRL + CmpOp::CmpS32Ext16 => (Some(0x49), Some(0xe379), Some(0xc65)), // CH(Y), CHRL + CmpOp::CmpS64 => (None, Some(0xe320), Some(0xc68)), // CG, CGRL + CmpOp::CmpS64Ext16 => (None, Some(0xe334), Some(0xc64)), // CGH, CGHRL + CmpOp::CmpS64Ext32 => (None, Some(0xe330), Some(0xc6c)), // CGF, CGFRL + CmpOp::CmpL32 => (Some(0x55), Some(0xe355), Some(0xc6f)), // CL(Y), CLRL + CmpOp::CmpL32Ext16 => (None, None, Some(0xc67)), // CLHRL + CmpOp::CmpL64 => (None, Some(0xe321), Some(0xc6a)), // CLG, CLGRL + CmpOp::CmpL64Ext16 => (None, None, Some(0xc66)), // CLGHRL + CmpOp::CmpL64Ext32 => (None, Some(0xe331), Some(0xc6e)), // CLGF, CLGFRL + }; + mem_emit( + rn, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::CmpRSImm16 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpS32 => 0xa7e, // CHI + CmpOp::CmpS64 => 0xa7f, // CGHI + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rn, imm as u16)); + } + &Inst::CmpRSImm32 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpS32 => 0xc2d, // CFI + CmpOp::CmpS64 => 0xc2c, // CGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rn, imm as u32)); + } + &Inst::CmpRUImm32 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpL32 => 0xc2f, // CLFI + CmpOp::CmpL64 => 0xc2e, // CLGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rn, imm)); + } + &Inst::CmpTrapRR { + op, + rn, + rm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpS32 => 0xb972, // CRT + CmpOp::CmpS64 => 0xb960, // CGRT + CmpOp::CmpL32 => 0xb973, // CLRT + CmpOp::CmpL64 => 0xb961, // CLGRT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rrf_cde(opcode, rn, rm, cond.bits(), 0), + srcloc, + trap_code, + ); + } + &Inst::CmpTrapRSImm16 { + op, + rn, + imm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpS32 => 0xec72, // CIT + CmpOp::CmpS64 => 0xec70, // CGIT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rie_a(opcode, rn, imm as u16, cond.bits()), + srcloc, + trap_code, + ); + } + &Inst::CmpTrapRUImm16 { + op, + rn, + imm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpL32 => 0xec73, // CLFIT + CmpOp::CmpL64 => 0xec71, // CLGIT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rie_a(opcode, rn, imm, cond.bits()), + srcloc, + trap_code, + ); + } + + &Inst::Load32 { rd, ref mem } + | &Inst::Load32ZExt8 { rd, ref mem } + | &Inst::Load32SExt8 { rd, ref mem } + | &Inst::Load32ZExt16 { rd, ref mem } + | &Inst::Load32SExt16 { rd, ref mem } + | &Inst::Load64 { rd, ref mem } + | &Inst::Load64ZExt8 { rd, ref mem } + | &Inst::Load64SExt8 { rd, ref mem } + | &Inst::Load64ZExt16 { rd, ref mem } + | &Inst::Load64SExt16 { rd, ref mem } + | &Inst::Load64ZExt32 { rd, ref mem } + | &Inst::Load64SExt32 { rd, ref mem } + | &Inst::LoadRev16 { rd, ref mem } + | &Inst::LoadRev32 { rd, ref mem } + | &Inst::LoadRev64 { rd, ref mem } + | &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Load32 { .. } => (Some(0x58), Some(0xe358), Some(0xc4d)), // L(Y), LRL + &Inst::Load32ZExt8 { .. } => (None, Some(0xe394), None), // LLC + &Inst::Load32SExt8 { .. } => (None, Some(0xe376), None), // LB + &Inst::Load32ZExt16 { .. } => (None, Some(0xe395), Some(0xc42)), // LLH, LLHRL + &Inst::Load32SExt16 { .. } => (Some(0x48), Some(0xe378), Some(0xc45)), // LH(Y), LHRL + &Inst::Load64 { .. } => (None, Some(0xe304), Some(0xc48)), // LG, LGRL + &Inst::Load64ZExt8 { .. } => (None, Some(0xe390), None), // LLGC + &Inst::Load64SExt8 { .. } => (None, Some(0xe377), None), // LGB + &Inst::Load64ZExt16 { .. } => (None, Some(0xe391), Some(0xc46)), // LLGH, LLGHRL + &Inst::Load64SExt16 { .. } => (None, Some(0xe315), Some(0xc44)), // LGH, LGHRL + &Inst::Load64ZExt32 { .. } => (None, Some(0xe316), Some(0xc4e)), // LLGF, LLGFRL + &Inst::Load64SExt32 { .. } => (None, Some(0xe314), Some(0xc4c)), // LGF, LGFRL + &Inst::LoadRev16 { .. } => (None, Some(0xe31f), None), // LRVH + &Inst::LoadRev32 { .. } => (None, Some(0xe31e), None), // LRV + &Inst::LoadRev64 { .. } => (None, Some(0xe30f), None), // LRVG + &Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), None), // LE(Y) + &Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), None), // LD(Y) + _ => unreachable!(), + }; + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + let opcode = match self { + &Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF + &Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG + _ => unreachable!(), + }; + + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && mem.can_trap() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put( + sink, + &enc_vrx(opcode, rd.to_reg(), base, index, disp.bits(), 0), + ); + } + _ => unreachable!(), + } + } + + &Inst::Store8 { rd, ref mem } + | &Inst::Store16 { rd, ref mem } + | &Inst::Store32 { rd, ref mem } + | &Inst::Store64 { rd, ref mem } + | &Inst::StoreRev16 { rd, ref mem } + | &Inst::StoreRev32 { rd, ref mem } + | &Inst::StoreRev64 { rd, ref mem } + | &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Store8 { .. } => (Some(0x42), Some(0xe372), None), // STC(Y) + &Inst::Store16 { .. } => (Some(0x40), Some(0xe370), Some(0xc47)), // STH(Y), STHRL + &Inst::Store32 { .. } => (Some(0x50), Some(0xe350), Some(0xc4f)), // ST(Y), STRL + &Inst::Store64 { .. } => (None, Some(0xe324), Some(0xc4b)), // STG, STGRL + &Inst::StoreRev16 { .. } => (None, Some(0xe33f), None), // STRVH + &Inst::StoreRev32 { .. } => (None, Some(0xe33e), None), // STRV + &Inst::StoreRev64 { .. } => (None, Some(0xe32f), None), // STRVG + &Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), None), // STE(Y) + &Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), None), // STD(Y) + _ => unreachable!(), + }; + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::StoreImm8 { imm, ref mem } => { + let opcode_si = 0x92; // MVI + let opcode_siy = 0xeb52; // MVIY + mem_imm8_emit( + imm, mem, opcode_si, opcode_siy, true, sink, emit_info, state, + ); + } + &Inst::StoreImm16 { imm, ref mem } + | &Inst::StoreImm32SExt16 { imm, ref mem } + | &Inst::StoreImm64SExt16 { imm, ref mem } => { + let opcode = match self { + &Inst::StoreImm16 { .. } => 0xe544, // MVHHI + &Inst::StoreImm32SExt16 { .. } => 0xe54c, // MVHI + &Inst::StoreImm64SExt16 { .. } => 0xe548, // MVGHI + _ => unreachable!(), + }; + mem_imm16_emit(imm, mem, opcode, true, sink, emit_info, state); + } + &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + let opcode = match self { + &Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF + &Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG + _ => unreachable!(), + }; + + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && mem.can_trap() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), 0)); + } + _ => unreachable!(), + } + } + + &Inst::LoadMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let opcode = 0xeb04; // LMG + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + put(sink, &enc_rsy(opcode, rt, rt2, addr_reg, addr_off.bits())); + } + &Inst::StoreMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let opcode = 0xeb24; // STMG + put(sink, &enc_rsy(opcode, rt, rt2, addr_reg, addr_off.bits())); + } + + &Inst::LoadAddr { rd, ref mem } => { + let opcode_rx = Some(0x41); // LA + let opcode_rxy = Some(0xe371); // LAY + let opcode_ril = Some(0xc00); // LARL + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, + ); + } + + &Inst::Mov64 { rd, rm } => { + let opcode = 0xb904; // LGR + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } + &Inst::Mov32 { rd, rm } => { + let opcode = 0x18; // LR + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::Mov32Imm { rd, imm } => { + let opcode = 0xc09; // IILF + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); + } + &Inst::Mov32SImm16 { rd, imm } => { + let opcode = 0xa78; // LHI + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::Mov64SImm16 { rd, imm } => { + let opcode = 0xa79; // LGHI + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::Mov64SImm32 { rd, imm } => { + let opcode = 0xc01; // LGFI + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); + } + &Inst::CMov32 { rd, cond, rm } => { + let opcode = 0xb9f2; // LOCR + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); + } + &Inst::CMov64 { rd, cond, rm } => { + let opcode = 0xb9e2; // LOCGR + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); + } + &Inst::CMov32SImm16 { rd, cond, imm } => { + let opcode = 0xec42; // LOCHI + put( + sink, + &enc_rie_g(opcode, rd.to_reg(), imm as u16, cond.bits()), + ); + } + &Inst::CMov64SImm16 { rd, cond, imm } => { + let opcode = 0xec46; // LOCGHI + put( + sink, + &enc_rie_g(opcode, rd.to_reg(), imm as u16, cond.bits()), + ); + } + &Inst::Mov64UImm16Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xa5f, // LLILL + 1 => 0xa5e, // LLILH + 2 => 0xa5d, // LLIHL + 3 => 0xa5c, // LLIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Mov64UImm32Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xc0f, // LLILF + 1 => 0xc0e, // LLIHF + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Insert64UImm16Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xa53, // IILL + 1 => 0xa52, // IILH + 2 => 0xa51, // IIHL + 3 => 0xa50, // IIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Insert64UImm32Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xc09, // IILF + 1 => 0xc08, // IIHF + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::LoadExtNameFar { + rd, + ref name, + offset, + } => { + let opcode = 0xa75; // BRAS + let srcloc = state.cur_srcloc(); + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 12)); + sink.add_reloc(srcloc, Reloc::Abs8, name, offset); + if emit_info.flags().emit_all_ones_funcaddrs() { + sink.put8(u64::max_value()); + } else { + sink.put8(0); + } + let inst = Inst::Load64 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + + &Inst::FpuMove32 { rd, rn } => { + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + &Inst::FpuMove64 { rd, rn } => { + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + &Inst::FpuCMov32 { rd, cond, rm } => { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::FpuCMov64 { rd, cond, rm } => { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::MovToFpr { rd, rn } => { + let opcode = 0xb3c1; // LDGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::MovFromFpr { rd, rn } => { + let opcode = 0xb3cd; // LGDR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let opcode = 0xa75; // BRAS + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 8)); + sink.put4(const_data.to_bits().swap_bytes()); + let inst = Inst::FpuLoad32 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let opcode = 0xa75; // BRAS + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 12)); + sink.put8(const_data.to_bits().swap_bytes()); + let inst = Inst::FpuLoad64 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + + &Inst::FpuCopysign { rd, rn, rm } => { + let opcode = 0xb372; // CPSDR + put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let opcode = match fpu_op { + FPUOp1::Abs32 => 0xb300, // LPEBR + FPUOp1::Abs64 => 0xb310, // LPDBR + FPUOp1::Neg32 => 0xb303, // LCEBR + FPUOp1::Neg64 => 0xb313, // LCDBR + FPUOp1::NegAbs32 => 0xb301, // LNEBR + FPUOp1::NegAbs64 => 0xb311, // LNDBR + FPUOp1::Sqrt32 => 0xb314, // SQEBR + FPUOp1::Sqrt64 => 0xb315, // SQDBR + FPUOp1::Cvt32To64 => 0xb304, // LDEBR + FPUOp1::Cvt64To32 => 0xb344, // LEDBR + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::FpuRRR { fpu_op, rd, rm } => { + let opcode = match fpu_op { + FPUOp2::Add32 => 0xb30a, // AEBR + FPUOp2::Add64 => 0xb31a, // ADBR + FPUOp2::Sub32 => 0xb30b, // SEBR + FPUOp2::Sub64 => 0xb31b, // SDBR + FPUOp2::Mul32 => 0xb317, // MEEBR + FPUOp2::Mul64 => 0xb31c, // MDBR + FPUOp2::Div32 => 0xb30d, // DEBR + FPUOp2::Div64 => 0xb31d, // DDBR + _ => unimplemented!(), + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } + &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + let opcode = match fpu_op { + FPUOp3::MAdd32 => 0xb30e, // MAEBR + FPUOp3::MAdd64 => 0xb31e, // MADBR + FPUOp3::MSub32 => 0xb30f, // MSEBR + FPUOp3::MSub64 => 0xb31f, // MSDBR + }; + put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn)); + } + &Inst::FpuToInt { op, rd, rn } => { + let opcode = match op { + FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA + FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR + FpuToIntOp::F32ToI64 => 0xb3a8, // CGEBRA + FpuToIntOp::F32ToU64 => 0xb3ac, // CLGEBR + FpuToIntOp::F64ToI32 => 0xb399, // CFDBRA + FpuToIntOp::F64ToU32 => 0xb39d, // CLFDBR + FpuToIntOp::F64ToI64 => 0xb3a9, // CGDBRA + FpuToIntOp::F64ToU64 => 0xb3ad, // CLGDBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0)); + } + &Inst::IntToFpu { op, rd, rn } => { + let opcode = match op { + IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA + IntToFpuOp::U32ToF32 => 0xb390, // CELFBR + IntToFpuOp::I64ToF32 => 0xb3a4, // CEGBRA + IntToFpuOp::U64ToF32 => 0xb3a0, // CELGBR + IntToFpuOp::I32ToF64 => 0xb395, // CDFBRA + IntToFpuOp::U32ToF64 => 0xb391, // CDLFBR + IntToFpuOp::I64ToF64 => 0xb3a5, // CDGBRA + IntToFpuOp::U64ToF64 => 0xb3a1, // CDLGBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); + } + &Inst::FpuRound { op, rd, rn } => { + let (opcode, m3) = match op { + FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR + FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR + FpuRoundMode::Plus32 => (0xb357, 6), // FIEBR + FpuRoundMode::Plus64 => (0xb35f, 6), // FIDBR + FpuRoundMode::Zero32 => (0xb357, 5), // FIEBR + FpuRoundMode::Zero64 => (0xb35f, 5), // FIDBR + FpuRoundMode::Nearest32 => (0xb357, 4), // FIEBR + FpuRoundMode::Nearest64 => (0xb35f, 4), // FIDBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0)); + } + &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { + let (opcode, m4) = match fpu_op { + FPUOp2::Max32 => (0xe7ef, 2), // VFMAX + FPUOp2::Max64 => (0xe7ef, 3), // VFMAX + FPUOp2::Min32 => (0xe7ee, 2), // VFMIN + FPUOp2::Min64 => (0xe7ee, 3), // VFMIN + _ => unimplemented!(), + }; + put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1)); + } + &Inst::FpuCmp32 { rn, rm } => { + let opcode = 0xb309; // CEBR + put(sink, &enc_rre(opcode, rn, rm)); + } + &Inst::FpuCmp64 { rn, rm } => { + let opcode = 0xb319; // CDBR + put(sink, &enc_rre(opcode, rn, rm)); + } + + &Inst::Call { link, ref info } => { + let opcode = 0xc05; // BRASL + let reloc = Reloc::S390xPCRel32Dbl; + let srcloc = state.cur_srcloc(); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(6), s); + } + put_with_reloc( + sink, + &enc_ril_b(opcode, link.to_reg(), 0), + 2, + srcloc, + reloc, + &info.dest, + 0, + ); + if info.opcode.is_call() { + sink.add_call_site(srcloc, info.opcode); + } + } + &Inst::CallInd { link, ref info } => { + let opcode = 0x0d; // BASR + let srcloc = state.cur_srcloc(); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + put(sink, &enc_rr(opcode, link.to_reg(), info.rn)); + if info.opcode.is_call() { + sink.add_call_site(srcloc, info.opcode); + } + } + &Inst::Ret { link } => { + let opcode = 0x07; // BCR + put(sink, &enc_rr(opcode, gpr(15), link)); + } + &Inst::EpiloguePlaceholder => { + // Noop; this is just a placeholder for epilogues. + } + &Inst::Jump { ref dest } => { + let off = sink.cur_offset(); + // Indicate that the jump uses a label, if so, so that a fixup can occur later. + if let Some(l) = dest.as_label() { + sink.use_label_at_offset(off, l, LabelUse::BranchRIL); + sink.add_uncond_branch(off, off + 6, l); + } + // Emit the jump itself. + let opcode = 0xc04; // BCRL + put(sink, &enc_ril_c(opcode, 15, dest.as_ril_offset_or_zero())); + } + &Inst::IndirectBr { rn, .. } => { + let opcode = 0x07; // BCR + put(sink, &enc_rr(opcode, gpr(15), rn)); + } + &Inst::CondBr { + ref taken, + ref not_taken, + cond, + } => { + let opcode = 0xc04; // BCRL + + // Conditional part first. + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::BranchRIL); + let inverted = &enc_ril_c(opcode, cond.invert().bits(), 0); + sink.add_cond_branch(cond_off, cond_off + 6, l, inverted); + } + put( + sink, + &enc_ril_c(opcode, cond.bits(), taken.as_ril_offset_or_zero()), + ); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::BranchRIL); + sink.add_uncond_branch(uncond_off, uncond_off + 6, l); + } + put( + sink, + &enc_ril_c(opcode, 15, not_taken.as_ril_offset_or_zero()), + ); + } + &Inst::OneWayCondBr { ref target, cond } => { + let opcode = 0xc04; // BCRL + if let Some(l) = target.as_label() { + sink.use_label_at_offset(sink.cur_offset(), l, LabelUse::BranchRIL); + } + put( + sink, + &enc_ril_c(opcode, cond.bits(), target.as_ril_offset_or_zero()), + ); + } + &Inst::Nop0 => {} + &Inst::Nop2 => { + put(sink, &enc_e(0x0707)); + } + &Inst::Debugtrap => { + put(sink, &enc_e(0x0001)); + } + &Inst::Trap { trap_code } => { + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + let srcloc = state.cur_srcloc(); + put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code); + } + &Inst::TrapIf { cond, trap_code } => { + // Branch over trap if condition is false. + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + // Now emit the actual trap. + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + let srcloc = state.cur_srcloc(); + put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code); + } + &Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + ref info, + .. + } => { + let table_label = sink.get_label(); + + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + + // Bounds-check index and branch to default. + let inst = Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: ridx, + imm: info.targets.len() as u32, + }; + inst.emit(sink, emit_info, state); + let inst = Inst::OneWayCondBr { + target: info.default_target, + cond: Cond::from_intcc(IntCC::UnsignedGreaterThanOrEqual), + }; + inst.emit(sink, emit_info, state); + + // Set rtmp2 to index scaled by entry size. + let inst = Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: rtmp2, + rn: ridx, + shift_imm: SImm20::maybe_from_i64(2).unwrap(), + shift_reg: None, + }; + inst.emit(sink, emit_info, state); + + // Set rtmp1 to address of jump table. + let inst = Inst::LoadAddr { + rd: rtmp1, + mem: MemArg::Label { + target: BranchTarget::Label(table_label), + }, + }; + inst.emit(sink, emit_info, state); + + // Set rtmp2 to value loaded out of jump table. + let inst = Inst::Load64SExt32 { + rd: rtmp2, + mem: MemArg::reg_plus_reg(rtmp1.to_reg(), rtmp2.to_reg(), MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + + // Set rtmp1 to target address (rtmp1 + rtmp2). + let inst = Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: rtmp1, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + }; + inst.emit(sink, emit_info, state); + + // Branch to computed address. (`targets` here is only used for successor queries + // and is not needed for emission.) + let inst = Inst::IndirectBr { + rn: rtmp1.to_reg(), + targets: vec![], + }; + inst.emit(sink, emit_info, state); + + // Emit jump table (table of 32-bit offsets). + sink.bind_label(table_label); + let jt_off = sink.cur_offset(); + for &target in info.targets.iter() { + let word_off = sink.cur_offset(); + let off_into_table = word_off - jt_off; + sink.use_label_at_offset( + word_off, + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + sink.put4(off_into_table.swap_bytes()); + } + + // Lowering produces an EmitIsland before using a JTSequence, so we can safely + // disable the worst-case-size check in this case. + start_off = sink.cur_offset(); + } + + &Inst::VirtualSPOffsetAdj { offset } => { + debug!( + "virtual sp offset adjusted by {} -> {}", + offset, + state.virtual_sp_offset + offset + ); + state.virtual_sp_offset += offset; + } + + &Inst::ValueLabelMarker { .. } => { + // Nothing; this is only used to compute debug info. + } + + &Inst::Unwind { ref inst } => { + sink.add_unwind(inst.clone()); + } + } + + let end_off = sink.cur_offset(); + debug_assert!((end_off - start_off) <= Inst::worst_case_size()); + + state.clear_post_insn(); + } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + self.print_with_state(mb_rru, state) + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs new file mode 100644 index 0000000000..746daf97f1 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -0,0 +1,7140 @@ +use crate::ir::MemFlags; +use crate::isa::s390x::inst::*; +use crate::isa::test_utils; +use crate::settings; +use alloc::vec::Vec; + +#[test] +fn test_s390x_binemit() { + let mut insns = Vec::<(Inst, &str, &str)>::new(); + + insns.push((Inst::Nop0, "", "nop-zero-len")); + insns.push((Inst::Nop2, "0707", "nop")); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F83012", + "ark %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E86045", + "agrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F93012", + "srk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E96045", + "sgrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FD3012", + "msrkc %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Mul64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9ED6045", + "msgrkc %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F43012", + "nrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E46045", + "ngrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F63012", + "ork %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E66045", + "ogrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F73012", + "xrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Xor64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E76045", + "xgrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9743012", + "nnrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9646045", + "nngrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9763012", + "nork %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9666045", + "nogrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::XorNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9773012", + "nxrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::XorNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9676045", + "nxgrk %r4, %r5, %r6", + )); + + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(4), + rn: gpr(5), + imm: -32768, + }, + "EC45800000D8", + "ahik %r4, %r5, -32768", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(4), + rn: gpr(5), + imm: 32767, + }, + "EC457FFF00D8", + "ahik %r4, %r5, 32767", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + imm: -32768, + }, + "EC45800000D9", + "aghik %r4, %r5, -32768", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + imm: 32767, + }, + "EC457FFF00D9", + "aghik %r4, %r5, 32767", + )); + + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1A12", + "ar %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9080045", + "agr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9180045", + "agfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1B12", + "sr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9090045", + "sgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9190045", + "sgfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "B2520012", + "msr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90C0045", + "msgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91C0045", + "msgfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1412", + "nr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::And64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9800045", + "ngr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1612", + "or %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Orr64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9810045", + "ogr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1712", + "xr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Xor64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9820045", + "xgr %r4, %r5", + )); + + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5A102000", + "a %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4A102000", + "ah %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005A", + "ay %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000004A", + "ahy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000008", + "ag %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000038", + "agh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000018", + "agf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5B102000", + "s %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4B102000", + "sh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005B", + "sy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000007B", + "shy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000009", + "sg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000039", + "sgh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000019", + "sgf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "71102000", + "ms %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4C102000", + "mh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000051", + "msy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000007C", + "mhy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000C", + "msg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003C", + "mgh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001C", + "msgf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "54102000", + "n %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000054", + "ny %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000080", + "ng %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "56102000", + "o %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000056", + "oy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000081", + "og %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "57102000", + "x %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000057", + "xy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000082", + "xg %r1, 0(%r2)", + )); + + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: -32768, + }, + "A77A8000", + "ahi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 32767, + }, + "A77A7FFF", + "ahi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: -32768, + }, + "A77B8000", + "aghi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 32767, + }, + "A77B7FFF", + "aghi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: -32768, + }, + "A77C8000", + "mhi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: 32767, + }, + "A77C7FFF", + "mhi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: -32768, + }, + "A77D8000", + "mghi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: 32767, + }, + "A77D7FFF", + "mghi %r7, 32767", + )); + + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27980000000", + "afi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2797FFFFFFF", + "afi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27180000000", + "msfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2717FFFFFFF", + "msfi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27880000000", + "agfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2787FFFFFFF", + "agfi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27080000000", + "msgfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2707FFFFFFF", + "msgfi %r7, 2147483647", + )); + + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 0, + }, + "C27B00000000", + "alfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C27BFFFFFFFF", + "alfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub32, + rd: writable_gpr(7), + imm: 0, + }, + "C27500000000", + "slfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub32, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C275FFFFFFFF", + "slfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 0, + }, + "C27A00000000", + "algfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C27AFFFFFFFF", + "algfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub64, + rd: writable_gpr(7), + imm: 0, + }, + "C27400000000", + "slgfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub64, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C274FFFFFFFF", + "slgfi %r7, 4294967295", + )); + + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff).unwrap(), + }, + "A587FFFF", + "nill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000).unwrap(), + }, + "A586FFFF", + "nilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A587FFFF", + "nill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A586FFFF", + "nilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A585FFFF", + "nihl %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A584FFFF", + "nihh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff).unwrap(), + }, + "A58BFFFF", + "oill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000).unwrap(), + }, + "A58AFFFF", + "oilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A58BFFFF", + "oill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A58AFFFF", + "oilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A589FFFF", + "oihl %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A588FFFF", + "oihh %r8, 65535", + )); + + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C08BFFFFFFFF", + "nilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08BFFFFFFFF", + "nilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08AFFFFFFFF", + "nihf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C08DFFFFFFFF", + "oilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08DFFFFFFFF", + "oilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08CFFFFFFFF", + "oihf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C087FFFFFFFF", + "xilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C087FFFFFFFF", + "xilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C086FFFFFFFF", + "xihf %r8, 4294967295", + )); + + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "101A", + "lpr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs64, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B900001A", + "lpgr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs64Ext32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B910001A", + "lpgfr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "131A", + "lcr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg64, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B903001A", + "lcgr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg64Ext32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B913001A", + "lcgfr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::PopcntByte, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B9E1001A", + "popcnt %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::PopcntReg, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B9E1801A", + "popcnt %r1, %r10, 8", + )); + + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS32, + rn: gpr(5), + rm: gpr(6), + }, + "1956", + "cr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS64, + rn: gpr(5), + rm: gpr(6), + }, + "B9200056", + "cgr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS64Ext32, + rn: gpr(5), + rm: gpr(6), + }, + "B9300056", + "cgfr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL32, + rn: gpr(5), + rm: gpr(6), + }, + "1556", + "clr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL64, + rn: gpr(5), + rm: gpr(6), + }, + "B9210056", + "clgr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL64Ext32, + rn: gpr(5), + rm: gpr(6), + }, + "B9310056", + "clgfr %r5, %r6", + )); + + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "59102000", + "c %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000059", + "cy %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61D00000020", + "crl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "49102000", + "ch %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000079", + "chy %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61500000020", + "chrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000020", + "cg %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61800000020", + "cgrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext16, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000034", + "cgh %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61400000020", + "cghrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000030", + "cgf %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61C00000020", + "cgfrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "55102000", + "cl %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000055", + "cly %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61F00000020", + "clrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61700000020", + "clhrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000021", + "clg %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61A00000020", + "clgrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61600000020", + "clghrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000031", + "clgf %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61E00000020", + "clgfrl %r1, 64", + )); + + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -32768, + }, + "A77E8000", + "chi %r7, -32768", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 32767, + }, + "A77E7FFF", + "chi %r7, 32767", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -32768, + }, + "A77F8000", + "cghi %r7, -32768", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 32767, + }, + "A77F7FFF", + "cghi %r7, 32767", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -2147483648, + }, + "C27D80000000", + "cfi %r7, -2147483648", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 2147483647, + }, + "C27D7FFFFFFF", + "cfi %r7, 2147483647", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -2147483648, + }, + "C27C80000000", + "cgfi %r7, -2147483648", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 2147483647, + }, + "C27C7FFFFFFF", + "cgfi %r7, 2147483647", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 0, + }, + "C27F00000000", + "clfi %r7, 0", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 4294967295, + }, + "C27FFFFFFFFF", + "clfi %r7, 4294967295", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 0, + }, + "C27E00000000", + "clgfi %r7, 0", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 4294967295, + }, + "C27EFFFFFFFF", + "clgfi %r7, 4294967295", + )); + + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpS32, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9728056", + "crte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpS64, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9608056", + "cgrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpL32, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9738056", + "clrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpL64, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9618056", + "clgrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -32768, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7080008072", + "cite %r7, -32768", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 32767, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC707FFF8072", + "cite %r7, 32767", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -32768, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7080008070", + "cgite %r7, -32768", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 32767, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC707FFF8070", + "cgite %r7, 32767", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 0, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7000008073", + "clfite %r7, 0", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 65535, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC70FFFF8073", + "clfite %r7, 65535", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 0, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7000008071", + "clgite %r7, 0", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 65535, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC70FFFF8071", + "clgite %r7, 65535", + )); + + insns.push(( + Inst::SMulWide { + rn: gpr(5), + rm: gpr(6), + }, + "B9EC6005", + "mgrk %r0, %r5, %r6", + )); + insns.push((Inst::UMulWide { rn: gpr(5) }, "B9860005", "mlgr %r0, %r5")); + insns.push((Inst::SDivMod32 { rn: gpr(5) }, "B91D0005", "dsgfr %r0, %r5")); + insns.push((Inst::SDivMod64 { rn: gpr(5) }, "B90D0005", "dsgr %r0, %r5")); + insns.push((Inst::UDivMod32 { rn: gpr(5) }, "B9970005", "dlr %r0, %r5")); + insns.push((Inst::UDivMod64 { rn: gpr(5) }, "B9870005", "dlgr %r0, %r5")); + + insns.push((Inst::Flogr { rn: gpr(5) }, "B9830005", "flogr %r0, %r5")); + + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000801D", + "rll %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F1D", + "rll %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000801D", + "rll %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F1D", + "rll %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000801C", + "rllg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F1C", + "rllg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000801C", + "rllg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F1C", + "rllg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DF", + "sllk %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDF", + "sllk %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DF", + "sllk %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDF", + "sllk %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800D", + "sllg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0D", + "sllg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800D", + "sllg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0D", + "sllg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DE", + "srlk %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDE", + "srlk %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DE", + "srlk %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDE", + "srlk %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800C", + "srlg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0C", + "srlg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800C", + "srlg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0C", + "srlg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DC", + "srak %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDC", + "srak %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DC", + "srak %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDC", + "srak %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800A", + "srag %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0A", + "srag %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800A", + "srag %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0A", + "srag %r4, %r5, 524287(%r6)", + )); + + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "58102000", + "l %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "58102FFF", + "l %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008058", + "ly %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F58", + "ly %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "58123000", + "l %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "58123FFF", + "l %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008058", + "ly %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F58", + "ly %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000094", + "llc %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0094", + "llc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008094", + "llc %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F94", + "llc %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000094", + "llc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0094", + "llc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008094", + "llc %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F94", + "llc %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000076", + "lb %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0076", + "lb %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008076", + "lb %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F76", + "lb %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000076", + "lb %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0076", + "lb %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008076", + "lb %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F76", + "lb %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000095", + "llh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0095", + "llh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008095", + "llh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F95", + "llh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000095", + "llh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0095", + "llh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008095", + "llh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F95", + "llh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "48102000", + "lh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "48102FFF", + "lh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008078", + "lhy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F78", + "lhy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "48123000", + "lh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "48123FFF", + "lh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008078", + "lhy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F78", + "lhy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000004", + "lg %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0004", + "lg %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008004", + "lg %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F04", + "lg %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000004", + "lg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0004", + "lg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008004", + "lg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F04", + "lg %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000090", + "llgc %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0090", + "llgc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008090", + "llgc %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F90", + "llgc %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000090", + "llgc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0090", + "llgc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008090", + "llgc %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F90", + "llgc %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000077", + "lgb %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0077", + "lgb %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008077", + "lgb %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F77", + "lgb %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000077", + "lgb %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0077", + "lgb %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008077", + "lgb %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F77", + "lgb %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000091", + "llgh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0091", + "llgh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008091", + "llgh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F91", + "llgh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000091", + "llgh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0091", + "llgh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008091", + "llgh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F91", + "llgh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000015", + "lgh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0015", + "lgh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008015", + "lgh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F15", + "lgh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000015", + "lgh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0015", + "lgh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008015", + "lgh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F15", + "lgh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000016", + "llgf %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0016", + "llgf %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008016", + "llgf %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F16", + "llgf %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000016", + "llgf %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0016", + "llgf %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008016", + "llgf %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F16", + "llgf %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000014", + "lgf %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0014", + "lgf %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008014", + "lgf %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F14", + "lgf %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000014", + "lgf %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0014", + "lgf %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008014", + "lgf %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F14", + "lgf %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41D00000020", + "lrl %r1, 64", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41500000020", + "lhrl %r1, 64", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41200000020", + "llhrl %r1, 64", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41800000020", + "lgrl %r1, 64", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41400000020", + "lghrl %r1, 64", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41600000020", + "llghrl %r1, 64", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41C00000020", + "lgfrl %r1, 64", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41E00000020", + "llgfrl %r1, 64", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001F", + "lrvh %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF001F", + "lrvh %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000801F", + "lrvh %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F1F", + "lrvh %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000001F", + "lrvh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF001F", + "lrvh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000801F", + "lrvh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F1F", + "lrvh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001E", + "lrv %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF001E", + "lrv %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000801E", + "lrv %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F1E", + "lrv %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000001E", + "lrv %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF001E", + "lrv %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000801E", + "lrv %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F1E", + "lrv %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000F", + "lrvg %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF000F", + "lrvg %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000800F", + "lrvg %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F0F", + "lrvg %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000000F", + "lrvg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF000F", + "lrvg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000800F", + "lrvg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F0F", + "lrvg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "42102000", + "stc %r1, 0(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "42102FFF", + "stc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008072", + "stcy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F72", + "stcy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "42123000", + "stc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "42123FFF", + "stc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008072", + "stcy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F72", + "stcy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "40102000", + "sth %r1, 0(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "40102FFF", + "sth %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008070", + "sthy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F70", + "sthy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "40123000", + "sth %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "40123FFF", + "sth %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008070", + "sthy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F70", + "sthy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "50102000", + "st %r1, 0(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "50102FFF", + "st %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008050", + "sty %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F50", + "sty %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "50123000", + "st %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "50123FFF", + "st %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008050", + "sty %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F50", + "sty %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000024", + "stg %r1, 0(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0024", + "stg %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008024", + "stg %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F24", + "stg %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000024", + "stg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0024", + "stg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008024", + "stg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F24", + "stg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::StoreImm8 { + imm: 255, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "92FF2000", + "mvi 0(%r2), 255", + )); + insns.push(( + Inst::StoreImm8 { + imm: 0, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "92002FFF", + "mvi 4095(%r2), 0", + )); + insns.push(( + Inst::StoreImm8 { + imm: 255, + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EBFF20008052", + "mviy -524288(%r2), 255", + )); + insns.push(( + Inst::StoreImm8 { + imm: 0, + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB002FFF7F52", + "mviy 524287(%r2), 0", + )); + insns.push(( + Inst::StoreImm16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54420008000", + "mvhhi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E5442FFF7FFF", + "mvhhi 4095(%r2), 32767", + )); + insns.push(( + Inst::StoreImm32SExt16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54C20008000", + "mvhi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm32SExt16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E54C2FFF7FFF", + "mvhi 4095(%r2), 32767", + )); + insns.push(( + Inst::StoreImm64SExt16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54820008000", + "mvghi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm64SExt16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E5482FFF7FFF", + "mvghi 4095(%r2), 32767", + )); + + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003F", + "strvh %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF003F", + "strvh %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000803F", + "strvh %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F3F", + "strvh %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000003F", + "strvh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF003F", + "strvh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000803F", + "strvh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F3F", + "strvh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003E", + "strv %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF003E", + "strv %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000803E", + "strv %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F3E", + "strv %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000003E", + "strv %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF003E", + "strv %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000803E", + "strv %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F3E", + "strv %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000002F", + "strvg %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF002F", + "strvg %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000802F", + "strvg %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F2F", + "strvg %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000002F", + "strvg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF002F", + "strvg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000802F", + "strvg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F2F", + "strvg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41700000020", + "sthrl %r1, 64", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41F00000020", + "strl %r1, 64", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41B00000020", + "stgrl %r1, 64", + )); + + insns.push(( + Inst::LoadMultiple64 { + rt: writable_gpr(8), + rt2: writable_gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(-524288).unwrap(), + }, + "EB8CF0008004", + "lmg %r8, %r12, -524288(%r15)", + )); + insns.push(( + Inst::LoadMultiple64 { + rt: writable_gpr(8), + rt2: writable_gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(524287).unwrap(), + }, + "EB8CFFFF7F04", + "lmg %r8, %r12, 524287(%r15)", + )); + + insns.push(( + Inst::StoreMultiple64 { + rt: gpr(8), + rt2: gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(-524288).unwrap(), + }, + "EB8CF0008024", + "stmg %r8, %r12, -524288(%r15)", + )); + insns.push(( + Inst::StoreMultiple64 { + rt: gpr(8), + rt2: gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(524287).unwrap(), + }, + "EB8CFFFF7F24", + "stmg %r8, %r12, 524287(%r15)", + )); + + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: zero_reg(), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41100000", + "la %r1, 0", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: zero_reg(), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41100FFF", + "la %r1, 4095", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31000008071", + "lay %r1, -524288", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3100FFF7F71", + "lay %r1, 524287", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41102000", + "la %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41102FFF", + "la %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071", + "lay %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71", + "lay %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41123000", + "la %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41123FFF", + "la %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071", + "lay %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71", + "lay %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C01000000020", + "larl %r1, 64", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::Symbol { + name: Box::new(ExternalName::testcase("test0")), + offset: 64, + flags: MemFlags::trusted(), + }, + }, + "C01000000000", + "larl %r1, %test0 + 64", + )); + + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 0, + flags: MemFlags::trusted(), + }, + }, + "41102000", + "la %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 4095, + flags: MemFlags::trusted(), + }, + }, + "41102FFF", + "la %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -524288, + flags: MemFlags::trusted(), + }, + }, + "E31020008071", + "lay %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 524287, + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71", + "lay %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -2147483648, + flags: MemFlags::trusted(), + }, + }, + "C0118000000041112000", + "lgfi %r1, -2147483648 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 2147483647, + flags: MemFlags::trusted(), + }, + }, + "C0117FFFFFFF41112000", + "lgfi %r1, 2147483647 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -9223372036854775808, + flags: MemFlags::trusted(), + }, + }, + "A51C800041112000", + "llihh %r1, 32768 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 9223372036854775807, + flags: MemFlags::trusted(), + }, + }, + "C01E7FFFFFFFC019FFFFFFFF41112000", + "llihf %r1, 2147483647 ; iilf %r1, 4294967295 ; la %r1, 0(%r1,%r2)", + )); + + insns.push(( + Inst::Mov64 { + rd: writable_gpr(8), + rm: gpr(9), + }, + "B9040089", + "lgr %r8, %r9", + )); + insns.push(( + Inst::Mov32 { + rd: writable_gpr(8), + rm: gpr(9), + }, + "1889", + "lr %r8, %r9", + )); + + insns.push(( + Inst::Mov32SImm16 { + rd: writable_gpr(8), + imm: -32768, + }, + "A7888000", + "lhi %r8, -32768", + )); + insns.push(( + Inst::Mov32SImm16 { + rd: writable_gpr(8), + imm: 32767, + }, + "A7887FFF", + "lhi %r8, 32767", + )); + insns.push(( + Inst::Mov32Imm { + rd: writable_gpr(8), + imm: 2147483648, + }, + "C08980000000", + "iilf %r8, 2147483648", + )); + insns.push(( + Inst::Mov32Imm { + rd: writable_gpr(8), + imm: 2147483647, + }, + "C0897FFFFFFF", + "iilf %r8, 2147483647", + )); + insns.push(( + Inst::Mov64SImm16 { + rd: writable_gpr(8), + imm: -32768, + }, + "A7898000", + "lghi %r8, -32768", + )); + insns.push(( + Inst::Mov64SImm16 { + rd: writable_gpr(8), + imm: 32767, + }, + "A7897FFF", + "lghi %r8, 32767", + )); + insns.push(( + Inst::Mov64SImm32 { + rd: writable_gpr(8), + imm: -2147483648, + }, + "C08180000000", + "lgfi %r8, -2147483648", + )); + insns.push(( + Inst::Mov64SImm32 { + rd: writable_gpr(8), + imm: 2147483647, + }, + "C0817FFFFFFF", + "lgfi %r8, 2147483647", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A58FFFFF", + "llill %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A58EFFFF", + "llilh %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A58DFFFF", + "llihl %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A58CFFFF", + "llihh %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08FFFFFFFFF", + "llilf %r8, 4294967295", + )); + insns.push(( + Inst::Mov64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08EFFFFFFFF", + "llihf %r8, 4294967295", + )); + + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A583FFFF", + "iill %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A582FFFF", + "iilh %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A581FFFF", + "iihl %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A580FFFF", + "iihh %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C089FFFFFFFF", + "iilf %r8, 4294967295", + )); + insns.push(( + Inst::Insert64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C088FFFFFFFF", + "iihf %r8, 4294967295", + )); + + insns.push(( + Inst::CMov32 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + rm: gpr(9), + }, + "B9F21089", + "locro %r8, %r9", + )); + insns.push(( + Inst::CMov64 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + rm: gpr(9), + }, + "B9E21089", + "locgro %r8, %r9", + )); + + insns.push(( + Inst::CMov32SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: -32768, + }, + "EC8180000042", + "lochio %r8, -32768", + )); + insns.push(( + Inst::CMov32SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: 32767, + }, + "EC817FFF0042", + "lochio %r8, 32767", + )); + insns.push(( + Inst::CMov64SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: -32768, + }, + "EC8180000046", + "locghio %r8, -32768", + )); + insns.push(( + Inst::CMov64SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: 32767, + }, + "EC817FFF0046", + "locghio %r8, 32767", + )); + + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 8, + to_bits: 32, + }, + "B9940012", + "llcr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 8, + to_bits: 32, + }, + "B9260012", + "lbr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 16, + to_bits: 32, + }, + "B9950012", + "llhr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 16, + to_bits: 32, + }, + "B9270012", + "lhr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 8, + to_bits: 64, + }, + "B9840012", + "llgcr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 8, + to_bits: 64, + }, + "B9060012", + "lgbr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 16, + to_bits: 64, + }, + "B9850012", + "llghr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 16, + to_bits: 64, + }, + "B9070012", + "lghr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 32, + to_bits: 64, + }, + "B9160012", + "llgfr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 32, + to_bits: 64, + }, + "B9140012", + "lgfr %r1, %r2", + )); + + insns.push(( + Inst::Jump { + dest: BranchTarget::ResolvedOffset(64), + }, + "C0F400000020", + "jg 64", + )); + + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(1), + }, + "C01400000020", + "jgo 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(2), + }, + "C02400000020", + "jgh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(3), + }, + "C03400000020", + "jgnle 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(4), + }, + "C04400000020", + "jgl 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(5), + }, + "C05400000020", + "jgnhe 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(6), + }, + "C06400000020", + "jglh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(7), + }, + "C07400000020", + "jgne 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(8), + }, + "C08400000020", + "jge 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(9), + }, + "C09400000020", + "jgnlh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(10), + }, + "C0A400000020", + "jghe 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(11), + }, + "C0B400000020", + "jgnl 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(12), + }, + "C0C400000020", + "jgle 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(13), + }, + "C0D400000020", + "jgnh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(14), + }, + "C0E400000020", + "jgno 64", + )); + + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(1), + }, + "C01400000020C0F400000040", + "jgo 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(2), + }, + "C02400000020C0F400000040", + "jgh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(3), + }, + "C03400000020C0F400000040", + "jgnle 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(4), + }, + "C04400000020C0F400000040", + "jgl 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(5), + }, + "C05400000020C0F400000040", + "jgnhe 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(6), + }, + "C06400000020C0F400000040", + "jglh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(7), + }, + "C07400000020C0F400000040", + "jgne 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(8), + }, + "C08400000020C0F400000040", + "jge 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(9), + }, + "C09400000020C0F400000040", + "jgnlh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(10), + }, + "C0A400000020C0F400000040", + "jghe 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(11), + }, + "C0B400000020C0F400000040", + "jgnl 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(12), + }, + "C0C400000020C0F400000040", + "jgle 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(13), + }, + "C0D400000020C0F400000040", + "jgnh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(14), + }, + "C0E400000020C0F400000040", + "jgno 64 ; jg 128", + )); + + insns.push(( + Inst::IndirectBr { + rn: gpr(3), + targets: vec![], + }, + "07F3", + "br %r3", + )); + + insns.push(( + Inst::Call { + link: writable_gpr(14), + info: Box::new(CallInfo { + dest: ExternalName::testcase("test0"), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::Call, + }), + }, + "C0E500000000", + "brasl %r14, %test0", + )); + + insns.push(( + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: gpr(1), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::CallIndirect, + }), + }, + "0DE1", + "basr %r14, %r1", + )); + + insns.push((Inst::Ret { link: gpr(14) }, "07FE", "br %r14")); + + insns.push((Inst::Debugtrap, "0001", "debugtrap")); + + insns.push(( + Inst::Trap { + trap_code: TrapCode::StackOverflow, + }, + "0000", + "trap", + )); + insns.push(( + Inst::TrapIf { + cond: Cond::from_mask(1), + trap_code: TrapCode::StackOverflow, + }, + "A7E400030000", + "jno 6 ; trap", + )); + + insns.push(( + Inst::FpuMove32 { + rd: writable_fpr(8), + rn: fpr(4), + }, + "3884", + "ler %f8, %f4", + )); + insns.push(( + Inst::FpuMove64 { + rd: writable_fpr(8), + rn: fpr(4), + }, + "2884", + "ldr %f8, %f4", + )); + insns.push(( + Inst::FpuCMov32 { + rd: writable_fpr(8), + rm: fpr(4), + cond: Cond::from_mask(1), + }, + "A7E400033884", + "jno 6 ; ler %f8, %f4", + )); + insns.push(( + Inst::FpuCMov64 { + rd: writable_fpr(8), + rm: fpr(4), + cond: Cond::from_mask(1), + }, + "A7E400032884", + "jno 6 ; ldr %f8, %f4", + )); + + insns.push(( + Inst::MovToFpr { + rd: writable_fpr(8), + rn: gpr(4), + }, + "B3C10084", + "ldgr %f8, %r4", + )); + insns.push(( + Inst::MovFromFpr { + rd: writable_gpr(8), + rn: fpr(4), + }, + "B3CD0084", + "lgdr %r8, %f4", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B300008C", + "lpebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B310008C", + "lpdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B303008C", + "lcebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B313008C", + "lcdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B301008C", + "lnebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B311008C", + "lndbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B314008C", + "sqebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B315008C", + "sqdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt32To64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B304008C", + "ldebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt64To32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B344008C", + "ledbr %f8, %f12", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30A008C", + "aebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31A008C", + "adbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30B008C", + "sebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31B008C", + "sdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B317008C", + "meebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31C008C", + "mdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30D008C", + "debr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31D008C", + "ddbr %f8, %f12", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B30E80CD", + "maebr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B31E80CD", + "madbr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub32, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B30F80CD", + "msebr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub64, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B31F80CD", + "msdbr %f8, %f12, %f13", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B39C5014", + "clfebr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3AC5014", + "clgebr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3985014", + "cfebra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3A85014", + "cgebra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B39D5014", + "clfdbr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3AD5014", + "clgdbr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3995014", + "cfdbra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3A95014", + "cgdbra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3900014", + "celfbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3940014", + "cefbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3910014", + "cdlfbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3950014", + "cdfbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A00014", + "celgbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A40014", + "cegbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A10014", + "cdlgbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A50014", + "cdgbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::FpuCopysign { + rd: writable_fpr(4), + rn: fpr(8), + rm: fpr(12), + }, + "B372C048", + "cpsdr %f4, %f12, %f8", + )); + + insns.push(( + Inst::FpuCmp32 { + rn: fpr(8), + rm: fpr(12), + }, + "B309008C", + "cebr %f8, %f12", + )); + insns.push(( + Inst::FpuCmp64 { + rn: fpr(8), + rm: fpr(12), + }, + "B319008C", + "cdbr %f8, %f12", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "78102000", + "le %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "78102FFF", + "le %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008064", + "ley %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F64", + "ley %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "78123000", + "le %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "78123FFF", + "le %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008064", + "ley %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F64", + "ley %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "68102000", + "ld %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "68102FFF", + "ld %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008065", + "ldy %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F65", + "ldy %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "68123000", + "ld %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "68123FFF", + "ld %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008065", + "ldy %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F65", + "ldy %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "70102000", + "ste %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "70102FFF", + "ste %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008066", + "stey %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F66", + "stey %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "70123000", + "ste %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "70123FFF", + "ste %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008066", + "stey %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F66", + "stey %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "60102000", + "std %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "60102FFF", + "std %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008067", + "stdy %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F67", + "stdy %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "60123000", + "std %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "60123FFF", + "std %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008067", + "stdy %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F67", + "stdy %f1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61020000003", + "vlebrf %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF0003", + "vlebrf %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E61010000003", + "lay %r1, -524288(%r2) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E61010000003", + "lay %r1, 524287(%r2) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61230000003", + "vlebrf %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF0003", + "vlebrf %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E61010000003", + "lay %r1, -524288(%r2,%r3) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E61010000003", + "lay %r1, 524287(%r2,%r3) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61020000002", + "vlebrg %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF0002", + "vlebrg %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E61010000002", + "lay %r1, -524288(%r2) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E61010000002", + "lay %r1, 524287(%r2) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61230000002", + "vlebrg %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF0002", + "vlebrg %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E61010000002", + "lay %r1, -524288(%r2,%r3) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E61010000002", + "lay %r1, 524287(%r2,%r3) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6102000000B", + "vstebrf %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF000B", + "vstebrf %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E6101000000B", + "lay %r1, -524288(%r2) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E6101000000B", + "lay %r1, 524287(%r2) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6123000000B", + "vstebrf %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF000B", + "vstebrf %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E6101000000B", + "lay %r1, -524288(%r2,%r3) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E6101000000B", + "lay %r1, 524287(%r2,%r3) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6102000000A", + "vstebrg %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF000A", + "vstebrg %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E6101000000A", + "lay %r1, -524288(%r2) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E6101000000A", + "lay %r1, 524287(%r2) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6123000000A", + "vstebrg %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF000A", + "vstebrg %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E6101000000A", + "lay %r1, -524288(%r2,%r3) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E6101000000A", + "lay %r1, 524287(%r2,%r3) ; vstebrg %f1, 0(%r1), 0", + )); + + insns.push(( + Inst::LoadFpuConst32 { + rd: writable_fpr(8), + const_data: 1.0, + }, + "A71500043F80000078801000", + "bras %r1, 8 ; data.f32 1 ; le %f8, 0(%r1)", + )); + insns.push(( + Inst::LoadFpuConst64 { + rd: writable_fpr(8), + const_data: 1.0, + }, + "A71500063FF000000000000068801000", + "bras %r1, 12 ; data.f64 1 ; ld %f8, 0(%r1)", + )); + + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Minus32, + }, + "B357708C", + "fiebr %f8, %f12, 7", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Minus64, + }, + "B35F708C", + "fidbr %f8, %f12, 7", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Plus32, + }, + "B357608C", + "fiebr %f8, %f12, 6", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Plus64, + }, + "B35F608C", + "fidbr %f8, %f12, 6", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Zero32, + }, + "B357508C", + "fiebr %f8, %f12, 5", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Zero64, + }, + "B35F508C", + "fidbr %f8, %f12, 5", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Nearest32, + }, + "B357408C", + "fiebr %f8, %f12, 4", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Nearest64, + }, + "B35F408C", + "fidbr %f8, %f12, 4", + )); + + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Max32, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801820EF", + "wfmaxsb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Max64, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801830EF", + "wfmaxdb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Min32, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801820EE", + "wfminsb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Min64, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801830EE", + "wfmindb %f4, %f6, %f8, 1", + )); + + let flags = settings::Flags::new(settings::builder()); + let rru = create_reg_universe(&flags); + let emit_info = EmitInfo::new(flags); + for (insn, expected_encoding, expected_printing) in insns { + println!( + "S390x: {:?}, {}, {}", + insn, expected_encoding, expected_printing + ); + + // Check the printed text is as expected. + let actual_printing = insn.show_rru(Some(&rru)); + assert_eq!(expected_printing, actual_printing); + + let mut sink = test_utils::TestCodeSink::new(); + let mut buffer = MachBuffer::new(); + insn.emit(&mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(); + buffer.emit(&mut sink); + let actual_encoding = &sink.stringify(); + assert_eq!(expected_encoding, actual_encoding); + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/imms.rs b/cranelift/codegen/src/isa/s390x/inst/imms.rs new file mode 100644 index 0000000000..b1a459ea68 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/imms.rs @@ -0,0 +1,231 @@ +//! S390x ISA definitions: immediate constants. + +use regalloc::{PrettyPrint, RealRegUniverse}; +use std::string::String; + +/// An unsigned 12-bit immediate. +#[derive(Clone, Copy, Debug)] +pub struct UImm12 { + /// The value. + value: u16, +} + +impl UImm12 { + pub fn maybe_from_u64(value: u64) -> Option { + if value < 4096 { + Some(UImm12 { + value: value as u16, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> UImm12 { + UImm12 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + u32::from(self.value) + } +} + +/// A signed 20-bit immediate. +#[derive(Clone, Copy, Debug)] +pub struct SImm20 { + /// The value. + value: i32, +} + +impl SImm20 { + pub fn maybe_from_i64(value: i64) -> Option { + if value >= -524288 && value < 524288 { + Some(SImm20 { + value: value as i32, + }) + } else { + None + } + } + + pub fn from_uimm12(value: UImm12) -> SImm20 { + SImm20 { + value: value.bits() as i32, + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> SImm20 { + SImm20 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + let encoded: u32 = self.value as u32; + encoded & 0xfffff + } +} + +/// A 16-bit immediate with a {0,16,32,48}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct UImm16Shifted { + /// The value. + pub bits: u16, + /// Result is `bits` shifted 16*shift bits to the left. + pub shift: u8, +} + +impl UImm16Shifted { + /// Construct a UImm16Shifted from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option { + let mask0 = 0x0000_0000_0000_ffffu64; + let mask1 = 0x0000_0000_ffff_0000u64; + let mask2 = 0x0000_ffff_0000_0000u64; + let mask3 = 0xffff_0000_0000_0000u64; + + if value == (value & mask0) { + return Some(UImm16Shifted { + bits: (value & mask0) as u16, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(UImm16Shifted { + bits: ((value >> 16) & mask0) as u16, + shift: 1, + }); + } + if value == (value & mask2) { + return Some(UImm16Shifted { + bits: ((value >> 32) & mask0) as u16, + shift: 2, + }); + } + if value == (value & mask3) { + return Some(UImm16Shifted { + bits: ((value >> 48) & mask0) as u16, + shift: 3, + }); + } + None + } + + pub fn maybe_with_shift(imm: u16, shift: u8) -> Option { + let shift_enc = shift / 16; + if shift_enc > 3 { + None + } else { + Some(UImm16Shifted { + bits: imm, + shift: shift_enc, + }) + } + } + + pub fn negate_bits(&self) -> UImm16Shifted { + UImm16Shifted { + bits: !self.bits, + shift: self.shift, + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (16 * self.shift) + } +} + +/// A 32-bit immediate with a {0,32}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct UImm32Shifted { + /// The value. + pub bits: u32, + /// Result is `bits` shifted 32*shift bits to the left. + pub shift: u8, +} + +impl UImm32Shifted { + /// Construct a UImm32Shifted from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option { + let mask0 = 0x0000_0000_ffff_ffffu64; + let mask1 = 0xffff_ffff_0000_0000u64; + + if value == (value & mask0) { + return Some(UImm32Shifted { + bits: (value & mask0) as u32, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(UImm32Shifted { + bits: ((value >> 32) & mask0) as u32, + shift: 1, + }); + } + None + } + + pub fn maybe_with_shift(imm: u32, shift: u8) -> Option { + let shift_enc = shift / 32; + if shift_enc > 3 { + None + } else { + Some(UImm32Shifted { + bits: imm, + shift: shift_enc, + }) + } + } + + pub fn from_uimm16shifted(value: UImm16Shifted) -> UImm32Shifted { + if value.shift % 2 == 0 { + UImm32Shifted { + bits: value.bits as u32, + shift: value.shift / 2, + } + } else { + UImm32Shifted { + bits: (value.bits as u32) << 16, + shift: value.shift / 2, + } + } + } + + pub fn negate_bits(&self) -> UImm32Shifted { + UImm32Shifted { + bits: !self.bits, + shift: self.shift, + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (32 * self.shift) + } +} + +impl PrettyPrint for UImm12 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.value) + } +} + +impl PrettyPrint for SImm20 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.value) + } +} + +impl PrettyPrint for UImm16Shifted { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.bits) + } +} + +impl PrettyPrint for UImm32Shifted { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.bits) + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs new file mode 100644 index 0000000000..ae4c36815b --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -0,0 +1,3411 @@ +//! This module defines s390x-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::binemit::CodeOffset; +use crate::ir::{types, ExternalName, Opcode, TrapCode, Type, ValueLabel}; +use crate::isa::unwind::UnwindInst; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +use regalloc::{PrettyPrint, RegUsageCollector, RegUsageMapper}; +use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::convert::TryFrom; +use smallvec::{smallvec, SmallVec}; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod unwind; + +#[cfg(test)] +mod emit_tests; + +//============================================================================= +// Instructions (top level): definition + +/// An ALU operation. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ALUOp { + Add32, + Add32Ext16, + Add64, + Add64Ext16, + Add64Ext32, + Sub32, + Sub32Ext16, + Sub64, + Sub64Ext16, + Sub64Ext32, + Mul32, + Mul32Ext16, + Mul64, + Mul64Ext16, + Mul64Ext32, + And32, + And64, + Orr32, + Orr64, + Xor32, + Xor64, + /// NAND + AndNot32, + AndNot64, + /// NOR + OrrNot32, + OrrNot64, + /// XNOR + XorNot32, + XorNot64, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum UnaryOp { + Abs32, + Abs64, + Abs64Ext32, + Neg32, + Neg64, + Neg64Ext32, + PopcntByte, + PopcntReg, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ShiftOp { + RotL32, + RotL64, + LShL32, + LShL64, + LShR32, + LShR64, + AShR32, + AShR64, +} + +/// An integer comparison operation. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CmpOp { + CmpS32, + CmpS32Ext16, + CmpS64, + CmpS64Ext16, + CmpS64Ext32, + CmpL32, + CmpL32Ext16, + CmpL64, + CmpL64Ext16, + CmpL64Ext32, +} + +/// A floating-point unit (FPU) operation with one arg. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp1 { + Abs32, + Abs64, + Neg32, + Neg64, + NegAbs32, + NegAbs64, + Sqrt32, + Sqrt64, + Cvt32To64, + Cvt64To32, +} + +/// A floating-point unit (FPU) operation with two args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp2 { + Add32, + Add64, + Sub32, + Sub64, + Mul32, + Mul64, + Div32, + Div64, + Max32, + Max64, + Min32, + Min64, +} + +/// A floating-point unit (FPU) operation with three args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp3 { + MAdd32, + MAdd64, + MSub32, + MSub64, +} + +/// A conversion from an FP to an integer value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuToIntOp { + F32ToU32, + F32ToI32, + F32ToU64, + F32ToI64, + F64ToU32, + F64ToI32, + F64ToU64, + F64ToI64, +} + +/// A conversion from an integer to an FP value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum IntToFpuOp { + U32ToF32, + I32ToF32, + U32ToF64, + I32ToF64, + U64ToF32, + I64ToF32, + U64ToF64, + I64ToF64, +} + +/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to +/// nearest, and for 32- or 64-bit FP values. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuRoundMode { + Minus32, + Minus64, + Plus32, + Plus64, + Zero32, + Zero64, + Nearest32, + Nearest64, +} + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: Vec, + pub defs: Vec>, + pub opcode: Opcode, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: Vec, + pub defs: Vec>, + pub opcode: Opcode, +} + +/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct JTSequenceInfo { + pub default_target: BranchTarget, + pub targets: Vec, + pub targets_for_term: Vec, // needed for MachTerminator. +} + +/// Instruction formats. +#[derive(Clone, Debug)] +pub enum Inst { + /// A no-op of zero size. + Nop0, + + /// A no-op of size two bytes. + Nop2, + + /// An ALU operation with two register sources and a register destination. + AluRRR { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + }, + /// An ALU operation with a register source and a signed 16-bit + /// immediate source, and a separate register destination. + AluRRSImm16 { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + imm: i16, + }, + /// An ALU operation with a register in-/out operand and + /// a second register source. + AluRR { + alu_op: ALUOp, + rd: Writable, + rm: Reg, + }, + /// An ALU operation with a register in-/out operand and + /// a memory source. + AluRX { + alu_op: ALUOp, + rd: Writable, + mem: MemArg, + }, + /// An ALU operation with a register in-/out operand and a signed 16-bit + /// immediate source. + AluRSImm16 { + alu_op: ALUOp, + rd: Writable, + imm: i16, + }, + /// An ALU operation with a register in-/out operand and a signed 32-bit + /// immediate source. + AluRSImm32 { + alu_op: ALUOp, + rd: Writable, + imm: i32, + }, + /// An ALU operation with a register in-/out operand and an unsigned 32-bit + /// immediate source. + AluRUImm32 { + alu_op: ALUOp, + rd: Writable, + imm: u32, + }, + /// An ALU operation with a register in-/out operand and a shifted 16-bit + /// immediate source. + AluRUImm16Shifted { + alu_op: ALUOp, + rd: Writable, + imm: UImm16Shifted, + }, + /// An ALU operation with a register in-/out operand and a shifted 32-bit + /// immediate source. + AluRUImm32Shifted { + alu_op: ALUOp, + rd: Writable, + imm: UImm32Shifted, + }, + /// A multiply operation with two register sources and a register pair destination. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + SMulWide { + rn: Reg, + rm: Reg, + }, + /// A multiply operation with an in/out register pair, and an extra register source. + /// Only the lower half of the register pair is used as input. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + UMulWide { + rn: Reg, + }, + /// A divide operation with an in/out register pair, and an extra register source. + /// Only the lower half of the register pair is used as input. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + SDivMod32 { + rn: Reg, + }, + SDivMod64 { + rn: Reg, + }, + /// A divide operation with an in/out register pair, and an extra register source. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + UDivMod32 { + rn: Reg, + }, + UDivMod64 { + rn: Reg, + }, + /// A FLOGR operation with a register source and a register pair destination. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + Flogr { + rn: Reg, + }, + + /// A shift instruction with a register source, a register destination, + /// and an immediate plus an optional register as shift count. + ShiftRR { + shift_op: ShiftOp, + rd: Writable, + rn: Reg, + shift_imm: SImm20, + shift_reg: Option, + }, + + /// An unary operation with a register source and a register destination. + UnaryRR { + op: UnaryOp, + rd: Writable, + rn: Reg, + }, + + /// A compare operation with two register sources. + CmpRR { + op: CmpOp, + rn: Reg, + rm: Reg, + }, + /// A compare operation with a register source and a memory source. + CmpRX { + op: CmpOp, + rn: Reg, + mem: MemArg, + }, + /// A compare operation with a register source and a signed 16-bit + /// immediate source. + CmpRSImm16 { + op: CmpOp, + rn: Reg, + imm: i16, + }, + /// A compare operation with a register source and a signed 32-bit + /// immediate source. + CmpRSImm32 { + op: CmpOp, + rn: Reg, + imm: i32, + }, + /// A compare operation with a register source and a unsigned 32-bit + /// immediate source. + CmpRUImm32 { + op: CmpOp, + rn: Reg, + imm: u32, + }, + /// A compare-and-trap instruction with two register sources. + CmpTrapRR { + op: CmpOp, + rn: Reg, + rm: Reg, + cond: Cond, + trap_code: TrapCode, + }, + /// A compare-and-trap operation with a register source and a signed 16-bit + /// immediate source. + CmpTrapRSImm16 { + op: CmpOp, + rn: Reg, + imm: i16, + cond: Cond, + trap_code: TrapCode, + }, + /// A compare-and-trap operation with a register source and an unsigned 16-bit + /// immediate source. + CmpTrapRUImm16 { + op: CmpOp, + rn: Reg, + imm: u16, + cond: Cond, + trap_code: TrapCode, + }, + + /// A 32-bit load. + Load32 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 8-bit to 32-bit load. + Load32ZExt8 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 8-bit to 32-bit load. + Load32SExt8 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 16-bit to 32-bit load. + Load32ZExt16 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 16-bit to 32-bit load. + Load32SExt16 { + rd: Writable, + mem: MemArg, + }, + /// A 64-bit load. + Load64 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 8-bit to 64-bit load. + Load64ZExt8 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 8-bit to 64-bit load. + Load64SExt8 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 16-bit to 64-bit load. + Load64ZExt16 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 16-bit to 64-bit load. + Load64SExt16 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 32-bit to 64-bit load. + Load64ZExt32 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 32-bit to 64-bit load. + Load64SExt32 { + rd: Writable, + mem: MemArg, + }, + + /// A 16-bit byte-reversed load. + LoadRev16 { + rd: Writable, + mem: MemArg, + }, + /// A 32-bit byte-reversed load. + LoadRev32 { + rd: Writable, + mem: MemArg, + }, + /// A 64-bit byte-reversed load. + LoadRev64 { + rd: Writable, + mem: MemArg, + }, + + /// An 8-bit store. + Store8 { + rd: Reg, + mem: MemArg, + }, + /// A 16-bit store. + Store16 { + rd: Reg, + mem: MemArg, + }, + /// A 32-bit store. + Store32 { + rd: Reg, + mem: MemArg, + }, + /// A 64-bit store. + Store64 { + rd: Reg, + mem: MemArg, + }, + /// An 8-bit store of an immediate. + StoreImm8 { + imm: u8, + mem: MemArg, + }, + /// A 16-bit store of an immediate. + StoreImm16 { + imm: i16, + mem: MemArg, + }, + /// A 32-bit store of a sign-extended 16-bit immediate. + StoreImm32SExt16 { + imm: i16, + mem: MemArg, + }, + /// A 64-bit store of a sign-extended 16-bit immediate. + StoreImm64SExt16 { + imm: i16, + mem: MemArg, + }, + + /// A 16-bit byte-reversed store. + StoreRev16 { + rd: Reg, + mem: MemArg, + }, + /// A 32-bit byte-reversed store. + StoreRev32 { + rd: Reg, + mem: MemArg, + }, + /// A 64-bit byte-reversed store. + StoreRev64 { + rd: Reg, + mem: MemArg, + }, + + /// A load-multiple instruction. + LoadMultiple64 { + rt: Writable, + rt2: Writable, + addr_reg: Reg, + addr_off: SImm20, + }, + /// A store-multiple instruction. + StoreMultiple64 { + rt: Reg, + rt2: Reg, + addr_reg: Reg, + addr_off: SImm20, + }, + + /// A 32-bit move instruction. + Mov32 { + rd: Writable, + rm: Reg, + }, + /// A 64-bit move instruction. + Mov64 { + rd: Writable, + rm: Reg, + }, + /// A 32-bit move instruction with a full 32-bit immediate. + Mov32Imm { + rd: Writable, + imm: u32, + }, + /// A 32-bit move instruction with a 16-bit signed immediate. + Mov32SImm16 { + rd: Writable, + imm: i16, + }, + /// A 64-bit move instruction with a 16-bit signed immediate. + Mov64SImm16 { + rd: Writable, + imm: i16, + }, + /// A 64-bit move instruction with a 32-bit signed immediate. + Mov64SImm32 { + rd: Writable, + imm: i32, + }, + /// A 64-bit move instruction with a shifted 16-bit immediate. + Mov64UImm16Shifted { + rd: Writable, + imm: UImm16Shifted, + }, + /// A 64-bit move instruction with a shifted 32-bit immediate. + Mov64UImm32Shifted { + rd: Writable, + imm: UImm32Shifted, + }, + + /// A 64-bit insert instruction with a shifted 16-bit immediate. + Insert64UImm16Shifted { + rd: Writable, + imm: UImm16Shifted, + }, + /// A 64-bit insert instruction with a shifted 32-bit immediate. + Insert64UImm32Shifted { + rd: Writable, + imm: UImm32Shifted, + }, + + /// A sign- or zero-extend operation. + Extend { + rd: Writable, + rn: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + }, + + /// A 32-bit conditional move instruction. + CMov32 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 64-bit conditional move instruction. + CMov64 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 32-bit conditional move instruction with a 16-bit signed immediate. + CMov32SImm16 { + rd: Writable, + cond: Cond, + imm: i16, + }, + /// A 64-bit conditional move instruction with a 16-bit signed immediate. + CMov64SImm16 { + rd: Writable, + cond: Cond, + imm: i16, + }, + + /// 32-bit FPU move. + FpuMove32 { + rd: Writable, + rn: Reg, + }, + /// 64-bit FPU move. + FpuMove64 { + rd: Writable, + rn: Reg, + }, + + /// A 32-bit conditional move FPU instruction. + FpuCMov32 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 64-bit conditional move FPU instruction. + FpuCMov64 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + + /// A 64-bit move instruction from GPR to FPR. + MovToFpr { + rd: Writable, + rn: Reg, + }, + /// A 64-bit move instruction from FPR to GPR. + MovFromFpr { + rd: Writable, + rn: Reg, + }, + + /// 1-op FPU instruction. + FpuRR { + fpu_op: FPUOp1, + rd: Writable, + rn: Reg, + }, + + /// 2-op FPU instruction. + FpuRRR { + fpu_op: FPUOp2, + rd: Writable, + rm: Reg, + }, + + /// 3-op FPU instruction. + FpuRRRR { + fpu_op: FPUOp3, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// FPU copy sign instruction. + FpuCopysign { + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, single-precision (32 bit). + FpuCmp32 { + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, double-precision (64 bit). + FpuCmp64 { + rn: Reg, + rm: Reg, + }, + + /// Floating-point load, single-precision (32 bit). + FpuLoad32 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point store, single-precision (32 bit). + FpuStore32 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point load, double-precision (64 bit). + FpuLoad64 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point store, double-precision (64 bit). + FpuStore64 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point byte-reversed load, single-precision (32 bit). + FpuLoadRev32 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point byte-reversed store, single-precision (32 bit). + FpuStoreRev32 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point byte-reversed load, double-precision (64 bit). + FpuLoadRev64 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point byte-reversed store, double-precision (64 bit). + FpuStoreRev64 { + rd: Reg, + mem: MemArg, + }, + + LoadFpuConst32 { + rd: Writable, + const_data: f32, + }, + + LoadFpuConst64 { + rd: Writable, + const_data: f64, + }, + + /// Conversion: FP -> integer. + FpuToInt { + op: FpuToIntOp, + rd: Writable, + rn: Reg, + }, + + /// Conversion: integer -> FP. + IntToFpu { + op: IntToFpuOp, + rd: Writable, + rn: Reg, + }, + + /// Round to integer. + FpuRound { + op: FpuRoundMode, + rd: Writable, + rn: Reg, + }, + + /// 2-op FPU instruction implemented as vector instruction with the W bit. + FpuVecRRR { + fpu_op: FPUOp2, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// A machine call instruction. + Call { + link: Writable, + info: Box, + }, + /// A machine indirect-call instruction. + CallInd { + link: Writable, + info: Box, + }, + + // ---- branches (exactly one must appear at end of BB) ---- + /// A machine return instruction. + Ret { + link: Reg, + }, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder, + + /// An unconditional branch. + Jump { + dest: BranchTarget, + }, + + /// A conditional branch. Contains two targets; at emission time, both are emitted, but + /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the + /// choice of taken/not_taken (inverting the branch polarity as needed) based on the + /// fallthrough at the time of lowering. + CondBr { + taken: BranchTarget, + not_taken: BranchTarget, + cond: Cond, + }, + + /// A conditional trap: execute a `Trap` if the condition is true. This is + /// one VCode instruction because it uses embedded control flow; it is + /// logically a single-in, single-out region, but needs to appear as one + /// unit to the register allocator. + /// + /// The `Cond` gives the conditional-branch condition that will + /// *execute* the embedded `Trap`. (In the emitted code, we use the inverse + /// of this condition in a branch that skips the trap instruction.) + TrapIf { + cond: Cond, + trap_code: TrapCode, + }, + + /// A one-way conditional branch, invisible to the CFG processing; used *only* as part of + /// straight-line sequences in code to be emitted. + /// + /// In more detail: + /// - This branch is lowered to a branch at the machine-code level, but does not end a basic + /// block, and does not create edges in the CFG seen by regalloc. + /// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is + /// lowered from a single CLIF instruction. For example, certain arithmetic operations may + /// use these branches to handle certain conditions, such as overflows, traps, etc. + /// + /// See, e.g., the lowering of `trapif` (conditional trap) for an example. + OneWayCondBr { + target: BranchTarget, + cond: Cond, + }, + + /// An indirect branch through a register, augmented with set of all + /// possible successors. + IndirectBr { + rn: Reg, + targets: Vec, + }, + + /// A "debugtrap" instruction, used for e.g. traps and debug breakpoints. + Debugtrap, + + /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at + /// runtime. + Trap { + trap_code: TrapCode, + }, + + /// Jump-table sequence, as one compound instruction (see note in lower.rs + /// for rationale). + JTSequence { + info: Box, + ridx: Reg, + rtmp1: Writable, + rtmp2: Writable, + }, + + /// Load an inline symbol reference with RelocDistance::Far. + LoadExtNameFar { + rd: Writable, + name: Box, + offset: i64, + }, + + /// Load address referenced by `mem` into `rd`. + LoadAddr { + rd: Writable, + mem: MemArg, + }, + + /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This + /// controls how MemArg::NominalSPOffset args are lowered. + VirtualSPOffsetAdj { + offset: i64, + }, + + /// A definition of a value label. + ValueLabelMarker { + reg: Reg, + label: ValueLabel, + }, + + /// An unwind pseudoinstruction describing the state of the + /// machine at this program point. + Unwind { + inst: UnwindInst, + }, +} + +fn count_zero_half_words(mut value: u64) -> usize { + let mut count = 0; + for _ in 0..4 { + if value & 0xffff == 0 { + count += 1; + } + value >>= 16; + } + + count +} + +#[test] +fn inst_size_test() { + // This test will help with unintentionally growing the size + // of the Inst enum. + assert_eq!(32, std::mem::size_of::()); +} + +impl Inst { + /// Create a 64-bit move instruction. + pub fn mov64(to_reg: Writable, from_reg: Reg) -> Inst { + assert!(to_reg.to_reg().get_class() == from_reg.get_class()); + if from_reg.get_class() == RegClass::I64 { + Inst::Mov64 { + rd: to_reg, + rm: from_reg, + } + } else { + Inst::FpuMove64 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create a 32-bit move instruction. + pub fn mov32(to_reg: Writable, from_reg: Reg) -> Inst { + if from_reg.get_class() == RegClass::I64 { + Inst::Mov32 { + rd: to_reg, + rm: from_reg, + } + } else { + Inst::FpuMove32 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create an instruction that loads a 64-bit integer constant. + pub fn load_constant64(rd: Writable, value: u64) -> SmallVec<[Inst; 4]> { + if let Ok(imm) = i16::try_from(value as i64) { + // 16-bit signed immediate + smallvec![Inst::Mov64SImm16 { rd, imm }] + } else if let Ok(imm) = i32::try_from(value as i64) { + // 32-bit signed immediate + smallvec![Inst::Mov64SImm32 { rd, imm }] + } else if let Some(imm) = UImm16Shifted::maybe_from_u64(value) { + // 16-bit shifted immediate + smallvec![Inst::Mov64UImm16Shifted { rd, imm }] + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(value) { + // 32-bit shifted immediate + smallvec![Inst::Mov64UImm32Shifted { rd, imm }] + } else { + let mut insts = smallvec![]; + let hi = value & 0xffff_ffff_0000_0000u64; + let lo = value & 0x0000_0000_ffff_ffffu64; + + if let Some(imm) = UImm16Shifted::maybe_from_u64(hi) { + // 16-bit shifted immediate + insts.push(Inst::Mov64UImm16Shifted { rd, imm }); + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(hi) { + // 32-bit shifted immediate + insts.push(Inst::Mov64UImm32Shifted { rd, imm }); + } else { + unreachable!(); + } + + if let Some(imm) = UImm16Shifted::maybe_from_u64(lo) { + // 16-bit shifted immediate + insts.push(Inst::Insert64UImm16Shifted { rd, imm }); + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(lo) { + // 32-bit shifted immediate + insts.push(Inst::Insert64UImm32Shifted { rd, imm }); + } else { + unreachable!(); + } + + insts + } + } + + /// Create an instruction that loads a 32-bit integer constant. + pub fn load_constant32(rd: Writable, value: u32) -> SmallVec<[Inst; 4]> { + if let Ok(imm) = i16::try_from(value as i32) { + // 16-bit signed immediate + smallvec![Inst::Mov32SImm16 { rd, imm }] + } else { + // 32-bit full immediate + smallvec![Inst::Mov32Imm { rd, imm: value }] + } + } + + /// Create an instruction that loads a 32-bit floating-point constant. + pub fn load_fp_constant32(rd: Writable, value: f32) -> Inst { + // TODO: use LZER to load 0.0 + Inst::LoadFpuConst32 { + rd, + const_data: value, + } + } + + /// Create an instruction that loads a 64-bit floating-point constant. + pub fn load_fp_constant64(rd: Writable, value: f64) -> Inst { + // TODO: use LZDR to load 0.0 + Inst::LoadFpuConst64 { + rd, + const_data: value, + } + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: MemArg, ty: Type) -> Inst { + match ty { + types::B1 | types::B8 | types::I8 => Inst::Load64ZExt8 { rd: into_reg, mem }, + types::B16 | types::I16 => Inst::Load64ZExt16 { rd: into_reg, mem }, + types::B32 | types::I32 => Inst::Load64ZExt32 { rd: into_reg, mem }, + types::B64 | types::I64 | types::R64 => Inst::Load64 { rd: into_reg, mem }, + types::F32 => Inst::FpuLoad32 { rd: into_reg, mem }, + types::F64 => Inst::FpuLoad64 { rd: into_reg, mem }, + _ => unimplemented!("gen_load({})", ty), + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { + match ty { + types::B1 | types::B8 | types::I8 => Inst::Store8 { rd: from_reg, mem }, + types::B16 | types::I16 => Inst::Store16 { rd: from_reg, mem }, + types::B32 | types::I32 => Inst::Store32 { rd: from_reg, mem }, + types::B64 | types::I64 | types::R64 => Inst::Store64 { rd: from_reg, mem }, + types::F32 => Inst::FpuStore32 { rd: from_reg, mem }, + types::F64 => Inst::FpuStore64 { rd: from_reg, mem }, + _ => unimplemented!("gen_store({})", ty), + } + } +} + +//============================================================================= +// Instructions: get_regs + +fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { + match memarg { + &MemArg::BXD12 { base, index, .. } | &MemArg::BXD20 { base, index, .. } => { + if base != zero_reg() { + collector.add_use(base); + } + if index != zero_reg() { + collector.add_use(index); + } + } + &MemArg::Label { .. } | &MemArg::Symbol { .. } => {} + &MemArg::RegOffset { reg, .. } => { + collector.add_use(reg); + } + &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => { + collector.add_use(stack_reg()); + } + } +} + +fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { + match inst { + &Inst::AluRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRSImm16 { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRR { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::AluRX { rd, ref mem, .. } => { + collector.add_mod(rd); + memarg_regs(mem, collector); + } + &Inst::AluRSImm16 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRSImm32 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm32 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm16Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm32Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::SMulWide { rn, rm, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_def(writable_gpr(1)); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::UMulWide { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => { + collector.add_mod(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::Flogr { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_def(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::ShiftRR { + rd, rn, shift_reg, .. + } => { + collector.add_def(rd); + collector.add_use(rn); + if let Some(reg) = shift_reg { + collector.add_use(reg); + } + } + &Inst::UnaryRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::CmpRR { rn, rm, .. } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CmpRX { rn, ref mem, .. } => { + collector.add_use(rn); + memarg_regs(mem, collector); + } + &Inst::CmpRSImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpRSImm32 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpRUImm32 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpTrapRR { rn, rm, .. } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CmpTrapRSImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpTrapRUImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::Load32 { rd, ref mem, .. } + | &Inst::Load32ZExt8 { rd, ref mem, .. } + | &Inst::Load32SExt8 { rd, ref mem, .. } + | &Inst::Load32ZExt16 { rd, ref mem, .. } + | &Inst::Load32SExt16 { rd, ref mem, .. } + | &Inst::Load64 { rd, ref mem, .. } + | &Inst::Load64ZExt8 { rd, ref mem, .. } + | &Inst::Load64SExt8 { rd, ref mem, .. } + | &Inst::Load64ZExt16 { rd, ref mem, .. } + | &Inst::Load64SExt16 { rd, ref mem, .. } + | &Inst::Load64ZExt32 { rd, ref mem, .. } + | &Inst::Load64SExt32 { rd, ref mem, .. } + | &Inst::LoadRev16 { rd, ref mem, .. } + | &Inst::LoadRev32 { rd, ref mem, .. } + | &Inst::LoadRev64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::Store8 { rd, ref mem, .. } + | &Inst::Store16 { rd, ref mem, .. } + | &Inst::Store32 { rd, ref mem, .. } + | &Inst::Store64 { rd, ref mem, .. } + | &Inst::StoreRev16 { rd, ref mem, .. } + | &Inst::StoreRev32 { rd, ref mem, .. } + | &Inst::StoreRev64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::StoreImm8 { ref mem, .. } + | &Inst::StoreImm16 { ref mem, .. } + | &Inst::StoreImm32SExt16 { ref mem, .. } + | &Inst::StoreImm64SExt16 { ref mem, .. } => { + memarg_regs(mem, collector); + } + &Inst::LoadMultiple64 { + rt, rt2, addr_reg, .. + } => { + let first_regnum = rt.to_reg().get_hw_encoding(); + let last_regnum = rt2.to_reg().get_hw_encoding(); + for regnum in first_regnum..last_regnum + 1 { + collector.add_def(writable_gpr(regnum)); + } + collector.add_use(addr_reg); + } + &Inst::StoreMultiple64 { + rt, rt2, addr_reg, .. + } => { + let first_regnum = rt.get_hw_encoding(); + let last_regnum = rt2.get_hw_encoding(); + for regnum in first_regnum..last_regnum + 1 { + collector.add_use(gpr(regnum)); + } + collector.add_use(addr_reg); + } + &Inst::Mov64 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32Imm { rd, .. } + | &Inst::Mov32SImm16 { rd, .. } + | &Inst::Mov64SImm16 { rd, .. } + | &Inst::Mov64SImm32 { rd, .. } + | &Inst::Mov64UImm16Shifted { rd, .. } + | &Inst::Mov64UImm32Shifted { rd, .. } => { + collector.add_def(rd); + } + &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRRR { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::FpuRRRR { rd, rn, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuCopysign { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoadRev32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoadRev64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStoreRev32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStoreRev64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => { + collector.add_def(rd); + } + &Inst::FpuToInt { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::IntToFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRound { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuVecRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::Extend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::Call { link, ref info } => { + collector.add_def(link); + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + } + &Inst::CallInd { link, ref info } => { + collector.add_def(link); + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + collector.add_use(info.rn); + } + &Inst::Ret { .. } => {} + &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {} + &Inst::IndirectBr { rn, .. } => { + collector.add_use(rn); + } + &Inst::CondBr { .. } | &Inst::OneWayCondBr { .. } => {} + &Inst::Nop0 | Inst::Nop2 => {} + &Inst::Debugtrap => {} + &Inst::Trap { .. } => {} + &Inst::TrapIf { .. } => {} + &Inst::JTSequence { + ridx, rtmp1, rtmp2, .. + } => { + collector.add_use(ridx); + collector.add_def(rtmp1); + collector.add_def(rtmp2); + } + &Inst::LoadExtNameFar { rd, .. } => { + collector.add_def(rd); + } + &Inst::LoadAddr { rd, ref mem } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::ValueLabelMarker { reg, .. } => { + collector.add_use(reg); + } + &Inst::Unwind { .. } => {} + } +} + +//============================================================================= +// Instructions: map_regs + +fn s390x_map_regs(inst: &mut Inst, mapper: &RUM) { + fn map_use(m: &RUM, r: &mut Reg) { + if r.is_virtual() { + let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg(); + *r = new; + } + } + + fn map_def(m: &RUM, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mod(m: &RUM, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mem(m: &RUM, mem: &mut MemArg) { + match mem { + &mut MemArg::BXD12 { + ref mut base, + ref mut index, + .. + } + | &mut MemArg::BXD20 { + ref mut base, + ref mut index, + .. + } => { + if *base != zero_reg() { + map_use(m, base); + } + if *index != zero_reg() { + map_use(m, index); + } + } + &mut MemArg::Label { .. } | &mut MemArg::Symbol { .. } => {} + &mut MemArg::RegOffset { ref mut reg, .. } => map_use(m, reg), + &mut MemArg::InitialSPOffset { .. } | &mut MemArg::NominalSPOffset { .. } => {} + }; + } + + match inst { + &mut Inst::AluRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::AluRRSImm16 { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::AluRX { + ref mut rd, + ref mut mem, + .. + } => { + map_mod(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::AluRR { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::AluRSImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRSImm32 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm32 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm16Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm32Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::SMulWide { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::UMulWide { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::SDivMod32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::SDivMod64 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::UDivMod32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::UDivMod64 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::Flogr { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::ShiftRR { + ref mut rd, + ref mut rn, + ref mut shift_reg, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + if let Some(reg) = shift_reg { + map_use(mapper, reg); + } + } + &mut Inst::UnaryRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::CmpRR { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::CmpRX { + ref mut rn, + ref mut mem, + .. + } => { + map_use(mapper, rn); + map_mem(mapper, mem); + } + &mut Inst::CmpRSImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpRSImm32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpRUImm32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpTrapRR { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::CmpTrapRSImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpTrapRUImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + + &mut Inst::Load32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32ZExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32SExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32ZExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32SExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + + &mut Inst::Store8 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store16 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreImm8 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm32SExt16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm64SExt16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreRev16 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadMultiple64 { .. } => { + // This instruction accesses all registers between rt and rt2, + // so it cannot be remapped. But this does not matter since + // the instruction is only ever used after register allocation. + unreachable!(); + } + &mut Inst::StoreMultiple64 { .. } => { + // This instruction accesses all registers between rt and rt2, + // so it cannot be remapped. But this does not matter since + // the instruction is only ever used after register allocation. + unreachable!(); + } + + &mut Inst::Mov64 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::Mov32 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::Mov32Imm { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov32SImm16 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64SImm16 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64SImm32 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64UImm16Shifted { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64UImm32Shifted { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Insert64UImm16Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::Insert64UImm32Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::CMov64 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::CMov32 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::CMov32SImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::CMov64SImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::FpuMove32 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuMove64 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuCMov64 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::FpuCMov32 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::MovToFpr { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::MovFromFpr { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRRR { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::FpuRRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCopysign { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp32 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp64 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuLoad32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoad64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoadRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoadRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStoreRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStoreRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadFpuConst32 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::FpuToInt { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::IntToFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRound { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuVecRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::Extend { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::Call { + ref mut link, + ref mut info, + } => { + map_def(mapper, link); + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + } + &mut Inst::CallInd { + ref mut link, + ref mut info, + .. + } => { + map_def(mapper, link); + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + map_use(mapper, &mut info.rn); + } + &mut Inst::Ret { .. } => {} + &mut Inst::EpiloguePlaceholder => {} + &mut Inst::Jump { .. } => {} + &mut Inst::IndirectBr { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CondBr { .. } | &mut Inst::OneWayCondBr { .. } => {} + &mut Inst::Debugtrap | &mut Inst::Trap { .. } | &mut Inst::TrapIf { .. } => {} + &mut Inst::Nop0 | &mut Inst::Nop2 => {} + &mut Inst::JTSequence { + ref mut ridx, + ref mut rtmp1, + ref mut rtmp2, + .. + } => { + map_use(mapper, ridx); + map_def(mapper, rtmp1); + map_def(mapper, rtmp2); + } + &mut Inst::LoadExtNameFar { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadAddr { + ref mut rd, + ref mut mem, + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::VirtualSPOffsetAdj { .. } => {} + &mut Inst::ValueLabelMarker { ref mut reg, .. } => { + map_use(mapper, reg); + } + &mut Inst::Unwind { .. } => {} + } +} + +//============================================================================= +// Instructions: misc functions and external interface + +impl MachInst for Inst { + type LabelUse = LabelUse; + + fn get_regs(&self, collector: &mut RegUsageCollector) { + s390x_get_regs(self, collector) + } + + fn map_regs(&mut self, mapper: &RUM) { + s390x_map_regs(self, mapper); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + &Inst::Mov32 { rd, rm } => Some((rd, rm)), + &Inst::Mov64 { rd, rm } => Some((rd, rm)), + &Inst::FpuMove32 { rd, rn } => Some((rd, rn)), + &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), + _ => None, + } + } + + fn is_epilogue_placeholder(&self) -> bool { + if let Inst::EpiloguePlaceholder = self { + true + } else { + false + } + } + + fn is_term<'a>(&'a self) -> MachTerminator<'a> { + match self { + &Inst::Ret { .. } | &Inst::EpiloguePlaceholder => MachTerminator::Ret, + &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()), + &Inst::CondBr { + taken, not_taken, .. + } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), + &Inst::OneWayCondBr { .. } => { + // Explicitly invisible to CFG processing. + MachTerminator::None + } + &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]), + &Inst::JTSequence { ref info, .. } => { + MachTerminator::Indirect(&info.targets_for_term[..]) + } + _ => MachTerminator::None, + } + } + + fn stack_op_info(&self) -> Option { + match self { + &Inst::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(offset)), + &Inst::Store64 { + rd, + mem: MemArg::NominalSPOffset { off }, + } => Some(MachInstStackOpInfo::StoreNomSPOff(rd, off)), + &Inst::Load64 { + rd, + mem: MemArg::NominalSPOffset { off }, + } => Some(MachInstStackOpInfo::LoadNomSPOff(rd.to_reg(), off)), + _ => None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + assert!(ty.bits() <= 64); + if ty.bits() <= 32 { + Inst::mov32(to_reg, from_reg) + } else { + Inst::mov64(to_reg, from_reg) + } + } + + fn gen_constant Writable>( + to_regs: ValueRegs>, + value: u128, + ty: Type, + _alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let to_reg = to_regs + .only_reg() + .expect("multi-reg values not supported yet"); + let value = value as u64; + match ty { + types::F64 => { + let mut ret = SmallVec::new(); + ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value))); + ret + } + types::F32 => { + let mut ret = SmallVec::new(); + ret.push(Inst::load_fp_constant32( + to_reg, + f32::from_bits(value as u32), + )); + ret + } + types::I64 | types::B64 | types::R64 => Inst::load_constant64(to_reg, value), + types::B1 + | types::I8 + | types::B8 + | types::I16 + | types::B16 + | types::I32 + | types::B32 => Inst::load_constant32(to_reg, value as u32), + _ => unreachable!(), + } + } + + fn gen_nop(preferred_size: usize) -> Inst { + if preferred_size == 0 { + Inst::Nop0 + } else { + // We can't give a NOP (or any insn) < 2 bytes. + assert!(preferred_size >= 2); + Inst::Nop2 + } + } + + fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { + None + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + types::I8 => Ok((&[RegClass::I64], &[types::I8])), + types::I16 => Ok((&[RegClass::I64], &[types::I16])), + types::I32 => Ok((&[RegClass::I64], &[types::I32])), + types::I64 => Ok((&[RegClass::I64], &[types::I64])), + types::B1 => Ok((&[RegClass::I64], &[types::B1])), + types::B8 => Ok((&[RegClass::I64], &[types::B8])), + types::B16 => Ok((&[RegClass::I64], &[types::B16])), + types::B32 => Ok((&[RegClass::I64], &[types::B32])), + types::B64 => Ok((&[RegClass::I64], &[types::B64])), + types::R32 => panic!("32-bit reftype pointer should never be seen on s390x"), + types::R64 => Ok((&[RegClass::I64], &[types::R64])), + types::F32 => Ok((&[RegClass::F64], &[types::F32])), + types::F64 => Ok((&[RegClass::F64], &[types::F64])), + types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])), + types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])), + // FIXME: We don't really have IFLAGS, but need to allow it here + // for now to support the SelectifSpectreGuard instruction. + types::IFLAGS => Ok((&[RegClass::I64], &[types::I64])), + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jump { + dest: BranchTarget::Label(target), + } + } + + fn reg_universe(flags: &settings::Flags) -> RealRegUniverse { + create_reg_universe(flags) + } + + fn worst_case_size() -> CodeOffset { + // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of + // an 8-instruction sequence (saturating int-to-float conversions) with three embedded + // 64-bit f64 constants. + // + // Note that inline jump-tables handle island/pool insertion separately, so we do not need + // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not + // feasible for other reasons). + 44 + } + + fn ref_type_regclass(_: &settings::Flags) -> RegClass { + RegClass::I64 + } + + fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self { + Inst::ValueLabelMarker { label, reg } + } + + fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { + match self { + Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)), + _ => None, + } + } +} + +//============================================================================= +// Pretty-printing of instructions. + +fn mem_finalize_for_show( + mem: &MemArg, + mb_rru: Option<&RealRegUniverse>, + state: &EmitState, + have_d12: bool, + have_d20: bool, + have_pcrel: bool, + have_index: bool, +) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index); + let mut mem_str = mem_insts + .into_iter() + .map(|inst| inst.show_rru(mb_rru)) + .collect::>() + .join(" ; "); + if !mem_str.is_empty() { + mem_str += " ; "; + } + + (mem_str, mem) +} + +impl PrettyPrint for Inst { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.pretty_print(mb_rru, &mut EmitState::default()) + } +} + +impl Inst { + fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + match self { + &Inst::Nop0 => "nop-zero-len".to_string(), + &Inst::Nop2 => "nop".to_string(), + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (op, have_rr) = match alu_op { + ALUOp::Add32 => ("ark", true), + ALUOp::Add64 => ("agrk", true), + ALUOp::Sub32 => ("srk", true), + ALUOp::Sub64 => ("sgrk", true), + ALUOp::Mul32 => ("msrkc", true), + ALUOp::Mul64 => ("msgrkc", true), + ALUOp::And32 => ("nrk", true), + ALUOp::And64 => ("ngrk", true), + ALUOp::Orr32 => ("ork", true), + ALUOp::Orr64 => ("ogrk", true), + ALUOp::Xor32 => ("xrk", true), + ALUOp::Xor64 => ("xgrk", true), + ALUOp::AndNot32 => ("nnrk", false), + ALUOp::AndNot64 => ("nngrk", false), + ALUOp::OrrNot32 => ("nork", false), + ALUOp::OrrNot64 => ("nogrk", false), + ALUOp::XorNot32 => ("nxrk", false), + ALUOp::XorNot64 => ("nxgrk", false), + _ => unreachable!(), + }; + if have_rr && rd.to_reg() == rn { + let inst = Inst::AluRR { alu_op, rd, rm }; + return inst.print_with_state(mb_rru, state); + } + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + } => { + if rd.to_reg() == rn { + let inst = Inst::AluRSImm16 { alu_op, rd, imm }; + return inst.print_with_state(mb_rru, state); + } + let op = match alu_op { + ALUOp::Add32 => "ahik", + ALUOp::Add64 => "aghik", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imm) + } + &Inst::AluRR { alu_op, rd, rm } => { + let op = match alu_op { + ALUOp::Add32 => "ar", + ALUOp::Add64 => "agr", + ALUOp::Add64Ext32 => "agfr", + ALUOp::Sub32 => "sr", + ALUOp::Sub64 => "sgr", + ALUOp::Sub64Ext32 => "sgfr", + ALUOp::Mul32 => "msr", + ALUOp::Mul64 => "msgr", + ALUOp::Mul64Ext32 => "msgfr", + ALUOp::And32 => "nr", + ALUOp::And64 => "ngr", + ALUOp::Orr32 => "or", + ALUOp::Orr64 => "ogr", + ALUOp::Xor32 => "xr", + ALUOp::Xor64 => "xgr", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rm) + } + &Inst::AluRX { + alu_op, + rd, + ref mem, + } => { + let (opcode_rx, opcode_rxy) = match alu_op { + ALUOp::Add32 => (Some("a"), Some("ay")), + ALUOp::Add32Ext16 => (Some("ah"), Some("ahy")), + ALUOp::Add64 => (None, Some("ag")), + ALUOp::Add64Ext16 => (None, Some("agh")), + ALUOp::Add64Ext32 => (None, Some("agf")), + ALUOp::Sub32 => (Some("s"), Some("sy")), + ALUOp::Sub32Ext16 => (Some("sh"), Some("shy")), + ALUOp::Sub64 => (None, Some("sg")), + ALUOp::Sub64Ext16 => (None, Some("sgh")), + ALUOp::Sub64Ext32 => (None, Some("sgf")), + ALUOp::Mul32 => (Some("ms"), Some("msy")), + ALUOp::Mul32Ext16 => (Some("mh"), Some("mhy")), + ALUOp::Mul64 => (None, Some("msg")), + ALUOp::Mul64Ext16 => (None, Some("mgh")), + ALUOp::Mul64Ext32 => (None, Some("msgf")), + ALUOp::And32 => (Some("n"), Some("ny")), + ALUOp::And64 => (None, Some("ng")), + ALUOp::Orr32 => (Some("o"), Some("oy")), + ALUOp::Orr64 => (None, Some("og")), + ALUOp::Xor32 => (Some("x"), Some("xy")), + ALUOp::Xor64 => (None, Some("xg")), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + false, + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + _ => unreachable!(), + }; + + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::AluRSImm16 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "ahi", + ALUOp::Add64 => "aghi", + ALUOp::Mul32 => "mhi", + ALUOp::Mul64 => "mghi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRSImm32 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "afi", + ALUOp::Add64 => "agfi", + ALUOp::Mul32 => "msfi", + ALUOp::Mul64 => "msgfi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRUImm32 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "alfi", + ALUOp::Add64 => "algfi", + ALUOp::Sub32 => "slfi", + ALUOp::Sub64 => "slgfi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { + let op = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => "nill", + (ALUOp::And32, 1) => "nilh", + (ALUOp::And64, 0) => "nill", + (ALUOp::And64, 1) => "nilh", + (ALUOp::And64, 2) => "nihl", + (ALUOp::And64, 3) => "nihh", + (ALUOp::Orr32, 0) => "oill", + (ALUOp::Orr32, 1) => "oilh", + (ALUOp::Orr64, 0) => "oill", + (ALUOp::Orr64, 1) => "oilh", + (ALUOp::Orr64, 2) => "oihl", + (ALUOp::Orr64, 3) => "oihh", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { + let op = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => "nilf", + (ALUOp::And64, 0) => "nilf", + (ALUOp::And64, 1) => "nihf", + (ALUOp::Orr32, 0) => "oilf", + (ALUOp::Orr64, 0) => "oilf", + (ALUOp::Orr64, 1) => "oihf", + (ALUOp::Xor32, 0) => "xilf", + (ALUOp::Xor64, 0) => "xilf", + (ALUOp::Xor64, 1) => "xihf", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::SMulWide { rn, rm } => { + let op = "mgrk"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::UMulWide { rn } => { + let op = "mlgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::SDivMod32 { rn, .. } => { + let op = "dsgfr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::SDivMod64 { rn, .. } => { + let op = "dsgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::UDivMod32 { rn, .. } => { + let op = "dlr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::UDivMod64 { rn, .. } => { + let op = "dlgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::Flogr { rn } => { + let op = "flogr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + ref shift_reg, + } => { + let op = match shift_op { + ShiftOp::RotL32 => "rll", + ShiftOp::RotL64 => "rllg", + ShiftOp::LShL32 => "sllk", + ShiftOp::LShL64 => "sllg", + ShiftOp::LShR32 => "srlk", + ShiftOp::LShR64 => "srlg", + ShiftOp::AShR32 => "srak", + ShiftOp::AShR64 => "srag", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let shift_imm = shift_imm.show_rru(mb_rru); + let shift_reg = match shift_reg { + Some(reg) => format!("({})", reg.show_rru(mb_rru)), + None => "".to_string(), + }; + format!("{} {}, {}, {}{}", op, rd, rn, shift_imm, shift_reg) + } + &Inst::UnaryRR { op, rd, rn } => { + let (op, extra) = match op { + UnaryOp::Abs32 => ("lpr", ""), + UnaryOp::Abs64 => ("lpgr", ""), + UnaryOp::Abs64Ext32 => ("lpgfr", ""), + UnaryOp::Neg32 => ("lcr", ""), + UnaryOp::Neg64 => ("lcgr", ""), + UnaryOp::Neg64Ext32 => ("lcgfr", ""), + UnaryOp::PopcntByte => ("popcnt", ""), + UnaryOp::PopcntReg => ("popcnt", ", 8"), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}{}", op, rd, rn, extra) + } + &Inst::CmpRR { op, rn, rm } => { + let op = match op { + CmpOp::CmpS32 => "cr", + CmpOp::CmpS64 => "cgr", + CmpOp::CmpS64Ext32 => "cgfr", + CmpOp::CmpL32 => "clr", + CmpOp::CmpL64 => "clgr", + CmpOp::CmpL64Ext32 => "clgfr", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rn, rm) + } + &Inst::CmpRX { op, rn, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match op { + CmpOp::CmpS32 => (Some("c"), Some("cy"), Some("crl")), + CmpOp::CmpS32Ext16 => (Some("ch"), Some("chy"), Some("chrl")), + CmpOp::CmpS64 => (None, Some("cg"), Some("cgrl")), + CmpOp::CmpS64Ext16 => (None, Some("cgh"), Some("cghrl")), + CmpOp::CmpS64Ext32 => (None, Some("cgf"), Some("cgfrl")), + CmpOp::CmpL32 => (Some("cl"), Some("cly"), Some("clrl")), + CmpOp::CmpL32Ext16 => (None, None, Some("clhrl")), + CmpOp::CmpL64 => (None, Some("clg"), Some("clgrl")), + CmpOp::CmpL64Ext16 => (None, None, Some("clghrl")), + CmpOp::CmpL64Ext32 => (None, Some("clgf"), Some("clgfrl")), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rn = rn.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rn, mem) + } + &Inst::CmpRSImm16 { op, rn, imm } => { + let op = match op { + CmpOp::CmpS32 => "chi", + CmpOp::CmpS64 => "cghi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpRSImm32 { op, rn, imm } => { + let op = match op { + CmpOp::CmpS32 => "cfi", + CmpOp::CmpS64 => "cgfi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpRUImm32 { op, rn, imm } => { + let op = match op { + CmpOp::CmpL32 => "clfi", + CmpOp::CmpL64 => "clgfi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpTrapRR { + op, rn, rm, cond, .. + } => { + let op = match op { + CmpOp::CmpS32 => "crt", + CmpOp::CmpS64 => "cgrt", + CmpOp::CmpL32 => "clrt", + CmpOp::CmpL64 => "clgrt", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, rm) + } + &Inst::CmpTrapRSImm16 { + op, rn, imm, cond, .. + } => { + let op = match op { + CmpOp::CmpS32 => "cit", + CmpOp::CmpS64 => "cgit", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, imm) + } + &Inst::CmpTrapRUImm16 { + op, rn, imm, cond, .. + } => { + let op = match op { + CmpOp::CmpL32 => "clfit", + CmpOp::CmpL64 => "clgit", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, imm) + } + &Inst::Load32 { rd, ref mem } + | &Inst::Load32ZExt8 { rd, ref mem } + | &Inst::Load32SExt8 { rd, ref mem } + | &Inst::Load32ZExt16 { rd, ref mem } + | &Inst::Load32SExt16 { rd, ref mem } + | &Inst::Load64 { rd, ref mem } + | &Inst::Load64ZExt8 { rd, ref mem } + | &Inst::Load64SExt8 { rd, ref mem } + | &Inst::Load64ZExt16 { rd, ref mem } + | &Inst::Load64SExt16 { rd, ref mem } + | &Inst::Load64ZExt32 { rd, ref mem } + | &Inst::Load64SExt32 { rd, ref mem } + | &Inst::LoadRev16 { rd, ref mem } + | &Inst::LoadRev32 { rd, ref mem } + | &Inst::LoadRev64 { rd, ref mem } + | &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Load32 { .. } => (Some("l"), Some("ly"), Some("lrl")), + &Inst::Load32ZExt8 { .. } => (None, Some("llc"), None), + &Inst::Load32SExt8 { .. } => (None, Some("lb"), None), + &Inst::Load32ZExt16 { .. } => (None, Some("llh"), Some("llhrl")), + &Inst::Load32SExt16 { .. } => (Some("lh"), Some("lhy"), Some("lhrl")), + &Inst::Load64 { .. } => (None, Some("lg"), Some("lgrl")), + &Inst::Load64ZExt8 { .. } => (None, Some("llgc"), None), + &Inst::Load64SExt8 { .. } => (None, Some("lgb"), None), + &Inst::Load64ZExt16 { .. } => (None, Some("llgh"), Some("llghrl")), + &Inst::Load64SExt16 { .. } => (None, Some("lgh"), Some("lghrl")), + &Inst::Load64ZExt32 { .. } => (None, Some("llgf"), Some("llgfrl")), + &Inst::Load64SExt32 { .. } => (None, Some("lgf"), Some("lgfrl")), + &Inst::LoadRev16 { .. } => (None, Some("lrvh"), None), + &Inst::LoadRev32 { .. } => (None, Some("lrv"), None), + &Inst::LoadRev64 { .. } => (None, Some("lrvg"), None), + &Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), None), + &Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), None), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); + + let op = match self { + &Inst::FpuLoadRev32 { .. } => "vlebrf", + &Inst::FpuLoadRev64 { .. } => "vlebrg", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + } + &Inst::Store8 { rd, ref mem } + | &Inst::Store16 { rd, ref mem } + | &Inst::Store32 { rd, ref mem } + | &Inst::Store64 { rd, ref mem } + | &Inst::StoreRev16 { rd, ref mem } + | &Inst::StoreRev32 { rd, ref mem } + | &Inst::StoreRev64 { rd, ref mem } + | &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Store8 { .. } => (Some("stc"), Some("stcy"), None), + &Inst::Store16 { .. } => (Some("sth"), Some("sthy"), Some("sthrl")), + &Inst::Store32 { .. } => (Some("st"), Some("sty"), Some("strl")), + &Inst::Store64 { .. } => (None, Some("stg"), Some("stgrl")), + &Inst::StoreRev16 { .. } => (None, Some("strvh"), None), + &Inst::StoreRev32 { .. } => (None, Some("strv"), None), + &Inst::StoreRev64 { .. } => (None, Some("strvg"), None), + &Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), None), + &Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), None), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::StoreImm8 { imm, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, true, false, false); + let op = match &mem { + &MemArg::BXD12 { .. } => "mvi", + &MemArg::BXD20 { .. } => "mviy", + _ => unreachable!(), + }; + + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, mem, imm) + } + &Inst::StoreImm16 { imm, ref mem } + | &Inst::StoreImm32SExt16 { imm, ref mem } + | &Inst::StoreImm64SExt16 { imm, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); + let op = match self { + &Inst::StoreImm16 { .. } => "mvhhi", + &Inst::StoreImm32SExt16 { .. } => "mvhi", + &Inst::StoreImm64SExt16 { .. } => "mvghi", + _ => unreachable!(), + }; + + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, mem, imm) + } + &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); + + let op = match self { + &Inst::FpuStoreRev32 { .. } => "vstebrf", + &Inst::FpuStoreRev64 { .. } => "vstebrg", + _ => unreachable!(), + }; + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + } + &Inst::LoadMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let addr_reg = addr_reg.show_rru(mb_rru); + let addr_off = addr_off.show_rru(mb_rru); + format!("lmg {}, {}, {}({})", rt, rt2, addr_off, addr_reg) + } + &Inst::StoreMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let addr_reg = addr_reg.show_rru(mb_rru); + let addr_off = addr_off.show_rru(mb_rru); + format!("stmg {}, {}, {}({})", rt, rt2, addr_off, addr_reg) + } + &Inst::Mov64 { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("lgr {}, {}", rd, rm) + } + &Inst::Mov32 { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("lr {}, {}", rd, rm) + } + &Inst::Mov32Imm { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("iilf {}, {}", rd, imm) + } + &Inst::Mov32SImm16 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lhi {}, {}", rd, imm) + } + &Inst::Mov64SImm16 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lghi {}, {}", rd, imm) + } + &Inst::Mov64SImm32 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lgfi {}, {}", rd, imm) + } + &Inst::Mov64UImm16Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "llill", + 1 => "llilh", + 2 => "llihl", + 3 => "llihh", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Mov64UImm32Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "llilf", + 1 => "llihf", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Insert64UImm16Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "iill", + 1 => "iilh", + 2 => "iihl", + 3 => "iihh", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Insert64UImm32Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "iilf", + 1 => "iihf", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::CMov32 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locr{} {}, {}", cond, rd, rm) + } + &Inst::CMov64 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locgr{} {}, {}", cond, rd, rm) + } + &Inst::CMov32SImm16 { rd, cond, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("lochi{} {}, {}", cond, rd, imm) + } + &Inst::CMov64SImm16 { rd, cond, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locghi{} {}, {}", cond, rd, imm) + } + &Inst::FpuMove32 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ler {}, {}", rd, rn) + } + &Inst::FpuMove64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ldr {}, {}", rd, rn) + } + &Inst::FpuCMov32 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; ler {}, {}", cond, rd, rm) + } + &Inst::FpuCMov64 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; ldr {}, {}", cond, rd, rm) + } + &Inst::MovToFpr { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ldgr {}, {}", rd, rn) + } + &Inst::MovFromFpr { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("lgdr {}, {}", rd, rn) + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let op = match fpu_op { + FPUOp1::Abs32 => "lpebr", + FPUOp1::Abs64 => "lpdbr", + FPUOp1::Neg32 => "lcebr", + FPUOp1::Neg64 => "lcdbr", + FPUOp1::NegAbs32 => "lnebr", + FPUOp1::NegAbs64 => "lndbr", + FPUOp1::Sqrt32 => "sqebr", + FPUOp1::Sqrt64 => "sqdbr", + FPUOp1::Cvt32To64 => "ldebr", + FPUOp1::Cvt64To32 => "ledbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuRRR { fpu_op, rd, rm } => { + let op = match fpu_op { + FPUOp2::Add32 => "aebr", + FPUOp2::Add64 => "adbr", + FPUOp2::Sub32 => "sebr", + FPUOp2::Sub64 => "sdbr", + FPUOp2::Mul32 => "meebr", + FPUOp2::Mul64 => "mdbr", + FPUOp2::Div32 => "debr", + FPUOp2::Div64 => "ddbr", + _ => unimplemented!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rm) + } + &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + let op = match fpu_op { + FPUOp3::MAdd32 => "maebr", + FPUOp3::MAdd64 => "madbr", + FPUOp3::MSub32 => "msebr", + FPUOp3::MSub64 => "msdbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::FpuCopysign { rd, rn, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cpsdr {}, {}, {}", rd, rm, rn) + } + &Inst::FpuCmp32 { rn, rm } => { + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cebr {}, {}", rn, rm) + } + &Inst::FpuCmp64 { rn, rm } => { + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cdbr {}, {}", rn, rm) + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let rd = rd.to_reg().show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 8 ; data.f32 {} ; le {}, 0({})", + tmp, const_data, rd, tmp + ) + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = rd.to_reg().show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 12 ; data.f64 {} ; ld {}, 0({})", + tmp, const_data, rd, tmp + ) + } + &Inst::FpuToInt { op, rd, rn } => { + let op = match op { + FpuToIntOp::F32ToI32 => "cfebra", + FpuToIntOp::F32ToU32 => "clfebr", + FpuToIntOp::F32ToI64 => "cgebra", + FpuToIntOp::F32ToU64 => "clgebr", + FpuToIntOp::F64ToI32 => "cfdbra", + FpuToIntOp::F64ToU32 => "clfdbr", + FpuToIntOp::F64ToI64 => "cgdbra", + FpuToIntOp::F64ToU64 => "clgdbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, 5, {}, 0", op, rd, rn) + } + &Inst::IntToFpu { op, rd, rn } => { + let op = match op { + IntToFpuOp::I32ToF32 => "cefbra", + IntToFpuOp::U32ToF32 => "celfbr", + IntToFpuOp::I64ToF32 => "cegbra", + IntToFpuOp::U64ToF32 => "celgbr", + IntToFpuOp::I32ToF64 => "cdfbra", + IntToFpuOp::U32ToF64 => "cdlfbr", + IntToFpuOp::I64ToF64 => "cdgbra", + IntToFpuOp::U64ToF64 => "cdlgbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, 0, {}, 0", op, rd, rn) + } + &Inst::FpuRound { op, rd, rn } => { + let (op, m3) = match op { + FpuRoundMode::Minus32 => ("fiebr", 7), + FpuRoundMode::Minus64 => ("fidbr", 7), + FpuRoundMode::Plus32 => ("fiebr", 6), + FpuRoundMode::Plus64 => ("fidbr", 6), + FpuRoundMode::Zero32 => ("fiebr", 5), + FpuRoundMode::Zero64 => ("fidbr", 5), + FpuRoundMode::Nearest32 => ("fiebr", 4), + FpuRoundMode::Nearest64 => ("fidbr", 4), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, m3) + } + &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { + let op = match fpu_op { + FPUOp2::Max32 => "wfmaxsb", + FPUOp2::Max64 => "wfmaxdb", + FPUOp2::Min32 => "wfminsb", + FPUOp2::Min64 => "wfmindb", + _ => unimplemented!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}, 1", op, rd, rn, rm) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let op = match (signed, from_bits, to_bits) { + (_, 1, 32) => "llcr", + (_, 1, 64) => "llgcr", + (false, 8, 32) => "llcr", + (false, 8, 64) => "llgcr", + (true, 8, 32) => "lbr", + (true, 8, 64) => "lgbr", + (false, 16, 32) => "llhr", + (false, 16, 64) => "llghr", + (true, 16, 32) => "lhr", + (true, 16, 64) => "lghr", + (false, 32, 64) => "llgfr", + (true, 32, 64) => "lgfr", + _ => panic!("Unsupported Extend case: {:?}", self), + }; + format!("{} {}, {}", op, rd, rn) + } + &Inst::Call { link, ref info, .. } => { + let link = link.show_rru(mb_rru); + format!("brasl {}, {}", link, info.dest) + } + &Inst::CallInd { link, ref info, .. } => { + let link = link.show_rru(mb_rru); + let rn = info.rn.show_rru(mb_rru); + format!("basr {}, {}", link, rn) + } + &Inst::Ret { link } => { + let link = link.show_rru(mb_rru); + format!("br {}", link) + } + &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), + &Inst::Jump { ref dest } => { + let dest = dest.show_rru(mb_rru); + format!("jg {}", dest) + } + &Inst::IndirectBr { rn, .. } => { + let rn = rn.show_rru(mb_rru); + format!("br {}", rn) + } + &Inst::CondBr { + ref taken, + ref not_taken, + cond, + } => { + let taken = taken.show_rru(mb_rru); + let not_taken = not_taken.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("jg{} {} ; jg {}", cond, taken, not_taken) + } + &Inst::OneWayCondBr { ref target, cond } => { + let target = target.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("jg{} {}", cond, target) + } + &Inst::Debugtrap => "debugtrap".to_string(), + &Inst::Trap { .. } => "trap".to_string(), + &Inst::TrapIf { cond, .. } => { + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; trap", cond) + } + &Inst::JTSequence { + ref info, + ridx, + rtmp1, + rtmp2, + .. + } => { + let ridx = ridx.show_rru(mb_rru); + let rtmp1 = rtmp1.show_rru(mb_rru); + let rtmp2 = rtmp2.show_rru(mb_rru); + let default_target = info.default_target.show_rru(mb_rru); + format!( + concat!( + "clgfi {}, {} ; ", + "jghe {} ; ", + "sllg {}, {}, 2 ; ", + "larl {}, 18 ; ", + "lgf {}, 0({}, {}) ; ", + "agrk {}, {}, {} ; ", + "br {} ; ", + "jt_entries {:?}" + ), + ridx, + info.targets.len(), + default_target, + rtmp2, + ridx, + rtmp1, + rtmp2, + rtmp2, + rtmp1, + rtmp1, + rtmp1, + rtmp2, + rtmp1, + info.targets + ) + } + &Inst::LoadExtNameFar { + rd, + ref name, + offset, + } => { + let rd = rd.show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 12 ; data {} + {} ; lg {}, 0({})", + tmp, name, offset, rd, tmp + ) + } + &Inst::LoadAddr { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, true, true, true); + + let op = match &mem { + &MemArg::BXD12 { .. } => "la", + &MemArg::BXD20 { .. } => "lay", + &MemArg::Label { .. } | &MemArg::Symbol { .. } => "larl", + _ => unreachable!(), + }; + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::VirtualSPOffsetAdj { offset } => { + state.virtual_sp_offset += offset; + format!("virtual_sp_offset_adjust {}", offset) + } + &Inst::ValueLabelMarker { label, reg } => { + format!("value_label {:?}, {}", label, reg.show_rru(mb_rru)) + } + &Inst::Unwind { ref inst } => { + format!("unwind {:?}", inst) + } + } + } +} + +//============================================================================= +// Label fixups and jump veneers. + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// RI-format branch. 16-bit signed offset. PC-relative, offset is imm << 1. + BranchRI, + /// RIL-format branch. 32-bit signed offset. PC-relative, offset is imm << 1. + BranchRIL, + /// 32-bit PC relative constant offset (from address of constant itself), + /// signed. Used in jump tables. + PCRel32, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. + const ALIGN: CodeOffset = 2; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + // 16-bit signed immediate, left-shifted by 1. + LabelUse::BranchRI => (1 << 20) - 1, + // This can address any valid CodeOffset. + LabelUse::BranchRIL => 0x7fff_ffff, + LabelUse::PCRel32 => 0x7fff_ffff, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + // 16-bit signed immediate, left-shifted by 1. + LabelUse::BranchRI => 1 << 20, + // This can address any valid CodeOffset. + LabelUse::BranchRIL => 0x8000_0000, + LabelUse::PCRel32 => 0x8000_0000, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + LabelUse::BranchRI => 4, + LabelUse::BranchRIL => 6, + LabelUse::PCRel32 => 4, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + let pc_rel = (label_offset as i64) - (use_offset as i64); + debug_assert!(pc_rel <= self.max_pos_range() as i64); + debug_assert!(pc_rel >= -(self.max_neg_range() as i64)); + debug_assert!(pc_rel & 1 == 0); + let pc_rel_shifted = pc_rel >> 1; + + match self { + LabelUse::BranchRI => { + buffer[2..4].clone_from_slice(&u16::to_be_bytes(pc_rel_shifted as u16)); + } + LabelUse::BranchRIL => { + buffer[2..6].clone_from_slice(&u32::to_be_bytes(pc_rel_shifted as u32)); + } + LabelUse::PCRel32 => { + let insn_word = u32::from_be_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + let insn_word = insn_word.wrapping_add(pc_rel as u32); + buffer[0..4].clone_from_slice(&u32::to_be_bytes(insn_word)); + } + } + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + false + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + 0 + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + _buffer: &mut [u8], + _veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + unreachable!(); + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs new file mode 100644 index 0000000000..3ebba43bda --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -0,0 +1,168 @@ +//! S390x ISA definitions: registers. + +use crate::settings; +use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES}; + +//============================================================================= +// Registers, the Universe thereof, and printing + +#[rustfmt::skip] +const GPR_INDICES: [u8; 16] = [ + // r0 and r1 reserved + 30, 31, + // r2 - r5 call-clobbered + 16, 17, 18, 19, + // r6 - r14 call-saved (order reversed) + 28, 27, 26, 25, 24, 23, 22, 21, 20, + // r15 (SP) + 29, +]; + +#[rustfmt::skip] +const FPR_INDICES: [u8; 16] = [ + // f0 - f7 as pairs + 0, 4, 1, 5, 2, 6, 3, 7, + // f8 - f15 as pairs + 8, 12, 9, 13, 10, 14, 11, 15, +]; + +/// Get a reference to a GPR (integer register). +pub fn gpr(num: u8) -> Reg { + assert!(num < 16); + Reg::new_real( + RegClass::I64, + /* enc = */ num, + /* index = */ GPR_INDICES[num as usize], + ) +} + +/// Get a writable reference to a GPR. +pub fn writable_gpr(num: u8) -> Writable { + Writable::from_reg(gpr(num)) +} + +/// Get a reference to a FPR (floating-point register). +pub fn fpr(num: u8) -> Reg { + assert!(num < 16); + Reg::new_real( + RegClass::F64, + /* enc = */ num, + /* index = */ FPR_INDICES[num as usize], + ) +} + +/// Get a writable reference to a V-register. +pub fn writable_fpr(num: u8) -> Writable { + Writable::from_reg(fpr(num)) +} + +/// Get a reference to the stack-pointer register. +pub fn stack_reg() -> Reg { + gpr(15) +} + +/// Get a writable reference to the stack-pointer register. +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is +/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not +/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this +/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how +/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc. +/// +/// We use r1 for this because it's a scratch register but is slightly special (used for linker +/// veneers). We're free to use it as long as we don't expect it to live through call instructions. +pub fn spilltmp_reg() -> Reg { + gpr(1) +} + +/// Get a writable reference to the spilltmp reg. +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +pub fn zero_reg() -> Reg { + gpr(0) +} + +/// Create the register universe for AArch64. +pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse { + let mut regs = vec![]; + let mut allocable_by_class = [None; NUM_REG_CLASSES]; + + // Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers: + // r0 (we cannot use this for addressing // FIXME regalloc) + // r1 (spilltmp) + // r15 (stack pointer) + + // FPRs. + let mut base = regs.len(); + regs.push((fpr(0).to_real_reg(), "%f0".into())); + regs.push((fpr(2).to_real_reg(), "%f2".into())); + regs.push((fpr(4).to_real_reg(), "%f4".into())); + regs.push((fpr(6).to_real_reg(), "%f6".into())); + regs.push((fpr(1).to_real_reg(), "%f1".into())); + regs.push((fpr(3).to_real_reg(), "%f3".into())); + regs.push((fpr(5).to_real_reg(), "%f5".into())); + regs.push((fpr(7).to_real_reg(), "%f7".into())); + regs.push((fpr(8).to_real_reg(), "%f8".into())); + regs.push((fpr(10).to_real_reg(), "%f10".into())); + regs.push((fpr(12).to_real_reg(), "%f12".into())); + regs.push((fpr(14).to_real_reg(), "%f14".into())); + regs.push((fpr(9).to_real_reg(), "%f9".into())); + regs.push((fpr(11).to_real_reg(), "%f11".into())); + regs.push((fpr(13).to_real_reg(), "%f13".into())); + regs.push((fpr(15).to_real_reg(), "%f15".into())); + + allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(fpr(1).get_index()), + }); + + // Caller-saved GPRs in the SystemV s390x ABI. + base = regs.len(); + regs.push((gpr(2).to_real_reg(), "%r2".into())); + regs.push((gpr(3).to_real_reg(), "%r3".into())); + regs.push((gpr(4).to_real_reg(), "%r4".into())); + regs.push((gpr(5).to_real_reg(), "%r5".into())); + + // Callee-saved GPRs in the SystemV s390x ABI. + // We start from r14 downwards in an attempt to allow the + // prolog to use as short a STMG as possible. + regs.push((gpr(14).to_real_reg(), "%r14".into())); + regs.push((gpr(13).to_real_reg(), "%r13".into())); + regs.push((gpr(12).to_real_reg(), "%r12".into())); + regs.push((gpr(11).to_real_reg(), "%r11".into())); + regs.push((gpr(10).to_real_reg(), "%r10".into())); + regs.push((gpr(9).to_real_reg(), "%r9".into())); + regs.push((gpr(8).to_real_reg(), "%r8".into())); + regs.push((gpr(7).to_real_reg(), "%r7".into())); + regs.push((gpr(6).to_real_reg(), "%r6".into())); + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(gpr(13).get_index()), + }); + + // Other regs, not available to the allocator. + let allocable = regs.len(); + regs.push((gpr(15).to_real_reg(), "%r15".into())); + regs.push((gpr(0).to_real_reg(), "%r0".into())); + regs.push((gpr(1).to_real_reg(), "%r1".into())); + + // Assert sanity: the indices in the register structs must match their + // actual indices in the array. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } + + RealRegUniverse { + regs, + allocable, + allocable_by_class, + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind.rs b/cranelift/codegen/src/isa/s390x/inst/unwind.rs new file mode 100644 index 0000000000..1e2bb904db --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/unwind.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "unwind")] +pub(crate) mod systemv; diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs new file mode 100644 index 0000000000..f5ff00cbd1 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -0,0 +1,197 @@ +//! Unwind information for System V ABI (s390x). + +use crate::isa::unwind::systemv::RegisterMappingError; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc::{Reg, RegClass}; + +/// Creates a new s390x common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 1, // Code alignment factor + -8, // Data alignment factor + Register(14), // Return address column - register %r14 + ); + + // Every frame will start with the call frame address (CFA) at %r15 + 160. + entry.add_instruction(CallFrameInstruction::Cfa(Register(15), 160)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result { + const GPR_MAP: [gimli::Register; 16] = [ + Register(0), + Register(1), + Register(2), + Register(3), + Register(4), + Register(5), + Register(6), + Register(7), + Register(8), + Register(9), + Register(10), + Register(11), + Register(12), + Register(13), + Register(14), + Register(15), + ]; + const FPR_MAP: [gimli::Register; 16] = [ + Register(16), + Register(20), + Register(17), + Register(21), + Register(18), + Register(22), + Register(19), + Register(23), + Register(24), + Register(28), + Register(25), + Register(29), + Register(26), + Register(30), + Register(27), + Register(31), + ]; + + match reg.get_class() { + RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]), + RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]), + _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), + } +} + +pub(crate) struct RegisterMapper; + +impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { + fn map(&self, reg: Reg) -> Result { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + Register(15).0 + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ + types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData, + StackSlotKind, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("s390x")) + .expect("expect s390x ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 10, lsda: None, instructions: [(4, CfaOffset(224))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("s390x")) + .expect("expect s390x ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_multi_return_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 26, lsda: None, instructions: [(4, CfaOffset(224))] }"); + } + + fn create_multi_return_function( + call_conv: CallConv, + stack_slot: Option, + ) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brnz(v0, block2, &[]); + pos.ins().jump(block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } +} diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs new file mode 100644 index 0000000000..26276f0434 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -0,0 +1,2839 @@ +//! Lowering rules for S390x. + +use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::Inst as IRInst; +use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type}; +use crate::isa::s390x::abi::*; +use crate::isa::s390x::inst::*; +use crate::isa::s390x::S390xBackend; +use crate::machinst::lower::*; +use crate::machinst::*; +use crate::settings::Flags; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::convert::TryFrom; +use regalloc::{Reg, Writable}; +use smallvec::SmallVec; + +//============================================================================= +// Helpers for instruction lowering. + +fn ty_is_int(ty: Type) -> bool { + match ty { + types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, + types::F32 | types::F64 => false, + types::IFLAGS | types::FFLAGS => panic!("Unexpected flags type"), + _ => panic!("ty_is_int() on unknown type: {:?}", ty), + } +} + +fn ty_is_float(ty: Type) -> bool { + !ty_is_int(ty) +} + +fn choose_32_64(ty: Type, op32: T, op64: T) -> T { + let bits = ty_bits(ty); + if bits <= 32 { + op32 + } else if bits == 64 { + op64 + } else { + panic!("choose_32_64 on > 64 bits!") + } +} + +//============================================================================ +// Lowering: convert instruction inputs to forms that we can use. + +/// Lower an instruction input to a 64-bit constant, if possible. +fn input_matches_const>(ctx: &mut C, input: InsnInput) -> Option { + let input = ctx.get_input_as_source_or_const(input.insn, input.input); + input.constant +} + +/// Return false if instruction input cannot have the value Imm, true otherwise. +fn input_maybe_imm>(ctx: &mut C, input: InsnInput, imm: u64) -> bool { + if let Some(c) = input_matches_const(ctx, input) { + let ty = ctx.input_ty(input.insn, input.input); + let from_bits = ty_bits(ty) as u8; + let mask = if from_bits < 64 { + (1u64 << ty_bits(ty)) - 1 + } else { + 0xffff_ffff_ffff_ffff + }; + c & mask == imm & mask + } else { + true + } +} + +/// Lower an instruction input to a 16-bit signed constant, if possible. +fn input_matches_simm16>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i16::try_from(imm_value as i64) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 32-bit signed constant, if possible. +fn input_matches_simm32>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i32::try_from(imm_value as i64) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 32-bit unsigned constant, if possible. +fn input_matches_uimm32>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = u32::try_from(imm_value) { + return Some(imm); + } + } + None +} + +/// Lower a negated instruction input to a 16-bit signed constant, if possible. +fn negated_input_matches_simm16>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i16::try_from(-(imm_value as i64)) { + return Some(imm); + } + } + None +} + +/// Lower a negated instruction input to a 32-bit signed constant, if possible. +fn negated_input_matches_simm32>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i32::try_from(-(imm_value as i64)) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 16-bit shifted constant, if possible. +fn input_matches_uimm16shifted>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + return UImm16Shifted::maybe_from_u64(imm_value); + } + None +} + +/// Lower an instruction input to a 32-bit shifted constant, if possible. +fn input_matches_uimm32shifted>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + return UImm32Shifted::maybe_from_u64(imm_value); + } + None +} + +/// Lower an instruction input to a 16-bit inverted shifted constant, if possible. +fn input_matches_uimm16shifted_inv>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Some(imm) = UImm16Shifted::maybe_from_u64(!imm_value) { + return Some(imm.negate_bits()); + } + } + None +} + +/// Lower an instruction input to a 32-bit inverted shifted constant, if possible. +fn input_matches_uimm32shifted_inv>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Some(imm) = UImm32Shifted::maybe_from_u64(!imm_value) { + return Some(imm.negate_bits()); + } + } + None +} + +/// Checks for an instance of `op` feeding the given input. +fn input_matches_insn>( + c: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + let inputs = c.get_input_as_source_or_const(input.insn, input.input); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + } + None +} + +/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., +/// Bint or a bitcast). +fn input_matches_insn_via_conv>( + c: &mut C, + input: InsnInput, + op: Opcode, + conv: Opcode, +) -> Option { + let inputs = c.get_input_as_source_or_const(input.insn, input.input); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + if data.opcode() == conv { + let inputs = c.get_input_as_source_or_const(src_inst, 0); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + } + } + } + None +} + +fn input_matches_load_insn>( + ctx: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, op) { + let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) + .map(|i| InsnInput { insn, input: i }) + .collect(); + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + if endianness == Endianness::Big { + let mem = lower_address(ctx, &inputs[..], off, flags); + ctx.sink_inst(insn); + return Some(mem); + } + } + None +} + +fn input_matches_mem>(ctx: &mut C, input: InsnInput) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + None +} + +fn input_matches_sext16_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Sload16); + } + None +} + +fn input_matches_sext32_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { + return input_matches_load_insn(ctx, input, Opcode::Sload32); + } + None +} + +fn input_matches_sext32_reg>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, Opcode::Sextend) { + if ty_bits(ctx.input_ty(insn, 0)) == 32 { + let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + return Some(reg); + } + } + None +} + +fn input_matches_uext32_reg>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, Opcode::Uextend) { + if ty_bits(ctx.input_ty(insn, 0)) == 32 { + let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + return Some(reg); + } + } + None +} + +fn input_matches_uext16_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Uload16); + } + None +} + +fn input_matches_uext32_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { + return input_matches_load_insn(ctx, input, Opcode::Uload32); + } + None +} + +//============================================================================ +// Lowering: force instruction input into a register + +/// How to handle narrow values loaded into registers; see note on `narrow_mode` +/// parameter to `put_input_in_*` below. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum NarrowValueMode { + None, + /// Zero-extend to 32 bits if original is < 32 bits. + ZeroExtend32, + /// Sign-extend to 32 bits if original is < 32 bits. + SignExtend32, + /// Zero-extend to 64 bits if original is < 64 bits. + ZeroExtend64, + /// Sign-extend to 64 bits if original is < 64 bits. + SignExtend64, +} + +fn extend_memory_to_reg>( + ctx: &mut C, + mem: MemArg, + from_ty: Type, + to_ty: Type, + signed: bool, +) -> Reg { + let rd = ctx.alloc_tmp(to_ty).only_reg().unwrap(); + ctx.emit(match (signed, ty_bits(to_ty), ty_bits(from_ty)) { + (false, 32, 8) => Inst::Load32ZExt8 { rd, mem }, + (false, 32, 16) => Inst::Load32ZExt16 { rd, mem }, + (true, 32, 8) => Inst::Load32SExt8 { rd, mem }, + (true, 32, 16) => Inst::Load32SExt16 { rd, mem }, + (false, 64, 8) => Inst::Load64ZExt8 { rd, mem }, + (false, 64, 16) => Inst::Load64ZExt16 { rd, mem }, + (false, 64, 32) => Inst::Load64ZExt32 { rd, mem }, + (true, 64, 8) => Inst::Load64SExt8 { rd, mem }, + (true, 64, 16) => Inst::Load64SExt16 { rd, mem }, + (true, 64, 32) => Inst::Load64SExt32 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + rd.to_reg() +} + +/// Sign-extend the low `from_bits` bits of `value` to a full u64. +fn sign_extend_to_u64(value: u64, from_bits: u8) -> u64 { + assert!(from_bits <= 64); + if from_bits >= 64 { + value + } else { + (((value << (64 - from_bits)) as i64) >> (64 - from_bits)) as u64 + } +} + +/// Zero-extend the low `from_bits` bits of `value` to a full u64. +fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 { + assert!(from_bits <= 64); + if from_bits >= 64 { + value + } else { + value & ((1u64 << from_bits) - 1) + } +} + +/// Lower an instruction input to a reg. +/// +/// The given register will be extended appropriately, according to +/// `narrow_mode` and the input's type. +fn put_input_in_reg>( + ctx: &mut C, + input: InsnInput, + narrow_mode: NarrowValueMode, +) -> Reg { + let signed = match narrow_mode { + NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true, + NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false, + _ => false, + }; + let ty = ctx.input_ty(input.insn, input.input); + let from_bits = ty_bits(ty) as u8; + let ext_ty = match narrow_mode { + NarrowValueMode::None => ty, + NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => types::I32, + NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => types::I64, + }; + let to_bits = ty_bits(ext_ty) as u8; + assert!(to_bits >= from_bits); + + if let Some(c) = input_matches_const(ctx, input) { + let extended = if from_bits == to_bits { + c + } else if signed { + sign_extend_to_u64(c, from_bits) + } else { + zero_extend_to_u64(c, from_bits) + }; + let masked = zero_extend_to_u64(extended, to_bits); + + // Generate constants fresh at each use to minimize long-range register pressure. + let to_reg = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); + for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ext_ty, |ty| { + ctx.alloc_tmp(ty).only_reg().unwrap() + }) + .into_iter() + { + ctx.emit(inst); + } + to_reg.to_reg() + } else if to_bits == from_bits { + ctx.put_input_in_regs(input.insn, input.input) + .only_reg() + .unwrap() + } else if let Some(mem) = input_matches_load_insn(ctx, input, Opcode::Load) { + extend_memory_to_reg(ctx, mem, ty, ext_ty, signed) + } else { + let rd = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); + let rn = ctx + .put_input_in_regs(input.insn, input.input) + .only_reg() + .unwrap(); + ctx.emit(Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + }); + rd.to_reg() + } +} + +//============================================================================ +// Lowering: addressing mode support. Takes instruction directly, rather +// than an `InsnInput`, to do more introspection. + +/// Lower the address of a load or store. +fn lower_address>( + ctx: &mut C, + addends: &[InsnInput], + offset: i32, + flags: MemFlags, +) -> MemArg { + // Handle one reg and offset. + if addends.len() == 1 { + if offset == 0 { + if let Some(add) = input_matches_insn(ctx, addends[0], Opcode::Iadd) { + debug_assert_eq!(ctx.output_ty(add, 0), types::I64); + let add_inputs = &[ + InsnInput { + insn: add, + input: 0, + }, + InsnInput { + insn: add, + input: 1, + }, + ]; + + let ra = put_input_in_reg(ctx, add_inputs[0], NarrowValueMode::None); + let rb = put_input_in_reg(ctx, add_inputs[1], NarrowValueMode::None); + return MemArg::reg_plus_reg(ra, rb, flags); + } + } + + if let Some(symbol) = input_matches_insn(ctx, addends[0], Opcode::SymbolValue) { + let (extname, dist, ext_offset) = ctx.symbol_value(symbol).unwrap(); + let ext_offset = ext_offset + i64::from(offset); + if dist == RelocDistance::Near && (ext_offset & 1) == 0 { + if let Ok(offset) = i32::try_from(ext_offset) { + return MemArg::Symbol { + name: Box::new(extname.clone()), + offset, + flags, + }; + } + } + } + + let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); + return MemArg::reg_plus_off(reg, offset as i64, flags); + } + + // Handle two regs and a zero offset. + if addends.len() == 2 && offset == 0 { + let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); + let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::None); + return MemArg::reg_plus_reg(ra, rb, flags); + } + + // Otherwise, generate add instructions. + let addr = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + + // Get the const into a reg. + lower_constant_u64(ctx, addr.clone(), offset as u64); + + // Add each addend to the address. + for addend in addends { + let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::None); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: addr.clone(), + rn: addr.to_reg(), + rm: reg.clone(), + }); + } + + MemArg::reg(addr.to_reg(), flags) +} + +//============================================================================ +// Lowering: generating constants. + +fn lower_constant_u64>(ctx: &mut C, rd: Writable, value: u64) { + for inst in Inst::load_constant64(rd, value) { + ctx.emit(inst); + } +} + +fn lower_constant_u32>(ctx: &mut C, rd: Writable, value: u32) { + for inst in Inst::load_constant32(rd, value) { + ctx.emit(inst); + } +} + +fn lower_constant_f32>(ctx: &mut C, rd: Writable, value: f32) { + ctx.emit(Inst::load_fp_constant32(rd, value)); +} + +fn lower_constant_f64>(ctx: &mut C, rd: Writable, value: f64) { + ctx.emit(Inst::load_fp_constant64(rd, value)); +} + +//============================================================================= +// Lowering: comparisons + +/// Determines whether this condcode interprets inputs as signed or +/// unsigned. See the documentation for the `icmp` instruction in +/// cranelift-codegen/meta/src/shared/instructions.rs for further insights +/// into this. +pub fn condcode_is_signed(cc: IntCC) -> bool { + match cc { + IntCC::Equal => false, + IntCC::NotEqual => false, + IntCC::SignedGreaterThanOrEqual => true, + IntCC::SignedGreaterThan => true, + IntCC::SignedLessThanOrEqual => true, + IntCC::SignedLessThan => true, + IntCC::UnsignedGreaterThanOrEqual => false, + IntCC::UnsignedGreaterThan => false, + IntCC::UnsignedLessThanOrEqual => false, + IntCC::UnsignedLessThan => false, + IntCC::Overflow => true, + IntCC::NotOverflow => true, + } +} + +fn lower_icmp_to_flags>( + ctx: &mut C, + insn: IRInst, + is_signed: bool, + may_sink_memory: bool, +) { + let ty = ctx.input_ty(insn, 0); + let bits = ty_bits(ty); + let narrow_mode = match (bits <= 32, is_signed) { + (true, true) => NarrowValueMode::SignExtend32, + (true, false) => NarrowValueMode::ZeroExtend32, + (false, true) => NarrowValueMode::SignExtend64, + (false, false) => NarrowValueMode::ZeroExtend64, + }; + let inputs = [ + InsnInput { + insn: insn, + input: 0, + }, + InsnInput { + insn: insn, + input: 1, + }, + ]; + let ty = ctx.input_ty(insn, 0); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + if is_signed { + let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); + // Try matching immedate operand. + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRSImm16 { op, rn, imm }); + } + if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRSImm32 { op, rn, imm }); + } + // If sinking memory loads is allowed, try matching memory operand. + if may_sink_memory { + if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let op = choose_32_64(ty, CmpOp::CmpS32Ext16, CmpOp::CmpS64Ext16); + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn, + mem, + }); + } + } + // Try matching sign-extension in register. + if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRR { + op: CmpOp::CmpS64Ext32, + rn, + rm, + }); + } + // If no special case matched above, fall back to a register compare. + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } else { + let op = choose_32_64(ty, CmpOp::CmpL32, CmpOp::CmpL64); + // Try matching immedate operand. + if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRUImm32 { op, rn, imm }); + } + // If sinking memory loads is allowed, try matching memory operand. + if may_sink_memory { + if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_uext16_mem(ctx, inputs[1]) { + match &mem { + &MemArg::Symbol { .. } => { + let op = choose_32_64(ty, CmpOp::CmpL32Ext16, CmpOp::CmpL64Ext16); + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + _ => { + let reg_ty = choose_32_64(ty, types::I32, types::I64); + let rm = extend_memory_to_reg(ctx, mem, ty, reg_ty, false); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } + } + } + if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn, + mem, + }); + } + } + // Try matching zero-extension in register. + if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRR { + op: CmpOp::CmpL64Ext32, + rn, + rm, + }); + } + // If no special case matched above, fall back to a register compare. + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } +} + +fn lower_fcmp_to_flags>(ctx: &mut C, insn: IRInst) { + let ty = ctx.input_ty(insn, 0); + let bits = ty_bits(ty); + let inputs = [ + InsnInput { + insn: insn, + input: 0, + }, + InsnInput { + insn: insn, + input: 1, + }, + ]; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + match bits { + 32 => { + ctx.emit(Inst::FpuCmp32 { rn, rm }); + } + 64 => { + ctx.emit(Inst::FpuCmp64 { rn, rm }); + } + _ => panic!("Unknown float size"), + } +} + +fn lower_boolean_to_flags>(ctx: &mut C, input: InsnInput) -> Cond { + if let Some(icmp_insn) = input_matches_insn_via_conv(ctx, input, Opcode::Icmp, Opcode::Bint) { + // FIXME: If the Icmp (and Bint) only have a single use, we can still allow sinking memory + let may_sink_memory = false; + let condcode = ctx.data(icmp_insn).cond_code().unwrap(); + let is_signed = condcode_is_signed(condcode); + lower_icmp_to_flags(ctx, icmp_insn, is_signed, may_sink_memory); + Cond::from_intcc(condcode) + } else if let Some(fcmp_insn) = + input_matches_insn_via_conv(ctx, input, Opcode::Fcmp, Opcode::Bint) + { + let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap(); + lower_fcmp_to_flags(ctx, fcmp_insn); + Cond::from_floatcc(condcode) + } else { + let ty = ctx.input_ty(input.insn, input.input); + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::None + }; + let rn = put_input_in_reg(ctx, input, narrow_mode); + let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); + ctx.emit(Inst::CmpRSImm16 { op, rn, imm: 0 }); + Cond::from_intcc(IntCC::NotEqual) + } +} + +fn lower_flags_to_bool_result>( + ctx: &mut C, + cond: Cond, + rd: Writable, + ty: Type, +) { + if ty_bits(ty) == 1 { + lower_constant_u32(ctx, rd, 0); + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 1 }); + } else if ty_bits(ty) < 64 { + lower_constant_u32(ctx, rd, 0); + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: -1 }); + } else { + lower_constant_u64(ctx, rd, 0); + ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: -1 }); + } +} + +//============================================================================ +// Lowering: main entry point for lowering a instruction + +fn lower_insn_to_regs>( + ctx: &mut C, + insn: IRInst, + flags: &Flags, +) -> CodegenResult<()> { + let op = ctx.data(insn).opcode(); + let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) + .map(|i| InsnInput { insn, input: i }) + .collect(); + let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) + .map(|i| InsnOutput { insn, output: i }) + .collect(); + let ty = if outputs.len() > 0 { + Some(ctx.output_ty(insn, 0)) + } else { + None + }; + + match op { + Opcode::Nop => { + // Nothing. + } + + Opcode::Copy | Opcode::Ireduce | Opcode::Breduce => { + // Smaller ints / bools have the high bits undefined, so any reduce + // operation is simply a copy. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } + + Opcode::Iconst | Opcode::Bconst | Opcode::Null => { + let value = ctx.get_constant(insn).unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + if ty.bits() <= 32 { + lower_constant_u32(ctx, rd, value as u32); + } else { + lower_constant_u64(ctx, rd, value); + } + } + Opcode::F32const => { + let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + lower_constant_f32(ctx, rd, value); + } + Opcode::F64const => { + let value = f64::from_bits(ctx.get_constant(insn).unwrap()); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + lower_constant_f64(ctx, rd, value); + } + + Opcode::Iadd => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + }); + } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Add32Ext16, ALUOp::Add64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Add64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Add64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + Opcode::Isub => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); + let neg_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = negated_input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::AluRRSImm16 { + alu_op: neg_op, + rd, + rn, + imm, + }); + } else if let Some(imm) = negated_input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { + alu_op: neg_op, + rd, + imm, + }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Sub32Ext16, ALUOp::Sub64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Sub64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Sub64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::UaddSat | Opcode::SaddSat => unimplemented!(), + Opcode::UsubSat | Opcode::SsubSat => unimplemented!(), + + Opcode::Iabs => { + let ty = ty.unwrap(); + let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Abs64Ext32, + rd, + rn, + }); + } else { + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::None + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + ctx.emit(Inst::UnaryRR { op, rd, rn }); + } + } + Opcode::Ineg => { + let ty = ty.unwrap(); + let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg64Ext32, + rd, + rn, + }); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::UnaryRR { op, rd, rn }); + } + } + + Opcode::Imul => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Mul32, ALUOp::Mul64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm16 { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Mul32Ext16, ALUOp::Mul64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Mul64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Mul64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Umulhi | Opcode::Smulhi => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let is_signed = op == Opcode::Smulhi; + let input_ty = ctx.input_ty(insn, 0); + assert!(ctx.input_ty(insn, 1) == input_ty); + assert!(ctx.output_ty(insn, 0) == input_ty); + + match input_ty { + types::I64 => { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + + if is_signed { + ctx.emit(Inst::SMulWide { rn, rm }); + ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); + } else { + ctx.emit(Inst::gen_move(writable_gpr(1), rm, input_ty)); + ctx.emit(Inst::UMulWide { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); + } + } + types::I32 => { + let narrow_mode = if is_signed { + NarrowValueMode::SignExtend64 + } else { + NarrowValueMode::ZeroExtend64 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Mul64, + rd, + rn, + rm, + }); + let shift_op = if is_signed { + ShiftOp::AShR64 + } else { + ShiftOp::LShR64 + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + } + types::I16 | types::I8 => { + let narrow_mode = if is_signed { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::ZeroExtend32 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Mul32, + rd, + rn, + rm, + }); + let shift_op = if is_signed { + ShiftOp::AShR32 + } else { + ShiftOp::LShR32 + }; + let shift_amt = match input_ty { + types::I16 => 16, + types::I8 => 8, + _ => unreachable!(), + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(shift_amt).unwrap(), + shift_reg: None, + }); + } + _ => { + panic!("Unsupported argument type for umulhi/smulhi: {}", input_ty); + } + } + } + + Opcode::Udiv | Opcode::Urem => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) <= 32 { + lower_constant_u32(ctx, writable_gpr(0), 0); + if ty_bits(ty) < 32 { + ctx.emit(Inst::Extend { + rd: writable_gpr(1), + rn, + signed: false, + from_bits: ty_bits(ty) as u8, + to_bits: 32, + }); + } else { + ctx.emit(Inst::mov32(writable_gpr(1), rn)); + } + } else { + lower_constant_u64(ctx, writable_gpr(0), 0); + ctx.emit(Inst::mov64(writable_gpr(1), rn)); + } + + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::None + }; + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + + if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: rm, + imm: 0, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerDivisionByZero, + }); + } + + if ty_bits(ty) <= 32 { + ctx.emit(Inst::UDivMod32 { rn: rm }); + } else { + ctx.emit(Inst::UDivMod64 { rn: rm }); + } + + if op == Opcode::Udiv { + ctx.emit(Inst::gen_move(rd, gpr(1), ty)); + } else { + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + } + } + + Opcode::Sdiv | Opcode::Srem => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) < 64 { + ctx.emit(Inst::Extend { + rd: writable_gpr(1), + rn, + signed: true, + from_bits: ty_bits(ty) as u8, + to_bits: 64, + }); + } else { + ctx.emit(Inst::mov64(writable_gpr(1), rn)); + } + + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::None + }; + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + + if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: rm, + imm: 0, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerDivisionByZero, + }); + } + + if input_maybe_imm(ctx, inputs[1], 0xffff_ffff_ffff_ffff) { + if op == Opcode::Sdiv { + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + if ty_bits(ty) <= 32 { + lower_constant_u32(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); + } else { + lower_constant_u64(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); + } + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), + rd: tmp, + rn: tmp.to_reg(), + rm: gpr(1), + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), + rd: tmp, + rn: tmp.to_reg(), + rm, + }); + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: tmp.to_reg(), + imm: -1, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerOverflow, + }); + } else { + if ty_bits(ty) > 32 { + ctx.emit(Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: rm, + imm: -1, + }); + ctx.emit(Inst::CMov64SImm16 { + rd: writable_gpr(1), + cond: Cond::from_intcc(IntCC::Equal), + imm: 0, + }); + } + } + } + + if ty_bits(ty) <= 32 { + ctx.emit(Inst::SDivMod32 { rn: rm }); + } else { + ctx.emit(Inst::SDivMod64 { rn: rm }); + } + + if op == Opcode::Sdiv { + ctx.emit(Inst::gen_move(rd, gpr(1), ty)); + } else { + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + } + } + + Opcode::Uextend | Opcode::Sextend => { + let ty = ty.unwrap(); + let to_bits = ty_bits(ty) as u8; + let to_bits = std::cmp::max(32, to_bits); + let narrow_mode = match (op, to_bits) { + (Opcode::Uextend, 32) => NarrowValueMode::ZeroExtend32, + (Opcode::Uextend, 64) => NarrowValueMode::ZeroExtend64, + (Opcode::Sextend, 32) => NarrowValueMode::SignExtend32, + (Opcode::Sextend, 64) => NarrowValueMode::SignExtend64, + _ => unreachable!(), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } + + Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { + let ty = ty.unwrap(); + let size = ty_bits(ty); + let narrow_mode = match (op, size) { + (Opcode::Ishl, _) => NarrowValueMode::None, + (Opcode::Ushr, 64) => NarrowValueMode::ZeroExtend64, + (Opcode::Ushr, _) => NarrowValueMode::ZeroExtend32, + (Opcode::Sshr, 64) => NarrowValueMode::SignExtend64, + (Opcode::Sshr, _) => NarrowValueMode::SignExtend32, + _ => unreachable!(), + }; + let shift_op = match op { + Opcode::Ishl => choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64), + Opcode::Ushr => choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64), + Opcode::Sshr => choose_32_64(ty, ShiftOp::AShR32, ShiftOp::AShR64), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let imm = imm & if size < 64 { 31 } else { 63 }; + let shift_imm = SImm20::maybe_from_i64(imm as i64).unwrap(); + let shift_reg = None; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let shift_imm = SImm20::zero(); + let shift_reg = if size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp, rm, types::I64)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: tmp, + imm: UImm16Shifted::maybe_from_u64(31).unwrap(), + }); + Some(tmp.to_reg()) + } else { + Some(rm) + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + }); + } + } + + Opcode::Rotr | Opcode::Rotl => { + // s390x doesn't have a right-rotate instruction, but a right rotation of K places is + // effectively a left rotation of N - K places, if N is the integer's bit size. We + // implement right rotations with this trick. + // + // For a 32-bit or 64-bit rotate-left, we can use the ROR instruction directly. + // + // For a < 32-bit rotate-left, we synthesize this as: + // + // rotr rd, rn, rm + // + // => + // + // zero-extend rn, <32-or-64> + // and tmp_masked_rm, rm, + // sub tmp1, tmp_masked_rm, + // sub tmp1, zero, tmp1 ; neg + // lsr tmp2, rn, tmp_masked_rm + // lsl rd, rn, tmp1 + // orr rd, rd, tmp2 + // + // For a constant amount, we can instead do: + // + // zero-extend rn, <32-or-64> + // lsr tmp2, rn, # + // lsl rd, rn, + // orr rd, rd, tmp2 + + let is_rotr = op == Opcode::Rotr; + + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty) as u64; + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg( + ctx, + inputs[0], + if ty_bits_size <= 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::ZeroExtend64 + }, + ); + + if ty_bits_size == 32 || ty_bits_size == 64 { + let shift_op = choose_32_64(ty, ShiftOp::RotL32, ShiftOp::RotL64); + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let shiftcount = imm & (ty_bits_size - 1); + let shiftcount = if is_rotr { + ty_bits_size - shiftcount + } else { + shiftcount + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm: SImm20::maybe_from_i64(shiftcount as i64).unwrap(), + shift_reg: None, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = if is_rotr { + // Really ty_bits_size - rn, but the upper bits of the result are + // ignored (because of the implicit masking done by the instruction), + // so this is equivalent to negating the input. + let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + ctx.emit(Inst::UnaryRR { + op, + rd: tmp, + rn: rm, + }); + tmp.to_reg() + } else { + rm + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(rm), + }); + } + } else { + debug_assert!(ty_bits_size < 32); + + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let rot_count = imm & (ty_bits_size - 1); + let (lshl_count, lshr_count) = if is_rotr { + (ty_bits_size - rot_count, rot_count) + } else { + (rot_count, ty_bits_size - rot_count) + }; + + let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: tmp1, + rn, + shift_imm: SImm20::maybe_from_i64(lshl_count as i64).unwrap(), + shift_reg: None, + }); + + let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: tmp2, + rn, + shift_imm: SImm20::maybe_from_i64(lshr_count as i64).unwrap(), + shift_reg: None, + }); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd, + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + + ctx.emit(Inst::mov32(tmp1, rm)); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg32, + rd: tmp2, + rn: rm, + }); + + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: tmp1, + imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), + }); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: tmp2, + imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), + }); + + let (lshr, lshl) = if is_rotr { (tmp2, tmp1) } else { (tmp1, tmp2) }; + + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: lshl, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(lshl.to_reg()), + }); + + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: lshr, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(lshr.to_reg()), + }); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd, + rn: lshl.to_reg(), + rm: lshr.to_reg(), + }); + } + } + } + + Opcode::Bnot => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { + alu_op, + rd, + rn, + rm: rn, + }); + } + + Opcode::Band => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm16shifted_inv(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_uimm32shifted_inv(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Bor => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm16shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Bxor => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => { + let ty = ty.unwrap(); + let alu_op = match op { + Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), + Opcode::BxorNot => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + + Opcode::Bitselect => { + let ty = ty.unwrap(); + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), + rd: tmp, + rn, + rm: rcond, + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + rd, + rn: rm, + rm: rcond, + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), + rd, + rn: rd.to_reg(), + rm: tmp.to_reg(), + }); + } + + Opcode::Bextend | Opcode::Bmask => { + // Bextend and Bmask both simply sign-extend. This works for: + // - Bextend, because booleans are stored as 0 / -1, so we + // sign-extend the -1 to a -1 in the wider width. + // - Bmask, because the resulting integer mask value must be + // all-ones (-1) if the argument is true. + // + // For a sign-extension from a 1-bit value (Case 1 below), we need + // to do things a bit specially, because the ISA does not have a + // 1-to-N-bit sign extension instruction. For 8-bit or wider + // sources (Case 2 below), we do a sign extension normally. + + let from_ty = ctx.input_ty(insn, 0); + let to_ty = ctx.output_ty(insn, 0); + let from_bits = ty_bits(from_ty); + let to_bits = ty_bits(to_ty); + + assert!( + from_bits <= 64 && to_bits <= 64, + "Vector Bextend not supported yet" + ); + + if from_bits >= to_bits { + // Just a move. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } else if from_bits == 1 { + assert!(to_bits >= 8); + // Case 1: 1-bit to N-bit extension: use a shift-left / + // shift-right sequence to create a 0 / -1 result. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let shl_op = choose_32_64(to_ty, ShiftOp::LShL32, ShiftOp::LShL64); + let shr_op = choose_32_64(to_ty, ShiftOp::AShR32, ShiftOp::AShR64); + let count = if to_bits > 32 { 63 } else { 31 }; + ctx.emit(Inst::ShiftRR { + shift_op: shl_op, + rd, + rn, + shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::ShiftRR { + shift_op: shr_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), + shift_reg: None, + }); + } else { + // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A + // `true` (all ones, or `-1`) will be extended to -1 with the + // larger width. + assert!(from_bits >= 8); + let narrow_mode = if to_bits == 64 { + NarrowValueMode::SignExtend64 + } else { + assert!(to_bits <= 32); + NarrowValueMode::SignExtend32 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::gen_move(rd, rn, to_ty)); + } + } + + Opcode::Bint => { + // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND + // out the LSB to give a 0 / 1-valued integer result. + let ty = ty.unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) <= 16 { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd, + imm: UImm16Shifted::maybe_from_u64(1).unwrap(), + }); + } else if ty_bits(ty) <= 32 { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { + alu_op: ALUOp::And32, + rd, + imm: UImm32Shifted::maybe_from_u64(1).unwrap(), + }); + } else { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + lower_constant_u64(ctx, tmp, 1); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::And64, + rd, + rn, + rm: tmp.to_reg(), + }); + } + } + + Opcode::Clz => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::Extend { + rd: tmp, + rn, + signed: false, + from_bits: ty_bits_size as u8, + to_bits: 64, + }); + tmp.to_reg() + } else { + rn + }; + + ctx.emit(Inst::Flogr { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + + if ty_bits_size < 64 { + ctx.emit(Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd, + imm: -(64 - ty_bits_size as i16), + }); + } + } + + Opcode::Cls => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::Extend { + rd: tmp, + rn, + signed: true, + from_bits: ty_bits_size as u8, + to_bits: 64, + }); + tmp.to_reg() + } else { + rn + }; + + // tmp = rn ^ ((signed)rn >> 63) + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: tmp, + rn, + shift_imm: SImm20::maybe_from_i64(63).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Xor64, + rd: tmp, + rn: tmp.to_reg(), + rm: rn, + }); + + ctx.emit(Inst::Flogr { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + + if ty_bits_size < 64 { + ctx.emit(Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd, + imm: -(64 - ty_bits_size as i16), + }); + } + } + + Opcode::Ctz => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: tmp, + imm: UImm16Shifted::maybe_from_u64(1u64 << ty_bits_size).unwrap(), + }); + tmp.to_reg() + } else { + rn + }; + + // tmp = rn & -rn + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg64, + rd: tmp, + rn, + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::And64, + rd: tmp, + rn: tmp.to_reg(), + rm: rn, + }); + + ctx.emit(Inst::Flogr { rn: tmp.to_reg() }); + if ty_bits_size == 64 { + ctx.emit(Inst::CMov64SImm16 { + rd: writable_gpr(0), + cond: Cond::from_intcc(IntCC::Equal), + imm: -1, + }); + } + + if ty_bits_size <= 32 { + lower_constant_u32(ctx, rd, 63); + } else { + lower_constant_u64(ctx, rd, 63); + } + let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); + ctx.emit(Inst::AluRRR { + alu_op, + rd, + rn: rd.to_reg(), + rm: gpr(0), + }); + } + + Opcode::Bitrev => unimplemented!(), + + Opcode::Popcnt => { + let ty = ty.unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if ty_bits(ty) <= 8 { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::PopcntByte, + rd, + rn, + }); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::PopcntReg, + rd, + rn, + }); + } + } + + Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Fadd, 32) => FPUOp2::Add32, + (Opcode::Fadd, 64) => FPUOp2::Add64, + (Opcode::Fsub, 32) => FPUOp2::Sub32, + (Opcode::Fsub, 64) => FPUOp2::Sub64, + (Opcode::Fmul, 32) => FPUOp2::Mul32, + (Opcode::Fmul, 64) => FPUOp2::Mul64, + (Opcode::Fdiv, 32) => FPUOp2::Div32, + (Opcode::Fdiv, 64) => FPUOp2::Div64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::mov64(rd, rn)); + ctx.emit(Inst::FpuRRR { fpu_op, rd, rm }); + } + + Opcode::Fmin | Opcode::Fmax => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Fmin, 32) => FPUOp2::Min32, + (Opcode::Fmin, 64) => FPUOp2::Min64, + (Opcode::Fmax, 32) => FPUOp2::Max32, + (Opcode::Fmax, 64) => FPUOp2::Max64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuVecRRR { fpu_op, rd, rn, rm }); + } + + Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, + (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, + (Opcode::Fneg, 32) => FPUOp1::Neg32, + (Opcode::Fneg, 64) => FPUOp1::Neg64, + (Opcode::Fabs, 32) => FPUOp1::Abs32, + (Opcode::Fabs, 64) => FPUOp1::Abs64, + (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"), + (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64, + (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32, + (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"), + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); + } + + Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let op = match (op, bits) { + (Opcode::Ceil, 32) => FpuRoundMode::Plus32, + (Opcode::Ceil, 64) => FpuRoundMode::Plus64, + (Opcode::Floor, 32) => FpuRoundMode::Minus32, + (Opcode::Floor, 64) => FpuRoundMode::Minus64, + (Opcode::Trunc, 32) => FpuRoundMode::Zero32, + (Opcode::Trunc, 64) => FpuRoundMode::Zero64, + (Opcode::Nearest, 32) => FpuRoundMode::Nearest32, + (Opcode::Nearest, 64) => FpuRoundMode::Nearest64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuRound { op, rd, rn }); + } + + Opcode::Fma => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match bits { + 32 => FPUOp3::MAdd32, + 64 => FPUOp3::MAdd64, + _ => panic!("Unknown op size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::mov64(rd, ra)); + ctx.emit(Inst::FpuRRRR { fpu_op, rd, rn, rm }); + } + + Opcode::Fcopysign => { + let ty = ctx.output_ty(insn, 0); + let bits = ty_bits(ty) as u8; + assert!(bits == 32 || bits == 64); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + ctx.emit(Inst::FpuCopysign { rd, rn, rm }); + } + + Opcode::FcvtFromUint | Opcode::FcvtFromSint => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtFromSint; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 64, 32) => IntToFpuOp::U64ToF32, + (true, 64, 32) => IntToFpuOp::I64ToF32, + (false, 64, 64) => IntToFpuOp::U64ToF64, + (true, 64, 64) => IntToFpuOp::I64ToF64, + _ => panic!("Unknown input/output-bits combination"), + }; + let narrow_mode = match (signed, in_bits) { + (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 32) => NarrowValueMode::SignExtend32, + (false, 64) => NarrowValueMode::ZeroExtend64, + (true, 64) => NarrowValueMode::SignExtend64, + _ => panic!("Unknown input size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::IntToFpu { op, rd, rn }); + } + + Opcode::FcvtToUint | Opcode::FcvtToSint => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtToSint; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 64) => FpuToIntOp::F32ToU64, + (true, 32, 64) => FpuToIntOp::F32ToI64, + (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 64) => FpuToIntOp::F64ToU64, + (true, 64, 64) => FpuToIntOp::F64ToI64, + _ => panic!("Unknown input/output-bits combination"), + }; + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + // First, check whether the input is a NaN and trap if so. + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); + } else { + ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); + } + ctx.emit(Inst::TrapIf { + trap_code: TrapCode::BadConversionToInteger, + cond: Cond::from_floatcc(FloatCC::Unordered), + }); + + // Perform the conversion. If this sets CC 3, we have a + // "special case". Since we already exluded the case where + // the input was a NaN, the only other option is that the + // conversion overflowed the target type. + ctx.emit(Inst::FpuToInt { op, rd, rn }); + ctx.emit(Inst::TrapIf { + trap_code: TrapCode::IntegerOverflow, + cond: Cond::from_floatcc(FloatCC::Unordered), + }); + } + + Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtToSintSat; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 64) => FpuToIntOp::F32ToU64, + (true, 32, 64) => FpuToIntOp::F32ToI64, + (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 64) => FpuToIntOp::F64ToU64, + (true, 64, 64) => FpuToIntOp::F64ToI64, + _ => panic!("Unknown input/output-bits combination"), + }; + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + // Perform the conversion. + ctx.emit(Inst::FpuToInt { op, rd, rn }); + + // In most special cases, the Z instruction already yields the + // result expected by Cranelift semantic. The only exception + // it the case where the input was a Nan. We explicitly check + // for that and force the output to 0 in that case. + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); + } else { + ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); + } + let cond = Cond::from_floatcc(FloatCC::Unordered); + if out_bits <= 32 { + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 0 }); + } else { + ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: 0 }); + } + } + + Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), + + Opcode::Bitcast => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let input_ty = ctx.input_ty(insn, 0); + let output_ty = ctx.output_ty(insn, 0); + match (input_ty, output_ty) { + (types::I64, types::F64) => { + ctx.emit(Inst::MovToFpr { rd, rn }); + } + (types::F64, types::I64) => { + ctx.emit(Inst::MovFromFpr { rd, rn }); + } + (types::I32, types::F32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: tmp, + rn, + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::MovToFpr { + rd, + rn: tmp.to_reg(), + }); + } + (types::F32, types::I32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::MovFromFpr { rd: tmp, rn }); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd, + rn: tmp.to_reg(), + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + } + _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), + } + } + + Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 + | Opcode::LoadComplex + | Opcode::Uload8Complex + | Opcode::Sload8Complex + | Opcode::Uload16Complex + | Opcode::Sload16Complex + | Opcode::Uload32Complex + | Opcode::Sload32Complex => { + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + let elem_ty = ctx.output_ty(insn, 0); + let is_float = ty_is_float(elem_ty); + let to_bits = ty_bits(elem_ty); + let from_bits = match op { + Opcode::Load | Opcode::LoadComplex => to_bits, + Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => { + 8 + } + Opcode::Sload16 + | Opcode::Uload16 + | Opcode::Sload16Complex + | Opcode::Uload16Complex => 16, + Opcode::Sload32 + | Opcode::Uload32 + | Opcode::Sload32Complex + | Opcode::Uload32Complex => 32, + _ => unreachable!(), + }; + let ext_bits = if to_bits < 32 { 32 } else { to_bits }; + let sign_extend = match op { + Opcode::Sload8 + | Opcode::Sload8Complex + | Opcode::Sload16 + | Opcode::Sload16Complex + | Opcode::Sload32 + | Opcode::Sload32Complex => true, + _ => false, + }; + + let mem = lower_address(ctx, &inputs[..], off, flags); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + if endianness == Endianness::Big { + ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { + (32, 32, _, true) => Inst::FpuLoad32 { rd, mem }, + (64, 64, _, true) => Inst::FpuLoad64 { rd, mem }, + (32, 32, _, false) => Inst::Load32 { rd, mem }, + (64, 64, _, false) => Inst::Load64 { rd, mem }, + (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, + (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, + (32, 16, false, _) => Inst::Load32ZExt16 { rd, mem }, + (32, 16, true, _) => Inst::Load32SExt16 { rd, mem }, + (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, + (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, + (64, 16, false, _) => Inst::Load64ZExt16 { rd, mem }, + (64, 16, true, _) => Inst::Load64SExt16 { rd, mem }, + (64, 32, false, _) => Inst::Load64ZExt32 { rd, mem }, + (64, 32, true, _) => Inst::Load64SExt32 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + } else { + ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { + (32, 32, _, true) => Inst::FpuLoadRev32 { rd, mem }, + (64, 64, _, true) => Inst::FpuLoadRev64 { rd, mem }, + (_, 16, _, false) => Inst::LoadRev16 { rd, mem }, + (_, 32, _, false) => Inst::LoadRev32 { rd, mem }, + (_, 64, _, false) => Inst::LoadRev64 { rd, mem }, + (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, + (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, + (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, + (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + if to_bits > from_bits && from_bits > 8 { + assert!(is_float == false); + ctx.emit(Inst::Extend { + rd, + rn: rd.to_reg(), + signed: sign_extend, + from_bits: from_bits as u8, + to_bits: to_bits as u8, + }); + } + } + } + + Opcode::Store + | Opcode::Istore8 + | Opcode::Istore16 + | Opcode::Istore32 + | Opcode::StoreComplex + | Opcode::Istore8Complex + | Opcode::Istore16Complex + | Opcode::Istore32Complex => { + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + let elem_ty = match op { + Opcode::Istore8 | Opcode::Istore8Complex => types::I8, + Opcode::Istore16 | Opcode::Istore16Complex => types::I16, + Opcode::Istore32 | Opcode::Istore32Complex => types::I32, + Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), + _ => unreachable!(), + }; + + let mem = lower_address(ctx, &inputs[1..], off, flags); + + if ty_is_float(elem_ty) { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (Endianness::Big, 32) => Inst::FpuStore32 { rd, mem }, + (Endianness::Big, 64) => Inst::FpuStore64 { rd, mem }, + (Endianness::Little, 32) => Inst::FpuStoreRev32 { rd, mem }, + (Endianness::Little, 64) => Inst::FpuStoreRev64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } else if ty_bits(elem_ty) <= 16 { + if let Some(imm) = input_matches_const(ctx, inputs[0]) { + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (_, 1) | (_, 8) => Inst::StoreImm8 { + imm: imm as u8, + mem, + }, + (Endianness::Big, 16) => Inst::StoreImm16 { + imm: imm as i16, + mem, + }, + (Endianness::Little, 16) => Inst::StoreImm16 { + imm: (imm as i16).swap_bytes(), + mem, + }, + _ => panic!("Unsupported size in store"), + }); + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (_, 1) | (_, 8) => Inst::Store8 { rd, mem }, + (Endianness::Big, 16) => Inst::Store16 { rd, mem }, + (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } else if endianness == Endianness::Big { + if let Some(imm) = input_matches_simm16(ctx, inputs[0]) { + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::StoreImm32SExt16 { imm, mem }, + 64 => Inst::StoreImm64SExt16 { imm, mem }, + _ => panic!("Unsupported size in store"), + }); + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::Store32 { rd, mem }, + 64 => Inst::Store64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::StoreRev32 { rd, mem }, + 64 => Inst::StoreRev64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } + + Opcode::StackLoad | Opcode::StackStore => { + panic!("Direct stack memory access not supported; should not be used by Wasm"); + } + + Opcode::StackAddr => { + let (stack_slot, offset) = match *ctx.data(insn) { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } => (stack_slot, offset), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let offset: i32 = offset.into(); + let inst = ctx + .abi() + .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + ctx.emit(inst); + } + + Opcode::ConstAddr => unimplemented!(), + + Opcode::FuncAddr => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let (extname, dist) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); + if dist == RelocDistance::Near { + ctx.emit(Inst::LoadAddr { + rd, + mem: MemArg::Symbol { + name: Box::new(extname), + offset: 0, + flags: MemFlags::trusted(), + }, + }); + } else { + ctx.emit(Inst::LoadExtNameFar { + rd, + name: Box::new(extname), + offset: 0, + }); + } + } + + Opcode::SymbolValue => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let (extname, dist, offset) = ctx.symbol_value(insn).unwrap(); + let extname = extname.clone(); + if dist == RelocDistance::Near && (offset & 1) == 0 && i32::try_from(offset).is_ok() { + ctx.emit(Inst::LoadAddr { + rd, + mem: MemArg::Symbol { + name: Box::new(extname), + offset: i32::try_from(offset).unwrap(), + flags: MemFlags::trusted(), + }, + }); + } else { + ctx.emit(Inst::LoadExtNameFar { + rd, + name: Box::new(extname), + offset, + }); + } + } + + Opcode::HeapAddr => { + panic!("heap_addr should have been removed by legalization!"); + } + + Opcode::TableAddr => { + panic!("table_addr should have been removed by legalization!"); + } + + Opcode::GlobalValue => { + panic!("global_value should have been removed by legalization!"); + } + + Opcode::TlsValue => { + panic!("Thread-local storage support not implemented!"); + } + + Opcode::GetPinnedReg | Opcode::SetPinnedReg => { + panic!("Pinned register support not implemented!"); + } + + Opcode::Icmp => { + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + lower_icmp_to_flags(ctx, insn, is_signed, true); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::Fcmp => { + let condcode = ctx.data(insn).fp_cond_code().unwrap(); + let cond = Cond::from_floatcc(condcode); + lower_fcmp_to_flags(ctx, insn); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::IsNull | Opcode::IsInvalid => { + // Null references are represented by the constant value 0; invalid + // references are represented by the constant value -1. + let cond = Cond::from_intcc(IntCC::Equal); + let imm = match op { + Opcode::IsNull => 0, + Opcode::IsInvalid => -1, + _ => unreachable!(), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn, + imm, + }); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::Select => { + let ty = ctx.output_ty(insn, 0); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let cond = lower_boolean_to_flags(ctx, inputs[0]); + ctx.emit(Inst::gen_move(rd, rm, ty)); + if ty_is_float(ty) { + if ty_bits(ty) < 64 { + ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); + } + } else { + if ty_bits(ty) < 64 { + ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); + } + } + } + + Opcode::SelectifSpectreGuard => { + let ty = ctx.output_ty(insn, 0); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + ctx.emit(Inst::gen_move(rd, rm, ty)); + if ty_is_float(ty) { + if ty_bits(ty) < 64 { + ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); + } + } else { + if ty_bits(ty) < 64 { + ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); + } + } + } + + Opcode::Trap | Opcode::ResumableTrap => { + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::Trap { trap_code }) + } + + Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { + let cond = lower_boolean_to_flags(ctx, inputs[0]); + let negated = op == Opcode::Trapz; + let cond = if negated { cond.invert() } else { cond }; + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); + } + + Opcode::Trapif => { + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); + } + + Opcode::Debugtrap => { + ctx.emit(Inst::Debugtrap); + } + + Opcode::Call | Opcode::CallIndirect => { + let caller_conv = ctx.abi().call_conv(); + let (mut abi, inputs) = match op { + Opcode::Call => { + let (extname, dist) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + ( + S390xABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, + &inputs[..], + ) + } + Opcode::CallIndirect => { + let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() - 1 == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + ( + S390xABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, + &inputs[1..], + ) + } + _ => unreachable!(), + }; + + assert!(inputs.len() == abi.num_args()); + for (i, input) in inputs.iter().enumerate() { + let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); + abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg)); + } + abi.emit_call(ctx); + for (i, output) in outputs.iter().enumerate() { + let retval_reg = get_output_reg(ctx, *output).only_reg().unwrap(); + abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg)); + } + abi.accumulate_outgoing_args_size(ctx); + } + + Opcode::FallthroughReturn | Opcode::Return => { + for (i, input) in inputs.iter().enumerate() { + let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); + let retval_reg = ctx.retval(i).only_reg().unwrap(); + let ty = ctx.input_ty(insn, i); + ctx.emit(Inst::gen_move(retval_reg, reg, ty)); + } + // N.B.: the Ret itself is generated by the ABI. + } + + Opcode::AtomicRmw + | Opcode::AtomicCas + | Opcode::AtomicLoad + | Opcode::AtomicStore + | Opcode::Fence => { + // TODO + panic!("Atomic operations not implemented"); + } + + Opcode::RawBitcast + | Opcode::Splat + | Opcode::Swizzle + | Opcode::Insertlane + | Opcode::Extractlane + | Opcode::Imin + | Opcode::Umin + | Opcode::Imax + | Opcode::Umax + | Opcode::AvgRound + | Opcode::FminPseudo + | Opcode::FmaxPseudo + | Opcode::Uload8x8 + | Opcode::Uload8x8Complex + | Opcode::Sload8x8 + | Opcode::Sload8x8Complex + | Opcode::Uload16x4 + | Opcode::Uload16x4Complex + | Opcode::Sload16x4 + | Opcode::Sload16x4Complex + | Opcode::Uload32x2 + | Opcode::Uload32x2Complex + | Opcode::Sload32x2 + | Opcode::Sload32x2Complex + | Opcode::Vconst + | Opcode::Shuffle + | Opcode::Vsplit + | Opcode::Vconcat + | Opcode::Vselect + | Opcode::VanyTrue + | Opcode::VallTrue + | Opcode::VhighBits + | Opcode::ScalarToVector + | Opcode::Snarrow + | Opcode::Unarrow + | Opcode::SwidenLow + | Opcode::SwidenHigh + | Opcode::UwidenLow + | Opcode::UwidenHigh + | Opcode::WideningPairwiseDotProductS => { + // TODO + panic!("Vector ops not implemented."); + } + + Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."), + + Opcode::Spill + | Opcode::Fill + | Opcode::FillNop + | Opcode::Regmove + | Opcode::CopySpecial + | Opcode::CopyToSsa + | Opcode::CopyNop + | Opcode::AdjustSpDown + | Opcode::AdjustSpUpImm + | Opcode::AdjustSpDownImm + | Opcode::DummySargT + | Opcode::IfcmpSp + | Opcode::Regspill + | Opcode::Regfill => { + panic!("Unused opcode should not be encountered."); + } + + Opcode::Ifcmp + | Opcode::Ffcmp + | Opcode::Trapff + | Opcode::Trueif + | Opcode::Trueff + | Opcode::Selectif => { + panic!("Flags opcode should not be encountered."); + } + + Opcode::Jump + | Opcode::Fallthrough + | Opcode::Brz + | Opcode::Brnz + | Opcode::BrIcmp + | Opcode::Brif + | Opcode::Brff + | Opcode::IndirectJumpTableBr + | Opcode::BrTable => { + panic!("Branch opcode reached non-branch lowering logic!"); + } + + Opcode::JumpTableEntry | Opcode::JumpTableBase => { + panic!("Should not appear: we handle BrTable directly"); + } + + Opcode::Safepoint => { + panic!("safepoint instructions not used by new backend's safepoints!"); + } + + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::UdivImm + | Opcode::SdivImm + | Opcode::UremImm + | Opcode::SremImm + | Opcode::IrsubImm + | Opcode::IaddCin + | Opcode::IaddIfcin + | Opcode::IaddCout + | Opcode::IaddIfcout + | Opcode::IaddCarry + | Opcode::IaddIfcarry + | Opcode::IsubBin + | Opcode::IsubIfbin + | Opcode::IsubBout + | Opcode::IsubIfbout + | Opcode::IsubBorrow + | Opcode::IsubIfborrow + | Opcode::BandImm + | Opcode::BorImm + | Opcode::BxorImm + | Opcode::RotlImm + | Opcode::RotrImm + | Opcode::IshlImm + | Opcode::UshrImm + | Opcode::SshrImm + | Opcode::IcmpImm + | Opcode::IfcmpImm => { + panic!("ALU+imm and ALU+carry ops should not appear here!"); + } + + #[cfg(feature = "x86")] + Opcode::X86Udivmodx + | Opcode::X86Sdivmodx + | Opcode::X86Umulx + | Opcode::X86Smulx + | Opcode::X86Cvtt2si + | Opcode::X86Fmin + | Opcode::X86Fmax + | Opcode::X86Push + | Opcode::X86Pop + | Opcode::X86Bsr + | Opcode::X86Bsf + | Opcode::X86Pblendw + | Opcode::X86Pshufd + | Opcode::X86Pshufb + | Opcode::X86Pextr + | Opcode::X86Pinsr + | Opcode::X86Insertps + | Opcode::X86Movsd + | Opcode::X86Movlhps + | Opcode::X86Psll + | Opcode::X86Psrl + | Opcode::X86Psra + | Opcode::X86Ptest + | Opcode::X86Pmaxs + | Opcode::X86Pmaxu + | Opcode::X86Pmins + | Opcode::X86Pminu + | Opcode::X86Pmullq + | Opcode::X86Pmuludq + | Opcode::X86Punpckh + | Opcode::X86Punpckl + | Opcode::X86Vcvtudq2ps + | Opcode::X86Palignr + | Opcode::X86ElfTlsGetAddr + | Opcode::X86MachoTlsGetAddr => { + panic!("x86-specific opcode in supposedly arch-neutral IR!"); + } + } + + Ok(()) +} + +//============================================================================ +// Lowering: main entry point for lowering a branch group + +fn lower_branch>( + ctx: &mut C, + branches: &[IRInst], + targets: &[MachLabel], +) -> CodegenResult<()> { + // A block should end with at most two branches. The first may be a + // conditional branch; a conditional branch can be followed only by an + // unconditional branch or fallthrough. Otherwise, if only one branch, + // it may be an unconditional branch, a fallthrough, a return, or a + // trap. These conditions are verified by `is_ebb_basic()` during the + // verifier pass. + assert!(branches.len() <= 2); + + if branches.len() == 2 { + // Must be a conditional branch followed by an unconditional branch. + let op0 = ctx.data(branches[0]).opcode(); + let op1 = ctx.data(branches[1]).opcode(); + + assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); + let taken = BranchTarget::Label(targets[0]); + let not_taken = BranchTarget::Label(targets[1]); + + match op0 { + Opcode::Brz | Opcode::Brnz => { + let flag_input = InsnInput { + insn: branches[0], + input: 0, + }; + let cond = lower_boolean_to_flags(ctx, flag_input); + let negated = op0 == Opcode::Brz; + let cond = if negated { cond.invert() } else { cond }; + ctx.emit(Inst::CondBr { + taken, + not_taken, + cond, + }); + } + + Opcode::Brif => { + let condcode = ctx.data(branches[0]).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(branches[0], 0) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + ctx.emit(Inst::CondBr { + taken, + not_taken, + cond, + }); + } + + Opcode::Brff => unreachable!(), + + _ => unimplemented!(), + } + } else { + // Must be an unconditional branch or an indirect branch. + let op = ctx.data(branches[0]).opcode(); + match op { + Opcode::Jump | Opcode::Fallthrough => { + assert!(branches.len() == 1); + // In the Fallthrough case, the machine-independent driver + // fills in `targets[0]` with our fallthrough block, so this + // is valid for both Jump and Fallthrough. + ctx.emit(Inst::Jump { + dest: BranchTarget::Label(targets[0]), + }); + } + + Opcode::BrTable => { + let jt_size = targets.len() - 1; + assert!(jt_size <= std::u32::MAX as usize); + + // Load up jump table element index. + let ridx = put_input_in_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + NarrowValueMode::ZeroExtend64, + ); + + // Temp registers needed by the compound instruction. + let rtmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let rtmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + + // Emit the compound instruction that does: + // + // clgfi %rIdx, + // jghe + // sllg %rTmp2, %rIdx, 2 + // larl %rTmp1, + // lgf %rTmp2, 0(%rTmp2, %rTmp1) + // agrk %rTmp1, %rTmp1, %rTmp2 + // br %rA + // [jt entries] + // + // This must be *one* instruction in the vcode because + // we cannot allow regalloc to insert any spills/fills + // in the middle of the sequence; otherwise, the ADR's + // PC-rel offset to the jumptable would be incorrect. + // (The alternative is to introduce a relocation pass + // for inlined jumptables, which is much worse, IMHO.) + + let default_target = BranchTarget::Label(targets[0]); + let jt_targets: Vec = targets + .iter() + .skip(1) + .map(|bix| BranchTarget::Label(*bix)) + .collect(); + let targets_for_term: Vec = targets.to_vec(); + ctx.emit(Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + info: Box::new(JTSequenceInfo { + default_target, + targets: jt_targets, + targets_for_term: targets_for_term, + }), + }); + } + + _ => panic!("Unknown branch type!"), + } + } + + Ok(()) +} + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for S390xBackend { + type MInst = Inst; + + fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { + lower_insn_to_regs(ctx, ir_inst, &self.flags) + } + + fn lower_branch_group>( + &self, + ctx: &mut C, + branches: &[IRInst], + targets: &[MachLabel], + ) -> CodegenResult<()> { + lower_branch(ctx, branches, targets) + } +} diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs new file mode 100644 index 0000000000..3a78b54c95 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -0,0 +1,296 @@ +//! IBM Z 64-bit Instruction Set Architecture. + +use crate::ir::condcodes::IntCC; +use crate::ir::Function; +use crate::isa::s390x::settings as s390x_settings; +use crate::isa::unwind::systemv::RegisterMappingError; +use crate::isa::Builder as IsaBuilder; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::result::CodegenResult; +use crate::settings as shared_settings; + +use alloc::{boxed::Box, vec::Vec}; +use core::hash::{Hash, Hasher}; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg}; +use target_lexicon::{Architecture, Triple}; + +// New backend: +mod abi; +pub(crate) mod inst; +mod lower; +mod settings; + +use inst::create_reg_universe; + +use self::inst::EmitInfo; + +/// A IBM Z backend. +pub struct S390xBackend { + triple: Triple, + flags: shared_settings::Flags, + isa_flags: s390x_settings::Flags, + reg_universe: RealRegUniverse, +} + +impl S390xBackend { + /// Create a new IBM Z backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: s390x_settings::Flags, + ) -> S390xBackend { + let reg_universe = create_reg_universe(&flags); + S390xBackend { + triple, + flags, + isa_flags, + reg_universe, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &Function, + flags: shared_settings::Flags, + ) -> CodegenResult> { + let emit_info = EmitInfo::new(flags.clone()); + let abi = Box::new(abi::S390xABICallee::new(func, flags)?); + compile::compile::(func, self, abi, emit_info) + } +} + +impl MachBackend for S390xBackend { + fn compile_function( + &self, + func: &Function, + want_disasm: bool, + ) -> CodegenResult { + let flags = self.flags(); + let vcode = self.compile_vcode(func, flags.clone())?; + let buffer = vcode.emit(); + let frame_size = vcode.frame_size(); + let value_labels_ranges = vcode.value_labels_ranges(); + let stackslot_offsets = vcode.stackslot_offsets().clone(); + + let disasm = if want_disasm { + Some(vcode.show_rru(Some(&create_reg_universe(flags)))) + } else { + None + }; + + let buffer = buffer.finish(); + + Ok(MachCompileResult { + buffer, + frame_size, + disasm, + value_labels_ranges, + stackslot_offsets, + }) + } + + fn name(&self) -> &'static str { + "s390x" + } + + fn triple(&self) -> Triple { + self.triple.clone() + } + + fn flags(&self) -> &shared_settings::Flags { + &self.flags + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { + self.flags.hash(&mut hasher); + self.isa_flags.hash(&mut hasher); + } + + fn reg_universe(&self) -> &RealRegUniverse { + &self.reg_universe + } + + fn unsigned_add_overflow_condition(&self) -> IntCC { + unimplemented!() + } + + fn unsigned_sub_overflow_condition(&self) -> IntCC { + unimplemented!() + } + + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + result: &MachCompileResult, + kind: crate::machinst::UnwindInfoKind, + ) -> CodegenResult> { + use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; + Ok(match kind { + UnwindInfoKind::SystemV => { + let mapper = self::inst::unwind::systemv::RegisterMapper; + Some(UnwindInfo::SystemV( + crate::isa::unwind::systemv::create_unwind_info_from_insts( + &result.buffer.unwind_info[..], + result.buffer.data.len(), + &mapper, + )?, + )) + } + _ => None, + }) + } + + #[cfg(feature = "unwind")] + fn create_systemv_cie(&self) -> Option { + Some(inst::unwind::systemv::create_cie()) + } + + #[cfg(feature = "unwind")] + fn map_reg_to_dwarf(&self, reg: Reg) -> Result { + inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) + } +} + +/// Create a new `isa::Builder`. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + assert!(triple.architecture == Architecture::S390x); + IsaBuilder { + triple, + setup: s390x_settings::builder(), + constructor: |triple, shared_flags, builder| { + let isa_flags = s390x_settings::Flags::new(&shared_flags, builder); + let backend = S390xBackend::new_with_flags(triple, shared_flags, isa_flags); + Box::new(TargetIsaAdapter::new(backend)) + }, + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; + use crate::isa::CallConv; + use crate::settings; + use crate::settings::Configurable; + use core::str::FromStr; + use target_lexicon::Triple; + + #[test] + fn test_compile_function() { + let name = ExternalName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + sig.returns.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + + let bb0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(bb0, I32); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(bb0); + let v0 = pos.ins().iconst(I32, 0x1234); + let v1 = pos.ins().iadd(arg0, v0); + pos.ins().return_(&[v1]); + + let mut shared_flags_builder = settings::builder(); + shared_flags_builder.set("opt_level", "none").unwrap(); + let shared_flags = settings::Flags::new(shared_flags_builder); + let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder()); + let backend = S390xBackend::new_with_flags( + Triple::from_str("s390x").unwrap(), + shared_flags, + isa_flags, + ); + let result = backend + .compile_function(&mut func, /* want_disasm = */ false) + .unwrap(); + let code = &result.buffer.data[..]; + + // ahi %r2, 0x1234 + // br %r14 + let golden = vec![0xa7, 0x2a, 0x12, 0x34, 0x07, 0xfe]; + + assert_eq!(code, &golden[..]); + } + + #[test] + fn test_branch_lowering() { + let name = ExternalName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + sig.returns.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + + let bb0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(bb0, I32); + let bb1 = func.dfg.make_block(); + let bb2 = func.dfg.make_block(); + let bb3 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(bb0); + let v0 = pos.ins().iconst(I32, 0x1234); + let v1 = pos.ins().iadd(arg0, v0); + pos.ins().brnz(v1, bb1, &[]); + pos.ins().jump(bb2, &[]); + pos.insert_block(bb1); + pos.ins().brnz(v1, bb2, &[]); + pos.ins().jump(bb3, &[]); + pos.insert_block(bb2); + let v2 = pos.ins().iadd(v1, v0); + pos.ins().brnz(v2, bb2, &[]); + pos.ins().jump(bb1, &[]); + pos.insert_block(bb3); + let v3 = pos.ins().isub(v1, v0); + pos.ins().return_(&[v3]); + + let mut shared_flags_builder = settings::builder(); + shared_flags_builder.set("opt_level", "none").unwrap(); + let shared_flags = settings::Flags::new(shared_flags_builder); + let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder()); + let backend = S390xBackend::new_with_flags( + Triple::from_str("s390x").unwrap(), + shared_flags, + isa_flags, + ); + let result = backend + .compile_function(&mut func, /* want_disasm = */ false) + .unwrap(); + let code = &result.buffer.data[..]; + + // FIXME: the branching logic should be optimized more + + // ahi %r2, 4660 + // chi %r2, 0 + // jglh label1 ; jg label2 + // jg label6 + // jg label3 + // ahik %r3, %r2, 4660 + // chi %r3, 0 + // jglh label4 ; jg label5 + // jg label3 + // jg label6 + // chi %r2, 0 + // jglh label7 ; jg label8 + // jg label3 + // ahi %r2, -4660 + // br %r14 + let golden = vec![ + 167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167, + 62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246, + 167, 42, 237, 204, 7, 254, + ]; + + assert_eq!(code, &golden[..]); + } +} diff --git a/cranelift/codegen/src/isa/s390x/settings.rs b/cranelift/codegen/src/isa/s390x/settings.rs new file mode 100644 index 0000000000..69859cee4f --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/settings.rs @@ -0,0 +1,9 @@ +//! S390X Settings. + +use crate::settings::{self, detail, Builder, Value}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/s390x/settings.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-s390x.rs")); diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index 7c9718a570..13397c3266 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -225,6 +225,11 @@ pub enum UnwindInst { /// the clobber area. offset_downward_to_clobbers: u32, }, + /// The stack pointer was adjusted to allocate the stack. + StackAlloc { + /// Size to allocate. + size: u32, + }, /// The stack slot at the given offset from the clobber-area base has been /// used to save the given register. /// diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index 965603d4e1..da3bfea869 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -6,7 +6,6 @@ use crate::isa::unwind::UnwindInst; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; use gimli::write::{Address, FrameDescriptionEntry}; -use thiserror::Error; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -15,16 +14,32 @@ type Register = u16; /// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent. #[allow(missing_docs)] -#[derive(Error, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] pub enum RegisterMappingError { - #[error("unable to find bank for register info")] MissingBank, - #[error("register mapping is currently only implemented for x86_64")] UnsupportedArchitecture, - #[error("unsupported register bank: {0}")] UnsupportedRegisterBank(&'static str), } +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for RegisterMappingError {} + +impl std::fmt::Display for RegisterMappingError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + RegisterMappingError::MissingBank => write!(f, "unable to find bank for register info"), + RegisterMappingError::UnsupportedArchitecture => write!( + f, + "register mapping is currently only implemented for x86_64" + ), + RegisterMappingError::UnsupportedRegisterBank(bank) => { + write!(f, "unsupported register bank: {}", bank) + } + } + } +} + // This mirrors gimli's CallFrameInstruction, but is serializable // This excludes CfaExpression, Expression, ValExpression due to // https://github.com/gimli-rs/gimli/issues/513. @@ -122,8 +137,10 @@ pub(crate) trait RegisterMapper { fn map(&self, reg: Reg) -> Result; /// Gets stack pointer register. fn sp(&self) -> Register; - /// Gets the frame pointer register. - fn fp(&self) -> Register; + /// Gets the frame pointer register, if any. + fn fp(&self) -> Option { + None + } /// Gets the link register, if any. fn lr(&self) -> Option { None @@ -151,6 +168,7 @@ pub(crate) fn create_unwind_info_from_insts>( ) -> CodegenResult { let mut instructions = vec![]; + let mut cfa_offset = 0; let mut clobber_offset_to_cfa = 0; for &(instruction_offset, ref inst) in insts { match inst { @@ -163,10 +181,14 @@ pub(crate) fn create_unwind_info_from_insts>( instruction_offset, CallFrameInstruction::CfaOffset(offset_upward_to_caller_sp as i32), )); - // Note that we saved the old FP value on the stack. + // Note that we saved the old FP value on the stack. Use of this + // operation implies that the target defines a FP register. instructions.push(( instruction_offset, - CallFrameInstruction::Offset(mr.fp(), -(offset_upward_to_caller_sp as i32)), + CallFrameInstruction::Offset( + mr.fp().unwrap(), + -(offset_upward_to_caller_sp as i32), + ), )); // If there is a link register on this architecture, note that // we saved it as well. @@ -188,15 +210,29 @@ pub(crate) fn create_unwind_info_from_insts>( // Define CFA in terms of FP. Note that we assume it was already // defined correctly in terms of the current SP, and FP has just // been set to the current SP, so we do not need to change the - // offset, only the register. - instructions.push(( - instruction_offset, - CallFrameInstruction::CfaRegister(mr.fp()), - )); + // offset, only the register. (This is done only if the target + // defines a frame pointer register.) + if let Some(fp) = mr.fp() { + instructions.push((instruction_offset, CallFrameInstruction::CfaRegister(fp))); + } + // Record initial CFA offset. This will be used with later + // StackAlloc calls if we do not have a frame pointer. + cfa_offset = offset_upward_to_caller_sp; // Record distance from CFA downward to clobber area so we can // express clobber offsets later in terms of CFA. clobber_offset_to_cfa = offset_upward_to_caller_sp + offset_downward_to_clobbers; } + &UnwindInst::StackAlloc { size } => { + // If we do not use a frame pointer, we need to update the + // CFA offset whenever the stack pointer changes. + if mr.fp().is_none() { + cfa_offset += size; + instructions.push(( + instruction_offset, + CallFrameInstruction::CfaOffset(cfa_offset as i32), + )); + } + } &UnwindInst::SaveReg { clobber_offset, reg, diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 1308eb3bdc..1c232f6855 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -3,14 +3,11 @@ use crate::isa::unwind::input; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; -use byteorder::{ByteOrder, LittleEndian}; use log::warn; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; -#[cfg(feature = "x64")] use crate::binemit::CodeOffset; -#[cfg(feature = "x64")] use crate::isa::unwind::UnwindInst; /// Maximum (inclusive) size of a "small" stack allocation @@ -33,20 +30,20 @@ impl<'a> Writer<'a> { self.offset += 1; } - fn write_u16(&mut self, v: u16) { - T::write_u16(&mut self.buf[self.offset..(self.offset + 2)], v); + fn write_u16_le(&mut self, v: u16) { + self.buf[self.offset..(self.offset + 2)].copy_from_slice(&v.to_le_bytes()); self.offset += 2; } - fn write_u32(&mut self, v: u32) { - T::write_u32(&mut self.buf[self.offset..(self.offset + 4)], v); + fn write_u32_le(&mut self, v: u32) { + self.buf[self.offset..(self.offset + 4)].copy_from_slice(&v.to_le_bytes()); self.offset += 4; } } /// The supported unwind codes for the x64 Windows ABI. /// -/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 +/// See: /// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here. /// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values. #[allow(dead_code)] @@ -123,11 +120,11 @@ impl UnwindCode { let scaled_stack_offset = stack_offset / 16; if scaled_stack_offset <= core::u16::MAX as u32 { writer.write_u8((*reg << 4) | (op_small as u8)); - writer.write_u16::(scaled_stack_offset as u16); + writer.write_u16_le(scaled_stack_offset as u16); } else { writer.write_u8((*reg << 4) | (op_large as u8)); - writer.write_u16::(*stack_offset as u16); - writer.write_u16::((stack_offset >> 16) as u16); + writer.write_u16_le(*stack_offset as u16); + writer.write_u16_le((stack_offset >> 16) as u16); } } Self::StackAlloc { @@ -145,10 +142,10 @@ impl UnwindCode { ); } else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE { writer.write_u8(UnwindOperation::LargeStackAlloc as u8); - writer.write_u16::((*size / 8) as u16); + writer.write_u16_le((*size / 8) as u16); } else { writer.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8)); - writer.write_u32::(*size); + writer.write_u32_le(*size); } } Self::SetFPReg { instruction_offset } => { @@ -195,7 +192,7 @@ pub(crate) trait RegisterMapper { /// Represents Windows x64 unwind information. /// /// For information about Windows x64 unwind info, see: -/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 +/// #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct UnwindInfo { @@ -250,7 +247,7 @@ impl UnwindInfo { // To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes if (node_count & 1) == 1 { - writer.write_u16::(0); + writer.write_u16_le(0); } // Ensure the correct number of bytes was emitted @@ -334,10 +331,8 @@ impl UnwindInfo { } } -#[cfg(feature = "x64")] const UNWIND_RBP_REG: u8 = 5; -#[cfg(feature = "x64")] pub(crate) fn create_unwind_info_from_insts>( insts: &[(CodeOffset, UnwindInst)], ) -> CodegenResult { @@ -360,6 +355,12 @@ pub(crate) fn create_unwind_info_from_insts>( frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?; unwind_codes.push(UnwindCode::SetFPReg { instruction_offset }); } + &UnwindInst::StackAlloc { size } => { + unwind_codes.push(UnwindCode::StackAlloc { + instruction_offset, + size, + }); + } &UnwindInst::SaveReg { clobber_offset, reg, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4ae639568d..63a8af7c9b 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -237,10 +237,20 @@ impl ABIMachineSpec for X64ABIMachineSpec { extension: param.extension, }); } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) + // Compute size. For the wasmtime ABI it differs from native + // ABIs in how multiple values are returned, so we take a + // leaf out of arm64's book by not rounding everything up to + // 8 bytes. For all ABI arguments, and other ABI returns, + // though, each slot takes a minimum of 8 bytes. + // + // Note that in all cases 16-byte stack alignment happens + // separately after all args. let size = (reg_ty.bits() / 8) as u64; - let size = std::cmp::max(size, 8); + let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { + size + } else { + std::cmp::max(size, 8) + }; // Align. debug_assert!(size.is_power_of_two()); next_stack = align_to(next_stack, size); @@ -490,6 +500,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>) { let mut insts = SmallVec::new(); // Find all clobbered registers that are callee-save. @@ -564,6 +575,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + _outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]> { let mut insts = SmallVec::new(); @@ -824,15 +836,7 @@ impl From for SyntheticAmode { } fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option { - let is_fastcall = match call_conv { - CallConv::Fast - | CallConv::Cold - | CallConv::SystemV - | CallConv::BaldrdashSystemV - | CallConv::Baldrdash2020 => false, - CallConv::WindowsFastcall => true, - _ => panic!("int args only supported for SysV or Fastcall calling convention"), - }; + let is_fastcall = call_conv.extends_windows_fastcall(); // Fastcall counts by absolute argument number; SysV counts by argument of // this (integer) class. @@ -853,15 +857,7 @@ fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Optio } fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option { - let is_fastcall = match call_conv { - CallConv::Fast - | CallConv::Cold - | CallConv::SystemV - | CallConv::BaldrdashSystemV - | CallConv::Baldrdash2020 => false, - CallConv::WindowsFastcall => true, - _ => panic!("float args only supported for SysV or Fastcall calling convention"), - }; + let is_fastcall = call_conv.extends_windows_fastcall(); // Fastcall counts by absolute argument number; SysV counts by argument of // this (floating-point) class. @@ -894,7 +890,10 @@ fn get_intreg_for_retval( 1 => Some(regs::rdx()), _ => None, }, - CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => { + CallConv::BaldrdashSystemV + | CallConv::Baldrdash2020 + | CallConv::WasmtimeSystemV + | CallConv::WasmtimeFastcall => { if intreg_idx == 0 && retval_idx == 0 { Some(regs::rax()) } else { @@ -907,6 +906,7 @@ fn get_intreg_for_retval( _ => None, }, CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), + CallConv::AppleAarch64 => unreachable!(), } } @@ -921,7 +921,10 @@ fn get_fltreg_for_retval( 1 => Some(regs::xmm1()), _ => None, }, - CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => { + CallConv::BaldrdashSystemV + | CallConv::Baldrdash2020 + | CallConv::WasmtimeFastcall + | CallConv::WasmtimeSystemV => { if fltreg_idx == 0 && retval_idx == 0 { Some(regs::xmm0()) } else { @@ -933,6 +936,7 @@ fn get_fltreg_for_retval( _ => None, }, CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), + CallConv::AppleAarch64 => unreachable!(), } } @@ -990,17 +994,18 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec< CallConv::BaldrdashWindows => { todo!("baldrdash windows"); } - CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs + CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs .iter() .cloned() .filter(|r| is_callee_save_systemv(r.to_reg())) .collect(), - CallConv::WindowsFastcall => regs + CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs .iter() .cloned() .filter(|r| is_callee_save_fastcall(r.to_reg())) .collect(), CallConv::Probestack => todo!("probestack?"), + CallConv::AppleAarch64 => unreachable!(), }; // Sort registers for deterministic code output. We can do an unstable sort because the // registers will be unique (there are no dups). diff --git a/cranelift/codegen/src/isa/x64/encoding/evex.rs b/cranelift/codegen/src/isa/x64/encoding/evex.rs new file mode 100644 index 0000000000..80a3c86bda --- /dev/null +++ b/cranelift/codegen/src/isa/x64/encoding/evex.rs @@ -0,0 +1,403 @@ +//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The +//! EVEX encoding requires a 4-byte prefix: +//! +//! Byte 0: 0x62 +//! ┌───┬───┬───┬───┬───┬───┬───┬───┐ +//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │ +//! ├───┼───┼───┼───┼───┼───┼───┼───┤ +//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │ +//! ├───┼───┼───┼───┼───┼───┼───┼───┤ +//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │ +//! └───┴───┴───┴───┴───┴───┴───┴───┘ +//! +//! The prefix is then followeded by the opcode byte, the ModR/M byte, and other optional suffixes +//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel +//! Software Development Manual, volume 2A). +use super::rex::{encode_modrm, LegacyPrefixes, OpcodeMap}; +use super::ByteSink; +use core::ops::RangeInclusive; + +/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually +/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code: +/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`. +pub struct EvexInstruction { + bits: u32, + opcode: u8, + reg: Register, + rm: Register, +} + +/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may +/// choose to skip setting fields, here we set some sane defaults. Note that: +/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value +/// implemented--remember the little-endian order +/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1 +/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte +/// 3). +/// +/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX, +/// unsetting the W bit, etc.) +impl Default for EvexInstruction { + fn default() -> Self { + Self { + bits: 0x08_7C_F0_62, + opcode: 0, + reg: Register::default(), + rm: Register::default(), + } + } +} + +#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names. +impl EvexInstruction { + /// Construct a default EVEX instruction. + pub fn new() -> Self { + Self::default() + } + + /// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding, + /// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a + /// way to set those context bits and verify that both are not used (e.g. rounding AND length). + /// For now, this method is very convenient. + #[inline(always)] + pub fn length(mut self, length: EvexVectorLength) -> Self { + self.write(Self::LL, EvexContext::Other { length }.bits() as u32); + self + } + + /// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions + /// pack these into the prefix, not as separate bytes. + #[inline(always)] + pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self { + self.write(Self::pp, prefix.bits() as u32); + self + } + + /// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack + /// these into the prefix, not as separate bytes. + #[inline(always)] + pub fn map(mut self, map: OpcodeMap) -> Self { + self.write(Self::mm, map.bits() as u32); + self + } + + /// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g. + /// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX + /// prefix. + #[inline(always)] + pub fn w(mut self, w: bool) -> Self { + self.write(Self::W, w as u32); + self + } + + /// Set the instruction opcode byte. + #[inline(always)] + pub fn opcode(mut self, opcode: u8) -> Self { + self.opcode = opcode; + self + } + + /// Set the register to use for the `reg` bits; many instructions use this as the write operand. + /// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension + /// bits for register encodings > 8). + #[inline(always)] + pub fn reg(mut self, reg: impl Into) -> Self { + self.reg = reg.into(); + let r = !(self.reg.0 >> 3) & 1; + let r_ = !(self.reg.0 >> 4) & 1; + self.write(Self::R, r as u32); + self.write(Self::R_, r_ as u32); + self + } + + /// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for + /// more details. + #[allow(dead_code)] + #[inline(always)] + pub fn mask(mut self, mask: EvexMasking) -> Self { + self.write(Self::aaa, mask.aaa_bits() as u32); + self.write(Self::z, mask.z_bit() as u32); + self + } + + /// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive + /// source register in 3-operand instructions (e.g. 2 read, 1 write). + #[allow(dead_code)] + #[inline(always)] + pub fn vvvvv(mut self, reg: impl Into) -> Self { + let reg = reg.into(); + self.write(Self::vvvv, !(reg.0 as u32) & 0b1111); + self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1); + self + } + + /// Set the register to use for the `rm` bits; many instructions use this as the "read from + /// register/memory" operand. Currently this does not support memory addressing (TODO).Setting + /// this affects both the ModRM byte (`rm` section) and the EVEX prefix (the extension bits for + /// register encodings > 8). + #[inline(always)] + pub fn rm(mut self, reg: impl Into) -> Self { + self.rm = reg.into(); + let b = !(self.rm.0 >> 3) & 1; + let x = !(self.rm.0 >> 4) & 1; + self.write(Self::X, x as u32); + self.write(Self::B, b as u32); + self + } + + /// Emit the EVEX-encoded instruction to the code sink: + /// - first, the 4-byte EVEX prefix; + /// - then, the opcode byte; + /// - finally, the ModR/M byte. + /// + /// Eventually this method should support encodings of more than just the reg-reg addressing mode (TODO). + pub fn encode(&self, sink: &mut CS) { + sink.put4(self.bits); + sink.put1(self.opcode); + sink.put1(encode_modrm(3, self.reg.0 & 7, self.rm.0 & 7)); + } + + // In order to simplify the encoding of the various bit ranges in the prefix, we specify those + // ranges according to the table below (extracted from the Intel Software Development Manual, + // volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this + // chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit + // 8, leaving bits 0-7 for the mandatory `0x62`. + // ┌───┬───┬───┬───┬───┬───┬───┬───┐ + // Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │ + // ├───┼───┼───┼───┼───┼───┼───┼───┤ + // Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │ + // ├───┼───┼───┼───┼───┼───┼───┼───┤ + // Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │ + // └───┴───┴───┴───┴───┴───┴───┴───┘ + + // Byte 1: + const mm: RangeInclusive = 8..=9; + const R_: RangeInclusive = 12..=12; + const B: RangeInclusive = 13..=13; + const X: RangeInclusive = 14..=14; + const R: RangeInclusive = 15..=15; + + // Byte 2: + const pp: RangeInclusive = 16..=17; + const vvvv: RangeInclusive = 19..=22; + const W: RangeInclusive = 23..=23; + + // Byte 3: + const aaa: RangeInclusive = 24..=26; + const V_: RangeInclusive = 27..=27; + #[allow(dead_code)] // Will be used once broadcast and rounding controls are exposed. + const b: RangeInclusive = 28..=28; + const LL: RangeInclusive = 29..=30; + const z: RangeInclusive = 31..=31; + + // A convenience method for writing the `value` bits to the given range in `self.bits`. + #[inline] + fn write(&mut self, range: RangeInclusive, value: u32) { + assert!(ExactSizeIterator::len(&range) > 0); + let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. + let mask: u32 = (1 << size) - 1; // Generate a bit mask. + debug_assert!( + value <= mask, + "The written value should have fewer than {} bits.", + size + ); + let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask. + self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through. + let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`). + self.bits |= value; // Modify the bits in `range`. + } +} + +/// Describe the register index to use. This wrapper is a type-safe way to pass +/// around the registers defined in `inst/regs.rs`. +#[derive(Copy, Clone, Default)] +pub struct Register(u8); +impl From for Register { + fn from(reg: u8) -> Self { + debug_assert!(reg < 16); + Self(reg) + } +} +impl Into for Register { + fn into(self) -> u8 { + self.0 + } +} + +/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in +/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be +/// used together for certain classes of instructions; i.e., special care should be taken to ensure +/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where +/// opcodes can result in an #UD. +#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used. +pub enum EvexContext { + RoundingRegToRegFP { + rc: EvexRoundingControl, + }, + NoRoundingFP { + sae: bool, + length: EvexVectorLength, + }, + MemoryOp { + broadcast: bool, + length: EvexVectorLength, + }, + Other { + length: EvexVectorLength, + }, +} + +impl Default for EvexContext { + fn default() -> Self { + Self::Other { + length: EvexVectorLength::default(), + } + } +} + +impl EvexContext { + /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte. + pub fn bits(&self) -> u8 { + match self { + Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1, + Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1, + Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1, + Self::Other { length } => length.bits() << 1, + } + } +} + +/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`. +#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used. +pub enum EvexVectorLength { + V128, + V256, + V512, +} + +impl EvexVectorLength { + /// Encode the `L'` and `L` bits for merging with the P2 byte. + fn bits(&self) -> u8 { + match self { + Self::V128 => 0b00, + Self::V256 => 0b01, + Self::V512 => 0b10, + // 0b11 is reserved (#UD). + } + } +} + +impl Default for EvexVectorLength { + fn default() -> Self { + Self::V128 + } +} + +/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`. +#[allow(dead_code, missing_docs)] // Rounding controls are not yet used. +pub enum EvexRoundingControl { + RNE, + RD, + RU, + RZ, +} + +impl EvexRoundingControl { + /// Encode the `L'` and `L` bits for merging with the P2 byte. + fn bits(&self) -> u8 { + match self { + Self::RNE => 0b00, + Self::RD => 0b01, + Self::RU => 0b10, + Self::RZ => 0b11, + } + } +} + +/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel +/// Software Development Manual, volume 2A. +#[allow(dead_code, missing_docs)] // Masking is not yet used. +pub enum EvexMasking { + None, + Merging { k: u8 }, + Zeroing { k: u8 }, +} + +impl Default for EvexMasking { + fn default() -> Self { + EvexMasking::None + } +} + +impl EvexMasking { + /// Encode the `z` bit for merging with the P2 byte. + pub fn z_bit(&self) -> u8 { + match self { + Self::None | Self::Merging { .. } => 0, + Self::Zeroing { .. } => 1, + } + } + + /// Encode the `aaa` bits for merging with the P2 byte. + pub fn aaa_bits(&self) -> u8 { + match self { + Self::None => 0b000, + Self::Merging { k } | Self::Zeroing { k } => { + debug_assert!(*k <= 7); + *k + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::isa::x64::inst::regs; + use std::vec::Vec; + + // As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0}, + // xmm1'` matches this EVEX encoding machinery. + #[test] + fn vpabsq() { + let dst = regs::xmm0(); + let src = regs::xmm1(); + let mut sink0 = Vec::new(); + + EvexInstruction::new() + .prefix(LegacyPrefixes::_66) + .map(OpcodeMap::_0F38) + .w(true) + .opcode(0x1F) + .reg(dst.get_hw_encoding()) + .rm(src.get_hw_encoding()) + .length(EvexVectorLength::V128) + .encode(&mut sink0); + + assert_eq!(sink0, vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1]); + } + + /// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the + /// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This + /// test is more interesting than it may appear because some of the parameters have flipped-bit + /// representations (e.g. `vvvvv`) so emitting 0s as a default will not work. + #[test] + fn default_emission() { + let mut sink0 = Vec::new(); + EvexInstruction::new().encode(&mut sink0); + + let mut sink1 = Vec::new(); + EvexInstruction::new() + .length(EvexVectorLength::V128) + .prefix(LegacyPrefixes::None) + .map(OpcodeMap::None) + .w(false) + .opcode(0x00) + .reg(regs::rax().get_hw_encoding()) + .rm(regs::rax().get_hw_encoding()) + .mask(EvexMasking::None) + .encode(&mut sink1); + + assert_eq!(sink0, sink1); + } +} diff --git a/cranelift/codegen/src/isa/x64/encoding/mod.rs b/cranelift/codegen/src/isa/x64/encoding/mod.rs new file mode 100644 index 0000000000..9dd2697649 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/encoding/mod.rs @@ -0,0 +1,60 @@ +//! Contains the encoding machinery for the various x64 instruction formats. +use crate::{isa::x64, machinst::MachBuffer}; +use std::vec::Vec; + +pub mod evex; +pub mod rex; +pub mod vex; + +/// The encoding formats in this module all require a way of placing bytes into +/// a buffer. +pub trait ByteSink { + /// Add 1 byte to the code section. + fn put1(&mut self, _: u8); + + /// Add 2 bytes to the code section. + fn put2(&mut self, _: u16); + + /// Add 4 bytes to the code section. + fn put4(&mut self, _: u32); + + /// Add 8 bytes to the code section. + fn put8(&mut self, _: u64); +} + +impl ByteSink for MachBuffer { + fn put1(&mut self, value: u8) { + self.put1(value) + } + + fn put2(&mut self, value: u16) { + self.put2(value) + } + + fn put4(&mut self, value: u32) { + self.put4(value) + } + + fn put8(&mut self, value: u64) { + self.put8(value) + } +} + +/// Provide a convenient implementation for testing. +impl ByteSink for Vec { + fn put1(&mut self, v: u8) { + self.extend_from_slice(&[v]) + } + + fn put2(&mut self, v: u16) { + self.extend_from_slice(&v.to_le_bytes()) + } + + fn put4(&mut self, v: u32) { + self.extend_from_slice(&v.to_le_bytes()) + } + + fn put8(&mut self, v: u64) { + self.extend_from_slice(&v.to_le_bytes()) + } +} diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs new file mode 100644 index 0000000000..51016fa39e --- /dev/null +++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs @@ -0,0 +1,504 @@ +//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel +//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module) +//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a +//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space). +//! +//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the +//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only +//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following +//! means "hardware register encoding number". + +use crate::{ + ir::TrapCode, + isa::x64::inst::{ + args::{Amode, OperandSize}, + regs, EmitInfo, EmitState, Inst, LabelUse, + }, + machinst::{MachBuffer, MachInstEmitInfo}, +}; +use regalloc::{Reg, RegClass}; + +pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool { + let xs = (x as i32) as i64; + xs == ((xs << 56) >> 56) +} + +pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool { + let xs = x as i32; + xs == ((xs << 24) >> 24) +} + +/// Encode the ModR/M byte. +#[inline(always)] +pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 { + debug_assert!(m0d < 4); + debug_assert!(enc_reg_g < 8); + debug_assert!(rm_e < 8); + ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7) +} + +#[inline(always)] +pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 { + debug_assert!(shift < 4); + debug_assert!(enc_index < 8); + debug_assert!(enc_base < 8); + ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7) +} + +/// Get the encoding number of a GPR. +#[inline(always)] +pub(crate) fn int_reg_enc(reg: Reg) -> u8 { + debug_assert!(reg.is_real()); + debug_assert_eq!(reg.get_class(), RegClass::I64); + reg.get_hw_encoding() +} + +/// Get the encoding number of any register. +#[inline(always)] +pub(crate) fn reg_enc(reg: Reg) -> u8 { + debug_assert!(reg.is_real()); + reg.get_hw_encoding() +} + +/// A small bit field to record a REX prefix specification: +/// - bit 0 set to 1 indicates REX.W must be 0 (cleared). +/// - bit 1 set to 1 indicates the REX prefix must always be emitted. +#[repr(transparent)] +#[derive(Clone, Copy)] +pub(crate) struct RexFlags(u8); + +impl RexFlags { + /// By default, set the W field, and don't always emit. + #[inline(always)] + pub(crate) fn set_w() -> Self { + Self(0) + } + /// Creates a new RexPrefix for which the REX.W bit will be cleared. + #[inline(always)] + pub(crate) fn clear_w() -> Self { + Self(1) + } + + #[inline(always)] + pub(crate) fn always_emit(&mut self) -> &mut Self { + self.0 = self.0 | 2; + self + } + + #[inline(always)] + pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self { + let enc_reg = int_reg_enc(reg); + if enc_reg >= 4 && enc_reg <= 7 { + self.always_emit(); + } + self + } + + #[inline(always)] + pub(crate) fn must_clear_w(&self) -> bool { + (self.0 & 1) != 0 + } + #[inline(always)] + pub(crate) fn must_always_emit(&self) -> bool { + (self.0 & 2) != 0 + } + + #[inline(always)] + pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_e: u8) { + let w = if self.must_clear_w() { 0 } else { 1 }; + let r = (enc_g >> 3) & 1; + let x = 0; + let b = (enc_e >> 3) & 1; + let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || self.must_always_emit() { + sink.put1(rex); + } + } + + #[inline(always)] + pub fn emit_three_op( + &self, + sink: &mut MachBuffer, + enc_g: u8, + enc_index: u8, + enc_base: u8, + ) { + let w = if self.must_clear_w() { 0 } else { 1 }; + let r = (enc_g >> 3) & 1; + let x = (enc_index >> 3) & 1; + let b = (enc_base >> 3) & 1; + let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || self.must_always_emit() { + sink.put1(rex); + } + } +} + +/// Generate the proper Rex flags for the given operand size. +impl From for RexFlags { + fn from(size: OperandSize) -> Self { + match size { + OperandSize::Size64 => RexFlags::set_w(), + _ => RexFlags::clear_w(), + } + } +} +/// Generate Rex flags for an OperandSize/register tuple. +impl From<(OperandSize, Reg)> for RexFlags { + fn from((size, reg): (OperandSize, Reg)) -> Self { + let mut rex = RexFlags::from(size); + if size == OperandSize::Size8 { + rex.always_emit_if_8bit_needed(reg); + } + rex + } +} + +/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction +/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details. +#[allow(missing_docs)] +pub enum OpcodeMap { + None, + _0F, + _0F38, + _0F3A, +} + +impl OpcodeMap { + /// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding + /// formats pack this information as bits in a prefix (e.g. EVEX). + pub(crate) fn bits(&self) -> u8 { + match self { + OpcodeMap::None => 0b00, + OpcodeMap::_0F => 0b01, + OpcodeMap::_0F38 => 0b10, + OpcodeMap::_0F3A => 0b11, + } + } +} + +impl Default for OpcodeMap { + fn default() -> Self { + Self::None + } +} + +/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum +/// covers only the small set of possibilities that we actually need. +pub enum LegacyPrefixes { + /// No prefix bytes. + None, + /// Operand Size Override -- here, denoting "16-bit operation". + _66, + /// The Lock prefix. + _F0, + /// Operand size override and Lock. + _66F0, + /// REPNE, but no specific meaning here -- is just an opcode extension. + _F2, + /// REP/REPE, but no specific meaning here -- is just an opcode extension. + _F3, + /// Operand size override and same effect as F3. + _66F3, +} + +impl LegacyPrefixes { + /// Emit the legacy prefix as bytes (e.g. in REX instructions). + #[inline(always)] + pub(crate) fn emit(&self, sink: &mut MachBuffer) { + match self { + Self::_66 => sink.put1(0x66), + Self::_F0 => sink.put1(0xF0), + Self::_66F0 => { + // I don't think the order matters, but in any case, this is the same order that + // the GNU assembler uses. + sink.put1(0x66); + sink.put1(0xF0); + } + Self::_F2 => sink.put1(0xF2), + Self::_F3 => sink.put1(0xF3), + Self::_66F3 => { + sink.put1(0x66); + sink.put1(0xF3); + } + Self::None => (), + } + } + + /// Emit the legacy prefix as bits (e.g. for EVEX instructions). + #[inline(always)] + pub(crate) fn bits(&self) -> u8 { + match self { + Self::None => 0b00, + Self::_66 => 0b01, + Self::_F3 => 0b10, + Self::_F2 => 0b11, + _ => panic!( + "VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2" + ), + } + } +} + +impl Default for LegacyPrefixes { + fn default() -> Self { + Self::None + } +} + +/// This is the core 'emit' function for instructions that reference memory. +/// +/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`, +/// create and emit: +/// - first the legacy prefixes, if any +/// - then the REX prefix, if needed +/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`), +/// - then the MOD/RM byte, +/// - then optionally, a SIB byte, +/// - and finally optionally an immediate that will be derived from the `mem_e` operand. +/// +/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is +/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed +/// instructions will require their own emitter functions. +/// +/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided. +/// +/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode +/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` == +/// 0xF3_0F_27 and `num_opcodes` == 3. +/// +/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`. +/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will +/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a +/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to +/// indicate a 64-bit operation. +pub(crate) fn emit_std_enc_mem( + sink: &mut MachBuffer, + state: &EmitState, + info: &EmitInfo, + prefixes: LegacyPrefixes, + opcodes: u32, + mut num_opcodes: usize, + enc_g: u8, + mem_e: &Amode, + rex: RexFlags, +) { + // General comment for this function: the registers in `mem_e` must be + // 64-bit integer registers, because they are part of an address + // expression. But `enc_g` can be derived from a register of any class. + + let srcloc = state.cur_srcloc(); + let can_trap = mem_e.can_trap(); + if can_trap { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + prefixes.emit(sink); + + match mem_e { + Amode::ImmReg { simm32, base, .. } => { + // If this is an access based off of RSP, it may trap with a stack overflow if it's the + // first touch of a new stack page. + if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() { + sink.add_trap(srcloc, TrapCode::StackOverflow); + } + + // First, the REX byte. + let enc_e = int_reg_enc(*base); + rex.emit_two_op(sink, enc_g, enc_e); + + // Now the opcode(s). These include any other prefixes the caller + // hands to us. + while num_opcodes > 0 { + num_opcodes -= 1; + sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); + } + + // Now the mod/rm and associated immediates. This is + // significantly complicated due to the multiple special cases. + if *simm32 == 0 + && enc_e != regs::ENC_RSP + && enc_e != regs::ENC_RBP + && enc_e != regs::ENC_R12 + && enc_e != regs::ENC_R13 + { + // FIXME JRS 2020Feb11: those four tests can surely be + // replaced by a single mask-and-compare check. We should do + // that because this routine is likely to be hot. + sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7)); + } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) { + sink.put1(encode_modrm(0, enc_g & 7, 4)); + sink.put1(0x24); + } else if low8_will_sign_extend_to_32(*simm32) + && enc_e != regs::ENC_RSP + && enc_e != regs::ENC_R12 + { + sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7)); + sink.put1((simm32 & 0xFF) as u8); + } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 { + sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7)); + sink.put4(*simm32); + } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) + && low8_will_sign_extend_to_32(*simm32) + { + // REX.B distinguishes RSP from R12 + sink.put1(encode_modrm(1, enc_g & 7, 4)); + sink.put1(0x24); + sink.put1((simm32 & 0xFF) as u8); + } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP { + //.. wait for test case for RSP case + // REX.B distinguishes RSP from R12 + sink.put1(encode_modrm(2, enc_g & 7, 4)); + sink.put1(0x24); + sink.put4(*simm32); + } else { + unreachable!("ImmReg"); + } + } + + Amode::ImmRegRegShift { + simm32, + base: reg_base, + index: reg_index, + shift, + .. + } => { + // If this is an access based off of RSP, it may trap with a stack overflow if it's the + // first touch of a new stack page. + if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() { + sink.add_trap(srcloc, TrapCode::StackOverflow); + } + + let enc_base = int_reg_enc(*reg_base); + let enc_index = int_reg_enc(*reg_index); + + // The rex byte. + rex.emit_three_op(sink, enc_g, enc_index, enc_base); + + // All other prefixes and opcodes. + while num_opcodes > 0 { + num_opcodes -= 1; + sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); + } + + // modrm, SIB, immediates. + if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP { + sink.put1(encode_modrm(1, enc_g & 7, 4)); + sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); + sink.put1(*simm32 as u8); + } else if enc_index != regs::ENC_RSP { + sink.put1(encode_modrm(2, enc_g & 7, 4)); + sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); + sink.put4(*simm32); + } else { + panic!("ImmRegRegShift"); + } + } + + Amode::RipRelative { ref target } => { + // First, the REX byte, with REX.B = 0. + rex.emit_two_op(sink, enc_g, 0); + + // Now the opcode(s). These include any other prefixes the caller + // hands to us. + while num_opcodes > 0 { + num_opcodes -= 1; + sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); + } + + // RIP-relative is mod=00, rm=101. + sink.put1(encode_modrm(0, enc_g & 7, 0b101)); + + let offset = sink.cur_offset(); + sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32); + sink.put4(0); + } + } +} + +/// This is the core 'emit' function for instructions that do not reference memory. +/// +/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E +/// operand is a register rather than memory. Hence it is much simpler. +pub(crate) fn emit_std_enc_enc( + sink: &mut MachBuffer, + prefixes: LegacyPrefixes, + opcodes: u32, + mut num_opcodes: usize, + enc_g: u8, + enc_e: u8, + rex: RexFlags, +) { + // EncG and EncE can be derived from registers of any class, and they + // don't even have to be from the same class. For example, for an + // integer-to-FP conversion insn, one might be RegClass::I64 and the other + // RegClass::V128. + + // The legacy prefixes. + prefixes.emit(sink); + + // The rex byte. + rex.emit_two_op(sink, enc_g, enc_e); + + // All other prefixes and opcodes. + while num_opcodes > 0 { + num_opcodes -= 1; + sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); + } + + // Now the mod/rm byte. The instruction we're generating doesn't access + // memory, so there is no SIB byte or immediate -- we're done. + sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7)); +} + +// These are merely wrappers for the above two functions that facilitate passing +// actual `Reg`s rather than their encodings. + +pub(crate) fn emit_std_reg_mem( + sink: &mut MachBuffer, + state: &EmitState, + info: &EmitInfo, + prefixes: LegacyPrefixes, + opcodes: u32, + num_opcodes: usize, + reg_g: Reg, + mem_e: &Amode, + rex: RexFlags, +) { + let enc_g = reg_enc(reg_g); + emit_std_enc_mem( + sink, + state, + info, + prefixes, + opcodes, + num_opcodes, + enc_g, + mem_e, + rex, + ); +} + +pub(crate) fn emit_std_reg_reg( + sink: &mut MachBuffer, + prefixes: LegacyPrefixes, + opcodes: u32, + num_opcodes: usize, + reg_g: Reg, + reg_e: Reg, + rex: RexFlags, +) { + let enc_g = reg_enc(reg_g); + let enc_e = reg_enc(reg_e); + emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex); +} + +/// Write a suitable number of bits from an imm64 to the sink. +pub(crate) fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { + match size { + 8 | 4 => sink.put4(simm32), + 2 => sink.put2(simm32 as u16), + 1 => sink.put1(simm32 as u8), + _ => unreachable!(), + } +} diff --git a/cranelift/codegen/src/isa/x64/encoding/vex.rs b/cranelift/codegen/src/isa/x64/encoding/vex.rs new file mode 100644 index 0000000000..f2f3feebba --- /dev/null +++ b/cranelift/codegen/src/isa/x64/encoding/vex.rs @@ -0,0 +1,2 @@ +//! Encodes VEX instructions. These instructions are those added by the Advanced Vector Extensions +//! (AVX). diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index f2c6feb3c1..b54f1b6126 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -10,6 +10,7 @@ use regalloc::{ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, Writable, }; +use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::String; @@ -411,12 +412,12 @@ pub enum UnaryRmROpcode { } impl UnaryRmROpcode { - pub(crate) fn available_from(&self) -> Option { + pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> { match self { - UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => None, - UnaryRmROpcode::Lzcnt => Some(InstructionSet::Lzcnt), - UnaryRmROpcode::Tzcnt => Some(InstructionSet::BMI1), - UnaryRmROpcode::Popcnt => Some(InstructionSet::Popcnt), + UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => smallvec![], + UnaryRmROpcode::Lzcnt => smallvec![InstructionSet::Lzcnt], + UnaryRmROpcode::Tzcnt => smallvec![InstructionSet::BMI1], + UnaryRmROpcode::Popcnt => smallvec![InstructionSet::Popcnt], } } } @@ -447,6 +448,7 @@ pub enum CmpOpcode { Test, } +#[derive(Debug)] pub(crate) enum InstructionSet { SSE, SSE2, @@ -458,10 +460,13 @@ pub(crate) enum InstructionSet { BMI1, #[allow(dead_code)] // never constructed (yet). BMI2, + AVX512F, + AVX512VL, } /// Some SSE operations requiring 2 operands r/m and r. #[derive(Clone, Copy, PartialEq)] +#[allow(dead_code)] // some variants here aren't used just yet pub enum SseOpcode { Addps, Addpd, @@ -479,6 +484,7 @@ pub enum SseOpcode { Cmpss, Cmpsd, Cvtdq2ps, + Cvtdq2pd, Cvtsd2ss, Cvtsd2si, Cvtsi2ss, @@ -672,6 +678,7 @@ impl SseOpcode { | SseOpcode::Cmpsd | SseOpcode::Comisd | SseOpcode::Cvtdq2ps + | SseOpcode::Cvtdq2pd | SseOpcode::Cvtsd2ss | SseOpcode::Cvtsd2si | SseOpcode::Cvtsi2sd @@ -827,6 +834,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Comiss => "comiss", SseOpcode::Comisd => "comisd", SseOpcode::Cvtdq2ps => "cvtdq2ps", + SseOpcode::Cvtdq2pd => "cvtdq2pd", SseOpcode::Cvtsd2ss => "cvtsd2ss", SseOpcode::Cvtsd2si => "cvtsd2si", SseOpcode::Cvtsi2ss => "cvtsi2ss", @@ -983,6 +991,35 @@ impl fmt::Display for SseOpcode { } } +#[derive(Clone)] +pub enum Avx512Opcode { + Vpabsq, +} + +impl Avx512Opcode { + /// Which `InstructionSet`s support the opcode? + pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> { + match self { + Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL], + } + } +} + +impl fmt::Debug for Avx512Opcode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let name = match self { + Avx512Opcode::Vpabsq => "vpabsq", + }; + write!(fmt, "{}", name) + } +} + +impl fmt::Display for Avx512Opcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + /// This defines the ways a value can be extended: either signed- or zero-extension, or none for /// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which /// values can be extended. diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2e8e7d9d15..0bd74ecd8b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2,449 +2,18 @@ use crate::binemit::{Addend, Reloc}; use crate::ir::immediates::{Ieee32, Ieee64}; use crate::ir::LibCall; use crate::ir::TrapCode; +use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength}; +use crate::isa::x64::encoding::rex::{ + emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc, + low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, OpcodeMap, + RexFlags, +}; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel}; use core::convert::TryInto; use log::debug; -use regalloc::{Reg, RegClass, Writable}; - -fn low8_will_sign_extend_to_64(x: u32) -> bool { - let xs = (x as i32) as i64; - xs == ((xs << 56) >> 56) -} - -fn low8_will_sign_extend_to_32(x: u32) -> bool { - let xs = x as i32; - xs == ((xs << 24) >> 24) -} - -//============================================================================= -// Instructions and subcomponents: emission - -// For all of the routines that take both a memory-or-reg operand (sometimes -// called "E" in the Intel documentation) and a reg-only operand ("G" in -// Intelese), the order is always G first, then E. -// -// "enc" in the following means "hardware register encoding number". - -#[inline(always)] -fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 { - debug_assert!(m0d < 4); - debug_assert!(enc_reg_g < 8); - debug_assert!(rm_e < 8); - ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7) -} - -#[inline(always)] -fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 { - debug_assert!(shift < 4); - debug_assert!(enc_index < 8); - debug_assert!(enc_base < 8); - ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7) -} - -/// Get the encoding number of a GPR. -#[inline(always)] -fn int_reg_enc(reg: Reg) -> u8 { - debug_assert!(reg.is_real()); - debug_assert_eq!(reg.get_class(), RegClass::I64); - reg.get_hw_encoding() -} - -/// Get the encoding number of any register. -#[inline(always)] -fn reg_enc(reg: Reg) -> u8 { - debug_assert!(reg.is_real()); - reg.get_hw_encoding() -} - -/// A small bit field to record a REX prefix specification: -/// - bit 0 set to 1 indicates REX.W must be 0 (cleared). -/// - bit 1 set to 1 indicates the REX prefix must always be emitted. -#[repr(transparent)] -#[derive(Clone, Copy)] -struct RexFlags(u8); - -impl RexFlags { - /// By default, set the W field, and don't always emit. - #[inline(always)] - fn set_w() -> Self { - Self(0) - } - /// Creates a new RexPrefix for which the REX.W bit will be cleared. - #[inline(always)] - fn clear_w() -> Self { - Self(1) - } - - #[inline(always)] - fn always_emit(&mut self) -> &mut Self { - self.0 = self.0 | 2; - self - } - - #[inline(always)] - fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self { - let enc_reg = int_reg_enc(reg); - if enc_reg >= 4 && enc_reg <= 7 { - self.always_emit(); - } - self - } - - #[inline(always)] - fn must_clear_w(&self) -> bool { - (self.0 & 1) != 0 - } - #[inline(always)] - fn must_always_emit(&self) -> bool { - (self.0 & 2) != 0 - } - - #[inline(always)] - fn emit_two_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_e: u8) { - let w = if self.must_clear_w() { 0 } else { 1 }; - let r = (enc_g >> 3) & 1; - let x = 0; - let b = (enc_e >> 3) & 1; - let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; - if rex != 0x40 || self.must_always_emit() { - sink.put1(rex); - } - } - - #[inline(always)] - fn emit_three_op(&self, sink: &mut MachBuffer, enc_g: u8, enc_index: u8, enc_base: u8) { - let w = if self.must_clear_w() { 0 } else { 1 }; - let r = (enc_g >> 3) & 1; - let x = (enc_index >> 3) & 1; - let b = (enc_base >> 3) & 1; - let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; - if rex != 0x40 || self.must_always_emit() { - sink.put1(rex); - } - } -} - -/// Generate the proper Rex flags for the given operand size. -impl From for RexFlags { - fn from(size: OperandSize) -> Self { - match size { - OperandSize::Size64 => RexFlags::set_w(), - _ => RexFlags::clear_w(), - } - } -} -/// Generate Rex flags for an OperandSize/register tuple. -impl From<(OperandSize, Reg)> for RexFlags { - fn from((size, reg): (OperandSize, Reg)) -> Self { - let mut rex = RexFlags::from(size); - if size == OperandSize::Size8 { - rex.always_emit_if_8bit_needed(reg); - } - rex - } -} - -/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum -/// covers only the small set of possibilities that we actually need. -enum LegacyPrefixes { - /// No prefix bytes. - None, - /// Operand Size Override -- here, denoting "16-bit operation". - _66, - /// The Lock prefix. - _F0, - /// Operand size override and Lock. - _66F0, - /// REPNE, but no specific meaning here -- is just an opcode extension. - _F2, - /// REP/REPE, but no specific meaning here -- is just an opcode extension. - _F3, - /// Operand size override and same effect as F3. - _66F3, -} - -impl LegacyPrefixes { - #[inline(always)] - fn emit(&self, sink: &mut MachBuffer) { - match self { - LegacyPrefixes::_66 => sink.put1(0x66), - LegacyPrefixes::_F0 => sink.put1(0xF0), - LegacyPrefixes::_66F0 => { - // I don't think the order matters, but in any case, this is the same order that - // the GNU assembler uses. - sink.put1(0x66); - sink.put1(0xF0); - } - LegacyPrefixes::_F2 => sink.put1(0xF2), - LegacyPrefixes::_F3 => sink.put1(0xF3), - LegacyPrefixes::_66F3 => { - sink.put1(0x66); - sink.put1(0xF3); - } - LegacyPrefixes::None => (), - } - } -} - -/// This is the core 'emit' function for instructions that reference memory. -/// -/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`, -/// create and emit: -/// - first the legacy prefixes, if any -/// - then the REX prefix, if needed -/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`), -/// - then the MOD/RM byte, -/// - then optionally, a SIB byte, -/// - and finally optionally an immediate that will be derived from the `mem_e` operand. -/// -/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is -/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed -/// instructions will require their own emitter functions. -/// -/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided. -/// -/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode -/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` == -/// 0xF3_0F_27 and `num_opcodes` == 3. -/// -/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`. -/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will -/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a -/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to -/// indicate a 64-bit operation. -fn emit_std_enc_mem( - sink: &mut MachBuffer, - state: &EmitState, - info: &EmitInfo, - prefixes: LegacyPrefixes, - opcodes: u32, - mut num_opcodes: usize, - enc_g: u8, - mem_e: &Amode, - rex: RexFlags, -) { - // General comment for this function: the registers in `mem_e` must be - // 64-bit integer registers, because they are part of an address - // expression. But `enc_g` can be derived from a register of any class. - - let srcloc = state.cur_srcloc(); - let can_trap = mem_e.can_trap(); - if can_trap { - sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); - } - - prefixes.emit(sink); - - match mem_e { - Amode::ImmReg { simm32, base, .. } => { - // If this is an access based off of RSP, it may trap with a stack overflow if it's the - // first touch of a new stack page. - if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() { - sink.add_trap(srcloc, TrapCode::StackOverflow); - } - - // First, the REX byte. - let enc_e = int_reg_enc(*base); - rex.emit_two_op(sink, enc_g, enc_e); - - // Now the opcode(s). These include any other prefixes the caller - // hands to us. - while num_opcodes > 0 { - num_opcodes -= 1; - sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); - } - - // Now the mod/rm and associated immediates. This is - // significantly complicated due to the multiple special cases. - if *simm32 == 0 - && enc_e != regs::ENC_RSP - && enc_e != regs::ENC_RBP - && enc_e != regs::ENC_R12 - && enc_e != regs::ENC_R13 - { - // FIXME JRS 2020Feb11: those four tests can surely be - // replaced by a single mask-and-compare check. We should do - // that because this routine is likely to be hot. - sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7)); - } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) { - sink.put1(encode_modrm(0, enc_g & 7, 4)); - sink.put1(0x24); - } else if low8_will_sign_extend_to_32(*simm32) - && enc_e != regs::ENC_RSP - && enc_e != regs::ENC_R12 - { - sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7)); - sink.put1((simm32 & 0xFF) as u8); - } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 { - sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7)); - sink.put4(*simm32); - } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) - && low8_will_sign_extend_to_32(*simm32) - { - // REX.B distinguishes RSP from R12 - sink.put1(encode_modrm(1, enc_g & 7, 4)); - sink.put1(0x24); - sink.put1((simm32 & 0xFF) as u8); - } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP { - //.. wait for test case for RSP case - // REX.B distinguishes RSP from R12 - sink.put1(encode_modrm(2, enc_g & 7, 4)); - sink.put1(0x24); - sink.put4(*simm32); - } else { - unreachable!("ImmReg"); - } - } - - Amode::ImmRegRegShift { - simm32, - base: reg_base, - index: reg_index, - shift, - .. - } => { - // If this is an access based off of RSP, it may trap with a stack overflow if it's the - // first touch of a new stack page. - if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() { - sink.add_trap(srcloc, TrapCode::StackOverflow); - } - - let enc_base = int_reg_enc(*reg_base); - let enc_index = int_reg_enc(*reg_index); - - // The rex byte. - rex.emit_three_op(sink, enc_g, enc_index, enc_base); - - // All other prefixes and opcodes. - while num_opcodes > 0 { - num_opcodes -= 1; - sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); - } - - // modrm, SIB, immediates. - if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP { - sink.put1(encode_modrm(1, enc_g & 7, 4)); - sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); - sink.put1(*simm32 as u8); - } else if enc_index != regs::ENC_RSP { - sink.put1(encode_modrm(2, enc_g & 7, 4)); - sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7)); - sink.put4(*simm32); - } else { - panic!("ImmRegRegShift"); - } - } - - Amode::RipRelative { ref target } => { - // First, the REX byte, with REX.B = 0. - rex.emit_two_op(sink, enc_g, 0); - - // Now the opcode(s). These include any other prefixes the caller - // hands to us. - while num_opcodes > 0 { - num_opcodes -= 1; - sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); - } - - // RIP-relative is mod=00, rm=101. - sink.put1(encode_modrm(0, enc_g & 7, 0b101)); - - let offset = sink.cur_offset(); - sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32); - sink.put4(0); - } - } -} - -/// This is the core 'emit' function for instructions that do not reference memory. -/// -/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E -/// operand is a register rather than memory. Hence it is much simpler. -fn emit_std_enc_enc( - sink: &mut MachBuffer, - prefixes: LegacyPrefixes, - opcodes: u32, - mut num_opcodes: usize, - enc_g: u8, - enc_e: u8, - rex: RexFlags, -) { - // EncG and EncE can be derived from registers of any class, and they - // don't even have to be from the same class. For example, for an - // integer-to-FP conversion insn, one might be RegClass::I64 and the other - // RegClass::V128. - - // The legacy prefixes. - prefixes.emit(sink); - - // The rex byte. - rex.emit_two_op(sink, enc_g, enc_e); - - // All other prefixes and opcodes. - while num_opcodes > 0 { - num_opcodes -= 1; - sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8); - } - - // Now the mod/rm byte. The instruction we're generating doesn't access - // memory, so there is no SIB byte or immediate -- we're done. - sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7)); -} - -// These are merely wrappers for the above two functions that facilitate passing -// actual `Reg`s rather than their encodings. - -fn emit_std_reg_mem( - sink: &mut MachBuffer, - state: &EmitState, - info: &EmitInfo, - prefixes: LegacyPrefixes, - opcodes: u32, - num_opcodes: usize, - reg_g: Reg, - mem_e: &Amode, - rex: RexFlags, -) { - let enc_g = reg_enc(reg_g); - emit_std_enc_mem( - sink, - state, - info, - prefixes, - opcodes, - num_opcodes, - enc_g, - mem_e, - rex, - ); -} - -fn emit_std_reg_reg( - sink: &mut MachBuffer, - prefixes: LegacyPrefixes, - opcodes: u32, - num_opcodes: usize, - reg_g: Reg, - reg_e: Reg, - rex: RexFlags, -) { - let enc_g = reg_enc(reg_g); - let enc_e = reg_enc(reg_e); - emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex); -} - -/// Write a suitable number of bits from an imm64 to the sink. -fn emit_simm(sink: &mut MachBuffer, size: u8, simm32: u32) { - match size { - 8 | 4 => sink.put4(simm32), - 2 => sink.put2(simm32 as u16), - 1 => sink.put1(simm32 as u8), - _ => unreachable!(), - } -} +use regalloc::{Reg, Writable}; /// A small helper to generate a signed conversion instruction. fn emit_signed_cvt( @@ -546,18 +115,30 @@ pub(crate) fn emit( info: &EmitInfo, state: &mut EmitState, ) { - if let Some(iset_requirement) = inst.isa_requirement() { + let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool { match iset_requirement { // Cranelift assumes SSE2 at least. - InstructionSet::SSE | InstructionSet::SSE2 => {} - InstructionSet::SSSE3 => assert!(info.isa_flags.use_ssse3()), - InstructionSet::SSE41 => assert!(info.isa_flags.use_sse41()), - InstructionSet::SSE42 => assert!(info.isa_flags.use_sse42()), - InstructionSet::Popcnt => assert!(info.isa_flags.use_popcnt()), - InstructionSet::Lzcnt => assert!(info.isa_flags.use_lzcnt()), - InstructionSet::BMI1 => assert!(info.isa_flags.use_bmi1()), - InstructionSet::BMI2 => assert!(info.isa_flags.has_bmi2()), + InstructionSet::SSE | InstructionSet::SSE2 => true, + InstructionSet::SSSE3 => info.isa_flags.use_ssse3(), + InstructionSet::SSE41 => info.isa_flags.use_sse41(), + InstructionSet::SSE42 => info.isa_flags.use_sse42(), + InstructionSet::Popcnt => info.isa_flags.use_popcnt(), + InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(), + InstructionSet::BMI1 => info.isa_flags.use_bmi1(), + InstructionSet::BMI2 => info.isa_flags.has_bmi2(), + InstructionSet::AVX512F => info.isa_flags.has_avx512f(), + InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(), } + }; + + // Certain instructions may be present in more than one ISA feature set; we must at least match + // one of them in the target CPU. + let isa_requirements = inst.available_in_any_isa(); + if !isa_requirements.is_empty() && !isa_requirements.iter().any(matches_isa_flags) { + panic!( + "Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}", + inst, isa_requirements + ) } match inst { @@ -887,7 +468,6 @@ pub(crate) fn emit( // idiv %divisor // // $done: - debug_assert!(info.flags().avoid_div_traps()); // Check if the divisor is zero, first. let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg()); @@ -911,7 +491,7 @@ pub(crate) fn emit( // x % -1 = 0; put the result into the destination, $rdx. let done_label = sink.get_label(); - let inst = Inst::imm(*size, 0, Writable::from_reg(regs::rdx())); + let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx())); inst.emit(sink, info, state); let inst = Inst::jmp_known(done_label); @@ -951,11 +531,6 @@ pub(crate) fn emit( sink.bind_label(do_op); } - assert!( - *size != OperandSize::Size8, - "CheckedDivOrRemSeq for i8 is not yet implemented" - ); - // Fill in the high parts: if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. @@ -1769,6 +1344,7 @@ pub(crate) fn emit( let rex = RexFlags::clear_w(); let (prefix, opcode, num_opcodes) = match op { + SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2), SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2), SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2), SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2), @@ -1830,6 +1406,24 @@ pub(crate) fn emit( }; } + Inst::XmmUnaryRmREvex { op, src, dst } => { + let opcode = match op { + Avx512Opcode::Vpabsq => 0x1f, + }; + match src { + RegMem::Reg { reg: src } => EvexInstruction::new() + .length(EvexVectorLength::V128) + .prefix(LegacyPrefixes::_66) + .map(OpcodeMap::_0F38) + .w(true) + .opcode(opcode) + .reg(dst.to_reg().get_hw_encoding()) + .rm(src.get_hw_encoding()) + .encode(sink), + _ => todo!(), + }; + } + Inst::XmmRmR { op, src: src_e, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index e407910c2d..f03762b97b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3859,6 +3859,18 @@ fn test_x64_emit() { "pabsd %xmm10, %xmm11", )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8), + "F3440FE6C2", + "cvtdq2pd %xmm2, %xmm8", + )); + + insns.push(( + Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, RegMem::reg(xmm2), w_xmm8), + "6272FD081FC2", + "vpabsq %xmm2, %xmm8", + )); + // Xmm to int conversions, and conversely. insns.push(( @@ -4270,6 +4282,7 @@ fn test_x64_emit() { let mut isa_flag_builder = x64::settings::builder(); isa_flag_builder.enable("has_ssse3").unwrap(); isa_flag_builder.enable("has_sse41").unwrap(); + isa_flag_builder.enable("has_avx512f").unwrap(); let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder); let rru = regs::create_reg_universe_systemv(&flags); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 5a1b5eeaad..fe89ac4c90 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -14,7 +14,7 @@ use regalloc::{ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, VirtualReg, Writable, }; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::{String, ToString}; @@ -224,6 +224,12 @@ pub enum Inst { dst: Writable, }, + XmmUnaryRmREvex { + op: Avx512Opcode, + src: RegMem, + dst: Writable, + }, + /// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq XmmMovRM { op: SseOpcode, @@ -501,7 +507,11 @@ pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool { } impl Inst { - fn isa_requirement(&self) -> Option { + /// Retrieve a list of ISA feature sets in which the instruction is available. An empty list + /// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and + /// below); more than one `InstructionSet` in the list indicates that the instruction is present + /// *any* of the included ISA feature sets. + fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> { match self { // These instructions are part of SSE2, which is a basic requirement in Cranelift, and // don't have to be checked. @@ -554,7 +564,7 @@ impl Inst { | Inst::ElfTlsGetAddr { .. } | Inst::MachOTlsGetAddr { .. } | Inst::ValueLabelMarker { .. } - | Inst::Unwind { .. } => None, + | Inst::Unwind { .. } => smallvec![], Inst::UnaryRmR { op, .. } => op.available_from(), @@ -565,7 +575,9 @@ impl Inst { | Inst::XmmRmR { op, .. } | Inst::XmmRmRImm { op, .. } | Inst::XmmToGpr { op, .. } - | Inst::XmmUnaryRmR { op, .. } => Some(op.available_from()), + | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()], + + Inst::XmmUnaryRmREvex { op, .. } => op.available_from(), } } } @@ -700,6 +712,12 @@ impl Inst { Inst::XmmUnaryRmR { op, src, dst } } + pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable) -> Inst { + src.assert_regclass_is(RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XmmUnaryRmREvex { op, src, dst } + } + pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { src.assert_regclass_is(RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128); @@ -1121,11 +1139,7 @@ impl Inst { pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into) -> Inst { let rc = from_reg.get_class(); match rc { - RegClass::I64 => { - // Always store the full register, to ensure that the high bits are properly set - // when doing a full reload. - Inst::mov_r_m(OperandSize::Size64, from_reg, to_addr) - } + RegClass::I64 => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr), RegClass::V128 => { let opcode = match ty { types::F32 => SseOpcode::Movss, @@ -1390,6 +1404,13 @@ impl PrettyPrint for Inst { show_ireg_sized(dst.to_reg(), mb_rru, 8), ), + Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, 8), + ), + Inst::XmmMovRM { op, src, dst, .. } => format!( "{} {}, {}", ljustify(op.to_string()), @@ -1862,7 +1883,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(Writable::from_reg(regs::rdx())); } }, - Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => { + Inst::UnaryRmR { src, dst, .. } + | Inst::XmmUnaryRmR { src, dst, .. } + | Inst::XmmUnaryRmREvex { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); } @@ -2209,6 +2232,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut dst, .. } + | Inst::XmmUnaryRmREvex { + ref mut src, + ref mut dst, + .. + } | Inst::UnaryRmR { ref mut src, ref mut dst, @@ -2827,7 +2855,7 @@ impl EmitState { self.stack_map = None; } - fn cur_srcloc(&self) -> SourceLoc { + pub(crate) fn cur_srcloc(&self) -> SourceLoc { self.cur_srcloc } } diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index d57f178dc2..9115db0671 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -89,8 +89,8 @@ impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { fn sp(&self) -> u16 { X86_64::RSP.0 } - fn fp(&self) -> u16 { - X86_64::RBP.0 + fn fp(&self) -> Option { + Some(X86_64::RBP.0) } } @@ -109,6 +109,7 @@ mod tests { use target_lexicon::triple; #[test] + #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_simple_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") @@ -151,6 +152,7 @@ mod tests { } #[test] + #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_multi_return_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 190462caaf..a1969d5642 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -204,6 +204,7 @@ enum ExtSpec { ZeroExtendTo32, ZeroExtendTo64, SignExtendTo32, + #[allow(dead_code)] // not used just yet but may be used in the future! SignExtendTo64, } @@ -1854,25 +1855,29 @@ fn lower_insn_to_regs>( let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); if ty == types::I64X2 { - // This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction. - // Instead, we use a separate register, `tmp`, to contain the results of `0 - src` - // and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1 - // (i.e. if `tmp` was negative or, conversely, if `src` was originally positive). + if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() { + ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst)); + } else { + // If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to + // contain the results of `0 - src` and then blend in those results with + // `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or, + // conversely, if `src` was originally positive). - // Emit all 0s into the `tmp` register. - let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); - // Subtract the lanes from 0 and set up `dst`. - ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp)); - ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty)); - // Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics - // require the "choice" mask to be in XMM0. - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::xmm0()), - tmp.to_reg(), - ty, - )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst)); + // Emit all 0s into the `tmp` register. + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); + // Subtract the lanes from 0 and set up `dst`. + ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp)); + ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty)); + // Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics + // require the "choice" mask to be in XMM0. + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::xmm0()), + tmp.to_reg(), + ty, + )); + ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst)); + } } else if ty.is_vector() { let opcode = match ty { types::I8X16 => SseOpcode::Pabsb, @@ -2041,7 +2046,7 @@ fn lower_insn_to_regs>( } ctx.emit(Inst::shift_r(size, shift_kind, count, dst)); } else if dst_ty == types::I128 { - let amt_src = put_input_in_reg(ctx, inputs[1]); + let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0]; let src = put_input_in_regs(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]); @@ -3914,7 +3919,15 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst)); } } - + Opcode::FcvtLowFromSint => { + let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::xmm_unary_rm_r( + SseOpcode::Cvtdq2pd, + RegMem::from(src), + dst, + )); + } Opcode::FcvtFromUint => { let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); @@ -4813,28 +4826,11 @@ fn lower_insn_to_regs>( if elem_ty == types::I128 { let srcs = put_input_in_regs(ctx, inputs[0]); - ctx.emit(Inst::mov_r_m( - OperandSize::Size64, - srcs.regs()[0], - addr.clone(), - )); - ctx.emit(Inst::mov_r_m( - OperandSize::Size64, - srcs.regs()[1], - addr.offset(8), - )); + ctx.emit(Inst::store(types::I64, srcs.regs()[0], addr.clone())); + ctx.emit(Inst::store(types::I64, srcs.regs()[1], addr.offset(8))); } else { let src = put_input_in_reg(ctx, inputs[0]); - - ctx.emit(match elem_ty { - types::F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr), - types::F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr), - _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { - // TODO Specialize for different types: MOVUPD, MOVDQU, etc. - Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr) - } - _ => Inst::mov_r_m(OperandSize::from_ty(elem_ty), src, addr), - }); + ctx.emit(Inst::store(elem_ty, src, addr)); } } @@ -4938,7 +4934,7 @@ fn lower_insn_to_regs>( let ty_access = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(ty_access)); - ctx.emit(Inst::mov_r_m(OperandSize::from_ty(ty_access), data, addr)); + ctx.emit(Inst::store(ty_access, data, addr)); ctx.emit(Inst::Fence { kind: FenceKind::MFence, }); @@ -5181,7 +5177,8 @@ fn lower_insn_to_regs>( input_ty, )); - if flags.avoid_div_traps() { + // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. + if flags.avoid_div_traps() || op == Opcode::Srem { // A vcode meta-instruction is used to lower the inline checks, since they embed // pc-relative offsets that must not change, thus requiring regalloc to not // interfere by introducing spills and reloads. diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index da4065f2d0..6ca4a7cec4 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -9,7 +9,7 @@ use crate::isa::Builder as IsaBuilder; use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; use crate::settings::{self as shared_settings, Flags}; -use alloc::boxed::Box; +use alloc::{boxed::Box, vec::Vec}; use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use target_lexicon::Triple; @@ -18,6 +18,7 @@ use target_lexicon::Triple; use crate::isa::unwind::systemv; mod abi; +pub mod encoding; mod inst; mod lower; mod settings; @@ -85,6 +86,10 @@ impl MachBackend for X64Backend { &self.flags } + fn isa_flags(&self) -> Vec { + self.x64_flags.iter().collect() + } + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { self.flags.hash(&mut hasher); self.x64_flags.hash(&mut hasher); diff --git a/cranelift/codegen/src/isa/x64/settings.rs b/cranelift/codegen/src/isa/x64/settings.rs index c5371bb132..501e153b46 100644 --- a/cranelift/codegen/src/isa/x64/settings.rs +++ b/cranelift/codegen/src/isa/x64/settings.rs @@ -1,6 +1,6 @@ //! x86 Settings. -use crate::settings::{self, detail, Builder}; +use crate::settings::{self, detail, Builder, Value}; use core::fmt; // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index 0117b8918b..d56d066e84 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -503,15 +503,18 @@ fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { match func.signature.call_conv { // For now, just translate fast and cold as system_v. - CallConv::Fast | CallConv::Cold | CallConv::SystemV => { + CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => { system_v_prologue_epilogue(func, isa) } - CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa), + CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => { + fastcall_prologue_epilogue(func, isa) + } CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => { baldrdash_prologue_epilogue(func, isa) } CallConv::Probestack => unimplemented!("probestack calling convention"), CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"), + CallConv::AppleAarch64 => unreachable!(), } } @@ -1083,16 +1086,17 @@ pub fn create_unwind_info( isa: &dyn TargetIsa, ) -> CodegenResult> { use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; // Assumption: RBP is being used as the frame pointer for both calling conventions // In the future, we should be omitting frame pointer as an optimization, so this will change - Ok(match func.signature.call_conv { - CallConv::Fast | CallConv::Cold | CallConv::SystemV => { + Ok(match isa.unwind_info_kind() { + UnwindInfoKind::SystemV => { super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u)) } - CallConv::WindowsFastcall => { + UnwindInfoKind::Windows => { super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) } - _ => None, + UnwindInfoKind::None => None, }) } diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs index 272c3dfe5d..54efe7fcfd 100644 --- a/cranelift/codegen/src/isa/x86/mod.rs +++ b/cranelift/codegen/src/isa/x86/mod.rs @@ -21,8 +21,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; use crate::regalloc; use crate::result::CodegenResult; use crate::timing; -use alloc::borrow::Cow; -use alloc::boxed::Box; +use alloc::{borrow::Cow, boxed::Box, vec::Vec}; use core::any::Any; use core::fmt; use core::hash::{Hash, Hasher}; @@ -79,6 +78,10 @@ impl TargetIsa for Isa { &self.shared_flags } + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { self.shared_flags.hash(&mut hasher); self.isa_flags.hash(&mut hasher); diff --git a/cranelift/codegen/src/isa/x86/settings.rs b/cranelift/codegen/src/isa/x86/settings.rs index 2d3a3f6698..f13431c1a2 100644 --- a/cranelift/codegen/src/isa/x86/settings.rs +++ b/cranelift/codegen/src/isa/x86/settings.rs @@ -1,6 +1,6 @@ //! x86 Settings. -use crate::settings::{self, detail, Builder}; +use crate::settings::{self, detail, Builder, Value}; use core::fmt; // Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a diff --git a/cranelift/codegen/src/isa/x86/unwind/systemv.rs b/cranelift/codegen/src/isa/x86/unwind/systemv.rs index f3e1cbea84..31fc64c9fb 100644 --- a/cranelift/codegen/src/isa/x86/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x86/unwind/systemv.rs @@ -3,7 +3,7 @@ use crate::ir::Function; use crate::isa::{ unwind::systemv::{RegisterMappingError, UnwindInfo}, - CallConv, RegUnit, TargetIsa, + RegUnit, TargetIsa, }; use crate::result::CodegenResult; use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64}; @@ -97,8 +97,8 @@ pub(crate) fn create_unwind_info( isa: &dyn TargetIsa, ) -> CodegenResult> { // Only System V-like calling conventions are supported - match func.signature.call_conv { - CallConv::Fast | CallConv::Cold | CallConv::SystemV => {} + match isa.unwind_info_kind() { + crate::machinst::UnwindInfoKind::SystemV => {} _ => return Ok(None), } @@ -121,8 +121,8 @@ pub(crate) fn create_unwind_info( fn sp(&self) -> u16 { X86_64::RSP.0 } - fn fp(&self) -> u16 { - X86_64::RBP.0 + fn fp(&self) -> Option { + Some(X86_64::RBP.0) } } let map = RegisterMapper(isa); diff --git a/cranelift/codegen/src/isa/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/x86/unwind/winx64.rs index b2da0bc8b9..33e5463bb8 100644 --- a/cranelift/codegen/src/isa/x86/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/x86/unwind/winx64.rs @@ -2,7 +2,7 @@ use crate::ir::Function; use crate::isa::x86::registers::{FPR, GPR}; -use crate::isa::{unwind::winx64::UnwindInfo, CallConv, RegUnit, TargetIsa}; +use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa}; use crate::result::CodegenResult; pub(crate) fn create_unwind_info( @@ -10,7 +10,7 @@ pub(crate) fn create_unwind_info( isa: &dyn TargetIsa, ) -> CodegenResult> { // Only Windows fastcall is supported for unwind information - if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() { + if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() { return Ok(None); } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 5b80073b7f..331c8f81b7 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -97,6 +97,7 @@ mod inst_predicates; mod iterators; mod legalizer; mod licm; +mod log; mod nan_canonicalization; mod partition_slice; mod postopt; diff --git a/cranelift/codegen/src/log.rs b/cranelift/codegen/src/log.rs new file mode 100644 index 0000000000..c5bd59aa58 --- /dev/null +++ b/cranelift/codegen/src/log.rs @@ -0,0 +1,39 @@ +//! This module implements deferred display helpers. +//! +//! These are particularly useful in logging contexts, where the maximum logging level filter might +//! be enabled, but we don't want the arguments to be evaluated early: +//! +//! ``` +//! log::set_max_level(log::LevelFilter::max()); +//! fn expensive_calculation() -> String { +//! "a string that is very slow to generate".into() +//! } +//! log::debug!("{}", expensive_calculation()); +//! ``` +//! +//! If the associated log implementation filters out log debug entries, the expensive calculation +//! would have been spurious. In this case, we can wrap the expensive computation within an +//! `DeferredDisplay`, so that the computation only happens when the actual `fmt` function is +//! called. + +use core::fmt; + +pub(crate) struct DeferredDisplay(F); + +impl T, T: fmt::Display> DeferredDisplay { + pub(crate) fn new(f: F) -> Self { + Self(f) + } +} + +impl T, T: fmt::Display> fmt::Display for DeferredDisplay { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0().fmt(f) + } +} + +impl T, T: fmt::Debug> fmt::Debug for DeferredDisplay { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0().fmt(f) + } +} diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 8c7322736c..7af9b087c8 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -30,6 +30,12 @@ pub trait ABICallee { /// Access the (possibly legalized) signature. fn signature(&self) -> &Signature; + /// Accumulate outgoing arguments. This ensures that at least SIZE bytes + /// are allocated in the prologue to be available for use in function calls + /// to hold arguments and/or return values. If this function is called + /// multiple times, the maximum of all SIZE values will be available. + fn accumulate_outgoing_args_size(&mut self, size: u32); + /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; @@ -189,9 +195,6 @@ pub trait ABICallee { from_slot: SpillSlot, ty: Option, ) -> Self::I; - - /// Desired unwind info type. - fn unwind_info_kind(&self) -> UnwindInfoKind; } /// Trait implemented by an object that tracks ABI-related state and can @@ -245,6 +248,13 @@ pub trait ABICaller { /// Emit code to post-adjust the satck, after call return and return-value copies. fn emit_stack_post_adjust>(&self, ctx: &mut C); + /// Accumulate outgoing arguments. This ensures that the caller (as + /// identified via the CTX argument) allocates enough space in the + /// prologue to hold all arguments and return values for this call. + /// There is no code emitted at the call site, everything is done + /// in the caller's function prologue. + fn accumulate_outgoing_args_size>(&self, ctx: &mut C); + /// Emit the call itself. /// /// The returned instruction should have proper use- and def-sets according diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index f240c9a238..56ebec48f0 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -102,7 +102,7 @@ //! support the SpiderMonkey Wasm ABI. For details of the multi-value return //! ABI, see: //! -//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 +//! //! //! In brief: //! - Return values are processed in *reverse* order. @@ -444,6 +444,7 @@ pub trait ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>); /// Generate a clobber-restore sequence. This sequence should perform the @@ -455,6 +456,7 @@ pub trait ABIMachineSpec { flags: &settings::Flags, clobbers: &Set>, fixed_frame_storage_size: u32, + outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]>; /// Generate a call instruction/sequence. This method is provided one @@ -576,6 +578,8 @@ pub struct ABICalleeImpl { stackslots: PrimaryMap, /// Total stack size of all stackslots. stackslots_size: u32, + /// Stack size to be reserved for outgoing arguments. + outgoing_args_size: u32, /// Clobbered registers, from regalloc. clobbered: Set>, /// Total number of spillslots, from regalloc. @@ -646,7 +650,9 @@ impl ABICalleeImpl { || call_conv == isa::CallConv::Fast || call_conv == isa::CallConv::Cold || call_conv.extends_baldrdash() - || call_conv.extends_windows_fastcall(), + || call_conv.extends_windows_fastcall() + || call_conv == isa::CallConv::AppleAarch64 + || call_conv == isa::CallConv::WasmtimeSystemV, "Unsupported calling convention: {:?}", call_conv ); @@ -689,6 +695,7 @@ impl ABICalleeImpl { sig, stackslots, stackslots_size: stack_offset, + outgoing_args_size: 0, clobbered: Set::empty(), spillslots: None, fixed_frame_storage_size: 0, @@ -915,6 +922,12 @@ impl ABICallee for ABICalleeImpl { } } + fn accumulate_outgoing_args_size(&mut self, size: u32) { + if size > self.outgoing_args_size { + self.outgoing_args_size = size; + } + } + fn flags(&self) -> &settings::Flags { &self.flags } @@ -1196,6 +1209,15 @@ impl ABICallee for ABICalleeImpl { let spill_off = islot * M::word_bytes() as i64; let sp_off = self.stackslots_size as i64 + spill_off; trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + + // Integer types smaller than word size have been spilled as words below, + // and therefore must be reloaded in the same type. + let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { + M::word_type() + } else { + ty + }; + gen_load_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty) } @@ -1211,6 +1233,19 @@ impl ABICallee for ABICalleeImpl { let spill_off = islot * M::word_bytes() as i64; let sp_off = self.stackslots_size as i64 + spill_off; trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + + // When reloading from a spill slot, we might have lost information about real integer + // types. For instance, on the x64 backend, a zero-extension can become spurious and + // optimized into a move, causing vregs of types I32 and I64 to share the same coalescing + // equivalency class. As a matter of fact, such a value can be spilled as an I32 and later + // reloaded as an I64; to make sure the high bits are always defined, do a word-sized store + // all the time, in this case. + let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { + M::word_type() + } else { + ty + }; + gen_store_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty) } @@ -1283,11 +1318,12 @@ impl ABICallee for ABICalleeImpl { } // Save clobbered registers. - let (_, clobber_insts) = M::gen_clobber_save( + let (clobber_size, clobber_insts) = M::gen_clobber_save( self.call_conv, &self.flags, &self.clobbered, self.fixed_frame_storage_size, + self.outgoing_args_size, ); insts.extend(clobber_insts); @@ -1302,7 +1338,7 @@ impl ABICallee for ABICalleeImpl { // [crate::machinst::abi_impl](this module) for more details // on stackframe layout and nominal SP maintenance. - self.total_frame_size = Some(total_stacksize); + self.total_frame_size = Some(total_stacksize + clobber_size as u32); insts } @@ -1315,6 +1351,7 @@ impl ABICallee for ABICalleeImpl { &self.flags, &self.clobbered, self.fixed_frame_storage_size, + self.outgoing_args_size, )); // N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no @@ -1369,18 +1406,6 @@ impl ABICallee for ABICalleeImpl { .next() .unwrap() } - - fn unwind_info_kind(&self) -> UnwindInfoKind { - match self.sig.call_conv { - #[cfg(feature = "unwind")] - isa::CallConv::Fast | isa::CallConv::Cold | isa::CallConv::SystemV => { - UnwindInfoKind::SystemV - } - #[cfg(feature = "unwind")] - isa::CallConv::WindowsFastcall => UnwindInfoKind::Windows, - _ => UnwindInfoKind::None, - } - } } fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { @@ -1529,6 +1554,11 @@ impl ABICaller for ABICallerImpl { } } + fn accumulate_outgoing_args_size>(&self, ctx: &mut C) { + let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + ctx.abi().accumulate_outgoing_args_size(off as u32); + } + fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index eb4760fae5..c0bc76417f 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -2,10 +2,12 @@ use crate::binemit; use crate::ir; -use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa}; +use crate::isa::{ + BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, +}; use crate::machinst::*; use crate::regalloc::RegisterSet; -use crate::settings::Flags; +use crate::settings::{self, Flags}; #[cfg(feature = "testing_hooks")] use crate::regalloc::RegDiversions; @@ -14,7 +16,6 @@ use crate::regalloc::RegDiversions; use crate::isa::unwind::systemv::RegisterMappingError; use core::any::Any; -use core::hash::Hasher; use std::borrow::Cow; use std::fmt; use target_lexicon::Triple; @@ -59,8 +60,16 @@ impl TargetIsa for TargetIsaAdapter { self.backend.flags() } + fn isa_flags(&self) -> Vec { + self.backend.isa_flags() + } + + fn variant(&self) -> BackendVariant { + BackendVariant::MachInst + } + fn hash_all_flags(&self, hasher: &mut dyn Hasher) { - self.backend.hash_all_flags(hasher) + self.backend.hash_all_flags(hasher); } fn register_info(&self) -> RegInfo { diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 9a00cee805..2dfbb85785 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -1,6 +1,7 @@ //! Compilation backend pipeline: optimized IR to VCode / binemit. use crate::ir::Function; +use crate::log::DeferredDisplay; use crate::machinst::*; use crate::settings; use crate::timing; @@ -29,9 +30,11 @@ where lower.lower(b)? }; + // Creating the vcode string representation may be costly for large functions, so defer its + // rendering. debug!( "vcode from lowering: \n{}", - vcode.show_rru(Some(b.reg_universe())) + DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe()))) ); // Perform register allocation. @@ -103,7 +106,7 @@ where debug!( "vcode after regalloc: final version:\n{}", - vcode.show_rru(Some(b.reg_universe())) + DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe()))) ); Ok(vcode) diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index d7835a98f7..401863cbd8 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -64,18 +64,18 @@ use crate::binemit::{CodeInfo, CodeOffset, StackMap}; use crate::ir::condcodes::IntCC; use crate::ir::{Function, SourceLoc, StackSlot, Type, ValueLabel}; use crate::result::CodegenResult; -use crate::settings::Flags; +use crate::settings::{self, Flags}; use crate::value_label::ValueLabelsRanges; use alloc::boxed::Box; use alloc::vec::Vec; use core::fmt::Debug; +use core::hash::Hasher; use cranelift_entity::PrimaryMap; use regalloc::RegUsageCollector; use regalloc::{ RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable, }; use smallvec::{smallvec, SmallVec}; -use std::hash::Hasher; use std::string::String; use target_lexicon::Triple; @@ -368,8 +368,10 @@ pub trait MachBackend { /// Return flags for this backend. fn flags(&self) -> &Flags; - /// Hashes all flags, both ISA-independent and ISA-specific, into the - /// specified hasher. + /// Get the ISA-dependent flag values that were used to make this trait object. + fn isa_flags(&self) -> Vec; + + /// Hashes all flags, both ISA-independent and ISA-dependent, into the specified hasher. fn hash_all_flags(&self, hasher: &mut dyn Hasher); /// Return triple for this backend. diff --git a/cranelift/codegen/src/regalloc/liverange.rs b/cranelift/codegen/src/regalloc/liverange.rs index e9b3db5bb0..91cff53b03 100644 --- a/cranelift/codegen/src/regalloc/liverange.rs +++ b/cranelift/codegen/src/regalloc/liverange.rs @@ -66,7 +66,7 @@ //! Our current implementation uses a sorted array of compressed intervals, represented by their //! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of //! intervals easily, and shows some nice performance behavior. See -//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a +//! for benchmarks against using a //! bforest::Map. //! //! ## block ordering @@ -112,7 +112,7 @@ //! the necessary API to make coalescing easy, nor does it optimize for our types' sizes. //! //! Even the specialized `bforest::Map` implementation is slower than a plain sorted -//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details. +//! array, see for details. use crate::entity::SparseMapValue; use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value}; diff --git a/cranelift/codegen/src/result.rs b/cranelift/codegen/src/result.rs index 493545c151..3178cd5ba9 100644 --- a/cranelift/codegen/src/result.rs +++ b/cranelift/codegen/src/result.rs @@ -2,19 +2,17 @@ use crate::verifier::VerifierErrors; use std::string::String; -use thiserror::Error; /// A compilation error. /// /// When Cranelift fails to compile a function, it will return one of these error codes. -#[derive(Error, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] pub enum CodegenError { /// A list of IR verifier errors. /// /// This always represents a bug, either in the code that generated IR for Cranelift, or a bug /// in Cranelift itself. - #[error("Verifier errors")] - Verifier(#[from] VerifierErrors), + Verifier(VerifierErrors), /// An implementation limit was exceeded. /// @@ -22,27 +20,57 @@ pub enum CodegenError { /// limits][limits] that cause compilation to fail when they are exceeded. /// /// [limits]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md#implementation-limits - #[error("Implementation limit exceeded")] ImplLimitExceeded, /// The code size for the function is too large. /// /// Different target ISAs may impose a limit on the size of a compiled function. If that limit /// is exceeded, compilation fails. - #[error("Code for function is too large")] CodeTooLarge, /// Something is not supported by the code generator. This might be an indication that a /// feature is used without explicitly enabling it, or that something is temporarily /// unsupported by a given target backend. - #[error("Unsupported feature: {0}")] Unsupported(String), /// A failure to map Cranelift register representation to a DWARF register representation. #[cfg(feature = "unwind")] - #[error("Register mapping error")] RegisterMappingError(crate::isa::unwind::systemv::RegisterMappingError), } /// A convenient alias for a `Result` that uses `CodegenError` as the error type. pub type CodegenResult = Result; + +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for CodegenError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + CodegenError::Verifier(source) => Some(source), + CodegenError::ImplLimitExceeded { .. } + | CodegenError::CodeTooLarge { .. } + | CodegenError::Unsupported { .. } => None, + #[cfg(feature = "unwind")] + CodegenError::RegisterMappingError { .. } => None, + } + } +} + +impl std::fmt::Display for CodegenError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + CodegenError::Verifier(_) => write!(f, "Verifier errors"), + CodegenError::ImplLimitExceeded => write!(f, "Implementation limit exceeded"), + CodegenError::CodeTooLarge => write!(f, "Code for function is too large"), + CodegenError::Unsupported(feature) => write!(f, "Unsupported feature: {}", feature), + #[cfg(feature = "unwind")] + CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"), + } + } +} + +impl From for CodegenError { + fn from(source: VerifierErrors) -> Self { + CodegenError::Verifier { 0: source } + } +} diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index 11c3639d6f..0f36db82a9 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -26,7 +26,6 @@ use alloc::boxed::Box; use alloc::string::{String, ToString}; use core::fmt; use core::str; -use thiserror::Error; /// A string-based configurator for settings groups. /// @@ -44,6 +43,78 @@ pub trait Configurable { fn enable(&mut self, name: &str) -> SetResult<()>; } +/// Represents the kind of setting. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SettingKind { + /// The setting is an enumeration. + Enum, + /// The setting is a number. + Num, + /// The setting is a boolean. + Bool, + /// The setting is a preset. + Preset, +} + +/// Represents an available builder setting. +/// +/// This is used for iterating settings in a builder. +#[derive(Clone, Copy, Debug)] +pub struct Setting { + /// The name of the setting. + pub name: &'static str, + /// The description of the setting. + pub description: &'static str, + /// The kind of the setting. + pub kind: SettingKind, + /// The supported values of the setting (for enum values). + pub values: Option<&'static [&'static str]>, +} + +/// Represents a setting value. +/// +/// This is used for iterating values in `Flags`. +pub struct Value { + /// The name of the setting associated with this value. + pub name: &'static str, + pub(crate) detail: detail::Detail, + pub(crate) values: Option<&'static [&'static str]>, + pub(crate) value: u8, +} + +impl Value { + /// Gets the kind of setting. + pub fn kind(&self) -> SettingKind { + match &self.detail { + detail::Detail::Enum { .. } => SettingKind::Enum, + detail::Detail::Num => SettingKind::Num, + detail::Detail::Bool { .. } => SettingKind::Bool, + detail::Detail::Preset => unreachable!(), + } + } + + /// Gets the enum value if the value is from an enum setting. + pub fn as_enum(&self) -> Option<&'static str> { + self.values.map(|v| v[self.value as usize]) + } + + /// Gets the numerical value if the value is from a num setting. + pub fn as_num(&self) -> Option { + match &self.detail { + detail::Detail::Num => Some(self.value), + _ => None, + } + } + + /// Gets the boolean value if the value is from a boolean setting. + pub fn as_bool(&self) -> Option { + match &self.detail { + detail::Detail::Bool { bit } => Some(self.value & (1 << bit) != 0), + _ => None, + } + } +} + /// Collect settings values based on a template. #[derive(Clone, Hash)] pub struct Builder { @@ -66,6 +137,30 @@ impl Builder { self.bytes } + /// Iterates the available settings in the builder. + pub fn iter(&self) -> impl Iterator { + let template = self.template; + + template.descriptors.iter().map(move |d| { + let (kind, values) = match d.detail { + detail::Detail::Enum { last, enumerators } => { + let values = template.enums(last, enumerators); + (SettingKind::Enum, Some(values)) + } + detail::Detail::Num => (SettingKind::Num, None), + detail::Detail::Bool { .. } => (SettingKind::Bool, None), + detail::Detail::Preset => (SettingKind::Preset, None), + }; + + Setting { + name: d.name, + description: d.description, + kind, + values, + } + }) + } + /// Set the value of a single bit. fn set_bit(&mut self, offset: usize, bit: u8, value: bool) { let byte = &mut self.bytes[offset]; @@ -165,21 +260,34 @@ impl Configurable for Builder { } /// An error produced when changing a setting. -#[derive(Error, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] pub enum SetError { /// No setting by this name exists. - #[error("No existing setting named '{0}'")] BadName(String), /// Type mismatch for setting (e.g., setting an enum setting as a bool). - #[error("Trying to set a setting with the wrong type")] BadType, /// This is not a valid value for this setting. - #[error("Unexpected value for a setting, expected {0}")] BadValue(String), } +impl std::error::Error for SetError {} + +impl fmt::Display for SetError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SetError::BadName(name) => write!(f, "No existing setting named '{}'", name), + SetError::BadType => { + write!(f, "Trying to set a setting with the wrong type") + } + SetError::BadValue(value) => { + write!(f, "Unexpected value for a setting, expected {}", value) + } + } + } +} + /// A result returned when changing a setting. pub type SetResult = Result; @@ -288,6 +396,9 @@ pub mod detail { /// Lower snake-case name of setting as defined in meta. pub name: &'static str, + /// The description of the setting. + pub description: &'static str, + /// Offset of byte containing this setting. pub offset: u32, diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index e20570c951..1d1801016b 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -80,7 +80,6 @@ use alloc::vec::Vec; use core::cmp::Ordering; use core::fmt::{self, Display, Formatter, Write}; use log::debug; -use thiserror::Error; pub use self::cssa::verify_cssa; pub use self::liveness::verify_liveness; @@ -92,8 +91,7 @@ mod liveness; mod locations; /// A verifier error. -#[derive(Error, Debug, PartialEq, Eq, Clone)] -#[error("{}{}: {}", .location, format_context(.context), .message)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct VerifierError { /// The entity causing the verifier error. pub location: AnyEntity, @@ -104,11 +102,16 @@ pub struct VerifierError { pub message: String, } -/// Helper for formatting Verifier::Error context. -fn format_context(context: &Option) -> String { - match context { - None => "".to_string(), - Some(c) => format!(" ({})", c), +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for VerifierError {} + +impl Display for VerifierError { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match &self.context { + None => write!(f, "{}: {}", self.location, self.message), + Some(context) => write!(f, "{} ({}): {}", self.location, context, self.message), + } } } @@ -175,9 +178,13 @@ pub type VerifierStepResult = Result; pub type VerifierResult = Result; /// List of verifier errors. -#[derive(Error, Debug, Default, PartialEq, Eq, Clone)] +#[derive(Debug, Default, PartialEq, Eq, Clone)] pub struct VerifierErrors(pub Vec); +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for VerifierErrors {} + impl VerifierErrors { /// Return a new `VerifierErrors` struct. #[inline] diff --git a/cranelift/entity/Cargo.toml b/cranelift/entity/Cargo.toml index bdc2e93fed..891550ffc5 100644 --- a/cranelift/entity/Cargo.toml +++ b/cranelift/entity/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-entity" -version = "0.72.0" +version = "0.73.0" description = "Data structures using entity references as mapping keys" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-entity" diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 3baba3a1d5..09054719e0 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -70,21 +70,25 @@ macro_rules! entity_impl { // Basic traits. ($entity:ident) => { impl $crate::EntityRef for $entity { + #[inline] fn new(index: usize) -> Self { debug_assert!(index < ($crate::__core::u32::MAX as usize)); $entity(index as u32) } + #[inline] fn index(self) -> usize { self.0 as usize } } impl $crate::packed_option::ReservedValue for $entity { + #[inline] fn reserved_value() -> $entity { $entity($crate::__core::u32::MAX) } + #[inline] fn is_reserved_value(&self) -> bool { self.0 == $crate::__core::u32::MAX } @@ -93,6 +97,7 @@ macro_rules! entity_impl { impl $entity { /// Create a new instance from a `u32`. #[allow(dead_code)] + #[inline] pub fn from_u32(x: u32) -> Self { debug_assert!(x < $crate::__core::u32::MAX); $entity(x) @@ -100,6 +105,7 @@ macro_rules! entity_impl { /// Return the underlying index value as a `u32`. #[allow(dead_code)] + #[inline] pub fn as_u32(self) -> u32 { self.0 } diff --git a/cranelift/entity/src/primary.rs b/cranelift/entity/src/primary.rs index 9f43f088ce..f35c6f44a6 100644 --- a/cranelift/entity/src/primary.rs +++ b/cranelift/entity/src/primary.rs @@ -148,6 +148,28 @@ where pub fn into_boxed_slice(self) -> BoxedSlice { unsafe { BoxedSlice::::from_raw(Box::<[V]>::into_raw(self.elems.into_boxed_slice())) } } + + /// Performs a binary search on the values with a key extraction function. + /// + /// Assumes that the values are sorted by the key extracted by the function. + /// + /// If the value is found then `Ok(K)` is returned, containing the entity key + /// of the matching value. + /// + /// If there are multiple matches, then any one of the matches could be returned. + /// + /// If the value is not found then Err(K) is returned, containing the entity key + /// where a matching element could be inserted while maintaining sorted order. + pub fn binary_search_values_by_key<'a, B, F>(&'a self, b: &B, f: F) -> Result + where + F: FnMut(&'a V) -> B, + B: Ord, + { + self.elems + .binary_search_by_key(b, f) + .map(|i| K::new(i)) + .map_err(|i| K::new(i)) + } } impl Default for PrimaryMap diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml index 40d643a708..dd44f72964 100644 --- a/cranelift/filetests/Cargo.toml +++ b/cranelift/filetests/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cranelift-filetests" authors = ["The Cranelift Project Developers"] -version = "0.66.0" +version = "0.73.0" description = "Test driver and implementations of the filetest commands" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-filetests" @@ -10,24 +10,22 @@ publish = false edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", features = ["testing_hooks"] } -cranelift-frontend = { path = "../frontend", version = "0.72.0" } -cranelift-interpreter = { path = "../interpreter", version = "0.72.0" } -cranelift-native = { path = "../native", version = "0.72.0" } -cranelift-reader = { path = "../reader", version = "0.72.0" } -cranelift-preopt = { path = "../preopt", version = "0.72.0" } -byteorder = { version = "1.3.2", default-features = false } +cranelift-codegen = { path = "../codegen", version = "0.73.0", features = ["testing_hooks"] } +cranelift-frontend = { path = "../frontend", version = "0.73.0" } +cranelift-interpreter = { path = "../interpreter", version = "0.73.0" } +cranelift-native = { path = "../native", version = "0.73.0" } +cranelift-reader = { path = "../reader", version = "0.73.0" } +cranelift-preopt = { path = "../preopt", version = "0.73.0" } file-per-thread-logger = "0.1.2" filecheck = "0.5.0" -gimli = { version = "0.23.0", default-features = false, features = ["read"] } +gimli = { version = "0.24.0", default-features = false, features = ["read"] } log = "0.4.6" memmap2 = "0.2.1" num_cpus = "1.8.0" -target-lexicon = "0.11" +target-lexicon = "0.12" thiserror = "1.0.15" anyhow = "1.0.32" [features] enable-peepmatic = [] experimental_arm32 = [] -experimental_x64 = [] diff --git a/cranelift/filetests/filetests/isa/aarch64/prologue.clif b/cranelift/filetests/filetests/isa/aarch64/prologue.clif index 20d90c4a68..40934abd65 100644 --- a/cranelift/filetests/filetests/isa/aarch64/prologue.clif +++ b/cranelift/filetests/filetests/isa/aarch64/prologue.clif @@ -77,22 +77,72 @@ block0(v0: f64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: str q8, [sp, #-16]! -; nextln: str q9, [sp, #-16]! -; nextln: str q10, [sp, #-16]! -; nextln: str q11, [sp, #-16]! -; nextln: str q12, [sp, #-16]! -; nextln: str q13, [sp, #-16]! -; nextln: str q14, [sp, #-16]! -; nextln: str q15, [sp, #-16]! +; nextln: stp d14, d15, [sp, #-16]! +; nextln: stp d12, d13, [sp, #-16]! +; nextln: stp d10, d11, [sp, #-16]! +; nextln: stp d8, d9, [sp, #-16]! -; check: ldr q15, [sp], #16 -; nextln: ldr q14, [sp], #16 -; nextln: ldr q13, [sp], #16 -; nextln: ldr q12, [sp], #16 -; nextln: ldr q11, [sp], #16 -; nextln: ldr q10, [sp], #16 -; nextln: ldr q9, [sp], #16 -; nextln: ldr q8, [sp], #16 +; check: ldp d8, d9, [sp], #16 +; nextln: ldp d10, d11, [sp], #16 +; nextln: ldp d12, d13, [sp], #16 +; nextln: ldp d14, d15, [sp], #16 +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f2(i64) -> i64 { +block0(v0: i64): + v1 = iadd.i64 v0, v0 + v2 = iadd.i64 v0, v1 + v3 = iadd.i64 v0, v2 + v4 = iadd.i64 v0, v3 + v5 = iadd.i64 v0, v4 + v6 = iadd.i64 v0, v5 + v7 = iadd.i64 v0, v6 + v8 = iadd.i64 v0, v7 + v9 = iadd.i64 v0, v8 + v10 = iadd.i64 v0, v9 + v11 = iadd.i64 v0, v10 + v12 = iadd.i64 v0, v11 + v13 = iadd.i64 v0, v12 + v14 = iadd.i64 v0, v13 + v15 = iadd.i64 v0, v14 + v16 = iadd.i64 v0, v15 + v17 = iadd.i64 v0, v16 + v18 = iadd.i64 v0, v17 + + v19 = iadd.i64 v0, v1 + v20 = iadd.i64 v2, v3 + v21 = iadd.i64 v4, v5 + v22 = iadd.i64 v6, v7 + v23 = iadd.i64 v8, v9 + v24 = iadd.i64 v10, v11 + v25 = iadd.i64 v12, v13 + v26 = iadd.i64 v14, v15 + v27 = iadd.i64 v16, v17 + + v28 = iadd.i64 v18, v19 + v29 = iadd.i64 v20, v21 + v30 = iadd.i64 v22, v23 + v31 = iadd.i64 v24, v25 + v32 = iadd.i64 v26, v27 + + v33 = iadd.i64 v28, v29 + v34 = iadd.i64 v30, v31 + + v35 = iadd.i64 v32, v33 + v36 = iadd.i64 v34, v35 + + return v36 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: str x22, [sp, #-16]! +; nextln: stp x19, x20, [sp, #-16]! +; nextln: add x1, x0, x0 + +; check: add x0, x1, x0 +; nextln: ldp x19, x20, [sp], #16 +; nextln: ldr x22, [sp], #16 ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif new file mode 100644 index 0000000000..479268ec0c --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -0,0 +1,1136 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IADD +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iadd_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: agr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agfr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: aghi %r2, 1 +; nextln: br %r14 + +function %iadd_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: agfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: ag %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agh %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agf %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: afi %r2, 32768 +; nextln: br %r14 + +function %iadd_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: a %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ay %r2, 4096(%r3) +; nextln: br %r14 + +function %iadd_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ah %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ahy %r2, 4096(%r3) +; nextln: br %r14 + +function %iadd_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = iadd.i16 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = iadd.i16 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = iadd.i16 v0, v2 + return v3 +} + +; check: ah %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = iadd.i8 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iadd.i8 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = iadd.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: ar %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ISUB +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %isub_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = isub.i64 v0, v1 + return v2 +} + +; check: sgr %r2, %r3 +; nextln: br %r14 + +function %isub_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgfr %r2, %r3 +; nextln: br %r14 + +function %isub_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = isub.i64 v0, v1 + return v2 +} + +; check: aghi %r2, -1 +; nextln: br %r14 + +function %isub_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32769 + v2 = isub.i64 v0, v1 + return v2 +} + +; check: agfi %r2, -32769 +; nextln: br %r14 + +function %isub_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sg %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgf %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = isub.i32 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = isub.i32 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32769 + v2 = isub.i32 v0, v1 + return v2 +} + +; check: afi %r2, -32769 +; nextln: br %r14 + +function %isub_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: s %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: sy %r2, 4096(%r3) +; nextln: br %r14 + +function %isub_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: sh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: shy %r2, 4096(%r3) +; nextln: br %r14 + +function %isub_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = isub.i16 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = isub.i16 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = isub.i16 v0, v2 + return v3 +} + +; check: sh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = isub.i8 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = isub.i8 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = isub.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: sr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IABS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs.i64 v0 + return v1 +} + +; check: lpgr %r2, %r2 +; nextln: br %r14 + +function %iabs_i64_ext32(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = iabs.i64 v1 + return v2 +} + +; check: lpgfr %r2, %r2 +; nextln: br %r14 + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs.i32 v0 + return v1 +} + +; check: lpr %r2, %r2 +; nextln: br %r14 + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs.i16 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: lpr %r2, %r2 +; nextln: br %r14 + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs.i8 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: lpr %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; INEG +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: neg-abs combination not yet supported + +function %ineg_i64(i64) -> i64 { +block0(v0: i64): + v1 = ineg.i64 v0 + return v1 +} + +; check: lcgr %r2, %r2 +; nextln: br %r14 + +function %ineg_i64_ext32(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = ineg.i64 v1 + return v2 +} + +; check: lcgfr %r2, %r2 +; nextln: br %r14 + +function %ineg_i32(i32) -> i32 { +block0(v0: i32): + v1 = ineg.i32 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +function %ineg_i16(i16) -> i16 { +block0(v0: i16): + v1 = ineg.i16 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +function %ineg_i8(i8) -> i8 { +block0(v0: i8): + v1 = ineg.i8 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IMUL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %imul_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = imul.i64 v0, v1 + return v2 +} + +; check: msgr %r2, %r3 +; nextln: br %r14 + +function %imul_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = imul.i64 v0, v1 + return v2 +} + +; check: mghi %r2, 3 +; nextln: br %r14 + +function %imul_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32769 + v2 = imul.i64 v0, v1 + return v2 +} + +; check: msgfi %r2, 32769 +; nextln: br %r14 + +function %imul_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: msg %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: mgh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: msgf %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = imul.i32 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 3 + v2 = imul.i32 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i32_imm32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32769 + v2 = imul.i32 v0, v1 + return v2 +} + +; check: msfi %r2, 32769 +; nextln: br %r14 + +function %imul_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: ms %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: msy %r2, 4096(%r3) +; nextln: br %r14 + +function %imul_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: mh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: mhy %r2, 4096(%r3) +; nextln: br %r14 + +function %imul_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = imul.i16 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 3 + v2 = imul.i16 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = imul.i16 v0, v2 + return v3 +} + +; check: mh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = imul.i8 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 3 + v2 = imul.i8 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = imul.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: msr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UMULHI +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %umulhi_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = umulhi.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r3 +; nextln: mlgr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %umulhi_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = umulhi.i32 v0, v1 + return v2 +} + +; check: llgfr %r2, %r2 +; nextln: llgfr %r3, %r3 +; nextln: msgr %r2, %r3 +; nextln: srlg %r2, %r2, 32 +; nextln: br %r14 + +function %umulhi_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = umulhi.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: llhr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srlk %r2, %r2, 16 +; nextln: br %r14 + +function %umulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = umulhi.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: llcr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srlk %r2, %r2, 8 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SMULHI +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %smulhi_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = smulhi.i64 v0, v1 + return v2 +} + +; check: mgrk %r0, %r2, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %smulhi_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = smulhi.i32 v0, v1 + return v2 +} + +; check: lgfr %r2, %r2 +; nextln: lgfr %r3, %r3 +; nextln: msgr %r2, %r3 +; nextln: srag %r2, %r2, 32 +; nextln: br %r14 + +function %smulhi_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = smulhi.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: lhr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srak %r2, %r2, 16 +; nextln: br %r14 + +function %smulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = smulhi.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: lbr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srak %r2, %r2, 8 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sdiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: llihf %r2, 2147483647 +; nextln: iilf %r2, 4294967295 +; nextln: xgr %r2, %r1 +; nextln: ngr %r2, %r3 +; nextln: cgite %r2, -1 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dsgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: iilf %r2, 2147483647 +; nextln: xr %r2, %r1 +; nextln: nr %r2, %r3 +; nextln: cite %r2, -1 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: lhi %r3, 32767 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: lhi %r3, 127 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %udiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dlgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %srem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cghi %r3, -1 +; nextln: locghie %r1, 0 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %srem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %urem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %urem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; check: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; check: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif new file mode 100644 index 0000000000..e4ab2f1f4b --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -0,0 +1,243 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITREV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: bitrev not yet implemented + +;function %bitrev_i64(i64) -> i64 { +;block0(v0: i64): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i32(i32) -> i32 { +;block0(v0: i32): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i16(i16) -> i16 { +;block0(v0: i16): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i8(i8) -> i8 { +;block0(v0: i8): +; v1 = bitrev v0 +; return v1 +;} +; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CLZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %clz_i64(i64) -> i64 { +block0(v0: i64): + v1 = clz v0 + return v1 +} + +; check: flogr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %clz_i32(i32) -> i32 { +block0(v0: i32): + v1 = clz v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -32 +; nextln: br %r14 + +function %clz_i16(i16) -> i16 { +block0(v0: i16): + v1 = clz v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -48 +; nextln: br %r14 + +function %clz_i8(i8) -> i8 { +block0(v0: i8): + v1 = clz v0 + return v1 +} + +; check: llgcr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -56 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CLS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %cls_i64(i64) -> i64 { +block0(v0: i64): + v1 = cls v0 + return v1 +} + +; check: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %cls_i32(i32) -> i32 { +block0(v0: i32): + v1 = cls v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -32 +; nextln: br %r14 + +function %cls_i16(i16) -> i16 { +block0(v0: i16): + v1 = cls v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -48 +; nextln: br %r14 + +function %cls_i8(i8) -> i8 { +block0(v0: i8): + v1 = cls v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -56 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CTZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ctz_i64(i64) -> i64 { +block0(v0: i64): + v1 = ctz v0 + return v1 +} + +; check: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: locghie %r0, -1 +; nextln: lghi %r2, 63 +; nextln: sgr %r2, %r0 +; nextln: br %r14 + +function %ctz_i32(i32) -> i32 { +block0(v0: i32): + v1 = ctz v0 + return v1 +} + +; check: oihl %r2, 1 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +function %ctz_i16(i16) -> i16 { +block0(v0: i16): + v1 = ctz v0 + return v1 +} + +; check: oilh %r2, 1 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +function %ctz_i8(i8) -> i8 { +block0(v0: i8): + v1 = ctz v0 + return v1 +} + +; check: oill %r2, 256 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; POPCNT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %popcnt_i64(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i32(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i16(i16) -> i16 { +block0(v0: i16): + v1 = popcnt v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i8(i8) -> i8 { +block0(v0: i8): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif new file mode 100644 index 0000000000..dd1e75cf2f --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -0,0 +1,490 @@ + +test compile +target s390x + +; FIXME: add immediate operand versions + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BAND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %band_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band.i64 v0, v1 + return v2 +} + +; check: ngr %r2, %r3 +; nextln: br %r14 + +function %band_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = band.i64 v0, v2 + return v3 +} + +; check: ng %r2, 0(%r3) +; nextln: br %r14 + +function %band_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band.i32 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = band.i32 v0, v2 + return v3 +} + +; check: n %r2, 0(%r3) +; nextln: br %r14 + +function %band_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = band.i32 v0, v2 + return v3 +} + +; check: ny %r2, 4096(%r3) +; nextln: br %r14 + +function %band_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = band.i16 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = band.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: nr %r2, %r3 +; nextln: br %r14 + +function %band_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band.i8 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = band.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: nr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor.i64 v0, v1 + return v2 +} + +; check: ogr %r2, %r3 +; nextln: br %r14 + +function %bor_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = bor.i64 v0, v2 + return v3 +} + +; check: og %r2, 0(%r3) +; nextln: br %r14 + +function %bor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor.i32 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = bor.i32 v0, v2 + return v3 +} + +; check: o %r2, 0(%r3) +; nextln: br %r14 + +function %bor_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = bor.i32 v0, v2 + return v3 +} + +; check: oy %r2, 4096(%r3) +; nextln: br %r14 + +function %bor_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bor.i16 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = bor.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bor_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bor.i8 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = bor.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: or %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BXOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bxor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor.i64 v0, v1 + return v2 +} + +; check: xgr %r2, %r3 +; nextln: br %r14 + +function %bxor_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = bxor.i64 v0, v2 + return v3 +} + +; check: xg %r2, 0(%r3) +; nextln: br %r14 + +function %bxor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor.i32 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = bxor.i32 v0, v2 + return v3 +} + +; check: x %r2, 0(%r3) +; nextln: br %r14 + +function %bxor_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = bxor.i32 v0, v2 + return v3 +} + +; check: xy %r2, 4096(%r3) +; nextln: br %r14 + +function %bxor_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bxor.i16 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = bxor.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bxor.i8 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = bxor.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: xr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BAND_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %band_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band_not.i64 v0, v1 + return v2 +} + +; check: nngrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not.i32 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = band_not.i16 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band_not.i8 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor_not.i64 v0, v1 + return v2 +} + +; check: nogrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not.i32 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bor_not.i16 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bor_not.i8 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BXOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bxor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_not.i64 v0, v1 + return v2 +} + +; check: nxgrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor_not.i32 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bxor_not.i16 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bxor_not.i8 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BNOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bnot_i64(i64) -> i64 { +block0(v0: i64): + v1 = bnot.i64 v0 + return v1 +} + +; check: nogrk %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i32(i32) -> i32 { +block0(v0: i32): + v1 = bnot.i32 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i16(i16) -> i16 { +block0(v0: i16): + v1 = bnot.i16 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i8(i8) -> i8 { +block0(v0: i8): + v1 = bnot.i8 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITSELECT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bitselect_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = bitselect.i64 v0, v1, v2 + return v3 +} + +; check: ngr %r3, %r2 +; nextln: nngrk %r2, %r4, %r2 +; nextln: ogr %r2, %r3 +; nextln: br %r14 + +function %bitselect_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = bitselect.i32 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i16(i16, i16, i16) -> i16 { +block0(v0: i16, v1: i16, v2: i16): + v3 = bitselect.i16 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i8(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = bitselect.i8 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif new file mode 100644 index 0000000000..4fee8cf9f8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -0,0 +1,113 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CALL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %call(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %call_uext(i32) -> i64 { + fn0 = %g(i32 uext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: llgfr %r2, %r2 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %ret_uext(i32) -> i32 uext { +block0(v0: i32): + return v0 +} + +; check: llgfr %r2, %r2 +; nextln: br %r14 + +function %call_uext(i32) -> i64 { + fn0 = %g(i32 sext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: lgfr %r2, %r2 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %ret_uext(i32) -> i32 sext { +block0(v0: i32): + return v0 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %call_colocated(i64) -> i64 { + fn0 = colocated %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: brasl %r14, %g +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %f2(i32) -> i64 { + fn0 = %g(i32 uext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CALL_INDIRECT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %call_indirect(i64, i64) -> i64 { + sig0 = (i64) -> i64 +block0(v0: i64, v1: i64): + v2 = call_indirect.i64 sig0, v1(v0) + return v2 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/condbr.clif b/cranelift/filetests/filetests/isa/s390x/condbr.clif new file mode 100644 index 0000000000..12b81b705c --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/condbr.clif @@ -0,0 +1,62 @@ +test compile +target s390x + +function %f(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + return v2 +} + +; check: clgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + brnz v2, block1 + jump block2 + +block1: + v4 = iconst.i64 1 + return v4 + +block2: + v5 = iconst.i64 2 + return v5 +} + +; check: Block 0: +; check: clgr %r2, %r3 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: lghi %r2, 1 +; nextln: br %r14 +; check: Block 2: +; check: lghi %r2, 2 +; nextln: br %r14 + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + brnz v2, block1 + jump block1 + +block1: + v4 = iconst.i64 1 + return v4 +} + +; FIXME: Should optimize away branches + +; check: Block 0: +; check: clgr %r2, %r3 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: jg label3 +; check: Block 2: +; check: jg label3 +; check: Block 3: +; check: lghi %r2, 1 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/condops.clif b/cranelift/filetests/filetests/isa/s390x/condops.clif new file mode 100644 index 0000000000..aaf1c3ba54 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/condops.clif @@ -0,0 +1,43 @@ +test compile +target s390x + +function %f(i8, i64, i64) -> i64 { +block0(v0: i8, v1: i64, v2: i64): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.i64 v4, v1, v2 + return v5 +} + +; check: llcr %r2, %r2 +; nextln: clfi %r2, 42 +; nextln: locgre %r4, %r3 +; nextln: lgr %r2, %r4 +; nextln: br %r14 + +function %g(b1, i8, i8) -> i8 { +block0(v0: b1, v1: i8, v2: i8): + v3 = select.i8 v0, v1, v2 + return v3 +} + +; FIXME: optimize i8/i16 compares + +; check: llcr %r2, %r2 +; nextln: chi %r2, 0 +; nextln: locrlh %r4, %r3 +; nextln: lr %r2, %r4 +; nextln: br %r14 + +function %i(i32, i8, i8) -> i8 { +block0(v0: i32, v1: i8, v2: i8): + v3 = iconst.i32 42 + v4 = icmp.i32 eq v0, v3 + v5 = select.i8 v4, v1, v2 + return v5 +} + +; check: clfi %r2, 42 +; nextln: locre %r4, %r3 +; nextln: lr %r2, %r4 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/constants.clif b/cranelift/filetests/filetests/isa/s390x/constants.clif new file mode 100644 index 0000000000..96effdecde --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/constants.clif @@ -0,0 +1,113 @@ +test compile +target s390x + +function %f() -> b8 { +block0: + v0 = bconst.b8 true + return v0 +} + +; check: lhi %r2, 255 +; nextln: br %r14 + +function %f() -> b16 { +block0: + v0 = bconst.b16 false + return v0 +} + +; check: lhi %r2, 0 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0 + return v0 +} + +; check: lghi %r2, 0 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff + return v0 +} + +; check: lgfi %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff0000 + return v0 +} + +; check: llilh %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff00000000 + return v0 +} + +; check: llihl %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff000000000000 + return v0 +} + +; check: llihh %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffffffff + return v0 +} + +; check: lghi %r2, -1 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffff0000 + return v0 +} + +; check: lgfi %r2, -65536 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xf34bf0a31212003a ; random digits + return v0 +} + +; check: llihf %r2, 4081840291 +; nextln: iilf %r2, 303169594 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0x12e900001ef40000 ; random digits with 2 clear half words + return v0 +} + +; check: llihh %r2, 4841 +; nextln: iilh %r2, 7924 +; nextln: br %r14 + +function %f() -> i32 { +block0: + v0 = iconst.i32 -1 + return v0 +} + +; check: lhi %r2, -1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/conversions.clif b/cranelift/filetests/filetests/isa/s390x/conversions.clif new file mode 100644 index 0000000000..5d57c8881a --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif @@ -0,0 +1,748 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %uextend_i32_i64(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: br %r14 + +function %uextend_i16_i64(i16) -> i64 { +block0(v0: i16): + v1 = uextend.i64 v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: br %r14 + +function %uextend_i16_i32(i16) -> i32 { +block0(v0: i16): + v1 = uextend.i32 v0 + return v1 +} + +; check: llhr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i64(i8) -> i64 { +block0(v0: i8): + v1 = uextend.i64 v0 + return v1 +} + +; check: llgcr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i32(i8) -> i32 { +block0(v0: i8): + v1 = uextend.i32 v0 + return v1 +} + +; check: llcr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i16(i8) -> i16 { +block0(v0: i8): + v1 = uextend.i16 v0 + return v1 +} + +; check: llcr %r2, %r2 +; nextln: br %r14 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sextend_i32_i64(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %sextend_i16_i64(i16) -> i64 { +block0(v0: i16): + v1 = sextend.i64 v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: br %r14 + +function %sextend_i16_i32(i16) -> i32 { +block0(v0: i16): + v1 = sextend.i32 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i64(i8) -> i64 { +block0(v0: i8): + v1 = sextend.i64 v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i32(i8) -> i32 { +block0(v0: i8): + v1 = sextend.i32 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i16(i8) -> i16 { +block0(v0: i8): + v1 = sextend.i16 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IREDUCE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ireduce_i64_i32(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = ireduce.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i64_i16(i64, i64) -> i16 { +block0(v0: i64, v1: i64): + v2 = ireduce.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i64_i8(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i32_i16(i32, i32) -> i16 { +block0(v0: i32, v1: i32): + v2 = ireduce.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i32_i8(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i16_i8(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bextend_b32_b64(b32) -> b64 { +block0(v0: b32): + v1 = bextend.b64 v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %bextend_b16_b64(b16) -> b64 { +block0(v0: b16): + v1 = bextend.b64 v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: br %r14 + +function %bextend_b16_b32(b16) -> b32 { +block0(v0: b16): + v1 = bextend.b32 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b64(b8) -> b64 { +block0(v0: b8): + v1 = bextend.b64 v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b32(b8) -> b32 { +block0(v0: b8): + v1 = bextend.b32 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b16(b8) -> b16 { +block0(v0: b8): + v1 = bextend.b16 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b1_b64(b1) -> b64 { +block0(v0: b1): + v1 = bextend.b64 v0 + return v1 +} + +; check: sllg %r2, %r2, 63 +; nextln: srag %r2, %r2, 63 +; nextln: br %r14 + +function %bextend_b1_b32(b1) -> b32 { +block0(v0: b1): + v1 = bextend.b32 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bextend_b1_b16(b1) -> b16 { +block0(v0: b1): + v1 = bextend.b16 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bextend_b1_b8(b1) -> b8 { +block0(v0: b1): + v1 = bextend.b8 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BREDUCE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %breduce_b64_b32(b64, b64) -> b32 { +block0(v0: b64, v1: b64): + v2 = breduce.b32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b16(b64, b64) -> b16 { +block0(v0: b64, v1: b64): + v2 = breduce.b16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b8(b64, b64) -> b8 { +block0(v0: b64, v1: b64): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b1(b64, b64) -> b1 { +block0(v0: b64, v1: b64): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b16(b32, b32) -> b16 { +block0(v0: b32, v1: b32): + v2 = breduce.b16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b8(b32, b32) -> b8 { +block0(v0: b32, v1: b32): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b1(b32, b32) -> b1 { +block0(v0: b32, v1: b32): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b16_b8(b16, b16) -> b8 { +block0(v0: b16, v1: b16): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b16_b1(b16, b16) -> b1 { +block0(v0: b16, v1: b16): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b8_b1(b8, b8) -> b1 { +block0(v0: b8, v1: b8): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BMASK +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bmask_b64_i64(b64, b64) -> i64 { +block0(v0: b64, v1: b64): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i32(b64, b64) -> i32 { +block0(v0: b64, v1: b64): + v2 = bmask.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i16(b64, b64) -> i16 { +block0(v0: b64, v1: b64): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i8(b64, b64) -> i8 { +block0(v0: b64, v1: b64): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i64(b32, b32) -> i64 { +block0(v0: b32, v1: b32): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgfr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i32(b32, b32) -> i32 { +block0(v0: b32, v1: b32): + v2 = bmask.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i16(b32, b32) -> i16 { +block0(v0: b32, v1: b32): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i8(b32, b32) -> i8 { +block0(v0: b32, v1: b32): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i64(b16, b16) -> i64 { +block0(v0: b16, v1: b16): + v2 = bmask.i64 v1 + return v2 +} + +; check: lghr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i32(b16, b16) -> i32 { +block0(v0: b16, v1: b16): + v2 = bmask.i32 v1 + return v2 +} + +; check: lhr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i16(b16, b16) -> i16 { +block0(v0: b16, v1: b16): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i8(b16, b16) -> i8 { +block0(v0: b16, v1: b16): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i64(b8, b8) -> i64 { +block0(v0: b8, v1: b8): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i32(b8, b8) -> i32 { +block0(v0: b8, v1: b8): + v2 = bmask.i32 v1 + return v2 +} + +; check: lbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i16(b8, b8) -> i16 { +block0(v0: b8, v1: b8): + v2 = bmask.i16 v1 + return v2 +} + +; check: lbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i8(b8, b8) -> i8 { +block0(v0: b8, v1: b8): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b1_i64(b1, b1) -> i64 { +block0(v0: b1, v1: b1): + v2 = bmask.i64 v1 + return v2 +} + +; check: sllg %r2, %r3, 63 +; nextln: srag %r2, %r2, 63 +; nextln: br %r14 + +function %bmask_b1_i32(b1, b1) -> i32 { +block0(v0: b1, v1: b1): + v2 = bmask.i32 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bmask_b1_i16(b1, b1) -> i16 { +block0(v0: b1, v1: b1): + v2 = bmask.i16 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bmask_b1_i8(b1, b1) -> i8 { +block0(v0: b1, v1: b1): + v2 = bmask.i8 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bint_b64_i64(b64) -> i64 { +block0(v0: b64): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b64_i32(b64) -> i32 { +block0(v0: b64): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b64_i16(b64) -> i16 { +block0(v0: b64): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b64_i8(b64) -> i8 { +block0(v0: b64): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b32_i64(b32) -> i64 { +block0(v0: b32): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b32_i32(b32) -> i32 { +block0(v0: b32): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b32_i16(b32) -> i16 { +block0(v0: b32): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b32_i8(b32) -> i8 { +block0(v0: b32): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b16_i64(b16) -> i64 { +block0(v0: b16): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b16_i32(b16) -> i32 { +block0(v0: b16): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b16_i16(b16) -> i16 { +block0(v0: b16): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b16_i8(b16) -> i8 { +block0(v0: b16): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b8_i64(b8) -> i64 { +block0(v0: b8): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b8_i32(b8) -> i32 { +block0(v0: b8): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b8_i16(b8) -> i16 { +block0(v0: b8): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b8_i8(b8) -> i8 { +block0(v0: b8): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b1_i64(b1) -> i64 { +block0(v0: b1): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b1_i32(b1) -> i32 { +block0(v0: b1): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b1_i16(b1) -> i16 { +block0(v0: b1): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b1_i8(b1) -> i8 { +block0(v0: b1): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/div-traps.clif b/cranelift/filetests/filetests/isa/s390x/div-traps.clif new file mode 100644 index 0000000000..2d7428b50d --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/div-traps.clif @@ -0,0 +1,355 @@ +test compile +set avoid_div_traps=1 +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sdiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: llihf %r2, 2147483647 +; nextln: iilf %r2, 4294967295 +; nextln: xgr %r2, %r1 +; nextln: ngr %r2, %r3 +; nextln: cgite %r2, -1 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dsgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: iilf %r2, 2147483647 +; nextln: xr %r2, %r1 +; nextln: nr %r2, %r3 +; nextln: cite %r2, -1 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: lhi %r3, 32767 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: lhi %r3, 127 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %udiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dlgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %srem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: cghi %r3, -1 +; nextln: locghie %r1, 0 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %srem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %urem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %urem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif new file mode 100644 index 0000000000..6f95c82487 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -0,0 +1,711 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; F32CONST/F64CONST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: should use FZERO instruction +; FIXME: should use out-of-line literal pool + +function %f32const_zero() -> f32 { +block0: + v1 = f32const 0x0.0 + return v1 +} + +; check: bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1) +; nextln: br %r14 + +function %f64const_zero() -> f64 { +block0: + v1 = f64const 0x0.0 + return v1 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: br %r14 + +function %f32const_one() -> f32 { +block0: + v1 = f32const 0x1.0 + return v1 +} + +; check: bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1) +; nextln: br %r14 + +function %f64const_one() -> f64 { +block0: + v1 = f64const 0x1.0 + return v1 +} + +; check: bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1) +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FADD +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fadd_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fadd v0, v1 + return v2 +} + +; check: aebr %f0, %f2 +; nextln: br %r14 + +function %fadd_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fadd v0, v1 + return v2 +} + +; check: adbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FSUB +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fsub_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fsub v0, v1 + return v2 +} + +; check: sebr %f0, %f2 +; nextln: br %r14 + +function %fsub_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fsub v0, v1 + return v2 +} + +; check: sdbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMUL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmul_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmul v0, v1 + return v2 +} + +; check: meebr %f0, %f2 +; nextln: br %r14 + +function %fmul_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmul v0, v1 + return v2 +} + +; check: mdbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fdiv_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fdiv v0, v1 + return v2 +} + +; check: debr %f0, %f2 +; nextln: br %r14 + +function %fdiv_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fdiv v0, v1 + return v2 +} + +; check: ddbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMIN +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmin_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} + +; check: wfminsb %f0, %f0, %f2, 1 +; nextln: br %r14 + +function %fmin_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} + +; check: wfmindb %f0, %f0, %f2, 1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMAX +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmax_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} + +; check: wfmaxsb %f0, %f0, %f2, 1 +; nextln: br %r14 + +function %fmax_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} + +; check: wfmaxdb %f0, %f0, %f2, 1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SQRT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sqrt_f32(f32) -> f32 { +block0(v0: f32): + v1 = sqrt v0 + return v1 +} + +; check: sqebr %f0, %f0 +; nextln: br %r14 + +function %sqrt_f64(f64) -> f64 { +block0(v0: f64): + v1 = sqrt v0 + return v1 +} + +; check: sqdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FABS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fabs_f32(f32) -> f32 { +block0(v0: f32): + v1 = fabs v0 + return v1 +} + +; check: lpebr %f0, %f0 +; nextln: br %r14 + +function %fabs_f64(f64) -> f64 { +block0(v0: f64): + v1 = fabs v0 + return v1 +} + +; check: lpdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FNEG +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fneg_f32(f32) -> f32 { +block0(v0: f32): + v1 = fneg v0 + return v1 +} + +; check: lcebr %f0, %f0 +; nextln: br %r14 + +function %fneg_f64(f64) -> f64 { +block0(v0: f64): + v1 = fneg v0 + return v1 +} + +; check: lcdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FPROMOTE/FDEMOTE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fpromote_f32(f32) -> f64 { +block0(v0: f32): + v1 = fpromote.f64 v0 + return v1 +} + +; check: ldebr %f0, %f0 +; nextln: br %r14 + +function %fdemote_f64(f64) -> f32 { +block0(v0: f64): + v1 = fdemote.f32 v0 + return v1 +} + +; check: ledbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CEIL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ceil_f32(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +; check: fiebr %f0, %f0, 6 +; nextln: br %r14 + +function %ceil_f64(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +; check: fidbr %f0, %f0, 6 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FLOOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %floor_f32(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +; check: fiebr %f0, %f0, 7 +; nextln: br %r14 + +function %floor_f64(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +; check: fidbr %f0, %f0, 7 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRUNC +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trunc_f32(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +; check: fiebr %f0, %f0, 5 +; nextln: br %r14 + +function %trunc_f64(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +; check: fidbr %f0, %f0, 5 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NEAREST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %nearest_f32(f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; check: fiebr %f0, %f0, 4 +; nextln: br %r14 + +function %nearest_f64(f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; check: fidbr %f0, %f0, 4 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMA +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fma_f32(f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + return v3 +} + +; FIXME: regalloc + +; check: maebr %f4, %f0, %f2 +; nextln: ler %f0, %f4 +; nextln: br %r14 + +function %fma_f64(f64, f64, f64) -> f64 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + return v3 +} + +; check: madbr %f4, %f0, %f2 +; nextln: ldr %f0, %f4 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCOPYSIGN +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcopysign_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} + +; check: cpsdr %f0, %f2, %f0 +; nextln: br %r14 + +function %fcopysign_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} + +; check: cpsdr %f0, %f2, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_TO_UINT/FCVT_TO_SINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_to_uint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clfebr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cfebra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clgebr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cgebra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clfdbr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cfdbra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clgdbr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cgdbra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_FROM_UINT/FCVT_FROM_SINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_from_uint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; check: celfbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; check: cefbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; check: celgbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; check: cegbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; check: cdlfbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; check: cdfbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; check: cdlgbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; check: cdgbra %f0, 0, %r2, 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_TO_UINT_SAT/FCVT_TO_SINT_SAT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_to_uint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; check: clfebr %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; check: cfebra %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; check: clgebr %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; check: cgebra %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; check: clfdbr %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; check: cfdbra %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; check: clgdbr %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; check: cgdbra %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITCAST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bitcast_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = bitcast.f64 v0 + return v1 +} + +; check: ldgr %f0, %r2 +; nextln: br %r14 + +function %bitcast_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = bitcast.i64 v0 + return v1 +} + +; check: lgdr %r2, %f0 +; nextln: br %r14 + +function %bitcast_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = bitcast.f32 v0 + return v1 +} + +; check: sllg %r2, %r2, 32 +; nextln: ldgr %f0, %r2 +; nextln: br %r14 + +function %bitcast_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = bitcast.i32 v0 + return v1 +} + +; check: lgdr %r2, %f0 +; nextln: srlg %r2, %r2, 32 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif new file mode 100644 index 0000000000..659ba71ae6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -0,0 +1,49 @@ +test compile +target s390x + +function %dynamic_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: Block 0: +; check: l %r4, 0(%r2) +; nextln: ahi %r4, 0 +; nextln: clr %r3, %r4 +; nextln: jgnh label1 ; jg label2 +; check: Block 1: +; check: llgfr %r5, %r3 +; nextln: agr %r2, %r5 +; nextln: lghi %r5, 0 +; nextln: clr %r3, %r4 +; nextln: locgrh %r2, %r5 +; nextln: br %r14 +; check: Block 2: +; check: trap + +function %static_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: Block 0: +; check: clfi %r3, 65536 +; nextln: jgnh label1 ; jg label2 +; check: Block 1: +; check: llgfr %r4, %r3 +; nextln: agr %r2, %r4 +; nextln: lghi %r4, 0 +; nextln: clfi %r3, 65536 +; nextln: locgrh %r2, %r4 +; nextln: br %r14 +; check: Block 2: +; check: trap diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif new file mode 100644 index 0000000000..ef792f34cf --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -0,0 +1,604 @@ +test compile +target s390x + +function %icmp_slt_i64(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_ext32(i64, i32) -> b1 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgfr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_imm16(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cghi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_imm32(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cgfi %r2, 32768 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cg %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = load.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem_ext16(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgh %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym_ext16(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = sload16.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cghrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem_ext32(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgf %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym_ext32(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = sload32.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgfrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32(i32, i32) -> b1 { +block0(v0: i32, v1: i32): + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_imm16(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_imm(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: cfi %r2, 32768 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_mem(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: c %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_memoff(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: cy %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_sym(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = load.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: crl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_mem_ext16(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: ch %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_memoff_ext16(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: chy %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_sym_ext16(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = sload16.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: chrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16(i16, i16) -> b1 { +block0(v0: i16, v1: i16): + v2 = icmp.i16 slt v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: lhr %r3, %r3 +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_imm(i16) -> b1 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = icmp.i16 slt v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_mem(i16, i64) -> b1 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = icmp.i16 slt v0, v2 + return v3 +} + +; check: lhr %r2, %r2 +; nextln: ch %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_sym(i16) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + v2 = load.i16 v1 + v3 = icmp.i16 slt v0, v2 + return v3 +} + +; check: lhr %r2, %r2 +; nextln: chrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8(i8, i8) -> b1 { +block0(v0: i8, v1: i8): + v2 = icmp.i8 slt v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: lbr %r3, %r3 +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8_imm(i8) -> b1 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = icmp.i8 slt v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8_mem(i8, i64) -> b1 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = icmp.i8 slt v0, v2 + return v3 +} + +; check: lbr %r2, %r2 +; nextln: lb %r3, 0(%r3) +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp.i64 ult v0, v1 + return v2 +} + +; check: clgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_ext32(i64, i32) -> b1 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgfr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_imm(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = icmp.i64 ult v0, v1 + return v2 +} + +; check: clgfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_mem(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clg %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = load.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_mem_ext32(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = uload32.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgf %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym_ext32(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = uload32.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgfrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym_ext16(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = uload16.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clghrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32(i32, i32) -> b1 { +block0(v0: i32, v1: i32): + v2 = icmp.i32 ult v0, v1 + return v2 +} + +; check: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_imm(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = icmp.i32 ult v0, v1 + return v2 +} + +; check: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_mem(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: cl %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_memoff(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: cly %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_sym(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = load.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: clrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_sym_ext16(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = uload16.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: clhrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16(i16, i16) -> b1 { +block0(v0: i16, v1: i16): + v2 = icmp.i16 ult v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: llhr %r3, %r3 +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_imm(i16) -> b1 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = icmp.i16 ult v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_mem(i16, i64) -> b1 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = icmp.i16 ult v0, v2 + return v3 +} + +; check: llhr %r2, %r2 +; nextln: llh %r3, 0(%r3) +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_mem(i16) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + v2 = load.i16 v1 + v3 = icmp.i16 ult v0, v2 + return v3 +} + +; check: llhr %r2, %r2 +; nextln: clhrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8(i8, i8) -> b1 { +block0(v0: i8, v1: i8): + v2 = icmp.i8 ult v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: llcr %r3, %r3 +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8_imm(i8) -> b1 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = icmp.i8 ult v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8_mem(i8, i64) -> b1 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = icmp.i8 ult v0, v2 + return v3 +} + +; check: llcr %r2, %r2 +; nextln: llc %r3, 0(%r3) +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/jumptable.clif b/cranelift/filetests/filetests/isa/s390x/jumptable.clif new file mode 100644 index 0000000000..1930c0367a --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/jumptable.clif @@ -0,0 +1,45 @@ +test compile +target s390x + +function %f(i64) -> i64 { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i64): + br_table v0, block4, jt0 + +block1: + v1 = iconst.i64 1 + jump block5(v1) + +block2: + v2 = iconst.i64 2 + jump block5(v2) + +block3: + v3 = iconst.i64 3 + jump block5(v3) + +block4: + v4 = iconst.i64 4 + jump block5(v4) + +block5(v5: i64): + v6 = iadd.i64 v0, v5 + return v6 +} + +; check: clgfi %r2, 3 ; jghe label1 ; sllg %r4, %r2, 2 ; larl %r3, 18 ; lgf %r4, 0(%r4, %r3) ; agrk %r3, %r3, %r4 ; br %r3 ; jt_entries + +; check: lghi %r3, 1 +; nextln: jg + +; check: lghi %r3, 2 +; nextln: jg + +; check: lghi %r3, 3 +; nextln: jg + +; check: agr %r2, %r3 +; nextln: br %r14 + + diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif new file mode 100644 index 0000000000..6561863d73 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -0,0 +1,258 @@ +test compile +target s390x + +function %load_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 little v0 + return v1 +} + +; check: lrvg %r2, 0(%r2) +; nextln: br %r14 + +function %load_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload8.i64 little v0 + return v1 +} + +; check: llgc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload8.i64 little v0 + return v1 +} + +; check: lgb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload16.i64 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: llghr %r2, %r2 +; nextln: br %r14 + +function %uload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: llghr %r2, %r2 +; nextln: br %r14 + +function %sload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload16.i64 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: lghr %r2, %r2 +; nextln: br %r14 + +function %sload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: lghr %r2, %r2 +; nextln: br %r14 + +function %uload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: llgfr %r2, %r2 +; nextln: br %r14 + +function %uload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload32.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: llgfr %r2, %r2 +; nextln: br %r14 + +function %sload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: lgfr %r2, %r2 +; nextln: br %r14 + +function %sload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload32.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: lgfr %r2, %r2 +; nextln: br %r14 + +function %load_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: br %r14 + +function %load_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 little v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: llhr %r2, %r2 +; nextln: br %r14 + +function %uload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: llhr %r2, %r2 +; nextln: br %r14 + +function %sload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: lhr %r2, %r2 +; nextln: br %r14 + +function %sload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: lhr %r2, %r2 +; nextln: br %r14 + +function %load_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: br %r14 + +function %load_i16_sym() -> i16 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i16 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = uload8.i16 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = sload8.i16 little v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %load_i8(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif new file mode 100644 index 0000000000..8d46fe0867 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -0,0 +1,264 @@ +test compile +target s390x + +function %load_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0 + return v1 +} + +; check: lg %r2, 0(%r2) +; nextln: br %r14 + +function %load_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i64 v0 + return v1 +} + +; check: lgrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload8.i64 v0 + return v1 +} + +; check: llgc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload8.i64 v0 + return v1 +} + +; check: lgb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload16.i64 v0 + return v1 +} + +; check: llgh %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i64 v0 + return v1 +} + +; check: llghrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload16.i64 v0 + return v1 +} + +; check: lgh %r2, 0(%r2) +; nextln: br %r14 + +function %sload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i64 v0 + return v1 +} + +; check: lghrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 v0 + return v1 +} + +; check: llgf %r2, 0(%r2) +; nextln: br %r14 + +function %uload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload32.i64 v0 + return v1 +} + +; check: llgfrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 v0 + return v1 +} + +; check: lgf %r2, 0(%r2) +; nextln: br %r14 + +function %sload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload32.i64 v0 + return v1 +} + +; check: lgfrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0 + return v1 +} + +; check: l %r2, 0(%r2) +; nextln: br %r14 + +function %load_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i32 v0 + return v1 +} + +; check: lrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i32_off(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0+4096 + return v1 +} + +; check: ly %r2, 4096(%r2) +; nextln: br %r14 + +function %uload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 v0 + return v1 +} + +; check: llh %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i32 v0 + return v1 +} + +; check: llhrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0 + return v1 +} + +; check: lh %r2, 0(%r2) +; nextln: br %r14 + +function %sload16_i32_off(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0+4096 + return v1 +} + +; check: lhy %r2, 4096(%r2) +; nextln: br %r14 + +function %sload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i32 v0 + return v1 +} + +; check: lhrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 v0 + return v1 +} + +; check: llh %r2, 0(%r2) +; nextln: br %r14 + +function %load_i16_sym() -> i16 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i16 v0 + return v1 +} + +; check: llhrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = uload8.i16 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = sload8.i16 v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %load_i8(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif new file mode 100644 index 0000000000..d9197d3072 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif @@ -0,0 +1,79 @@ +test compile +target s390x + +;; Test default (non-SpiderMonkey) ABI. +function %f1() -> i64, i64, i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + v2 = iconst.i64 3 + v3 = iconst.i64 4 + return v0, v1, v2, v3 +} + +; check: lghi %r2, 1 +; nextln: lghi %r3, 2 +; nextln: lghi %r4, 3 +; nextln: lghi %r5, 4 +; nextln: br %r14 + +function %f1() -> i64, i64, i64, i64, i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + v2 = iconst.i64 3 + v3 = iconst.i64 4 + v4 = iconst.i64 5 + v5 = iconst.i64 6 + return v0, v1, v2, v3, v4, v5 +} + +; check: stmg %r12, %r15, 96(%r15) +; nextln: lgr %r14, %r2 +; nextln: lghi %r2, 1 +; nextln: lghi %r3, 2 +; nextln: lghi %r4, 3 +; nextln: lghi %r5, 4 +; nextln: lghi %r13, 5 +; nextln: lghi %r12, 6 +; nextln: stg %r13, 0(%r14) +; nextln: stg %r12, 8(%r14) +; nextln: lmg %r12, %r15, 96(%r15) +; nextln: br %r14 + +;; Test default (non-SpiderMonkey) ABI. +function %f3() -> f64, f64, f64, f64 { +block1: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + v3 = f64const 0x3.0 + return v0, v1, v2, v3 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; nextln: br %r14 + +function %f4() -> f64, f64, f64, f64, f64, f64 { +block1: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + v3 = f64const 0x3.0 + v4 = f64const 0x4.0 + v5 = f64const 0x5.0 + return v0, v1, v2, v3, v4, v5 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 4 ; ld %f1, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 5 ; ld %f3, 0(%r1) +; nextln: std %f1, 0(%r2) +; nextln: std %f3, 8(%r2) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/reftypes.clif b/cranelift/filetests/filetests/isa/s390x/reftypes.clif new file mode 100644 index 0000000000..0ffdcab20e --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/reftypes.clif @@ -0,0 +1,101 @@ +test compile +target s390x + +function %f0(r64, r64) -> r64 { +block0(v0: r64, v1: r64): + return v1 +} + +; check: lgr %r2, %r3 +; nextln: br %r14 + +function %f1(r64) -> b1 { +block0(v0: r64): + v1 = is_null v0 + return v1 +} + +; check: cghi %r2, 0 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f2(r64) -> b1 { +block0(v0: r64): + v1 = is_invalid v0 + return v1 +} + +; check: cghi %r2, -1 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f3() -> r64 { +block0: + v0 = null.r64 + return v0 +} + +; check: lghi %r2, 0 +; nextln: br %r14 + +function %f4(r64, r64) -> r64, r64, r64 { + fn0 = %f(r64) -> b1 + ss0 = explicit_slot 8 + +block0(v0: r64, v1: r64): + v2 = call fn0(v0) + stack_store.r64 v0, ss0 + brz v2, block1(v1, v0) + jump block2(v0, v1) + +block1(v3: r64, v4: r64): + jump block3(v3, v4) + +block2(v5: r64, v6: r64): + jump block3(v5, v6) + +block3(v7: r64, v8: r64): + v9 = stack_load.r64 ss0 + return v7, v8, v9 +} + +; check: Block 0: +; check: stmg %r12, %r15, 96(%r15) +; nextln: aghi %r15, -192 +; nextln: virtual_sp_offset_adjust 160 +; nextln: lgr %r13, %r2 +; nextln: lgr %r12, %r3 +; nextln: lgr %r2, %r13 +; nextln: bras %r1, 12 ; data %f + 0 ; lg %r3, 0(%r1) +; nextln: stg %r2, 168(%r15) +; nextln: stg %r13, 176(%r15) +; nextln: stg %r12, 184(%r15) +; nextln: (safepoint: slots [S0, S1, S2] +; nextln: basr %r14, %r3 +; nextln: lg %r13, 176(%r15) +; nextln: lg %r12, 184(%r15) +; nextln: la %r3, 160(%r15) +; nextln: stg %r13, 0(%r3) +; nextln: llcr %r2, %r2 +; nextln: chi %r2, 0 +; nextln: jgnlh label1 ; jg label3 +; check: Block 1: +; check: jg label2 +; check: Block 2: +; check: lgr %r2, %r12 +; nextln: jg label5 +; check: Block 3: +; check: jg label4 +; check: Block 4: +; check: lgr %r2, %r13 +; nextln: lgr %r13, %r12 +; nextln: jg label5 +; check: Block 5: +; check: la %r3, 160(%r15) +; nextln: lg %r3, 0(%r3) +; nextln: lgr %r4, %r3 +; nextln: lgr %r3, %r13 +; nextln: lmg %r12, %r15, 288(%r15) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif new file mode 100644 index 0000000000..193a02eaad --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif @@ -0,0 +1,12 @@ +test compile +target s390x + +; FIXME: not yet supported + +function %uaddsat64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): +; v2 = uadd_sat.i64 v0, v1 + v2 = iconst.i64 0 + return v2 +} + diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif new file mode 100644 index 0000000000..a275b997c8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif @@ -0,0 +1,461 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ROTR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %rotr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotr.i64 v0, v1 + return v2 +} + +; check: lcgr %r3, %r3 +; nextln: rllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotr.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 47 +; nextln: br %r14 + +function %rotr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotr.i32 v0, v1 + return v2 +} + +; check: lcr %r3, %r3 +; nextln: rll %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = rotr.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 15 +; nextln: br %r14 + +function %rotr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: lr %r5, %r3 +; nextln: lcr %r4, %r3 +; nextln: nill %r5, 15 +; nextln: nill %r4, 15 +; nextln: sllk %r3, %r2, 0(%r5) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = rotr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: sllk %r3, %r2, 6 +; nextln: srlk %r2, %r2, 10 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: lr %r5, %r3 +; nextln: lcr %r4, %r3 +; nextln: nill %r5, 7 +; nextln: nill %r4, 7 +; nextln: sllk %r3, %r2, 0(%r5) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = rotr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: sllk %r3, %r2, 5 +; nextln: srlk %r2, %r2, 3 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ROTL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %rotl_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotl.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotl_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotl.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 17 +; nextln: br %r14 + +function %rotl_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotl.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotl_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = rotl.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 17 +; nextln: br %r14 + +function %rotl_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotl.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: lr %r4, %r3 +; nextln: lcr %r3, %r3 +; nextln: nill %r4, 15 +; nextln: nill %r3, 15 +; nextln: sllk %r3, %r2, 0(%r3) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotl_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = rotl.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: sllk %r3, %r2, 10 +; nextln: srlk %r2, %r2, 6 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotl_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotl.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: lr %r4, %r3 +; nextln: lcr %r3, %r3 +; nextln: nill %r4, 7 +; nextln: nill %r3, 7 +; nextln: sllk %r3, %r2, 0(%r3) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = rotl.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: sllk %r3, %r2, 3 +; nextln: srlk %r2, %r2, 5 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; USHR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ushr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr.i64 v0, v1 + return v2 +} + +; check: srlg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ushr.i64 v0, v1 + return v2 +} + +; check: srlg %r2, %r2, 17 +; nextln: br %r14 + +function %ushr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + return v2 +} + +; check: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = ushr.i32 v0, v1 + return v2 +} + +; check: srlk %r2, %r2, 17 +; nextln: br %r14 + +function %ushr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ushr.i16 v0, v1 + return v2 +} + +; FIXME: check shift count ? + +; check: llhr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = ushr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: srlk %r2, %r2, 10 +; nextln: br %r14 + +function %ushr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ushr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = ushr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: srlk %r2, %r2, 3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ISHL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ishl_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl.i64 v0, v1 + return v2 +} + +; check: sllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ishl.i64 v0, v1 + return v2 +} + +; check: sllg %r2, %r2, 17 +; nextln: br %r14 + +function %ishl_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = ishl.i32 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 17 +; nextln: br %r14 + +function %ishl_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ishl.i16 v0, v1 + return v2 +} + +; check: nill %r3, 31 +; nextln: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = ishl.i16 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 10 +; nextln: br %r14 + +function %ishl_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ishl.i8 v0, v1 + return v2 +} + +; check: nill %r3, 31 +; nextln: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = ishl.i8 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SSHR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sshr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr.i64 v0, v1 + return v2 +} + +; check: srag %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = sshr.i64 v0, v1 + return v2 +} + +; check: srag %r2, %r2, 17 +; nextln: br %r14 + +function %sshr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + return v2 +} + +; check: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = sshr.i32 v0, v1 + return v2 +} + +; check: srak %r2, %r2, 17 +; nextln: br %r14 + +function %sshr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sshr.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = sshr.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: srak %r2, %r2, 10 +; nextln: br %r14 + +function %sshr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sshr.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = sshr.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: srak %r2, %r2, 3 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif new file mode 100644 index 0000000000..0ef7320340 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif @@ -0,0 +1,175 @@ +test compile +target s390x + +function %foo() { +block0: + return +} + +function %stack_limit_leaf_zero(i64 stack_limit) { +block0(v0: i64): + return +} + +; check: br %r14 + +function %stack_limit_gv_leaf_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 +block0(v0: i64): + return +} + +; check: br %r14 + + +function %stack_limit_call_zero(i64 stack_limit) { + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; check: clgrtle %r15, %r2 +; nextln: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) +; nextln: basr %r14, %r2 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %stack_limit_gv_call_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) +; nextln: basr %r14, %r2 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %stack_limit(i64 stack_limit) { + ss0 = explicit_slot 168 +block0(v0: i64): + return +} + +; check: la %r1, 168(%r2) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -168 +; nextln: aghi %r15, 168 +; nextln: br %r14 + +function %large_stack_limit(i64 stack_limit) { + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; check: clgrtle %r15, %r2 +; nextln: lay %r1, 400000(%r2) +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -400000 +; nextln: agfi %r15, 400000 +; nextln: br %r14 + +function %huge_stack_limit(i64 stack_limit) { + ss0 = explicit_slot 4000000 +block0(v0: i64): + return +} + +; check: clgrtle %r15, %r2 +; nextln: lgr %r1, %r2 +; nextln: algfi %r1, 4000000 +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -4000000 +; nextln: agfi %r15, 4000000 +; nextln: br %r14 + +function %limit_preamble(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: la %r1, 24(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -24 +; nextln: aghi %r15, 24 +; nextln: br %r14 + +function %limit_preamble_large(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: lay %r1, 400000(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -400000 +; nextln: agfi %r15, 400000 +; nextln: br %r14 + +function %limit_preamble_huge(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 4000000 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: algfi %r1, 4000000 +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -4000000 +; nextln: agfi %r15, 4000000 +; nextln: br %r14 + +function %limit_preamble_huge_offset(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+1000000 + stack_limit = gv1 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; check: lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2) +; nextln: la %r1, 24(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -24 +; nextln: aghi %r15, 24 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/stack.clif b/cranelift/filetests/filetests/isa/s390x/stack.clif new file mode 100644 index 0000000000..1ac80b9fd0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/stack.clif @@ -0,0 +1,93 @@ +test compile +target s390x + +; FIXME: Should allocate register save area. + +function %stack_addr_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: aghi %r15, -8 +; nextln: la %r2, 0(%r15) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_addr_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: agfi %r15, -100008 +; nextln: la %r2, 0(%r15) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + +; FIXME: don't use stack_addr legalization for stack_load and stack_store + +function %stack_load_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: aghi %r15, -8 +; nextln: la %r2, 0(%r15) +; nextln: lg %r2, 0(%r2) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_load_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: agfi %r15, -100008 +; nextln: la %r2, 0(%r15) +; nextln: lg %r2, 0(%r2) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + + +function %stack_store_small(i64) { +ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: aghi %r15, -8 +; nextln: la %r3, 0(%r15) +; nextln: stg %r2, 0(%r3) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_store_big(i64) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: agfi %r15, -100008 +; nextln: la %r3, 0(%r15) +; nextln: stg %r2, 0(%r3) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif new file mode 100644 index 0000000000..65c9ffab93 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -0,0 +1,281 @@ +test compile +target s390x + +function %store_i64(i64, i64) { +block0(v0: i64, v1: i64): + store.i64 little v0, v1 + return +} + +; check: strvg %r2, 0(%r3) +; nextln: br %r14 + +function %store_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + store.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvg %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + store.i64 little v1, v0 + return +} + +; check: lghi %r3, 12345 +; nextln: strvg %r3, 0(%r2) +; nextln: br %r14 + +function %istore8_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore8.i64 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 123 + istore8.i64 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore16.i64 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore16.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %istore16_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore16.i64 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %istore32_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore32.i64 little v0, v1 + return +} + +; check: strv %r2, 0(%r3) +; nextln: br %r14 + +function %istore32_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore32.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; nextln: br %r14 + +function %istore32_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore32.i64 little v1, v0 + return +} + +; check: lghi %r3, 12345 +; nextln: strv %r3, 0(%r2) +; nextln: br %r14 + +function %store_i32(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 little v0, v1 + return +} + +; check: strv %r2, 0(%r3) +; nextln: br %r14 + +function %store_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + store.i32 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + store.i32 little v1, v0 + return +} + +; check: lhi %r3, 12345 +; nextln: strv %r3, 0(%r2) +; nextln: br %r14 + +function %istore8_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore8.i32 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 123 + istore8.i32 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore16.i32 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + istore16.i32 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %istore16_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + istore16.i32 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %store_i16(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %store_i16_sym(i16) { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + store.i16 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 12345 + store.i16 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %istore8_i16(i16, i64) { +block0(v0: i16, v1: i64): + istore8.i16 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 123 + istore8.i16 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_i8(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %store_i8_off(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 little v0, v1+4096 + return +} + +; check: stcy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i8(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_imm_i8_off(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 little v1, v0+4096 + return +} + +; check: mviy 4096(%r2), 123 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif new file mode 100644 index 0000000000..c7f896ae29 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -0,0 +1,296 @@ +test compile +target s390x + +function %store_i64(i64, i64) { +block0(v0: i64, v1: i64): + store.i64 v0, v1 + return +} + +; check: stg %r2, 0(%r3) +; nextln: br %r14 + +function %store_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + store.i64 v0, v1 + return +} + +; check: stgrl %r2, %sym + 0 +; nextln: br %r14 + +function %store_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + store.i64 v1, v0 + return +} + +; check: mvghi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore8.i64 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 123 + istore8.i64 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore16.i64 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore16.i64 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %istore16_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore16.i64 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore32_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore32.i64 v0, v1 + return +} + +; check: st %r2, 0(%r3) +; nextln: br %r14 + +function %istore32_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore32.i64 v0, v1 + return +} + +; check: strl %r2, %sym + 0 +; nextln: br %r14 + +function %istore32_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore32.i64 v1, v0 + return +} + +; check: mvhi 0(%r2), 12345 +; nextln: br %r14 + +function %store_i32(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 v0, v1 + return +} + +; check: st %r2, 0(%r3) +; nextln: br %r14 + +function %store_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + store.i32 v0, v1 + return +} + +; check: strl %r2, %sym + 0 +; nextln: br %r14 + +function %store_i32_off(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 v0, v1+4096 + return +} + +; check: sty %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + store.i32 v1, v0 + return +} + +; check: mvhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore8.i32 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 123 + istore8.i32 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore16.i32 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + istore16.i32 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %istore16_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + istore16.i32 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %store_i16(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %store_i16_sym(i16) { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + store.i16 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %store_i16_off(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 v0, v1+4096 + return +} + +; check: sthy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 12345 + store.i16 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i16(i16, i64) { +block0(v0: i16, v1: i64): + istore8.i16 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 123 + istore8.i16 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_i8(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %store_i8_off(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 v0, v1+4096 + return +} + +; check: stcy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i8(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_imm_i8_off(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 v1, v0+4096 + return +} + +; check: mviy 4096(%r2), 123 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/symbols.clif b/cranelift/filetests/filetests/isa/s390x/symbols.clif new file mode 100644 index 0000000000..98d0cc8e30 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/symbols.clif @@ -0,0 +1,54 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SYMBOL_VALUE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %symbol_value() -> i64 { + gv0 = symbol %my_global + +block0: + v0 = symbol_value.i64 gv0 + return v0 +} + +; check: bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1) +; nextln: br %r14 + +function %symbol_value_colocated() -> i64 { + gv0 = symbol colocated %my_global_colo + +block0: + v0 = symbol_value.i64 gv0 + return v0 +} + +; check: larl %r2, %my_global_colo + 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FUNC_ADDR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %func_addr() -> i64 { + fn0 = %my_func(i64) -> i64 + +block0: + v0 = func_addr.i64 fn0 + return v0 +} + +; check: bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1) +; nextln: br %r14 + +function %func_addr_colocated() -> i64 { + fn0 = colocated %my_func_colo(i64) -> i64 + +block0: + v0 = func_addr.i64 fn0 + return v0 +} + +; check: larl %r2, %my_func_colo + 0 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/traps.clif b/cranelift/filetests/filetests/isa/s390x/traps.clif new file mode 100644 index 0000000000..a6b70cecea --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/traps.clif @@ -0,0 +1,91 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAP/RESUMABLE_TRAP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trap() { +block0: + trap user0 +} + +; check: trap + +function %resumable_trap() { +block0: + trap user0 +} + +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAPZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trapz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as trapz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAPNZ/RESUMABLE_TRAPNZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as trapnz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jgne label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +function %resumable_trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as resumable_trapnz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jgne label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; DEBUGTRAP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %h() { +block0: + debugtrap + return +} + +; check: debugtrap diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index bfe0198753..f5789b67ee 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %amode_add(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index 7b65fa4e55..cbd265a9ea 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(b1, i32, i32) -> i32 { ; check: pushq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index 17897d2bb5..8b43d70c7c 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif b/cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif index 5795900438..531cde54f8 100644 --- a/cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif @@ -1,6 +1,5 @@ test run -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %ctz(i64, i64) -> i8 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif b/cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif index 64ea96716c..5e494354b5 100644 --- a/cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif @@ -1,6 +1,5 @@ test run -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %reverse_bits_zero() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 79461ba2cb..99aec088ac 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif new file mode 100644 index 0000000000..c705193c6b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -0,0 +1,344 @@ +test compile +target x86_64 machinst + +;; system_v has first param in %rdi, fascall in %rcx +function %one_arg(i32) system_v { + sig0 = (i32) windows_fastcall +block0(v0: i32): + ; check: movq %rdi, %rcx + ; nextln: call *%rdi + call_indirect sig0, v0(v0) + return +} + +;; system_v has params in %rdi, %xmm0, fascall in %rcx, %xmm1 +function %two_args(i32, f32) system_v { + sig0 = (i32, f32) windows_fastcall + sig1 = (i32, f32) system_v +block0(v0: i32, v1: f32): + ; check: movq %rdi, %rsi + ; check: movaps %xmm0, %xmm6 + ; check: movq %rsi, %rcx + ; nextln: movaps %xmm6, %xmm1 + ; nextln: call *%rsi + call_indirect sig0, v0(v0, v1) + + ; check: movq %rsi, %rdi + ; nextln: movaps %xmm6, %xmm0 + ; nextln: call *%rsi + call_indirect sig1, v0(v0, v1) + return +} + +;; fastcall preserves xmm6+, rbx, rbp, rdi, rsi, r12-r15 +;; system_v preserves no xmm registers, rbx, rbp, r12-r15 +function %fastcall_to_systemv(i32) windows_fastcall { + sig0 = () system_v +block0(v0: i32): + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$176, %rsp + ; nextln: movdqu %xmm6, 0(%rsp) + ; nextln: movdqu %xmm7, 16(%rsp) + ; nextln: movdqu %xmm8, 32(%rsp) + ; nextln: movdqu %xmm9, 48(%rsp) + ; nextln: movdqu %xmm10, 64(%rsp) + ; nextln: movdqu %xmm11, 80(%rsp) + ; nextln: movdqu %xmm12, 96(%rsp) + ; nextln: movdqu %xmm13, 112(%rsp) + ; nextln: movdqu %xmm14, 128(%rsp) + ; nextln: movdqu %xmm15, 144(%rsp) + ; nextln: movq %rsi, 160(%rsp) + ; nextln: movq %rdi, 168(%rsp) + ; nextln: call *%rcx + ; nextln: movdqu 0(%rsp), %xmm6 + ; nextln: movdqu 16(%rsp), %xmm7 + ; nextln: movdqu 32(%rsp), %xmm8 + ; nextln: movdqu 48(%rsp), %xmm9 + ; nextln: movdqu 64(%rsp), %xmm10 + ; nextln: movdqu 80(%rsp), %xmm11 + ; nextln: movdqu 96(%rsp), %xmm12 + ; nextln: movdqu 112(%rsp), %xmm13 + ; nextln: movdqu 128(%rsp), %xmm14 + ; nextln: movdqu 144(%rsp), %xmm15 + ; nextln: movq 160(%rsp), %rsi + ; nextln: movq 168(%rsp), %rdi + ; nextln: addq $$176, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + call_indirect sig0, v0() + return +} + +function %many_args( + ;; rdi, rsi, rdx, rcx, r8, r9, + i64, i64, i64, i64, i64, i64, + + ;; xmm0-7 + f64, f64, f64, f64, f64, f64, f64, f64, + + ;; stack args + i64, i32, f32, f64 +) system_v { + sig0 = ( + i64, i64, i64, i64, i64, i64, f64, f64, f64, f64, f64, f64, f64, f64, i64, + i32, f32, f64 + ) windows_fastcall +block0( + v0: i64, v1:i64, v2:i64, v3:i64, + v4:i64, v5:i64, + v6: f64, v7: f64, v8:f64, v9:f64, v10:f64, v11:f64, v12:f64, v13:f64, + v14:i64, v15:i32, v16:f32, v17:f64 +): + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$32, %rsp + ; nextln: movq %r12, 0(%rsp) + ; nextln: movq %r13, 8(%rsp) + ; nextln: movq %r14, 16(%rsp) + ; nextln: movq %rdx, %rax + ; nextln: movq %rcx, %r10 + ; nextln: movq %r8, %r11 + ; nextln: movq %r9, %r12 + ; nextln: movq 16(%rbp), %r13 + ; nextln: movslq 24(%rbp), %r14 + ; nextln: movss 32(%rbp), %xmm8 + ; nextln: movsd 40(%rbp), %xmm9 + ; nextln: subq $$144, %rsp + ; nextln: virtual_sp_offset_adjust 144 + ; nextln: movq %rdi, %rcx + ; nextln: movq %rsi, %rdx + ; nextln: movq %rax, %r8 + ; nextln: movq %r10, %r9 + ; nextln: movq %r11, 32(%rsp) + ; nextln: movq %r12, 40(%rsp) + ; nextln: movsd %xmm0, 48(%rsp) + ; nextln: movsd %xmm1, 56(%rsp) + ; nextln: movsd %xmm2, 64(%rsp) + ; nextln: movsd %xmm3, 72(%rsp) + ; nextln: movsd %xmm4, 80(%rsp) + ; nextln: movsd %xmm5, 88(%rsp) + ; nextln: movsd %xmm6, 96(%rsp) + ; nextln: movsd %xmm7, 104(%rsp) + ; nextln: movq %r13, 112(%rsp) + ; nextln: movl %r14d, 120(%rsp) + ; nextln: movss %xmm8, 128(%rsp) + ; nextln: movsd %xmm9, 136(%rsp) + ; nextln: call *%rdi + ; nextln: addq $$144, %rsp + ; nextln: virtual_sp_offset_adjust -144 + ; nextln: movq 0(%rsp), %r12 + ; nextln: movq 8(%rsp), %r13 + ; nextln: movq 16(%rsp), %r14 + ; nextln: addq $$32, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + call_indirect sig0, v0( + v0, v1, v2, v3, + v4, v5, v6, v7, + v8, v9, v10, v11, + v12, v13, v14, v15, + v16, v17 + ) + return +} + +; rdi => rcx +; rsi => rdx +; rdx => r8 +; rcx => r9 +; r8 => stack +function %many_ints(i64, i64, i64, i64, i64) system_v { + sig0 = (i64, i64, i64, i64, i64) windows_fastcall +block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64): + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: movq %rdx, %rax + ; nextln: movq %rcx, %r9 + ; nextln: movq %r8, %r10 + ; nextln: subq $$48, %rsp + ; nextln: virtual_sp_offset_adjust 48 + ; nextln: movq %rdi, %rcx + ; nextln: movq %rsi, %rdx + ; nextln: movq %rax, %r8 + ; nextln: movq %r10, 32(%rsp) + ; nextln: call *%rdi + ; nextln: addq $$48, %rsp + ; nextln: virtual_sp_offset_adjust -48 + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + call_indirect sig0, v0(v0, v1, v2, v3, v4) + return +} + +function %many_args2(i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) system_v { + sig0 = (i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) windows_fastcall +block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v8: f64, v9: f32, v10: f64): + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: movaps %xmm1, %xmm6 + ; nextln: movq %rcx, %rax + ; nextln: movq %r8, %r9 + ; nextln: movaps %xmm3, %xmm7 + ; nextln: subq $$96, %rsp + ; nextln: virtual_sp_offset_adjust 96 + ; nextln: movq %rdi, %rcx + ; nextln: movaps %xmm0, %xmm1 + ; nextln: movq %rsi, %r8 + ; nextln: movaps %xmm6, %xmm3 + ; nextln: movl %edx, 32(%rsp) + ; nextln: movl %eax, 40(%rsp) + ; nextln: movl %r9d, 48(%rsp) + ; nextln: movss %xmm2, 56(%rsp) + ; nextln: movsd %xmm7, 64(%rsp) + ; nextln: movss %xmm4, 72(%rsp) + ; nextln: movsd %xmm5, 80(%rsp) + ; nextln: call *%rdi + ; nextln: addq $$96, %rsp + ; nextln: virtual_sp_offset_adjust -96 + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + call_indirect sig0, v0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) + return +} + +function %wasmtime_mix1(i32) wasmtime_system_v { + sig0 = (i32) system_v +block0(v0: i32): + ; check: movq %rdi, %rsi + ; nextln: movq %rsi, %rdi + ; nextln: call *%rsi + call_indirect sig0, v0(v0) + return +} + +function %wasmtime_mix2(i32) system_v { + sig0 = (i32) wasmtime_system_v +block0(v0: i32): + ; check: movq %rdi, %rsi + ; nextln: movq %rsi, %rdi + ; nextln: call *%rsi + call_indirect sig0, v0(v0) + return +} + +function %wasmtime_mix2() -> i32, i32 system_v { + sig0 = () -> i32, i32 wasmtime_system_v +block0: + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: movl $$1, %esi + ; nextln: subq $$16, %rsp + ; nextln: virtual_sp_offset_adjust 16 + ; nextln: lea 0(%rsp), %rdi + ; nextln: call *%rsi + ; nextln: movslq 0(%rsp), %rsi + ; nextln: addq $$16, %rsp + ; nextln: virtual_sp_offset_adjust -16 + ; nextln: movq %rsi, %rdx + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + v2 = iconst.i32 1 + v0, v1 = call_indirect sig0, v2() + return v0, v1 +} + +function %wasmtime_mix3() -> i32, i32 wasmtime_system_v { + sig0 = () -> i32, i32 system_v +block0: + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$16, %rsp + ; nextln: movq %r12, 0(%rsp) + ; nextln: movq %rdi, %r12 + ; nextln: movl $$1, %esi + ; nextln: call *%rsi + ; nextln: movl %edx, 0(%r12) + ; nextln: movq 0(%rsp), %r12 + ; nextln: addq $$16, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + v2 = iconst.i32 1 + v0, v1 = call_indirect sig0, v2() + return v0, v1 +} + +function %wasmtime_mix4() -> i32, i64, i32 wasmtime_system_v { + sig0 = () -> i32, i64, i32 system_v +block0: + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$16, %rsp + ; nextln: movq %r12, 0(%rsp) + ; nextln: movq %rdi, %r12 + ; nextln: movl $$1, %esi + ; nextln: subq $$16, %rsp + ; nextln: virtual_sp_offset_adjust 16 + ; nextln: lea 0(%rsp), %rdi + ; nextln: call *%rsi + ; nextln: movslq 0(%rsp), %rsi + ; nextln: addq $$16, %rsp + ; nextln: virtual_sp_offset_adjust -16 + ; nextln: movq %rdx, 0(%r12) + ; nextln: movl %esi, 8(%r12) + ; nextln: movq 0(%rsp), %r12 + ; nextln: addq $$16, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + v3 = iconst.i32 1 + v0, v1, v2 = call_indirect sig0, v3() + return v0, v1, v2 +} + +function %wasmtime_mix5() -> f32, i64, i32, f32 wasmtime_system_v { + sig0 = () -> f32, i64, i32, f32 system_v +block0: + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$16, %rsp + ; nextln: movq %r12, 0(%rsp) + ; nextln: movq %rdi, %r12 + ; nextln: movl $$1, %esi + ; nextln: call *%rsi + ; nextln: movq %rax, 0(%r12) + ; nextln: movl %edx, 8(%r12) + ; nextln: movss %xmm1, 12(%r12) + ; nextln: movq 0(%rsp), %r12 + ; nextln: addq $$16, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + v5 = iconst.i32 1 + v0, v1, v2, v3 = call_indirect sig0, v5() + return v0, v1, v2, v3 +} + +function %wasmtime_mix6(f32, i64, i32, f32) -> f32, i64, i32, f32 wasmtime_system_v { + sig0 = (f32, i64, i32, f32) -> f32, i64, i32, f32 system_v +block0(v0: f32, v1: i64, v2: i32, v3: f32): + ; check: pushq %rbp + ; nextln: movq %rsp, %rbp + ; nextln: subq $$16, %rsp + ; nextln: movq %r12, 0(%rsp) + ; nextln: movq %rdx, %r12 + ; nextln: movl $$1, %eax + ; nextln: call *%rax + ; nextln: movq %rax, 0(%r12) + ; nextln: movl %edx, 8(%r12) + ; nextln: movss %xmm1, 12(%r12) + ; nextln: movq 0(%rsp), %r12 + ; nextln: addq $$16, %rsp + ; nextln: movq %rbp, %rsp + ; nextln: popq %rbp + ; nextln: ret + v4 = iconst.i32 1 + v5, v6, v7, v8 = call_indirect sig0, v4(v0, v1, v2, v3) + return v5, v6, v7, v8 +} diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif index ac0df03384..f36caed88a 100644 --- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif @@ -1,6 +1,5 @@ test compile -target x86_64 has_lzcnt -feature "experimental_x64" +target x86_64 machinst has_lzcnt function %clz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 9d05e04b04..fe43a2ce0d 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif index b50b10107a..5931451e11 100644 --- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif @@ -1,6 +1,5 @@ test compile -target x86_64 has_bmi1 -feature "experimental_x64" +target x86_64 machinst has_bmi1 function %ctz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/div-checks-run.clif b/cranelift/filetests/filetests/isa/x64/div-checks-run.clif new file mode 100644 index 0000000000..5c87d9e8b5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/div-checks-run.clif @@ -0,0 +1,32 @@ +test run +set avoid_div_traps=false +target x86_64 machinst + +function %i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 + return v2 +} +; run: %i8(0x80, 0xff) == 0 +; run: %i8(0x2, 0x7) == 0x2 + +function %i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 + return v2 +} +; run: %i16(0x8000, 0xffff) == 0 + +function %i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} +; run: %i32(0x80000000, 0xffffffff) == 0 + +function %i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} +; run: %i32(0x800000000000000, 0xffffffffffffffff) == 0 diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif new file mode 100644 index 0000000000..3984aba42f --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -0,0 +1,52 @@ +test compile +set avoid_div_traps=false +target x86_64 machinst + +;; We should get the checked-div/rem sequence (`srem` pseudoinst below) even +;; when `avoid_div_traps` above is false (i.e. even when the host is normally +;; willing to accept SIGFPEs as Wasm traps). The machine will SIGFPE in some +;; cases when `srem` is valid (specifically -INT_MIN % -1). + +function %i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 +; check: movq %rdi, %rax +; nextln: movl $$0, %edx +; nextln: srem $$rax:$$rdx, %sil +; nextln: shrq $$8, %rax + + return v2 +} + +function %i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 +; check: movq %rdi, %rax +; nextln: movl $$0, %edx +; nextln: srem $$rax:$$rdx, %si +; nextln: movq %rdx, %rax + + return v2 +} + +function %i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 +; check: movq %rdi, %rax +; nextln: movl $$0, %edx +; nextln: srem $$rax:$$rdx, %esi +; nextln: movq %rdx, %rax + + return v2 +} + +function %i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 +; check: movq %rdi, %rax +; nextln: movl $$0, %edx +; nextln: srem $$rax:$$rdx, %rsi +; nextln: movq %rdx, %rax + + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index f0fd1a166e..fc8d3a801d 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -1,8 +1,7 @@ test compile set enable_llvm_abi_extensions=true set unwind_info=true -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index b3b5907210..c1e30a3b19 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f(f64) -> f64 { block0(v0: f64): diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index c547582008..35660c826a 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f(i32, i64 vmctx) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 03260a9833..c480b857bb 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,7 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i128, i128) -> i128 { ; check: pushq %rbp @@ -891,3 +890,219 @@ block0(v0: i8, v1: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret + +function %f30(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ishl v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %rax +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shrq %cl, %rdi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rdi +; nextln: orq %rax, %rdi +; nextln: xorq %rax, %rax +; nextln: andq $$64, %rdx +; nextln: cmovzq %rdi, %rax +; nextln: cmovzq %rsi, %rcx +; nextln: cmovnzq %rsi, %rax +; nextln: movq %rax, %rdx +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f31(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ushr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rax +; nextln: movq %rsi, %rdi +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: shlq %cl, %rdi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rdi +; nextln: orq %rax, %rdi +; nextln: xorq %rax, %rax +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %rdx +; nextln: cmovzq %rsi, %rax +; nextln: cmovzq %rdi, %rcx +; nextln: cmovnzq %rsi, %rcx +; nextln: movq %rax, %rdx +; nextln: movq %rcx, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f32(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = sshr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %r8 +; nextln: movq %rsi, %rdi +; nextln: movq %rdi, %rsi +; nextln: movq %rdx, %rcx +; nextln: sarq %cl, %rsi +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %r8 +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rdi, %rax +; nextln: shlq %cl, %rax +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %rax +; nextln: orq %r8, %rax +; nextln: sarq $$63, %rdi +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %rdx +; nextln: cmovzq %rsi, %rdi +; nextln: cmovzq %rax, %rcx +; nextln: cmovnzq %rsi, %rcx +; nextln: movq %rcx, %rax +; nextln: movq %rdi, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function %f33(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotl v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %r8 +; nextln: movq %r8, %r9 +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %r9 +; nextln: movq %rsi, %rax +; nextln: movq %rdx, %rcx +; nextln: shlq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %r8, %r10 +; nextln: shrq %cl, %r10 +; nextln: xorq %rdi, %rdi +; nextln: testq $$127, %rdx +; nextln: cmovzq %rdi, %r10 +; nextln: orq %rax, %r10 +; nextln: xorq %rax, %rax +; nextln: movq %rdx, %rcx +; nextln: andq $$64, %rcx +; nextln: cmovzq %r10, %rax +; nextln: cmovzq %r9, %rdi +; nextln: cmovnzq %r9, %rax +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rsi, %rdx +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %rdx +; nextln: movq %r9, %rcx +; nextln: shrq %cl, %r8 +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shlq %cl, %rsi +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %r9 +; nextln: cmovzq %rcx, %rsi +; nextln: orq %r8, %rsi +; nextln: xorq %rcx, %rcx +; nextln: xorq %r8, %r8 +; nextln: andq $$64, %r9 +; nextln: cmovzq %rdx, %rcx +; nextln: cmovzq %rsi, %r8 +; nextln: cmovnzq %rdx, %r8 +; nextln: orq %rdi, %r8 +; nextln: orq %rax, %rcx +; nextln: movq %r8, %rax +; nextln: movq %rcx, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + + +function %f34(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotr v0, v1 + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rsi, %r9 +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %r9 +; nextln: movq %rdi, %rax +; nextln: movq %rdx, %rcx +; nextln: shrq %cl, %rax +; nextln: movl $$64, %ecx +; nextln: subq %rdx, %rcx +; nextln: movq %rsi, %r10 +; nextln: shlq %cl, %r10 +; nextln: xorq %rcx, %rcx +; nextln: testq $$127, %rdx +; nextln: cmovzq %rcx, %r10 +; nextln: orq %rax, %r10 +; nextln: xorq %rax, %rax +; nextln: xorq %r8, %r8 +; nextln: movq %rdx, %rcx +; nextln: andq $$64, %rcx +; nextln: cmovzq %r9, %rax +; nextln: cmovzq %r10, %r8 +; nextln: cmovnzq %r9, %r8 +; nextln: movl $$128, %r9d +; nextln: subq %rdx, %r9 +; nextln: movq %rdi, %rdx +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %rdx +; nextln: movq %rsi, %r10 +; nextln: movq %r9, %rcx +; nextln: shlq %cl, %r10 +; nextln: movl $$64, %ecx +; nextln: subq %r9, %rcx +; nextln: shrq %cl, %rdi +; nextln: xorq %rsi, %rsi +; nextln: testq $$127, %r9 +; nextln: cmovzq %rsi, %rdi +; nextln: orq %r10, %rdi +; nextln: xorq %rcx, %rcx +; nextln: andq $$64, %r9 +; nextln: cmovzq %rdi, %rcx +; nextln: cmovzq %rdx, %rsi +; nextln: cmovnzq %rdx, %rcx +; nextln: orq %r8, %rsi +; nextln: orq %rax, %rcx +; nextln: movq %rsi, %rax +; nextln: movq %rcx, %rdx +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret \ No newline at end of file diff --git a/cranelift/filetests/filetests/isa/x64/icmp-i128-run.clif b/cranelift/filetests/filetests/isa/x64/icmp-i128-run.clif index 2f9e2d5331..5ebf051682 100644 --- a/cranelift/filetests/filetests/isa/x64/icmp-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x64/icmp-i128-run.clif @@ -1,6 +1,5 @@ test run -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %test_icmp_eq_i128() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index 8fefaf6d42..a4069b20ca 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif index 45f07b98d4..5b23afb8d3 100644 --- a/cranelift/filetests/filetests/isa/x64/move-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif @@ -1,7 +1,6 @@ test compile set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %move_registers(i32x4) -> b8x16 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif index 4e49cd6d4f..2049f53962 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif @@ -1,6 +1,5 @@ test compile -target x86_64 has_popcnt has_sse42 -feature "experimental_x64" +target x86_64 machinst has_popcnt has_sse42 function %popcnt(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index a06f5a27ce..df68f6b4b7 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -1,8 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" - -; TODO: test with popcnt feature available too, once new backend supports that. +target x86_64 machinst function %popcnt64(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif index 135587d355..8e8b424c70 100644 --- a/cranelift/filetests/filetests/isa/x64/probestack.clif +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -1,7 +1,6 @@ test compile set enable_probestack=true -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f1() -> i64 { ss0 = explicit_slot 100000 diff --git a/cranelift/filetests/filetests/isa/x64/run-const.clif b/cranelift/filetests/filetests/isa/x64/run-const.clif index af0a85353c..c85196018c 100644 --- a/cranelift/filetests/filetests/isa/x64/run-const.clif +++ b/cranelift/filetests/filetests/isa/x64/run-const.clif @@ -1,6 +1,5 @@ test run -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %test_compare_i32() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index 97ec0142f5..af6996f85f 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -1,7 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i32, i128, i128) -> i128 { ; check: pushq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif b/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif index 753a6f658f..029444eea0 100644 --- a/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x64/shift-i128-run.clif @@ -1,6 +1,5 @@ test run -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %ishl(i64, i64, i8) -> i64, i64 { block0(v0: i64, v1: i64, v2: i8): @@ -72,3 +71,96 @@ block0(v0: i64, v1: i64, v2: i8): ; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] ; run: %rotr(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] ; run: %rotr(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] + + +; i128 amount operand tests +function %ishl_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = ishl.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] +; run: %ishl_amt_i128(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ishl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ishl_amt_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] +; run: %ishl_amt_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] +; run: %ishl_amt_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] + +function %ushr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = ushr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ushr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] +; run: %ushr_amt_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] +; run: %ushr_amt_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] +; run: %ushr_amt_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] + +function %sshr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = sshr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %sshr_amt_i128(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] +; run: %sshr_amt_i128(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] + +function %rotl_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = rotl.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] +; run: %rotl_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] + +function %rotr_amt_i128(i64, i64, i8) -> i64, i64 { +block0(v0: i64, v1: i64, v2: i8): + v3 = uextend.i64 v2 + v4 = iconcat v3, v3 + + v5 = iconcat v0, v1 + + v6 = rotr.i128 v5, v4 + v7, v8 = isplit v6 + return v7, v8 +} +; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] +; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] +; run: %rotr_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] +; run: %rotr_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] diff --git a/cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif index 1ea7d3f945..916f80f278 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %iadd_i32x4(i32x4, i32x4) -> i32x4 { block0(v0:i32x4, v1:i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 65e1b5df7e..f34b61f5e3 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -1,7 +1,6 @@ test compile set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %bitselect_i16x8() -> i16x8 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-run.clif index 8ab624d6c2..3dad7ac772 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %bitselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16, v2: i8x16): diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index 98dcf9a1a2..b50ff6328d 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -1,7 +1,6 @@ test compile set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-run.clif index c22c56201a..6b34c2f533 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %icmp_eq_i8x16() -> b8 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-conversion-run.clif b/cranelift/filetests/filetests/isa/x64/simd-conversion-run.clif index 3e33b27670..d6f353eee2 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-conversion-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-conversion-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %fcvt_from_sint() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index f451bd2a25..2f6a8c7dfd 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -1,7 +1,6 @@ test compile set enable_simd -target x86_64 has_ssse3 has_sse41 -feature "experimental_x64" +target x86_64 machinst has_ssse3 has_sse41 ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-run.clif index 2ed0aed5d9..7d0014a20d 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 -feature "experimental_x64" +target x86_64 machinst ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index d03aa0b204..72249faaef 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -1,7 +1,6 @@ test compile set enable_simd -target x86_64 skylake -feature "experimental_x64" +target x86_64 machinst skylake function %bnot_b32x4(b32x4) -> b32x4 { block0(v0: b32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-run.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-run.clif index ff0231e459..13fc1182b4 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-run.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-run.clif @@ -1,7 +1,6 @@ test run set enable_simd -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %bnot() -> b32 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif new file mode 100644 index 0000000000..31edd7bdca --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif @@ -0,0 +1,127 @@ +test compile +target x86_64 machinst + +;; The goal of this test is to ensure that stack spills of an integer value, +;; which width is less than the machine word's size, cause the full word to be +;; stored, and not only the lower bits. + +;; Because of unsigned extensions which can be transformed into simple moves, +;; the source vreg of the extension operation can be coalesced with its +;; destination vreg, and if it happens to be spill, then the reload may use a +;; reload of a different, larger size. + +function %f0(i32, i32, i32) -> i64 { + fn0 = %g(i32) -> i64 + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$64, %rsp + +;; Stash all the callee-saved registers. + +; nextln: movq %r12, 16(%rsp) +; nextln: movq %r13, 24(%rsp) +; nextln: movq %r14, 32(%rsp) +; nextln: movq %rbx, 40(%rsp) +; nextln: movq %r15, 48(%rsp) + +block0(v0: i32, v1: i32, v2: i32): + ;; First, create enough virtual registers so that the call instructions + ;; causes at least one of them to be spilled onto the stack. + + v3 = iadd.i32 v0, v1 + v4 = iadd.i32 v1, v2 + v5 = iadd.i32 v0, v2 + v6 = iadd.i32 v3, v0 + v7 = iadd.i32 v4, v0 + v8 = iadd.i32 v5, v0 + +; nextln: movq %rdi, %r12 +; nextln: addl %esi, %r12d +; nextln: movq %rsi, %r13 +; nextln: addl %edx, %r13d +; nextln: movq %rdi, %r14 +; nextln: addl %edx, %r14d +; nextln: movq %r12, %rbx +; nextln: addl %edi, %ebx +; nextln: movq %r13, %r15 +; nextln: addl %edi, %r15d +; nextln: movq %r14, %rsi + +;; This should be movq below, not movl. +; nextln: movq %rsi, rsp(0 + virtual offset) + +; nextln: movq rsp(0 + virtual offset), %rsi +; nextln: addl %edi, %esi + + ;; Put an effectful instruction so that the live-ranges of the adds and + ;; uextends are split here, and to prevent the uextend to be emitted + ;; before the call. This will effectively causing the above i32 to be + ;; spilled as an i32, and not a full i64. + + v300 = call fn0(v0) + +;; This should be movq below, not movl. +; nextln: movq %rsi, rsp(0 + virtual offset) + +; nextln: load_ext_name %g+0, %rsi +; nextln: call *%rsi + + v31 = uextend.i64 v3 + v41 = uextend.i64 v4 + v51 = uextend.i64 v5 + v61 = uextend.i64 v6 + v71 = uextend.i64 v7 + v81 = uextend.i64 v8 + + ;; None of the uextends are generated here yet. + + ;; At this point, I'd expect that this second call below would be not + ;; necessary, but if it is removed, the uextend is applied before the call, + ;; and the i64 is spilled (then reloaded), causing the bug to not appear. So + ;; an additional call it is! + + v100 = call fn0(v3) + +; nextln: movq %r12, %rsi +; nextln: movq %rsi, rsp(8 + virtual offset) +; nextln: nop len=0 +; nextln: movq %r12, %rdi +; nextln: load_ext_name %g+0, %rsi +; nextln: call *%rsi + + ;; Cause reloads of all the values. Most are in registers, but one of them + ;; is on the stack. Make sure they're all used in the final computation. + + v101 = iadd.i64 v100, v31 + v102 = iadd.i64 v101, v41 + v103 = iadd.i64 v102, v51 + v104 = iadd.i64 v103, v61 + v105 = iadd.i64 v104, v71 + v200 = iadd.i64 v105, v81 + +; nextln: movq %rax, %rsi +; nextln: movq rsp(8 + virtual offset), %rdi +; nextln: addq %rdi, %rsi +; nextln: addq %r13, %rsi +; nextln: addq %r14, %rsi +; nextln: addq %rbx, %rsi +; nextln: addq %r15, %rsi + +;; The reload operates on a full word, so uses movq. +; nextln: movq rsp(0 + virtual offset), %rdi + +; nextln: addq %rdi, %rsi +; nextln: movq %rsi, %rax +; nextln: movq 16(%rsp), %r12 +; nextln: movq 24(%rsp), %r13 +; nextln: movq 32(%rsp), %r14 +; nextln: movq 40(%rsp), %rbx +; nextln: movq 48(%rsp), %r15 +; nextln: addq $$64, %rsp + + return v200 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +} diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index 2f8b865420..e9001c5393 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function u0:0(i64 sarg(64)) -> i8 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index 05ebbd100b..ee59ff4963 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %f0(i64 sret) { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif index 6136e98e7d..37a4698619 100644 --- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif @@ -1,7 +1,6 @@ test compile set tls_model=elf_gd -target x86_64 -feature "experimental_x64" +target x86_64 machinst function u0:0(i32) -> i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index aed6068d42..ef43c3dd03 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst function %elide_uextend_add(i32, i32) -> i64 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif index 96478ba8ce..5ddd4b20d3 100644 --- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif +++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif @@ -1,6 +1,5 @@ test compile -target x86_64 -feature "experimental_x64" +target x86_64 machinst ;; From: https://github.com/bytecodealliance/wasmtime/issues/2670 diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif index 658ba66ca4..bfea325055 100644 --- a/cranelift/filetests/filetests/isa/x86/abcd.clif +++ b/cranelift/filetests/filetests/isa/x86/abcd.clif @@ -1,5 +1,5 @@ test regalloc -target i686 +target i686 legacy ; %rdi can't be used in a movsbl instruction, so test that the register ; allocator can move it to a register that can be. diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif index 2083250a91..5286de3c18 100644 --- a/cranelift/filetests/filetests/isa/x86/abi-bool.clif +++ b/cranelift/filetests/filetests/isa/x86/abi-bool.clif @@ -1,5 +1,5 @@ test compile -target x86_64 haswell +target x86_64 legacy haswell function %foo(i64, i64, i64, i32) -> b1 system_v { block3(v0: i64, v1: i64, v2: i64, v3: i32): diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif index 155d0efc4c..8ca530a695 100644 --- a/cranelift/filetests/filetests/isa/x86/abi32.clif +++ b/cranelift/filetests/filetests/isa/x86/abi32.clif @@ -1,6 +1,6 @@ ; Test the legalization of function signatures. test legalizer -target i686 +target i686 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif index d99761a4dc..0da2aad424 100644 --- a/cranelift/filetests/filetests/isa/x86/abi64.clif +++ b/cranelift/filetests/filetests/isa/x86/abi64.clif @@ -1,6 +1,6 @@ ; Test the legalization of function signatures. test legalizer -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif index 744b936c83..ca0ace1342 100644 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif +++ b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif @@ -2,7 +2,7 @@ test binemit set opt_level=speed_and_size set emit_all_ones_funcaddrs -target i686 haswell +target i686 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif index 623e96c9d3..7fbb670df2 100644 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif +++ b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif @@ -2,7 +2,7 @@ test binemit set opt_level=speed_and_size set emit_all_ones_funcaddrs -target x86_64 haswell +target x86_64 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif index e8dc4393ca..d2713829cd 100644 --- a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif +++ b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif @@ -1,6 +1,6 @@ test compile set enable_probestack=false -target i686 +target i686 legacy function u0:0(i32 vmctx) baldrdash_system_v { sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif index 7aca619d09..9099315878 100644 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif +++ b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif @@ -1,5 +1,5 @@ test compile -target x86_64 baseline +target x86_64 legacy baseline ; clz/ctz on 64 bit operands diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif index cbe18d904c..b2f36ff148 100644 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif +++ b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif @@ -1,6 +1,6 @@ test binemit set opt_level=speed_and_size -target x86_64 baseline +target x86_64 legacy baseline ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif index dc65a1f234..cfac85f7b2 100644 --- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif +++ b/cranelift/filetests/filetests/isa/x86/binary32-float.clif @@ -1,6 +1,6 @@ ; Binary emission of 32-bit floating point code. test binemit -target i686 haswell +target i686 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif index abe99ce0ea..11268d5c4e 100644 --- a/cranelift/filetests/filetests/isa/x86/binary32.clif +++ b/cranelift/filetests/filetests/isa/x86/binary32.clif @@ -1,7 +1,7 @@ ; binary emission of x86-32 code. test binemit set opt_level=speed_and_size -target i686 haswell +target i686 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif index 2ec733b2c0..6bf6f325b1 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64-float.clif @@ -1,7 +1,7 @@ ; Binary emission of 64-bit floating point code. test binemit set opt_level=speed_and_size -target x86_64 haswell +target x86_64 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif index 5a2443adce..4f2c650592 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif @@ -2,7 +2,7 @@ test binemit set opt_level=speed_and_size set is_pic -target x86_64 haswell +target x86_64 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/binary64-run.clif b/cranelift/filetests/filetests/isa/x86/binary64-run.clif index 6e6e1071c5..f1f262c394 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64-run.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64-run.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy ; this verifies that returning b64 immediates does not result in a segmentation fault, see https://github.com/bytecodealliance/cranelift/issues/911 function %test_b64() -> b64 { diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif index ab5d516b40..c5e1cf5099 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64.clif @@ -1,7 +1,7 @@ ; binary emission of x86-64 code. test binemit set opt_level=speed_and_size -target x86_64 haswell +target x86_64 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif b/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif index 4d3fe2ef57..45768b8cd9 100644 --- a/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy function %reverse_bits_zero() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/bnot-b1.clif b/cranelift/filetests/filetests/isa/x86/bnot-b1.clif index ef3736c54c..405519fff7 100644 --- a/cranelift/filetests/filetests/isa/x86/bnot-b1.clif +++ b/cranelift/filetests/filetests/isa/x86/bnot-b1.clif @@ -1,7 +1,7 @@ test binemit test run -target x86_64 +target x86_64 legacy function u0:323() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/br-i128-run.clif b/cranelift/filetests/filetests/isa/x86/br-i128-run.clif index 95a1de81cf..804ab6b908 100644 --- a/cranelift/filetests/filetests/isa/x86/br-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x86/br-i128-run.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy function %br_false() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif index a1778f4cf5..fccc691aa3 100644 --- a/cranelift/filetests/filetests/isa/x86/br-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/br-i128.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i128) -> i8 fast { block0(v0: i128): diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif b/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif index c8520830e6..1751eb5b73 100644 --- a/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif +++ b/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif index 6c2f95c359..fda005bc81 100644 --- a/cranelift/filetests/filetests/isa/x86/brz-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/brz-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif index 51d19b7b8f..eb537d7c1a 100644 --- a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif @@ -1,5 +1,5 @@ test compile -target i686 +target i686 legacy function u0:0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif b/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif index 3626e5ebf4..8617d50693 100644 --- a/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif +++ b/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif index db2b53276a..e7da3f0387 100644 --- a/cranelift/filetests/filetests/isa/x86/extend-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/extend-i128.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64-run.clif b/cranelift/filetests/filetests/isa/x86/extend-i64-run.clif index 061c8d0fe6..8cfabcad93 100644 --- a/cranelift/filetests/filetests/isa/x86/extend-i64-run.clif +++ b/cranelift/filetests/filetests/isa/x86/extend-i64-run.clif @@ -1,5 +1,5 @@ test run -target i686 +target i686 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64.clif b/cranelift/filetests/filetests/isa/x86/extend-i64.clif index db123a9cc8..a3d892c488 100644 --- a/cranelift/filetests/filetests/isa/x86/extend-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/extend-i64.clif @@ -1,5 +1,5 @@ test compile -target i686 +target i686 legacy function u0:0() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif index 4d736287e0..3bc9adf5bc 100644 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif @@ -1,6 +1,6 @@ ; Check that floating-point and integer constants equal to zero are optimized correctly. test binemit -target i686 +target i686 legacy function %foo() -> f32 fast { block0: diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif index 25cd686996..6fff51c7b1 100644 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif @@ -1,6 +1,6 @@ ; Check that floating-point constants equal to zero are optimized correctly. test binemit -target x86_64 +target x86_64 legacy function %zero_const_32bit_no_rex() -> f32 fast { block0: diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif index 493d2e6365..eda7b6dffd 100644 --- a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif +++ b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0() -> i128 system_v { block0: diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif index 028fb6e551..b171c0ccfd 100644 --- a/cranelift/filetests/filetests/isa/x86/i128.clif +++ b/cranelift/filetests/filetests/isa/x86/i128.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i64) -> i128 fast { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x86/icmp-i128.clif b/cranelift/filetests/filetests/isa/x86/icmp-i128.clif index 789fcc6ea3..a20e8d91cd 100644 --- a/cranelift/filetests/filetests/isa/x86/icmp-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/icmp-i128.clif @@ -1,5 +1,5 @@ test run -target x86_64 haswell +target x86_64 legacy haswell function %test_icmp_eq_i128() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/imul-i128.clif b/cranelift/filetests/filetests/isa/x86/imul-i128.clif index 65d21463fd..d5590b2564 100644 --- a/cranelift/filetests/filetests/isa/x86/imul-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/imul-i128.clif @@ -1,5 +1,5 @@ test run -target x86_64 haswell +target x86_64 legacy haswell function %test_imul_i128() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif index 2a283af485..dd75cac4a1 100644 --- a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i16) -> i8 fast { block0(v0: i16): diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif index c3ace05158..9aedb61001 100644 --- a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif +++ b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i64) -> i128 system_v { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif index dcf6c77e9a..948fa34d99 100644 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif @@ -1,6 +1,6 @@ test compile set opt_level=speed_and_size -target x86_64 +target x86_64 legacy function u0:0(i8) -> i8 fast { block0(v0: i8): diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif index 19f22c3906..a08356ca53 100644 --- a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif +++ b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i128) system_v { block0(v0: i128): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif index dec3416a89..9d88db9d17 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0() -> i8 fast { block0: diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif index dbd1397e45..acdd21c9f0 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:51(i64, i64) system_v { ss0 = explicit_slot 0 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif index f2bada2642..f64108531c 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif @@ -1,6 +1,6 @@ test legalizer -target x86_64 +target x86_64 legacy function %br_icmp(i64) fast { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif index b9ed036755..c931d6cacf 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif @@ -1,6 +1,6 @@ test compile set opt_level=speed_and_size -target x86_64 +target x86_64 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif index 2c8c8612d6..7c135d54ae 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif index c761a8d5aa..b21099281e 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-call.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-call.clif @@ -1,7 +1,7 @@ ; Test legalization of a non-colocated call in 64-bit non-PIC mode. test legalizer set opt_level=speed_and_size -target x86_64 haswell +target x86_64 legacy haswell function %call() { fn0 = %foo() diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif index 8e63f1e0c6..af5e158b07 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif index 3ee674d66a..0c51e064dd 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif @@ -1,7 +1,7 @@ ; Test the custom legalizations. test legalizer -target i686 -target x86_64 +target i686 legacy +target x86_64 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif index 1be81ec186..9e579c1bcd 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif @@ -2,7 +2,7 @@ test legalizer ; See also legalize-div.clif. set avoid_div_traps=1 -target x86_64 +target x86_64 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif index b9f115b85b..b172a9aef3 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-div.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-div.clif @@ -2,7 +2,7 @@ test legalizer ; See also legalize-div-traps.clif. set avoid_div_traps=0 -target x86_64 +target x86_64 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif index 382c6ba80a..43f57f8372 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif @@ -1,6 +1,6 @@ ; Test the legalization of f64const. test legalizer -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif index c11e77f2c7..32a256c9e7 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i16) -> f64 fast { block0(v0: i16): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif index d47a880320..f833f3b3ca 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif @@ -1,6 +1,6 @@ test legalizer set enable_heap_access_spectre_mitigation=false -target x86_64 +target x86_64 legacy ; Test legalization for various forms of heap addresses. ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif index 81a2d1ecdd..276de82d4e 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif @@ -1,6 +1,6 @@ ; Test the legalization of i128 instructions on x86_64. test legalizer -target x86_64 haswell +target x86_64 legacy haswell ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif index 94fbc82015..7e2d381947 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif @@ -1,6 +1,6 @@ ; Test the legalization of i64 instructions on x86_32. test legalizer -target i686 haswell +target i686 legacy haswell ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif index 2519d3b484..32f2b3d3e7 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif index 39908d1f1d..1e6a70434a 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64) system_v { ss0 = explicit_slot 0 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif index 6902636008..b1f5b12095 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i8, i8) system_v { diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif index 82d3fa26ce..4f84d93d0b 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i8) system_v { ss0 = explicit_slot 1 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif index fac17d6ff1..a36a2d6ed0 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif @@ -1,6 +1,6 @@ ; Test the custom legalization of ineg.i64 on x86_64. test legalizer -target x86_64 +target x86_64 legacy function %ineg_legalized_i64() { block0: diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif index 4dd275ccc9..527710d4fe 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif index 44c211709a..3ad3f4c69f 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif @@ -1,5 +1,5 @@ test compile -target i686 +target i686 legacy function u0:0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif index 5a903350b5..0d042bf3ff 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i128) -> i64, i64 fast { ; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif index 8ddb0865f8..838a915bf0 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif @@ -2,7 +2,7 @@ test legalizer ; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations. set is_pic -target x86_64 +target x86_64 legacy function %floor(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif index 2fcb086e72..4cbf3e088e 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i8, i8) system_v { ss0 = explicit_slot 0 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif index 2c99d285b2..5e7113b415 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif @@ -1,7 +1,7 @@ ; Test the legalization of memory objects. test legalizer set enable_heap_access_spectre_mitigation=false -target x86_64 +target x86_64 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif index 375a454c20..179ef824f3 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif @@ -1,5 +1,5 @@ test compile -target x86_64 baseline +target x86_64 legacy baseline ; umulhi/smulhi on 64 bit operands diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif index c3f89c4807..fb9c4f49b8 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i8) -> i8 fast { block0(v0: i8): diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif index 6f080ca89b..f770ba5643 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i64, i64, i64) system_v { ss0 = explicit_slot 0 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif index 78524d2969..e058602615 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ ; regex: R=%[a-z0-9]+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif index ee6e3e6d11..9759a8b155 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif index 5f4632041d..10912afe76 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 +target x86_64 legacy ; Test legalization for various forms of table addresses. ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif index de193c2abb..7be308308c 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif index bbcbf7091d..ff5d11a4d7 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target i686 haswell +target i686 legacy haswell function u0:1(i32) -> i64 system_v { block1(v0: i32): diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif index 070b7459e2..4e0af65c9f 100644 --- a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif +++ b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif @@ -1,5 +1,5 @@ test compile -target i686 +target i686 legacy function u0:0(i64, i32) system_v { block0(v0: i64, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif index 08d4fdd7a0..cafa90eb4f 100644 --- a/cranelift/filetests/filetests/isa/x86/nop.clif +++ b/cranelift/filetests/filetests/isa/x86/nop.clif @@ -1,6 +1,6 @@ test compile -target x86_64 +target x86_64 legacy function %test(i32) -> i32 system_v { block0(v0: i32): diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif index 6ce39a5c38..b5a9658b67 100644 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif @@ -1,7 +1,7 @@ ; Check that floating-point and integer constants equal to zero are optimized correctly. test binemit set opt_level=speed_and_size -target i686 +target i686 legacy function %foo() -> f32 fast { block0: diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif index 4ff2865a21..8e469b8b7a 100644 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif @@ -1,7 +1,7 @@ ; Check that floating-point constants equal to zero are optimized correctly. test binemit set opt_level=speed_and_size -target x86_64 +target x86_64 legacy function %zero_const_32bit_no_rex() -> f32 fast { block0: diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif index f4bbc2501b..b9bc230c33 100644 --- a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif +++ b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif @@ -4,7 +4,7 @@ set enable_pinned_reg=true set use_pinned_reg_as_heap_base=true set opt_level=speed_and_size -target x86_64 +target x86_64 legacy ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif index 17be399a4e..4b4a05244c 100644 --- a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif +++ b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif @@ -1,7 +1,7 @@ test compile set use_colocated_libcalls=1 set probestack_func_adjusts_sp=1 -target x86_64 +target x86_64 legacy ; Like %big in probestack.clif, but with the probestack function adjusting ; the stack pointer itself. diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif index 433c634cab..6b9b4f3342 100644 --- a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif +++ b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif @@ -1,7 +1,7 @@ test compile set use_colocated_libcalls=1 set enable_probestack=0 -target x86_64 +target x86_64 legacy ; Like %big in probestack.clif, but with probes disabled. diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif index 9af61f0586..2837ddd0c9 100644 --- a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif +++ b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; Like %big in probestack.clif, but without a colocated libcall. diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif index 8eb934b06c..efb1900170 100644 --- a/cranelift/filetests/filetests/isa/x86/probestack-size.clif +++ b/cranelift/filetests/filetests/isa/x86/probestack-size.clif @@ -1,7 +1,7 @@ test compile set use_colocated_libcalls=1 set probestack_size_log2=13 -target x86_64 +target x86_64 legacy ; Like %big in probestack.clif, but now the probestack size is bigger ; and it no longer needs a probe. diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif index d9f29a8681..c434cf5f63 100644 --- a/cranelift/filetests/filetests/isa/x86/probestack.clif +++ b/cranelift/filetests/filetests/isa/x86/probestack.clif @@ -1,6 +1,6 @@ test compile set use_colocated_libcalls=1 -target x86_64 +target x86_64 legacy ; A function with a big stack frame. This should have a stack probe. diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif index c28dd886ea..831928186b 100644 --- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif +++ b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif @@ -2,7 +2,7 @@ test compile set opt_level=speed_and_size set is_pic set enable_probestack=false -target x86_64 haswell +target x86_64 legacy haswell ; An empty function. diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif index fc4c733158..0e123f8a36 100644 --- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif +++ b/cranelift/filetests/filetests/isa/x86/relax_branch.clif @@ -4,7 +4,7 @@ set avoid_div_traps set baldrdash_prologue_words=3 set emit_all_ones_funcaddrs set enable_probestack=false -target x86_64 haswell +target x86_64 legacy haswell ; This checks that a branch that is too far away is getting relaxed. In ; particular, the first block has to be non-empty but its encoding size must be diff --git a/cranelift/filetests/filetests/isa/x86/run-const.clif b/cranelift/filetests/filetests/isa/x86/run-const.clif index 6324627628..401908a48c 100644 --- a/cranelift/filetests/filetests/isa/x86/run-const.clif +++ b/cranelift/filetests/filetests/isa/x86/run-const.clif @@ -1,5 +1,5 @@ test run -target x86_64 +target x86_64 legacy function %test_compare_i32() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/run-i64.clif b/cranelift/filetests/filetests/isa/x86/run-i64.clif index ae4a618573..6b9510b352 100644 --- a/cranelift/filetests/filetests/isa/x86/run-i64.clif +++ b/cranelift/filetests/filetests/isa/x86/run-i64.clif @@ -1,6 +1,6 @@ ; Test i64 instructions on x86_32. test compile -target i686 haswell +target i686 legacy haswell function %iadd(i64, i64) -> i64 { block0(v1: i64, v2: i64): diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif index 36b69ca8b7..a26e2d865c 100644 --- a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif +++ b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0() -> f32 system_v { block0: diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif index feec520860..44b7e32d12 100644 --- a/cranelift/filetests/filetests/isa/x86/select-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/select-i8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(b1, i8, i8) -> i8 { block0(v0: b1, v1: i8, v2: i8): diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif index f896d8cc25..31b73da391 100644 --- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif +++ b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif @@ -1,6 +1,6 @@ test shrink set opt_level=speed_and_size -target x86_64 +target x86_64 legacy function %test_multiple_uses(i32 [%rdi]) -> i32 { block0(v0: i32 [%rdi]): diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif index 9b0832b2a7..bb787832c9 100644 --- a/cranelift/filetests/filetests/isa/x86/shrink.clif +++ b/cranelift/filetests/filetests/isa/x86/shrink.clif @@ -1,6 +1,6 @@ test binemit set opt_level=speed_and_size -target x86_64 +target x86_64 legacy ; Test that instruction shrinking eliminates REX prefixes when possible. diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif index cd942338dd..0a8fbe7f0c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %arithmetic_i8x16(i8x16, i8x16) { block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif index 976ea2d02b..74bc68ee67 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %ineg_i32x4() -> b1 { ; check: const0 = 0x00000001000000010000000100000001 diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index 7acbe8e00b..78e0244c9f 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %iadd_i32x4(i32x4, i32x4) -> i32x4 { block0(v0:i32x4, v1:i32x4): diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif index d9729ae161..0daf064713 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake has_avx512dq=true +target x86_64 legacy skylake has_avx512dq=true function %imul_i64x2() { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif index 0c7dafcf02..294902d45b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake has_avx512dq=true +target x86_64 legacy skylake has_avx512dq=true function %imul_i64x2(i64x2, i64x2) { block0(v0:i64x2, v1:i64x2): diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif index 37abef0e61..6f235e6b3b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 has_avx512vl=true +target x86_64 legacy has_avx512vl=true function %fcvt_from_uint(i32x4) { block0(v0: i32x4 [%xmm2]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif index 78dc1cf220..cdadd3254d 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake has_avx512f=true +target x86_64 legacy skylake has_avx512f=true function %fcvt_from_uint(i32x4) -> f32x4 { block0(v0:i32x4): diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif index 03cc645712..469dc7d29c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif @@ -1,7 +1,7 @@ test run set opt_level=speed_and_size set enable_simd -target x86_64 haswell +target x86_64 legacy haswell ;; Test if bitselect->vselect optimization works properly diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif index 3d729de31f..3131a8aa0c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %ishl_i16x8(i16x8, i64x2) -> i16x8 { block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif index ad459563ef..7674f83e01 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %ushr_i8x16() -> i8x16 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif index 0f6ba31ed8..0b9d07663c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 skylake +target x86_64 legacy skylake ; TODO: once available, replace all lane extraction with `icmp + all_ones` diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif index 0df2c73ccf..1d3db4a119 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %icmp_i8x16() { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif index 27dba2f87a..a6324a34cc 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { ; check: const0 = 0xffffffffffffffffffffffffffffffff diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif index 1263512416..f3ffb2a355 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 +target x86_64 legacy function %icmp_eq_i8x16() -> b8 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif b/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif index ef2aeea26d..1323cbaa82 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %splat_i64x2() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif index 72e3412279..f26b436931 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 nehalem +target x86_64 legacy nehalem ; Ensure raw_bitcast emits no instructions. function %raw_bitcast_i16x8_to_b32x4() { diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif index 0115107810..6de14e181a 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %fcvt_from_uint(i32x4) -> f32x4 { block0(v0:i32x4): diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif index 0ca5e2022b..9d4870ad6d 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 +target x86_64 legacy function %fcvt_from_sint() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif index c36115dd47..6240a08557 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif @@ -1,7 +1,7 @@ test binemit set opt_level=speed_and_size set enable_simd -target x86_64 +target x86_64 legacy ;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag. diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif index abee26fa4b..a8c14a6342 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 haswell +target x86_64 legacy haswell ; for insertlane, floats are legalized differently than integers and booleans; integers and ; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif index dd3736bbd6..91ff8eb9a0 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif @@ -2,7 +2,7 @@ test compile set opt_level=speed_and_size set enable_probestack=false set enable_simd -target x86_64 +target x86_64 legacy ; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) function %scalar_to_vector_f32() -> f32x4 baldrdash_system_v { diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif index 0f22ed3669..284ef35180 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif index 0d58472a36..3318635035 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 +target x86_64 legacy function %shuffle_different_ssa_values() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif index 2f7c4f5b22..af5ca0fe63 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %bor_b16x8(b16x8, b16x8) -> b16x8 { block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif index b3ac8fa60f..5e5bb7ac43 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %bnot_b32x4(b32x4) -> b32x4 { ; check: const0 = 0xffffffffffffffffffffffffffffffff diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif index 619d300bfe..6b6b91a915 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif @@ -1,6 +1,6 @@ test rodata set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %bnot_b32x4(b32x4) -> b32x4 { block0(v0: b32x4): diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif index 35fc44bc6a..ce821f2238 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %bnot() -> b32 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif index f9c7f1d485..4f8b050d01 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 skylake +target x86_64 legacy skylake function %load_store_simple(i64) { block0(v0: i64 [%rax]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif index d111004d21..4141a05b32 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 haswell +target x86_64 legacy haswell function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v { ss0 = explicit_slot 32, offset -48 diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif index 787c993a81..23aee87655 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif @@ -1,7 +1,7 @@ test binemit set opt_level=speed_and_size set enable_simd -target x86_64 +target x86_64 legacy function %vconst_b8() { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif index 55fbde0199..477984b344 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif @@ -1,7 +1,7 @@ test compile set enable_simd=true set enable_probestack=false -target x86_64 haswell +target x86_64 legacy haswell ; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) function %vconst_i32() -> i32x4 baldrdash_system_v { diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif index 6bc4e70dff..07fa364752 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 +target x86_64 legacy function %vconst_optimizations() { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif index 1067b4fbb4..9e666431be 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 +target x86_64 legacy function %vconst_zeroes() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif index 62acba80ec..e7e63e65ea 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif @@ -1,6 +1,6 @@ test rodata set enable_simd=true -target x86_64 haswell +target x86_64 legacy haswell function %vconst_i32() -> i32x4 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif index 8871dde3e8..8d28c6b0de 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vconst-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 +target x86_64 legacy function %vconst_syntax() -> b1 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif index a575c58f64..275a5e4411 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 haswell +target x86_64 legacy haswell function %vselect_i8x16(b8x16, i8x16, i8x16) { block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]): diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif index 723539631d..648b3f5584 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif @@ -1,6 +1,6 @@ test legalizer set enable_simd -target x86_64 +target x86_64 legacy ;; Test if vselect gets legalized if BLEND* instructions are not available diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif index ac6feaa994..4c497eb97b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif @@ -1,6 +1,6 @@ test run set enable_simd -target x86_64 haswell +target x86_64 legacy haswell function %vselect_i8x16() -> i8x16 { block0: diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif index 4b8a153795..f06b3ec0eb 100644 --- a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif +++ b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif @@ -1,7 +1,7 @@ ; binary emission of stack address instructions on i686. test binemit set opt_level=none -target i686 haswell +target i686 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif index bcb441cd6b..5b8d5d7ab7 100644 --- a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif +++ b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif @@ -1,7 +1,7 @@ ; binary emission of stack address instructions on x86-64. test binemit set opt_level=none -target x86_64 haswell +target x86_64 legacy haswell ; The binary encodings can be verified with the command: ; diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif index a74a1dfc32..508fae04d2 100644 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif +++ b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif @@ -1,7 +1,7 @@ ; legalization of stack load and store instructions on x86-64. test legalizer set opt_level=none -target x86_64 haswell +target x86_64 legacy haswell function %stack_load_and_store() { ss0 = explicit_slot 8, offset 0 diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif index 2c5bb1553b..0a9f973fac 100644 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif +++ b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i8) -> i8 { ss0 = explicit_slot 1 diff --git a/cranelift/filetests/filetests/isa/x86/struct-arg.clif b/cranelift/filetests/filetests/isa/x86/struct-arg.clif index 2043ac4979..8358e8633a 100644 --- a/cranelift/filetests/filetests/isa/x86/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x86/struct-arg.clif @@ -1,6 +1,6 @@ test compile set is_pic -target x86_64 +target x86_64 legacy function u0:0(i64 sarg(64)) -> i8 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif index 9d4053aee1..c5144bfd97 100644 --- a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif @@ -1,7 +1,7 @@ test unwind set opt_level=speed_and_size set is_pic -target x86_64 haswell +target x86_64-linux legacy haswell ; check the unwind information with a function with no args function %no_args() system_v { diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif index 3788dd7d27..2c957e0b9a 100644 --- a/cranelift/filetests/filetests/isa/x86/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x86/tls_elf.clif @@ -1,6 +1,6 @@ test regalloc set tls_model=elf_gd -target x86_64 +target x86_64 legacy function u0:0(i32) -> i32, i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif index acdda733a5..d3481a15bf 100644 --- a/cranelift/filetests/filetests/isa/x86/tls_enc.clif +++ b/cranelift/filetests/filetests/isa/x86/tls_enc.clif @@ -1,5 +1,5 @@ test binemit -target x86_64 +target x86_64 legacy function u0:0() -> i64, i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif index d2c637d2e8..3747ac9f05 100644 --- a/cranelift/filetests/filetests/isa/x86/tls_macho.clif +++ b/cranelift/filetests/filetests/isa/x86/tls_macho.clif @@ -1,6 +1,6 @@ test regalloc set tls_model=macho -target x86_64 +target x86_64 legacy function u0:0(i32) -> i32, i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif index 7d778aa778..931b6e0aca 100644 --- a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif +++ b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy function u0:0(i8) -> i16 fast { block0(v0: i8): diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif index 27106d7d98..13cf504d13 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif @@ -1,7 +1,7 @@ test compile set opt_level=speed_and_size set is_pic -target x86_64 haswell +target x86_64 legacy haswell ; check if for one arg we use the right register function %one_arg(i64) windows_fastcall { @@ -252,4 +252,4 @@ block0(v0: i64): ; nextln: v58 = x86_pop.i64 ; nextln: v57 = x86_pop.i64 ; nextln: return v10, v57, v58, v59, v60, v61 -; nextln: } \ No newline at end of file +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index fcfe12b80b..547e131fbd 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -1,7 +1,7 @@ test unwind set opt_level=speed_and_size set is_pic -target x86_64 haswell +target x86_64-windows legacy haswell ; check the unwind information with a leaf function with no args function %no_args_leaf() windows_fastcall { diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif index fad0f2aace..b58bf9bcb5 100644 --- a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif +++ b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 +target x86_64 legacy function %reverse_bits(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif index 5651d7a7f3..6c9ead0fe2 100644 --- a/cranelift/filetests/filetests/legalizer/bitrev.clif +++ b/cranelift/filetests/filetests/legalizer/bitrev.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 +target x86_64 legacy function %reverse_bits_8(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif index 9677e2c9f3..db464ae4d4 100644 --- a/cranelift/filetests/filetests/legalizer/br_table_cond.clif +++ b/cranelift/filetests/filetests/legalizer/br_table_cond.clif @@ -1,7 +1,7 @@ test legalizer set enable_probestack=false set enable_jump_tables=false -target x86_64 +target x86_64 legacy ; Test that when jump_tables_enables is false, all jump tables are eliminated. ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif index 606a07f605..d320155470 100644 --- a/cranelift/filetests/filetests/legalizer/empty_br_table.clif +++ b/cranelift/filetests/filetests/legalizer/empty_br_table.clif @@ -1,7 +1,7 @@ test legalizer set enable_probestack=false set enable_jump_tables=false -target x86_64 +target x86_64 legacy function u0:0(i64) { jt0 = jump_table [] diff --git a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif index 56e54fa1c3..6d72cc6499 100644 --- a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif +++ b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 +target x86_64 legacy function %icmp_imm_i128(i128) -> i8 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif index 5cdfc92105..141330cf01 100644 --- a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif +++ b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 +target x86_64 legacy function %legalize_entry(i128) -> i64 windows_fastcall { block0(v0: i128): diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif index 6d07f32631..8976ad0e25 100644 --- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif +++ b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 haswell +target x86_64 legacy haswell function %foo() -> i128 { block0: diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif index 4fb9e9664c..7b4c07b422 100644 --- a/cranelift/filetests/filetests/postopt/basic.clif +++ b/cranelift/filetests/filetests/postopt/basic.clif @@ -1,5 +1,5 @@ test postopt -target i686 +target i686 legacy ; Test that compare+branch sequences are folded effectively on x86. diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif index 4ab28b06e4..acedb71087 100644 --- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif +++ b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif @@ -1,5 +1,5 @@ test postopt -target x86_64 +target x86_64 legacy function %dual_loads(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif index 1b58caed72..84ddf3b884 100644 --- a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif +++ b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif @@ -1,5 +1,5 @@ test postopt -target x86_64 +target x86_64 legacy ; Fold the immediate of an iadd_imm into an address offset. diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif index 6114298873..e3dcfbad90 100644 --- a/cranelift/filetests/filetests/regalloc/aliases.clif +++ b/cranelift/filetests/filetests/regalloc/aliases.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v { gv0 = vmctx diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif index 39ddf0fa88..c549cbd3d2 100644 --- a/cranelift/filetests/filetests/regalloc/coalescing-207.clif +++ b/cranelift/filetests/filetests/regalloc/coalescing-207.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell ; Reported as https://github.com/bytecodealliance/cranelift/issues/207 ; diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif index 020ced084b..4c9b27d6b0 100644 --- a/cranelift/filetests/filetests/regalloc/coalescing-216.clif +++ b/cranelift/filetests/filetests/regalloc/coalescing-216.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell ; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer. ; diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif index 3fabd84f76..d47a905637 100644 --- a/cranelift/filetests/filetests/regalloc/coloring-227.clif +++ b/cranelift/filetests/filetests/regalloc/coloring-227.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v { gv0 = vmctx diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif index 58ec61f0d8..90650aa4f0 100644 --- a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif +++ b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 +target x86_64 legacy ; Test that fallthrough returns are visited by reload and coloring. diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif index d51f4a7f72..1d569727dd 100644 --- a/cranelift/filetests/filetests/regalloc/ghost-param.clif +++ b/cranelift/filetests/filetests/regalloc/ghost-param.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell ; This test case would create a block parameter that was a ghost value. ; The coalescer would insert a copy of the ghost value, leading to verifier errors. diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif index 8149b9bae6..1fe89ae823 100644 --- a/cranelift/filetests/filetests/regalloc/global-constraints.clif +++ b/cranelift/filetests/filetests/regalloc/global-constraints.clif @@ -1,5 +1,5 @@ test regalloc -target i686 +target i686 legacy ; This test covers the troubles when values with global live ranges are defined ; by instructions with constrained register classes. diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif index 851f012492..6d31f7511a 100644 --- a/cranelift/filetests/filetests/regalloc/global-fixed.clif +++ b/cranelift/filetests/filetests/regalloc/global-fixed.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell function %foo() system_v { block4: diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif index 04e9cc54fb..c4534b0f8b 100644 --- a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif +++ b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 +target x86_64 legacy function u0:587() fast { block0: diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif index f3ed963d70..3272199bca 100644 --- a/cranelift/filetests/filetests/regalloc/iterate.clif +++ b/cranelift/filetests/filetests/regalloc/iterate.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif index b01be532f8..0a6b160f09 100644 --- a/cranelift/filetests/filetests/regalloc/multi-constraints.clif +++ b/cranelift/filetests/filetests/regalloc/multi-constraints.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell ; Test combinations of constraints. ; diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif index 3481747a60..8825a4df72 100644 --- a/cranelift/filetests/filetests/regalloc/multiple-returns.clif +++ b/cranelift/filetests/filetests/regalloc/multiple-returns.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 +target x86_64 legacy ; Return the same value twice. This needs a copy so that each value can be ; allocated its own register. diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif index 513c81f4e5..1ba797f6c8 100644 --- a/cranelift/filetests/filetests/regalloc/output-interference.clif +++ b/cranelift/filetests/filetests/regalloc/output-interference.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell function %test(i64) -> i64 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif index 6a723f02f5..5e6a7e9864 100644 --- a/cranelift/filetests/filetests/regalloc/reload-208.clif +++ b/cranelift/filetests/filetests/regalloc/reload-208.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 haswell +target x86_64 legacy haswell ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif index ed6374c9fb..5dafe32b5c 100644 --- a/cranelift/filetests/filetests/regalloc/reload-779.clif +++ b/cranelift/filetests/filetests/regalloc/reload-779.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; Filed as https://github.com/bytecodealliance/cranelift/issues/779 ; diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif index 88b20c1501..1ae755a988 100644 --- a/cranelift/filetests/filetests/regalloc/reload.clif +++ b/cranelift/filetests/filetests/regalloc/reload.clif @@ -1,5 +1,5 @@ test regalloc -target riscv32 enable_e +target riscv32 legacy enable_e ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif index f46d8958f7..701a91a15a 100644 --- a/cranelift/filetests/filetests/regalloc/schedule-moves.clif +++ b/cranelift/filetests/filetests/regalloc/schedule-moves.clif @@ -1,5 +1,5 @@ test regalloc -target i686 haswell +target i686 legacy haswell function %pr165() system_v { block0: diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif index 9737d4e163..b280db086f 100644 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif @@ -1,7 +1,7 @@ test compile set opt_level=speed set enable_pinned_reg=true -target x86_64 haswell +target x86_64 legacy haswell function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { block0(v0: i32, v1: i32, v2: i32, v3: i64): diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif index 8a9a040eb1..1c2d1b2bc0 100644 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif @@ -1,7 +1,7 @@ test compile set opt_level=speed set enable_pinned_reg=true -target x86_64 haswell +target x86_64 legacy haswell function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { block0(v0: i32, v1: i32, v2: i32, v3: i64): diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif index 475bfa0f47..1aec10354f 100644 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif @@ -1,7 +1,7 @@ test compile set opt_level=speed set enable_pinned_reg=true -target x86_64 haswell +target x86_64 legacy haswell ;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123 diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif index 5acdd45b17..e3540f6a59 100644 --- a/cranelift/filetests/filetests/regalloc/spill-noregs.clif +++ b/cranelift/filetests/filetests/regalloc/spill-noregs.clif @@ -1,5 +1,5 @@ test regalloc -target x86_64 +target x86_64 legacy ; Test case found by the Binaryen fuzzer. ; diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif index 23706cd2cf..2a3f2ad959 100644 --- a/cranelift/filetests/filetests/regalloc/spill.clif +++ b/cranelift/filetests/filetests/regalloc/spill.clif @@ -12,7 +12,7 @@ test regalloc ; regex: V=v\d+ ; regex: WS=\s+ -target riscv32 enable_e +target riscv32 legacy enable_e ; In straight-line code, the first value defined is spilled. ; That is in order: diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif index 4c288a91dd..219a299880 100644 --- a/cranelift/filetests/filetests/regalloc/unreachable_code.clif +++ b/cranelift/filetests/filetests/regalloc/unreachable_code.clif @@ -2,7 +2,7 @@ test compile set enable_probestack=0 -target x86_64 haswell +target x86_64 legacy haswell ; This function contains unreachable blocks which trip up the register ; allocator if they don't get cleared out. diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif index e239d0ad37..935b33c5b7 100644 --- a/cranelift/filetests/filetests/regalloc/x86-regres.clif +++ b/cranelift/filetests/filetests/regalloc/x86-regres.clif @@ -1,5 +1,5 @@ test regalloc -target i686 +target i686 legacy ; regex: V=v\d+ ; regex: BB=block\d+ diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif index 995e7c5f64..5d4a37f5e6 100644 --- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif +++ b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif @@ -1,5 +1,5 @@ test compile -target x86_64 +target x86_64 legacy ; This checks that code shrink is allowed while relaxing code, when code shrink ; has not run. diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif index 7e0088b23b..47acf2ad72 100644 --- a/cranelift/filetests/filetests/safepoint/basic.clif +++ b/cranelift/filetests/filetests/safepoint/basic.clif @@ -1,6 +1,6 @@ test safepoint set enable_safepoints=true -target x86_64 +target x86_64 legacy function %test(i32, r64, r64) -> r64 { block0(v0: i32, v1:r64, v2:r64): diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif index 53c9246323..ffcf41fb46 100644 --- a/cranelift/filetests/filetests/safepoint/call.clif +++ b/cranelift/filetests/filetests/safepoint/call.clif @@ -1,6 +1,6 @@ test safepoint set enable_safepoints=true -target x86_64 +target x86_64 legacy function %direct() -> r64 { fn0 = %none() diff --git a/cranelift/filetests/filetests/stack_maps/call.clif b/cranelift/filetests/filetests/stack_maps/call.clif index 158082be0e..6563ad450a 100644 --- a/cranelift/filetests/filetests/stack_maps/call.clif +++ b/cranelift/filetests/filetests/stack_maps/call.clif @@ -1,6 +1,6 @@ test stack_maps set enable_safepoints=true -target x86_64 +target x86_64 legacy function %icall_fast(r64) -> r64 fast { ; check: function %icall_fast diff --git a/cranelift/filetests/filetests/stack_maps/incoming_args.clif b/cranelift/filetests/filetests/stack_maps/incoming_args.clif index 8ae5712b6d..e8231c3aad 100644 --- a/cranelift/filetests/filetests/stack_maps/incoming_args.clif +++ b/cranelift/filetests/filetests/stack_maps/incoming_args.clif @@ -1,6 +1,6 @@ test stack_maps set enable_safepoints=true -target x86_64 +target x86_64 legacy ;; Incoming args get included in stack maps. diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif index 7a4d4d02b0..f41f867918 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-b1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 haswell +target x86_64 legacy haswell ;; `b1` return values need to be legalized into bytes so that they can be stored ;; in memory. diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif index 6f5afd4700..06d0814dfb 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 haswell +target x86_64 legacy haswell ;; Indirect calls with many returns. diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif index b57090d851..aae733ddf4 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 haswell +target x86_64 legacy haswell ;; Test if arguments are legalized if function uses sret diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif index d712bf21ce..c58102aedc 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 haswell +target x86_64 legacy haswell ;; Test that we don't reuse `sret` stack slots for multiple calls. We could do ;; this one day, but it would require some care to ensure that we don't have diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif index 5004ebbe54..da9f25ed97 100644 --- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif +++ b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif @@ -1,5 +1,5 @@ test legalizer -target x86_64 haswell +target x86_64 legacy haswell ;; Need to insert padding after the `i8`s so that the `i32` and `i64` are ;; aligned. diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index 446dc589ed..a41d5f2869 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -4,10 +4,10 @@ use cranelift_codegen::binemit::{NullRelocSink, NullStackMapSink, NullTrapSink}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64}; use cranelift_codegen::ir::{condcodes::IntCC, Function, InstBuilder, Signature, Type}; -use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::isa::{BackendVariant, TargetIsa}; use cranelift_codegen::{ir, settings, CodegenError, Context}; use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext}; -use cranelift_native::builder as host_isa_builder; +use cranelift_native::builder_with_options; use log::trace; use memmap2::{Mmap, MmapMut}; use std::cmp::max; @@ -48,8 +48,9 @@ impl SingleFunctionCompiler { } /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags. - pub fn with_host_isa(flags: settings::Flags) -> Self { - let builder = host_isa_builder().expect("Unable to build a TargetIsa for the current host"); + pub fn with_host_isa(flags: settings::Flags, variant: BackendVariant) -> Self { + let builder = builder_with_options(variant, true) + .expect("Unable to build a TargetIsa for the current host"); let isa = builder.finish(flags); Self::new(isa) } @@ -58,7 +59,7 @@ impl SingleFunctionCompiler { /// ISA. pub fn with_default_host_isa() -> Self { let flags = settings::Flags::new(settings::builder()); - Self::with_host_isa(flags) + Self::with_host_isa(flags, BackendVariant::Any) } /// Compile the passed [Function] to a `CompiledFunction`. This function will: diff --git a/cranelift/filetests/src/runone.rs b/cranelift/filetests/src/runone.rs index c38315568c..5c1ff01ec9 100644 --- a/cranelift/filetests/src/runone.rs +++ b/cranelift/filetests/src/runone.rs @@ -21,34 +21,17 @@ use std::time; /// When a test must be skipped, returns an Option with a string containing an explanation why; /// otherwise, return None. fn skip_feature_mismatches(testfile: &TestFile) -> Option<&'static str> { - let mut has_experimental_x64 = false; let mut has_experimental_arm32 = false; for feature in &testfile.features { if let Feature::With(name) = feature { match *name { - "experimental_x64" => has_experimental_x64 = true, "experimental_arm32" => has_experimental_arm32 = true, _ => {} } } } - // On the experimental x64 backend, skip tests which are not marked with the feature and - // that want to run on the x86_64 target isa. - #[cfg(feature = "experimental_x64")] - if let IsaSpec::Some(ref isas) = testfile.isa_spec { - if isas.iter().any(|isa| isa.name() == "x64") && !has_experimental_x64 { - return Some("test requiring x86_64 not marked with experimental_x64"); - } - } - - // On other targets, ignore tests marked as experimental_x64 only. - #[cfg(not(feature = "experimental_x64"))] - if has_experimental_x64 { - return Some("missing support for experimental_x64"); - } - // Don't run tests if the experimental support for arm32 is disabled. #[cfg(not(feature = "experimental_arm32"))] if has_experimental_arm32 { diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 85b0824a7d..b3ee672efe 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -46,8 +46,9 @@ impl SubTest for TestRun { ); return Ok(()); } + let variant = context.isa.unwrap().variant(); - let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone()); + let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone(), variant); for comment in context.details.comments.iter() { if let Some(command) = parse_run_command(comment.text, &func.signature)? { trace!("Parsed run command: {}", command); diff --git a/cranelift/filetests/src/test_unwind.rs b/cranelift/filetests/src/test_unwind.rs index 0c7124e8b2..3d22c4d3d9 100644 --- a/cranelift/filetests/src/test_unwind.rs +++ b/cranelift/filetests/src/test_unwind.rs @@ -72,7 +72,6 @@ impl SubTest for TestUnwind { } mod windowsx64 { - use byteorder::{ByteOrder, LittleEndian}; use std::fmt::Write; pub fn dump(text: &mut W, mem: &[u8]) { @@ -165,23 +164,24 @@ mod windowsx64 { let op_and_info = mem[1]; let op = UnwindOperation::from(op_and_info & 0xF); let info = (op_and_info & 0xF0) >> 4; + let unwind_le_bytes = |bytes| match (bytes, &mem[2..]) { + (2, &[b0, b1, ..]) => UnwindValue::U16(u16::from_le_bytes([b0, b1])), + (4, &[b0, b1, b2, b3, ..]) => { + UnwindValue::U32(u32::from_le_bytes([b0, b1, b2, b3])) + } + (_, _) => panic!("not enough bytes to unwind value"), + }; - let value = match op { - UnwindOperation::LargeStackAlloc => match info { - 0 => UnwindValue::U16(LittleEndian::read_u16(&mem[2..])), - 1 => UnwindValue::U32(LittleEndian::read_u32(&mem[2..])), - _ => panic!("unexpected stack alloc info value"), - }, - UnwindOperation::SaveNonvolatileRegister => { - UnwindValue::U16(LittleEndian::read_u16(&mem[2..])) - } - UnwindOperation::SaveNonvolatileRegisterFar => { - UnwindValue::U32(LittleEndian::read_u32(&mem[2..])) - } - UnwindOperation::SaveXmm128 => UnwindValue::U16(LittleEndian::read_u16(&mem[2..])), - UnwindOperation::SaveXmm128Far => { - UnwindValue::U32(LittleEndian::read_u32(&mem[2..])) + let value = match (&op, info) { + (UnwindOperation::LargeStackAlloc, 0) => unwind_le_bytes(2), + (UnwindOperation::LargeStackAlloc, 1) => unwind_le_bytes(4), + (UnwindOperation::LargeStackAlloc, _) => { + panic!("unexpected stack alloc info value") } + (UnwindOperation::SaveNonvolatileRegister, _) => unwind_le_bytes(2), + (UnwindOperation::SaveNonvolatileRegisterFar, _) => unwind_le_bytes(4), + (UnwindOperation::SaveXmm128, _) => unwind_le_bytes(2), + (UnwindOperation::SaveXmm128Far, _) => unwind_le_bytes(4), _ => UnwindValue::None, }; diff --git a/cranelift/frontend/Cargo.toml b/cranelift/frontend/Cargo.toml index 0b70573955..507feed4e1 100644 --- a/cranelift/frontend/Cargo.toml +++ b/cranelift/frontend/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-frontend" -version = "0.72.0" +version = "0.73.0" description = "Cranelift IR builder helper" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-frontend" @@ -11,8 +11,8 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -target-lexicon = "0.11" +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +target-lexicon = "0.12" log = { version = "0.4.6", default-features = false } hashbrown = { version = "0.9.1", optional = true } smallvec = { version = "1.6.1" } diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 45937dc3f9..3b76a47450 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -640,6 +640,7 @@ impl<'a> FunctionBuilder<'a> { dest_align: u8, src_align: u8, non_overlapping: bool, + mut flags: MemFlags, ) { // Currently the result of guess work, not actual profiling. const THRESHOLD: u64 = 4; @@ -676,7 +677,6 @@ impl<'a> FunctionBuilder<'a> { return; } - let mut flags = MemFlags::new(); flags.set_aligned(); // Load all of the memory first. This is necessary in case `dest` overlaps. @@ -732,6 +732,7 @@ impl<'a> FunctionBuilder<'a> { ch: u8, size: u64, buffer_align: u8, + mut flags: MemFlags, ) { // Currently the result of guess work, not actual profiling. const THRESHOLD: u64 = 4; @@ -763,7 +764,6 @@ impl<'a> FunctionBuilder<'a> { let size = self.ins().iconst(config.pointer_type(), size as i64); self.call_memset(config, buffer, ch, size); } else { - let mut flags = MemFlags::new(); flags.set_aligned(); let ch = u64::from(ch); @@ -851,7 +851,9 @@ mod tests { use alloc::string::ToString; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir::types::*; - use cranelift_codegen::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; + use cranelift_codegen::ir::{ + AbiParam, ExternalName, Function, InstBuilder, MemFlags, Signature, + }; use cranelift_codegen::isa::CallConv; use cranelift_codegen::settings; use cranelift_codegen::verifier::verify_function; @@ -1063,7 +1065,16 @@ block0: let src = builder.use_var(x); let dest = builder.use_var(y); let size = 8; - builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); + builder.emit_small_memory_copy( + target.frontend_config(), + dest, + src, + size, + 8, + 8, + true, + MemFlags::new(), + ); builder.ins().return_(&[dest]); builder.seal_all_blocks(); @@ -1121,7 +1132,16 @@ block0: let src = builder.use_var(x); let dest = builder.use_var(y); let size = 8192; - builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); + builder.emit_small_memory_copy( + target.frontend_config(), + dest, + src, + size, + 8, + 8, + true, + MemFlags::new(), + ); builder.ins().return_(&[dest]); builder.seal_all_blocks(); @@ -1179,7 +1199,7 @@ block0: let dest = builder.use_var(y); let size = 8; - builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8); + builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8, MemFlags::new()); builder.ins().return_(&[dest]); builder.seal_all_blocks(); @@ -1232,7 +1252,7 @@ block0: let dest = builder.use_var(y); let size = 8192; - builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8); + builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8, MemFlags::new()); builder.ins().return_(&[dest]); builder.seal_all_blocks(); diff --git a/cranelift/frontend/src/ssa.rs b/cranelift/frontend/src/ssa.rs index c86503d0d6..cdb8ced8e9 100644 --- a/cranelift/frontend/src/ssa.rs +++ b/cranelift/frontend/src/ssa.rs @@ -5,7 +5,7 @@ //! In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. //! Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg //! -//! https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf +//! use crate::Variable; use alloc::vec::Vec; diff --git a/cranelift/frontend/src/switch.rs b/cranelift/frontend/src/switch.rs index f4711e4591..4c165809a0 100644 --- a/cranelift/frontend/src/switch.rs +++ b/cranelift/frontend/src/switch.rs @@ -273,6 +273,7 @@ impl Switch { .icmp_imm(IntCC::UnsignedGreaterThan, discr, u32::max_value() as i64); bx.ins().brnz(bigger_than_u32, otherwise, &[]); bx.ins().jump(new_block, &[]); + bx.seal_block(new_block); bx.switch_to_block(new_block); // Cast to u32, as br_table is not implemented for integers bigger than 32bits. @@ -542,38 +543,47 @@ block4: #[test] fn switch_seal_generated_blocks() { - let keys = [0, 1, 2, 10, 11, 12, 20, 30, 40, 50]; + let cases = &[vec![0, 1, 2], vec![0, 1, 2, 10, 11, 12, 20, 30, 40, 50]]; - let mut func = Function::new(); - let mut builder_ctx = FunctionBuilderContext::new(); - let mut builder = FunctionBuilder::new(&mut func, &mut builder_ctx); - - let root_block = builder.create_block(); - let default_block = builder.create_block(); - let mut switch = Switch::new(); - - let case_blocks = keys - .iter() - .map(|key| { - let block = builder.create_block(); - switch.set_entry(*key, block); - block - }) - .collect::>(); - - builder.seal_block(root_block); - builder.switch_to_block(root_block); - - let val = builder.ins().iconst(types::I32, 1); - switch.emit(&mut builder, val, default_block); - - for &block in case_blocks.iter().chain(std::iter::once(&default_block)) { - builder.seal_block(block); - builder.switch_to_block(block); - builder.ins().return_(&[]); + for case in cases { + for typ in &[types::I8, types::I16, types::I32, types::I64, types::I128] { + eprintln!("Testing {:?} with keys: {:?}", typ, case); + do_case(case, *typ); + } } - builder.finalize(); // Will panic if some blocks are not sealed + fn do_case(keys: &[u128], typ: Type) { + let mut func = Function::new(); + let mut builder_ctx = FunctionBuilderContext::new(); + let mut builder = FunctionBuilder::new(&mut func, &mut builder_ctx); + + let root_block = builder.create_block(); + let default_block = builder.create_block(); + let mut switch = Switch::new(); + + let case_blocks = keys + .iter() + .map(|key| { + let block = builder.create_block(); + switch.set_entry(*key, block); + block + }) + .collect::>(); + + builder.seal_block(root_block); + builder.switch_to_block(root_block); + + let val = builder.ins().iconst(typ, 1); + switch.emit(&mut builder, val, default_block); + + for &block in case_blocks.iter().chain(std::iter::once(&default_block)) { + builder.seal_block(block); + builder.switch_to_block(block); + builder.ins().return_(&[]); + } + + builder.finalize(); // Will panic if some blocks are not sealed + } } #[test] diff --git a/cranelift/interpreter/Cargo.toml b/cranelift/interpreter/Cargo.toml index 0caaf9dbba..c539f70fe1 100644 --- a/cranelift/interpreter/Cargo.toml +++ b/cranelift/interpreter/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-interpreter" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Interpret Cranelift IR" repository = "https://github.com/bytecodealliance/wasmtime" @@ -11,15 +11,15 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", features = ["all-arch"] } -cranelift-entity = { path = "../entity", version = "0.72.0" } -cranelift-reader = { path = "../reader", version = "0.72.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", features = ["all-arch"] } +cranelift-entity = { path = "../entity", version = "0.73.0" } log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" thiserror = "1.0.15" [dev-dependencies] -cranelift-frontend = { path = "../frontend", version = "0.72.0" } +cranelift-frontend = { path = "../frontend", version = "0.73.0" } +cranelift-reader = { path = "../reader", version = "0.73.0" } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 93dc06515e..f8a79a9847 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -563,6 +563,7 @@ where Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"), Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"), Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"), + Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), Opcode::Isplit => unimplemented!("Isplit"), Opcode::Iconcat => unimplemented!("Iconcat"), Opcode::AtomicRmw => unimplemented!("AtomicRmw"), diff --git a/cranelift/jit/Cargo.toml b/cranelift/jit/Cargo.toml index c55655aed5..5917edbc9d 100644 --- a/cranelift/jit/Cargo.toml +++ b/cranelift/jit/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-jit" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "A JIT library backed by Cranelift" repository = "https://github.com/bytecodealliance/wasmtime" @@ -10,15 +10,14 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-module = { path = "../module", version = "0.72.0" } -cranelift-native = { path = "../native", version = "0.72.0" } -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false, features = ["std"] } -cranelift-entity = { path = "../entity", version = "0.72.0" } +cranelift-module = { path = "../module", version = "0.73.0" } +cranelift-native = { path = "../native", version = "0.73.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false, features = ["std"] } +cranelift-entity = { path = "../entity", version = "0.73.0" } anyhow = "1.0" region = "2.2.0" libc = { version = "0.2.42" } -errno = "0.2.4" -target-lexicon = "0.11" +target-lexicon = "0.12" memmap2 = { version = "0.2.1", optional = true } log = { version = "0.4.6", default-features = false } @@ -30,9 +29,9 @@ selinux-fix = ['memmap2'] default = [] [dev-dependencies] -cranelift = { path = "../umbrella", version = "0.72.0" } -cranelift-frontend = { path = "../frontend", version = "0.72.0" } -cranelift-entity = { path = "../entity", version = "0.72.0" } +cranelift = { path = "../umbrella", version = "0.73.0" } +cranelift-frontend = { path = "../frontend", version = "0.73.0" } +cranelift-entity = { path = "../entity", version = "0.73.0" } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index aa54ba1856..4389455d8d 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -167,7 +167,7 @@ impl JITModule { /// corresponding module, it should only be used when none of the functions /// from that module are currently executing and none of the `fn` pointers /// are called afterwards. - pub unsafe fn free_memory(&mut self) { + pub unsafe fn free_memory(mut self) { self.memory.code.free_memory(); self.memory.readonly.free_memory(); self.memory.writable.free_memory(); @@ -180,6 +180,53 @@ impl JITModule { .or_else(|| lookup_with_dlsym(name)) } + fn new_func_plt_entry(&mut self, id: FuncId, val: *const u8) { + let got_entry = self + .memory + .writable + .allocate( + std::mem::size_of::<*const u8>(), + std::mem::align_of::<*const u8>().try_into().unwrap(), + ) + .unwrap() + .cast::<*const u8>(); + self.function_got_entries[id] = Some(NonNull::new(got_entry).unwrap()); + unsafe { + std::ptr::write(got_entry, val); + } + let plt_entry = self + .memory + .code + .allocate(std::mem::size_of::<[u8; 16]>(), EXECUTABLE_DATA_ALIGNMENT) + .unwrap() + .cast::<[u8; 16]>(); + self.record_function_for_perf( + plt_entry as *mut _, + std::mem::size_of::<[u8; 16]>(), + &format!("{}@plt", self.declarations.get_function_decl(id).name), + ); + self.function_plt_entries[id] = Some(NonNull::new(plt_entry).unwrap()); + unsafe { + Self::write_plt_entry_bytes(plt_entry, got_entry); + } + } + + fn new_data_got_entry(&mut self, id: DataId, val: *const u8) { + let got_entry = self + .memory + .writable + .allocate( + std::mem::size_of::<*const u8>(), + std::mem::align_of::<*const u8>().try_into().unwrap(), + ) + .unwrap() + .cast::<*const u8>(); + self.data_object_got_entries[id] = Some(NonNull::new(got_entry).unwrap()); + unsafe { + std::ptr::write(got_entry, val); + } + } + unsafe fn write_plt_entry_bytes(plt_ptr: *mut [u8; 16], got_ptr: *mut *const u8) { assert!( cfg!(target_arch = "x86_64"), @@ -238,6 +285,13 @@ impl JITModule { } } + /// Returns the given function's entry in the Global Offset Table. + /// + /// Panics if there's no entry in the table for the given function. + pub fn read_got_entry(&self, func_id: FuncId) -> *const u8 { + unsafe { *self.function_got_entries[func_id].unwrap().as_ptr() } + } + fn get_got_address(&self, name: &ir::ExternalName) -> *const u8 { match *name { ir::ExternalName::User { .. } => { @@ -289,6 +343,9 @@ impl JITModule { } /// Returns the address of a finalized function. + /// + /// The pointer remains valid until either [`JITModule::free_memory`] is called or in the future + /// some way of deallocating this individual function is used. pub fn get_finalized_function(&self, func_id: FuncId) -> *const u8 { let info = &self.compiled_functions[func_id]; assert!( @@ -301,6 +358,9 @@ impl JITModule { } /// Returns the address and size of a finalized data object. + /// + /// The pointer remains valid until either [`JITModule::free_memory`] is called or in the future + /// some way of deallocating this individual data object is used. pub fn get_finalized_data(&self, data_id: DataId) -> (*const u8, usize) { let info = &self.compiled_data_objects[data_id]; assert!( @@ -488,40 +548,25 @@ impl Module for JITModule { linkage: Linkage, signature: &ir::Signature, ) -> ModuleResult { - let (id, _decl) = self + let (id, linkage) = self .declarations .declare_function(name, linkage, signature)?; if self.function_got_entries[id].is_none() && self.isa.flags().is_pic() { - let got_entry = self - .memory - .writable - .allocate( - std::mem::size_of::<*const u8>(), - std::mem::align_of::<*const u8>().try_into().unwrap(), - ) - .unwrap() - .cast::<*const u8>(); - self.function_got_entries[id] = Some(NonNull::new(got_entry).unwrap()); // FIXME populate got entries with a null pointer when defined - let val = self.lookup_symbol(name).unwrap_or(std::ptr::null()); - unsafe { - std::ptr::write(got_entry, val); - } - let plt_entry = self - .memory - .code - .allocate(std::mem::size_of::<[u8; 16]>(), EXECUTABLE_DATA_ALIGNMENT) - .unwrap() - .cast::<[u8; 16]>(); - self.record_function_for_perf( - plt_entry as *mut _, - std::mem::size_of::<[u8; 16]>(), - &format!("{}@plt", name), - ); - self.function_plt_entries[id] = Some(NonNull::new(plt_entry).unwrap()); - unsafe { - Self::write_plt_entry_bytes(plt_entry, got_entry); - } + let val = if linkage == Linkage::Import { + self.lookup_symbol(name).unwrap_or(std::ptr::null()) + } else { + std::ptr::null() + }; + self.new_func_plt_entry(id, val); + } + Ok(id) + } + + fn declare_anonymous_function(&mut self, signature: &ir::Signature) -> ModuleResult { + let id = self.declarations.declare_anonymous_function(signature)?; + if self.isa.flags().is_pic() { + self.new_func_plt_entry(id, std::ptr::null()); } Ok(id) } @@ -534,25 +579,26 @@ impl Module for JITModule { tls: bool, ) -> ModuleResult { assert!(!tls, "JIT doesn't yet support TLS"); - let (id, _decl) = self + let (id, linkage) = self .declarations .declare_data(name, linkage, writable, tls)?; if self.data_object_got_entries[id].is_none() && self.isa.flags().is_pic() { - let got_entry = self - .memory - .writable - .allocate( - std::mem::size_of::<*const u8>(), - std::mem::align_of::<*const u8>().try_into().unwrap(), - ) - .unwrap() - .cast::<*const u8>(); - self.data_object_got_entries[id] = Some(NonNull::new(got_entry).unwrap()); // FIXME populate got entries with a null pointer when defined - let val = self.lookup_symbol(name).unwrap_or(std::ptr::null()); - unsafe { - std::ptr::write(got_entry, val); - } + let val = if linkage == Linkage::Import { + self.lookup_symbol(name).unwrap_or(std::ptr::null()) + } else { + std::ptr::null() + }; + self.new_data_got_entry(id, val); + } + Ok(id) + } + + fn declare_anonymous_data(&mut self, writable: bool, tls: bool) -> ModuleResult { + assert!(!tls, "JIT doesn't yet support TLS"); + let id = self.declarations.declare_anonymous_data(writable, tls)?; + if self.isa.flags().is_pic() { + self.new_data_got_entry(id, std::ptr::null()); } Ok(id) } diff --git a/cranelift/jit/src/compiled_blob.rs b/cranelift/jit/src/compiled_blob.rs index d44497ae9e..f00165dbab 100644 --- a/cranelift/jit/src/compiled_blob.rs +++ b/cranelift/jit/src/compiled_blob.rs @@ -72,6 +72,15 @@ impl CompiledBlob { write_unaligned(at as *mut i32, pcrel) }; } + Reloc::S390xPCRel32Dbl => { + let base = get_address(name); + let what = unsafe { base.offset(isize::try_from(addend).unwrap()) }; + let pcrel = i32::try_from(((what as isize) - (at as isize)) >> 1).unwrap(); + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut i32, pcrel) + }; + } _ => unimplemented!(), } } diff --git a/cranelift/jit/src/memory.rs b/cranelift/jit/src/memory.rs index b85d231185..afd1a9e053 100644 --- a/cranelift/jit/src/memory.rs +++ b/cranelift/jit/src/memory.rs @@ -1,6 +1,3 @@ -#[cfg(not(feature = "selinux-fix"))] -use errno; - #[cfg(not(any(feature = "selinux-fix", windows)))] use libc; @@ -9,6 +6,7 @@ use memmap2::MmapMut; use region; use std::convert::TryFrom; +use std::io; use std::mem; use std::ptr; @@ -41,27 +39,22 @@ impl PtrLen { /// Create a new `PtrLen` pointing to at least `size` bytes of memory, /// suitably sized and aligned for memory protection. #[cfg(all(not(target_os = "windows"), feature = "selinux-fix"))] - fn with_size(size: usize) -> Result { + fn with_size(size: usize) -> io::Result { let page_size = region::page::size(); let alloc_size = round_up_to_page_size(size, page_size); - let map = MmapMut::map_anon(alloc_size); - - match map { - Ok(mut map) => { - // The order here is important; we assign the pointer first to get - // around compile time borrow errors. - Ok(Self { - ptr: map.as_mut_ptr(), - map: Some(map), - len: alloc_size, - }) - } - Err(e) => Err(e.to_string()), - } + MmapMut::map_anon(alloc_size).map(|mut mmap| { + // The order here is important; we assign the pointer first to get + // around compile time borrow errors. + Ok(Self { + ptr: mmap.as_mut_ptr(), + map: Some(mmap), + len: alloc_size, + }) + }) } #[cfg(all(not(target_os = "windows"), not(feature = "selinux-fix")))] - fn with_size(size: usize) -> Result { + fn with_size(size: usize) -> io::Result { let mut ptr = ptr::null_mut(); let page_size = region::page::size(); let alloc_size = round_up_to_page_size(size, page_size); @@ -74,13 +67,13 @@ impl PtrLen { len: alloc_size, }) } else { - Err(errno::Errno(err).to_string()) + Err(io::Error::from_raw_os_error(err)) } } } #[cfg(target_os = "windows")] - fn with_size(size: usize) -> Result { + fn with_size(size: usize) -> io::Result { use winapi::um::memoryapi::VirtualAlloc; use winapi::um::winnt::{MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE}; @@ -101,7 +94,7 @@ impl PtrLen { len: round_up_to_page_size(size, page_size), }) } else { - Err(errno::errno().to_string()) + Err(io::Error::last_os_error()) } } } @@ -149,8 +142,7 @@ impl Memory { self.position = 0; } - /// TODO: Use a proper error type. - pub(crate) fn allocate(&mut self, size: usize, align: u64) -> Result<*mut u8, String> { + pub(crate) fn allocate(&mut self, size: usize, align: u64) -> io::Result<*mut u8> { let align = usize::try_from(align).expect("alignment too big"); if self.position % align != 0 { self.position += align - self.position % align; diff --git a/cranelift/module/Cargo.toml b/cranelift/module/Cargo.toml index 0f64ebca4c..a3fe4a0901 100644 --- a/cranelift/module/Cargo.toml +++ b/cranelift/module/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-module" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Support for linking functions and data with Cranelift" repository = "https://github.com/bytecodealliance/wasmtime" @@ -11,11 +11,10 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -cranelift-entity = { path = "../entity", version = "0.72.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.73.0" } hashbrown = { version = "0.9.1", optional = true } log = { version = "0.4.6", default-features = false } -thiserror = "1.0.4" anyhow = "1.0" [features] diff --git a/cranelift/module/src/module.rs b/cranelift/module/src/module.rs index a1145abc04..191d468ed7 100644 --- a/cranelift/module/src/module.rs +++ b/cranelift/module/src/module.rs @@ -12,7 +12,6 @@ use cranelift_codegen::entity::{entity_impl, PrimaryMap}; use cranelift_codegen::{ir, isa, CodegenError, Context}; use std::borrow::ToOwned; use std::string::String; -use thiserror::Error; /// A function identifier for use in the `Module` interface. #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -168,30 +167,85 @@ impl FunctionDeclaration { } /// Error messages for all `Module` methods -#[derive(Error, Debug)] +#[derive(Debug)] pub enum ModuleError { /// Indicates an identifier was used before it was declared - #[error("Undeclared identifier: {0}")] Undeclared(String), + /// Indicates an identifier was used as data/function first, but then used as the other - #[error("Incompatible declaration of identifier: {0}")] IncompatibleDeclaration(String), + /// Indicates a function identifier was declared with a /// different signature than declared previously - #[error("Function {0} signature {2:?} is incompatible with previous declaration {1:?}")] IncompatibleSignature(String, ir::Signature, ir::Signature), + /// Indicates an identifier was defined more than once - #[error("Duplicate definition of identifier: {0}")] DuplicateDefinition(String), + /// Indicates an identifier was defined, but was declared as an import - #[error("Invalid to define identifier declared as an import: {0}")] InvalidImportDefinition(String), + /// Wraps a `cranelift-codegen` error - #[error("Compilation error: {0}")] - Compilation(#[from] CodegenError), + Compilation(CodegenError), + /// Wraps a generic error from a backend - #[error("Backend error: {0}")] - Backend(#[source] anyhow::Error), + Backend(anyhow::Error), +} + +// This is manually implementing Error and Display instead of using thiserror to reduce the amount +// of dependencies used by Cranelift. +impl std::error::Error for ModuleError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Undeclared { .. } + | Self::IncompatibleDeclaration { .. } + | Self::IncompatibleSignature { .. } + | Self::DuplicateDefinition { .. } + | Self::InvalidImportDefinition { .. } => None, + Self::Compilation(source) => Some(source), + Self::Backend(source) => Some(&**source), + } + } +} + +impl std::fmt::Display for ModuleError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Undeclared(name) => { + write!(f, "Undeclared identifier: {}", name) + } + Self::IncompatibleDeclaration(name) => { + write!(f, "Incompatible declaration of identifier: {}", name,) + } + Self::IncompatibleSignature(name, prev_sig, new_sig) => { + write!( + f, + "Function {} signature {:?} is incompatible with previous declaration {:?}", + name, new_sig, prev_sig, + ) + } + Self::DuplicateDefinition(name) => { + write!(f, "Duplicate definition of identifier: {}", name) + } + Self::InvalidImportDefinition(name) => { + write!( + f, + "Invalid to define identifier declared as an import: {}", + name, + ) + } + Self::Compilation(err) => { + write!(f, "Compilation error: {}", err) + } + Self::Backend(err) => write!(f, "Backend error: {}", err), + } + } +} + +impl std::convert::From for ModuleError { + fn from(source: CodegenError) -> Self { + Self::Compilation { 0: source } + } } /// A convenient alias for a `Result` that uses `ModuleError` as the error type. @@ -268,7 +322,7 @@ impl ModuleDeclarations { name: &str, linkage: Linkage, signature: &ir::Signature, - ) -> ModuleResult<(FuncId, &FunctionDeclaration)> { + ) -> ModuleResult<(FuncId, Linkage)> { // TODO: Can we avoid allocating names so often? use super::hash_map::Entry::*; match self.names.entry(name.to_owned()) { @@ -276,7 +330,7 @@ impl ModuleDeclarations { FuncOrDataId::Func(id) => { let existing = &mut self.functions[id]; existing.merge(linkage, signature)?; - Ok((id, existing)) + Ok((id, existing.linkage)) } FuncOrDataId::Data(..) => { Err(ModuleError::IncompatibleDeclaration(name.to_owned())) @@ -289,11 +343,25 @@ impl ModuleDeclarations { signature: signature.clone(), }); entry.insert(FuncOrDataId::Func(id)); - Ok((id, &self.functions[id])) + Ok((id, self.functions[id].linkage)) } } } + /// Declare an anonymous function in this module. + pub fn declare_anonymous_function( + &mut self, + signature: &ir::Signature, + ) -> ModuleResult { + let id = self.functions.push(FunctionDeclaration { + name: String::new(), + linkage: Linkage::Local, + signature: signature.clone(), + }); + self.functions[id].name = format!(".L{:?}", id); + Ok(id) + } + /// Declare a data object in this module. pub fn declare_data( &mut self, @@ -301,7 +369,7 @@ impl ModuleDeclarations { linkage: Linkage, writable: bool, tls: bool, - ) -> ModuleResult<(DataId, &DataDeclaration)> { + ) -> ModuleResult<(DataId, Linkage)> { // TODO: Can we avoid allocating names so often? use super::hash_map::Entry::*; match self.names.entry(name.to_owned()) { @@ -309,7 +377,7 @@ impl ModuleDeclarations { FuncOrDataId::Data(id) => { let existing = &mut self.data_objects[id]; existing.merge(linkage, writable, tls); - Ok((id, existing)) + Ok((id, existing.linkage)) } FuncOrDataId::Func(..) => { @@ -324,10 +392,22 @@ impl ModuleDeclarations { tls, }); entry.insert(FuncOrDataId::Data(id)); - Ok((id, &self.data_objects[id])) + Ok((id, self.data_objects[id].linkage)) } } } + + /// Declare an anonymous data object in this module. + pub fn declare_anonymous_data(&mut self, writable: bool, tls: bool) -> ModuleResult { + let id = self.data_objects.push(DataDeclaration { + name: String::new(), + linkage: Linkage::Local, + writable, + tls, + }); + self.data_objects[id].name = format!(".L{:?}", id); + Ok(id) + } } /// Information about the compiled function. @@ -411,6 +491,9 @@ pub trait Module { signature: &ir::Signature, ) -> ModuleResult; + /// Declare an anonymous function in this module. + fn declare_anonymous_function(&mut self, signature: &ir::Signature) -> ModuleResult; + /// Declare a data object in this module. fn declare_data( &mut self, @@ -420,6 +503,9 @@ pub trait Module { tls: bool, ) -> ModuleResult; + /// Declare an anonymous data object in this module. + fn declare_anonymous_data(&mut self, writable: bool, tls: bool) -> ModuleResult; + /// Use this when you're building the IR of a function to reference a function. /// /// TODO: Coalesce redundant decls and signatures. @@ -532,6 +618,10 @@ impl Module for &mut M { (**self).declare_function(name, linkage, signature) } + fn declare_anonymous_function(&mut self, signature: &ir::Signature) -> ModuleResult { + (**self).declare_anonymous_function(signature) + } + fn declare_data( &mut self, name: &str, @@ -542,6 +632,10 @@ impl Module for &mut M { (**self).declare_data(name, linkage, writable, tls) } + fn declare_anonymous_data(&mut self, writable: bool, tls: bool) -> ModuleResult { + (**self).declare_anonymous_data(writable, tls) + } + fn declare_func_in_func(&self, func: FuncId, in_func: &mut ir::Function) -> ir::FuncRef { (**self).declare_func_in_func(func, in_func) } diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml index c1dcf7153f..75ef5d64bb 100644 --- a/cranelift/native/Cargo.toml +++ b/cranelift/native/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-native" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Support for targeting the host with Cranelift" documentation = "https://docs.rs/cranelift-native" @@ -11,8 +11,8 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -target-lexicon = "0.11" +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +target-lexicon = "0.12" [features] default = ["std"] diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index 3be04bc5f1..e425baeb8f 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -137,18 +137,20 @@ mod tests { if let Ok(isa_builder) = builder() { let flag_builder = settings::builder(); let isa = isa_builder.finish(settings::Flags::new(flag_builder)); - if cfg!(any(unix, target_os = "nebulet")) { + + if cfg!(all(target_os = "macos", target_arch = "aarch64")) { + assert_eq!(isa.default_call_conv(), CallConv::AppleAarch64); + } else if cfg!(any(unix, target_os = "nebulet")) { assert_eq!(isa.default_call_conv(), CallConv::SystemV); } else if cfg!(windows) { assert_eq!(isa.default_call_conv(), CallConv::WindowsFastcall); } + if cfg!(target_pointer_width = "64") { assert_eq!(isa.pointer_bits(), 64); - } - if cfg!(target_pointer_width = "32") { + } else if cfg!(target_pointer_width = "32") { assert_eq!(isa.pointer_bits(), 32); - } - if cfg!(target_pointer_width = "16") { + } else if cfg!(target_pointer_width = "16") { assert_eq!(isa.pointer_bits(), 16); } } diff --git a/cranelift/object/Cargo.toml b/cranelift/object/Cargo.toml index d26840ad75..10fdb34d01 100644 --- a/cranelift/object/Cargo.toml +++ b/cranelift/object/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-object" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Emit Cranelift output to native object files with `object`" repository = "https://github.com/bytecodealliance/wasmtime" @@ -10,16 +10,16 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-module = { path = "../module", version = "0.72.0" } -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false, features = ["std"] } -object = { version = "0.23.0", default-features = false, features = ["write"] } -target-lexicon = "0.11" +cranelift-module = { path = "../module", version = "0.73.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false, features = ["std"] } +object = { version = "0.24.0", default-features = false, features = ["write"] } +target-lexicon = "0.12" anyhow = "1.0" log = { version = "0.4.6", default-features = false } [dev-dependencies] -cranelift-frontend = { path = "../frontend", version = "0.72.0" } -cranelift-entity = { path = "../entity", version = "0.72.0" } +cranelift-frontend = { path = "../frontend", version = "0.73.0" } +cranelift-entity = { path = "../entity", version = "0.73.0" } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index ad3aa331be..6cf54fc30a 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -123,6 +123,8 @@ pub struct ObjectModule { libcall_names: Box String + Send + Sync>, function_alignment: u64, per_function_section: bool, + anon_func_number: u64, + anon_data_number: u64, } impl ObjectModule { @@ -141,6 +143,8 @@ impl ObjectModule { libcall_names: builder.libcall_names, function_alignment: builder.function_alignment, per_function_section: builder.per_function_section, + anon_func_number: 0, + anon_data_number: 0, } } } @@ -174,11 +178,11 @@ impl Module for ObjectModule { ) -> ModuleResult { validate_symbol(name)?; - let (id, decl) = self + let (id, linkage) = self .declarations .declare_function(name, linkage, signature)?; - let (scope, weak) = translate_linkage(decl.linkage); + let (scope, weak) = translate_linkage(linkage); if let Some((function, _defined)) = self.functions[id] { let symbol = self.object.symbol_mut(function); @@ -201,6 +205,30 @@ impl Module for ObjectModule { Ok(id) } + fn declare_anonymous_function(&mut self, signature: &ir::Signature) -> ModuleResult { + // Symbols starting with .L are completely omitted from the symbol table after linking. + // Using hexadecimal instead of decimal for slightly smaller symbol names and often slightly + // faster linking. + let name = format!(".Lfn{:x}", self.anon_func_number); + self.anon_func_number += 1; + + let id = self.declarations.declare_anonymous_function(signature)?; + + let symbol_id = self.object.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.functions[id] = Some((symbol_id, false)); + + Ok(id) + } + fn declare_data( &mut self, name: &str, @@ -210,16 +238,18 @@ impl Module for ObjectModule { ) -> ModuleResult { validate_symbol(name)?; - let (id, decl) = self + let (id, linkage) = self .declarations .declare_data(name, linkage, writable, tls)?; - let kind = if decl.tls { + // Merging declarations with conflicting values for tls is not allowed, so it is safe to use + // the passed in tls value here. + let kind = if tls { SymbolKind::Tls } else { SymbolKind::Data }; - let (scope, weak) = translate_linkage(decl.linkage); + let (scope, weak) = translate_linkage(linkage); if let Some((data, _defined)) = self.data_objects[id] { let symbol = self.object.symbol_mut(data); @@ -243,6 +273,36 @@ impl Module for ObjectModule { Ok(id) } + fn declare_anonymous_data(&mut self, writable: bool, tls: bool) -> ModuleResult { + // Symbols starting with .L are completely omitted from the symbol table after linking. + // Using hexadecimal instead of decimal for slightly smaller symbol names and often slightly + // faster linking. + let name = format!(".Ldata{:x}", self.anon_data_number); + self.anon_data_number += 1; + + let id = self.declarations.declare_anonymous_data(writable, tls)?; + + let kind = if tls { + SymbolKind::Tls + } else { + SymbolKind::Data + }; + + let symbol_id = self.object.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.data_objects[id] = Some((symbol_id, false)); + + Ok(id) + } + fn define_function( &mut self, func_id: FuncId, @@ -579,8 +639,8 @@ fn translate_linkage(linkage: Linkage) -> (SymbolScope, bool) { (scope, weak) } -/// This is the output of `Module`'s -/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function. +/// This is the output of `ObjectModule`'s +/// [`finish`](../struct.ObjectModule.html#method.finish) function. /// It contains the generated `Object` and other information produced during /// compilation. pub struct ObjectProduct { diff --git a/cranelift/peepmatic/Cargo.toml b/cranelift/peepmatic/Cargo.toml index fafa3c8789..6eb9e5eaa7 100644 --- a/cranelift/peepmatic/Cargo.toml +++ b/cranelift/peepmatic/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "peepmatic" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -10,13 +10,13 @@ description = "DSL and compiler for generating peephole optimizers" [dependencies] anyhow = "1.0.27" -peepmatic-automata = { version = "0.72.0", path = "crates/automata", features = ["dot"] } -peepmatic-macro = { version = "0.72.0", path = "crates/macro" } -peepmatic-runtime = { version = "0.72.0", path = "crates/runtime", features = ["construct"] } -peepmatic-traits = { version = "0.72.0", path = "crates/traits" } +peepmatic-automata = { version = "0.73.0", path = "crates/automata", features = ["dot"] } +peepmatic-macro = { version = "0.73.0", path = "crates/macro" } +peepmatic-runtime = { version = "0.73.0", path = "crates/runtime", features = ["construct"] } +peepmatic-traits = { version = "0.73.0", path = "crates/traits" } serde = { version = "1.0.105", features = ["derive"] } wast = "35.0.0" z3 = { version = "0.7.1", features = ["static-link-z3"] } [dev-dependencies] -peepmatic-test-operator = { version = "0.72.0", path = "crates/test-operator" } +peepmatic-test-operator = { version = "0.73.0", path = "crates/test-operator" } diff --git a/cranelift/peepmatic/crates/automata/Cargo.toml b/cranelift/peepmatic/crates/automata/Cargo.toml index 5359f68d57..ab6a1f3b44 100644 --- a/cranelift/peepmatic/crates/automata/Cargo.toml +++ b/cranelift/peepmatic/crates/automata/Cargo.toml @@ -1,13 +1,14 @@ [package] name = "peepmatic-automata" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" description = "Finite-state transducer automata" -[package.metadata.docs.rs] -all-features = true +# FIXME(rust-lang/cargo#9300): uncomment once that lands +# [package.metadata.docs.rs] +# all-features = true [dependencies] serde = { version = "1.0.106", optional = true } diff --git a/cranelift/peepmatic/crates/macro/Cargo.toml b/cranelift/peepmatic/crates/macro/Cargo.toml index 214cdb6145..4678e49904 100644 --- a/cranelift/peepmatic/crates/macro/Cargo.toml +++ b/cranelift/peepmatic/crates/macro/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "peepmatic-macro" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" diff --git a/cranelift/peepmatic/crates/runtime/Cargo.toml b/cranelift/peepmatic/crates/runtime/Cargo.toml index 98b8043c70..7fc8dfd36e 100644 --- a/cranelift/peepmatic/crates/runtime/Cargo.toml +++ b/cranelift/peepmatic/crates/runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "peepmatic-runtime" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -12,14 +12,14 @@ description = "Runtime support for peepmatic peephole optimizers" bincode = "1.2.1" bumpalo = "3.2.0" log = "0.4.8" -peepmatic-automata = { version = "0.72.0", path = "../automata", features = ["serde"] } -peepmatic-traits = { version = "0.72.0", path = "../traits" } +peepmatic-automata = { version = "0.73.0", path = "../automata", features = ["serde"] } +peepmatic-traits = { version = "0.73.0", path = "../traits" } serde = { version = "1.0.105", features = ["derive"] } thiserror = "1.0.15" wast = { version = "35.0.0", optional = true } [dev-dependencies] -peepmatic-test-operator = { version = "0.72.0", path = "../test-operator" } +peepmatic-test-operator = { version = "0.73.0", path = "../test-operator" } serde_test = "1.0.114" [features] diff --git a/cranelift/peepmatic/crates/souper/Cargo.toml b/cranelift/peepmatic/crates/souper/Cargo.toml index f50e868bfe..013361c7c5 100644 --- a/cranelift/peepmatic/crates/souper/Cargo.toml +++ b/cranelift/peepmatic/crates/souper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "peepmatic-souper" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -14,6 +14,6 @@ souper-ir = { version = "2.1.0", features = ["parse"] } log = "0.4.8" [dev-dependencies] -peepmatic = { path = "../..", version = "0.72.0" } -peepmatic-test-operator = { version = "0.72.0", path = "../test-operator" } +peepmatic = { path = "../..", version = "0.73.0" } +peepmatic-test-operator = { version = "0.73.0", path = "../test-operator" } wast = "35.0.0" diff --git a/cranelift/peepmatic/crates/test-operator/Cargo.toml b/cranelift/peepmatic/crates/test-operator/Cargo.toml index bd1cfe6279..a435a5d90f 100644 --- a/cranelift/peepmatic/crates/test-operator/Cargo.toml +++ b/cranelift/peepmatic/crates/test-operator/Cargo.toml @@ -2,13 +2,13 @@ name = "peepmatic-test-operator" description = "Operator for usage in peepmatic tests" license = "Apache-2.0 WITH LLVM-exception" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -peepmatic-traits = { version = "0.72.0", path = "../traits" } +peepmatic-traits = { version = "0.73.0", path = "../traits" } serde = { version = "1.0.105", features = ["derive"] } wast = "35.0.0" diff --git a/cranelift/peepmatic/crates/traits/Cargo.toml b/cranelift/peepmatic/crates/traits/Cargo.toml index 8d75528f1c..8c1ad24673 100644 --- a/cranelift/peepmatic/crates/traits/Cargo.toml +++ b/cranelift/peepmatic/crates/traits/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "peepmatic-traits" -version = "0.72.0" +version = "0.73.0" authors = ["Nick Fitzgerald "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" diff --git a/cranelift/preopt/Cargo.toml b/cranelift/preopt/Cargo.toml index a48410f483..9c5e2a4e3d 100644 --- a/cranelift/preopt/Cargo.toml +++ b/cranelift/preopt/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-preopt" -version = "0.72.0" +version = "0.73.0" description = "Support for optimizations in Cranelift" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-preopt" @@ -12,8 +12,8 @@ keywords = ["optimize", "compile", "compiler", "jit"] edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -cranelift-entity = { path = "../entity", version = "0.72.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.73.0" } # This is commented out because it doesn't build on Rust 1.25.0, which # cranelift currently supports. # rustc_apfloat = { version = "0.1.2", default-features = false } diff --git a/cranelift/reader/Cargo.toml b/cranelift/reader/Cargo.toml index 8316b592d2..2d7e93fee0 100644 --- a/cranelift/reader/Cargo.toml +++ b/cranelift/reader/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift-reader" -version = "0.72.0" +version = "0.73.0" description = "Cranelift textual IR reader" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift-reader" @@ -10,10 +10,9 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0" } smallvec = "1.6.1" -target-lexicon = "0.11" -thiserror = "1.0.15" +target-lexicon = "0.12" [badges] maintenance = { status = "experimental" } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 7146fa1a19..93618ffe88 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -21,7 +21,7 @@ use cranelift_codegen::ir::{ HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc, }; -use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::isa::{self, BackendVariant, CallConv, Encoding, RegUnit, TargetIsa}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::{settings, settings::Configurable, timing}; use smallvec::SmallVec; @@ -94,33 +94,6 @@ pub fn parse_test<'a>(text: &'a str, options: ParseOptions<'a>) -> ParseResult Parser<'a> { let loc = self.loc; // Grab the whole line so the lexer won't go looking for tokens on the // following lines. - let mut words = self.consume_line().trim().split_whitespace(); + let mut words = self.consume_line().trim().split_whitespace().peekable(); // Look for `target foo`. let target_name = match words.next() { Some(w) => w, @@ -1231,7 +1204,19 @@ impl<'a> Parser<'a> { Ok(triple) => triple, Err(err) => return err!(loc, err), }; - let mut isa_builder = match isa::lookup(triple) { + // Look for `machinst` or `legacy` option before instantiating IsaBuilder. + let variant = match words.peek() { + Some(&"machinst") => { + words.next(); + BackendVariant::MachInst + } + Some(&"legacy") => { + words.next(); + BackendVariant::Legacy + } + _ => BackendVariant::Any, + }; + let mut isa_builder = match isa::lookup_variant(triple, variant) { Err(isa::LookupError::SupportDisabled) => { continue; } @@ -3714,10 +3699,7 @@ mod tests { IsaSpec::None(_) => panic!("Expected some ISA"), IsaSpec::Some(v) => { assert_eq!(v.len(), 1); - #[cfg(not(feature = "experimental_x64"))] - assert_eq!(v[0].name(), "x86"); - #[cfg(feature = "experimental_x64")] - assert_eq!(v[0].name(), "x64"); + assert!(v[0].name() == "x64" || v[0].name() == "x86"); } } } diff --git a/cranelift/serde/Cargo.toml b/cranelift/serde/Cargo.toml index 7bceca874a..0f62eff6b9 100644 --- a/cranelift/serde/Cargo.toml +++ b/cranelift/serde/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-serde" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Serializer/Deserializer for Cranelift IR" repository = "https://github.com/bytecodealliance/wasmtime" @@ -18,8 +18,8 @@ clap = "2.32.0" serde = "1.0.8" serde_derive = "1.0.75" serde_json = "1.0.26" -cranelift-codegen = { path = "../codegen", version = "0.72.0", features = ["enable-serde"] } -cranelift-reader = { path = "../reader", version = "0.72.0" } +cranelift-codegen = { path = "../codegen", version = "0.73.0", features = ["enable-serde"] } +cranelift-reader = { path = "../reader", version = "0.73.0" } [badges] maintenance = { status = "experimental" } diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index a98e867380..35a581d344 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -153,6 +153,11 @@ cfg_if! { cs.set_skipdata(true).map_err(map_caperr)?; cs } + Architecture::S390x {..} => Capstone::new() + .sysz() + .mode(arch::sysz::ArchMode::Default) + .build() + .map_err(map_caperr)?, _ => anyhow::bail!("Unknown ISA"), }; diff --git a/cranelift/umbrella/Cargo.toml b/cranelift/umbrella/Cargo.toml index 29743fc0f8..8714b9a3fe 100644 --- a/cranelift/umbrella/Cargo.toml +++ b/cranelift/umbrella/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["The Cranelift Project Developers"] name = "cranelift" -version = "0.72.0" +version = "0.73.0" description = "Umbrella for commonly-used cranelift crates" license = "Apache-2.0 WITH LLVM-exception" documentation = "https://docs.rs/cranelift" @@ -12,8 +12,8 @@ keywords = ["compile", "compiler", "jit"] edition = "2018" [dependencies] -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -cranelift-frontend = { path = "../frontend", version = "0.72.0", default-features = false } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +cranelift-frontend = { path = "../frontend", version = "0.73.0", default-features = false } [features] default = ["std"] diff --git a/cranelift/wasm/Cargo.toml b/cranelift/wasm/Cargo.toml index 25ed63339f..902c5c7c4a 100644 --- a/cranelift/wasm/Cargo.toml +++ b/cranelift/wasm/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cranelift-wasm" -version = "0.72.0" +version = "0.73.0" authors = ["The Cranelift Project Developers"] description = "Translator from WebAssembly to Cranelift IR" documentation = "https://docs.rs/cranelift-wasm" @@ -12,10 +12,10 @@ keywords = ["webassembly", "wasm"] edition = "2018" [dependencies] -wasmparser = { version = "0.76", default-features = false } -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false } -cranelift-entity = { path = "../entity", version = "0.72.0" } -cranelift-frontend = { path = "../frontend", version = "0.72.0", default-features = false } +wasmparser = { version = "0.77", default-features = false } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.73.0" } +cranelift-frontend = { path = "../frontend", version = "0.73.0", default-features = false } hashbrown = { version = "0.9.1", optional = true } itertools = "0.10.0" log = { version = "0.4.6", default-features = false } @@ -24,10 +24,10 @@ smallvec = "1.6.1" thiserror = "1.0.4" [dev-dependencies] -wat = "1.0.36" -target-lexicon = "0.11" +wat = "1.0.37" +target-lexicon = "0.12" # Enable the riscv feature for cranelift-codegen, as some tests require it -cranelift-codegen = { path = "../codegen", version = "0.72.0", default-features = false, features = ["riscv"] } +cranelift-codegen = { path = "../codegen", version = "0.73.0", default-features = false, features = ["riscv"] } [features] default = ["std"] diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 92b1ae1e58..d6403ad456 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -64,11 +64,11 @@ //! 32X4 and 64X2 types from CLIF and instead have a single V128 type. //! //! For further background see also: -//! https://github.com/bytecodealliance/wasmtime/issues/1147 +//! //! ("Too many raw_bitcasts in SIMD code") -//! https://github.com/bytecodealliance/cranelift/pull/1251 +//! //! ("Add X128 type to represent WebAssembly's V128 type") -//! https://github.com/bytecodealliance/cranelift/pull/1236 +//! //! ("Relax verification to allow I8X16 to act as a default vector type") use super::{hash_map, HashMap}; @@ -1775,6 +1775,10 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, I32X4, builder); state.push1(builder.ins().fcvt_from_uint(F32X4, a)) } + Operator::F64x2ConvertLowI32x4S => { + let a = pop1_with_bitcast(state, I32X4, builder); + state.push1(builder.ins().fcvt_low_from_sint(F64X2, a)); + } Operator::I32x4TruncSatF32x4S => { let a = pop1_with_bitcast(state, F32X4, builder); state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a)) @@ -1851,12 +1855,10 @@ pub fn translate_operator( let arg = pop1_with_bitcast(state, type_of(op), builder); state.push1(builder.ins().nearest(arg)); } - Operator::I32x4DotI16x8S => { let (a, b) = pop2_with_bitcast(state, I16X8, builder); state.push1(builder.ins().widening_pairwise_dot_product_s(a, b)); } - Operator::I64x2ExtendLowI32x4S | Operator::I64x2ExtendHighI32x4S | Operator::I64x2ExtendLowI32x4U @@ -1880,7 +1882,6 @@ pub fn translate_operator( | Operator::I32x4ExtAddPairwiseI16x8U | Operator::F32x4DemoteF64x2Zero | Operator::F64x2PromoteLowF32x4 - | Operator::F64x2ConvertLowI32x4S | Operator::F64x2ConvertLowI32x4U | Operator::I32x4TruncSatF64x2SZero | Operator::I32x4TruncSatF64x2UZero diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs index 2bcd004dbe..6ac82b73f0 100644 --- a/cranelift/wasm/src/environ/dummy.rs +++ b/cranelift/wasm/src/environ/dummy.rs @@ -15,12 +15,13 @@ use crate::translation_utils::{ DataIndex, DefinedFuncIndex, ElemIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex, Table, TableIndex, TypeIndex, }; +use crate::WasmType; use core::convert::TryFrom; use cranelift_codegen::cursor::FuncCursor; use cranelift_codegen::ir::immediates::{Offset32, Uimm64}; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{self, InstBuilder}; -use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_codegen::isa::{CallConv, TargetFrontendConfig}; use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::FunctionBuilder; use std::boxed::Box; @@ -660,7 +661,25 @@ impl TargetEnvironment for DummyEnvironment { } impl<'data> ModuleEnvironment<'data> for DummyEnvironment { - fn declare_type_func(&mut self, _wasm: WasmFuncType, sig: ir::Signature) -> WasmResult<()> { + fn declare_type_func(&mut self, wasm: WasmFuncType) -> WasmResult<()> { + let mut sig = ir::Signature::new(CallConv::Fast); + let mut cvt = |ty: &WasmType| { + let reference_type = match self.pointer_type() { + ir::types::I32 => ir::types::R32, + ir::types::I64 => ir::types::R64, + _ => panic!("unsupported pointer type"), + }; + ir::AbiParam::new(match ty { + WasmType::I32 => ir::types::I32, + WasmType::I64 => ir::types::I64, + WasmType::F32 => ir::types::F32, + WasmType::F64 => ir::types::F64, + WasmType::V128 => ir::types::I8X16, + WasmType::FuncRef | WasmType::ExternRef | WasmType::ExnRef => reference_type, + }) + }; + sig.params.extend(wasm.params.iter().map(&mut cvt)); + sig.returns.extend(wasm.returns.iter().map(&mut cvt)); self.info.signatures.push(sig); Ok(()) } @@ -728,7 +747,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment { &mut self, _table_index: TableIndex, _base: Option, - _offset: usize, + _offset: u32, _elements: Box<[FuncIndex]>, ) -> WasmResult<()> { // We do nothing @@ -773,7 +792,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment { &mut self, _memory_index: MemoryIndex, _base: Option, - _offset: usize, + _offset: u32, _data: &'data [u8], ) -> WasmResult<()> { // We do nothing diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 0bd3c48745..31c8d86a4e 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -702,11 +702,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { } /// Declares a function signature to the environment. - fn declare_type_func( - &mut self, - wasm_func_type: WasmFuncType, - sig: ir::Signature, - ) -> WasmResult<()>; + fn declare_type_func(&mut self, wasm_func_type: WasmFuncType) -> WasmResult<()>; /// Declares a module type signature to the environment. fn declare_type_module( @@ -941,7 +937,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { &mut self, table_index: TableIndex, base: Option, - offset: usize, + offset: u32, elements: Box<[FuncIndex]>, ) -> WasmResult<()>; @@ -952,6 +948,13 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { elements: Box<[FuncIndex]>, ) -> WasmResult<()>; + /// Indicates that a declarative element segment was seen in the wasm + /// module. + fn declare_elements(&mut self, elements: Box<[FuncIndex]>) -> WasmResult<()> { + drop(elements); + Ok(()) + } + /// Provides the number of passive data segments up front. /// /// By default this does nothing, but implementations may use this to @@ -988,7 +991,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { &mut self, memory_index: MemoryIndex, base: Option, - offset: usize, + offset: u32, data: &'data [u8], ) -> WasmResult<()>; diff --git a/cranelift/wasm/src/module_translator.rs b/cranelift/wasm/src/module_translator.rs index 819834f20e..0114a43c94 100644 --- a/cranelift/wasm/src/module_translator.rs +++ b/cranelift/wasm/src/module_translator.rs @@ -131,6 +131,7 @@ pub fn translate_module<'data>( name: "name", data, data_offset, + range: _, } => { let result = NameSectionReader::new(data, data_offset) .map_err(|e| e.into()) diff --git a/cranelift/wasm/src/sections_translator.rs b/cranelift/wasm/src/sections_translator.rs index a674658358..bd4dcd1136 100644 --- a/cranelift/wasm/src/sections_translator.rs +++ b/cranelift/wasm/src/sections_translator.rs @@ -18,7 +18,6 @@ use crate::wasm_unsupported; use core::convert::TryFrom; use core::convert::TryInto; use cranelift_codegen::ir::immediates::V128Imm; -use cranelift_codegen::ir::{self, AbiParam, Signature}; use cranelift_entity::packed_option::ReservedValue; use cranelift_entity::EntityRef; use std::boxed::Box; @@ -110,19 +109,7 @@ pub fn parse_type_section<'a>( for entry in types { match entry? { TypeDef::Func(wasm_func_ty) => { - let mut sig = - Signature::new(ModuleEnvironment::target_config(environ).default_call_conv); - sig.params.extend(wasm_func_ty.params.iter().map(|ty| { - let cret_arg: ir::Type = type_to_type(*ty, environ) - .expect("only numeric types are supported in function signatures"); - AbiParam::new(cret_arg) - })); - sig.returns.extend(wasm_func_ty.returns.iter().map(|ty| { - let cret_arg: ir::Type = type_to_type(*ty, environ) - .expect("only numeric types are supported in function signatures"); - AbiParam::new(cret_arg) - })); - environ.declare_type_func(wasm_func_ty.clone().try_into()?, sig)?; + environ.declare_type_func(wasm_func_ty.clone().try_into()?)?; module_translation_state .wasm_types .push((wasm_func_ty.params, wasm_func_ty.returns)); @@ -390,7 +377,7 @@ pub fn parse_element_section<'data>( } => { let mut init_expr_reader = init_expr.get_binary_reader(); let (base, offset) = match init_expr_reader.read_operator()? { - Operator::I32Const { value } => (None, value as u32 as usize), + Operator::I32Const { value } => (None, value as u32), Operator::GlobalGet { global_index } => { (Some(GlobalIndex::from_u32(global_index)), 0) } @@ -401,12 +388,6 @@ pub fn parse_element_section<'data>( )); } }; - // Check for offset + len overflow - if offset.checked_add(segments.len()).is_none() { - return Err(wasm_unsupported!( - "element segment offset and length overflows" - )); - } environ.declare_table_elements( TableIndex::from_u32(table_index), base, @@ -419,7 +400,7 @@ pub fn parse_element_section<'data>( environ.declare_passive_element(index, segments)?; } ElementKind::Declared => { - // Nothing to do here. + environ.declare_elements(segments)?; } } } @@ -442,7 +423,7 @@ pub fn parse_data_section<'data>( } => { let mut init_expr_reader = init_expr.get_binary_reader(); let (base, offset) = match init_expr_reader.read_operator()? { - Operator::I32Const { value } => (None, value as u32 as usize), + Operator::I32Const { value } => (None, value as u32), Operator::GlobalGet { global_index } => { (Some(GlobalIndex::from_u32(global_index)), 0) } @@ -453,12 +434,6 @@ pub fn parse_data_section<'data>( )) } }; - // Check for offset + len overflow - if offset.checked_add(data.len()).is_none() { - return Err(wasm_unsupported!( - "data segment offset and length overflows" - )); - } environ.declare_data_initialization( MemoryIndex::from_u32(memory_index), base, diff --git a/cranelift/wasm/src/translation_utils.rs b/cranelift/wasm/src/translation_utils.rs index c8493d4b51..fffb7b375e 100644 --- a/cranelift/wasm/src/translation_utils.rs +++ b/cranelift/wasm/src/translation_utils.rs @@ -19,6 +19,7 @@ entity_impl!(FuncIndex); /// Index type of a defined function inside the WebAssembly module. #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct DefinedFuncIndex(u32); entity_impl!(DefinedFuncIndex); diff --git a/crates/bench-api/src/lib.rs b/crates/bench-api/src/lib.rs index b2884d7738..6cf6104279 100644 --- a/crates/bench-api/src/lib.rs +++ b/crates/bench-api/src/lib.rs @@ -83,9 +83,8 @@ use std::env; use std::os::raw::{c_int, c_void}; use std::path::Path; use std::slice; -use wasi_cap_std_sync::WasiCtxBuilder; use wasmtime::{Config, Engine, Instance, Linker, Module, Store}; -use wasmtime_wasi::Wasi; +use wasmtime_wasi::sync::{Wasi, WasiCtxBuilder}; pub type ExitCode = c_int; pub const OK: ExitCode = 0; diff --git a/crates/c-api/include/wasmtime.h b/crates/c-api/include/wasmtime.h index 9d09860228..f163f923bb 100644 --- a/crates/c-api/include/wasmtime.h +++ b/crates/c-api/include/wasmtime.h @@ -283,14 +283,6 @@ WASMTIME_CONFIG_PROP(void, static_memory_guard_size, uint64_t) */ WASMTIME_CONFIG_PROP(void, dynamic_memory_guard_size, uint64_t) -/** - * \brief Configures the maximum number of instances that can be created. - * - * For more information see the Rust documentation at - * https://bytecodealliance.github.io/wasmtime/api/wasmtime/struct.Config.html#method.max_instances. - */ -WASMTIME_CONFIG_PROP(void, max_instances, size_t) - /** * \brief Enables Wasmtime's cache and loads configuration from the specified * path. @@ -721,7 +713,7 @@ WASM_API_EXTERN const wasm_name_t *wasmtime_frame_module_name(const wasm_frame_t * * Errors are returned if `args` have the wrong types, if the args/results * arrays have the wrong lengths, or if values come from the wrong store. * - * The are three possible return states from this function: + * There are three possible return states from this function: * * 1. The returned error is non-null. This means `results` * wasn't written to and `trap` will have `NULL` written to it. This state @@ -1000,9 +992,13 @@ WASM_API_EXTERN own wasmtime_error_t* wasmtime_module_serialize( /** * \brief Build a module from serialized data. - * * + * * This function does not take ownership of any of its arguments, but the * returned error and module are owned by the caller. + * + * This function is not safe to receive arbitrary user input. See the Rust + * documentation for more information on what inputs are safe to pass in here + * (e.g. only that of #wasmtime_module_serialize) */ WASM_API_EXTERN own wasmtime_error_t *wasmtime_module_deserialize( wasm_engine_t *engine, diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index f84bc320cd..3e6e313ba9 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -176,8 +176,3 @@ pub extern "C" fn wasmtime_config_static_memory_guard_size_set(c: &mut wasm_conf pub extern "C" fn wasmtime_config_dynamic_memory_guard_size_set(c: &mut wasm_config_t, size: u64) { c.config.dynamic_memory_guard_size(size); } - -#[no_mangle] -pub extern "C" fn wasmtime_config_max_instances_set(c: &mut wasm_config_t, limit: usize) { - c.config.max_instances(limit); -} diff --git a/crates/c-api/src/memory.rs b/crates/c-api/src/memory.rs index 55a001deed..54d3936849 100644 --- a/crates/c-api/src/memory.rs +++ b/crates/c-api/src/memory.rs @@ -31,13 +31,13 @@ impl wasm_memory_t { pub extern "C" fn wasm_memory_new( store: &wasm_store_t, mt: &wasm_memorytype_t, -) -> Box { - let memory = Memory::new(&store.store, mt.ty().ty.clone()); - Box::new(wasm_memory_t { +) -> Option> { + let memory = Memory::new(&store.store, mt.ty().ty.clone()).ok()?; + Some(Box::new(wasm_memory_t { ext: wasm_extern_t { which: memory.into(), }, - }) + })) } #[no_mangle] diff --git a/crates/c-api/src/module.rs b/crates/c-api/src/module.rs index c31f125b70..4b60c67e73 100644 --- a/crates/c-api/src/module.rs +++ b/crates/c-api/src/module.rs @@ -186,7 +186,7 @@ pub extern "C" fn wasmtime_module_deserialize( ret: &mut *mut wasm_module_t, ) -> Option> { handle_result( - Module::deserialize(&engine.engine, binary.as_slice()), + unsafe { Module::deserialize(&engine.engine, binary.as_slice()) }, |module| { let module = Box::new(wasm_module_t::new(module)); *ret = Box::into_raw(module); diff --git a/crates/c-api/src/wasi.rs b/crates/c-api/src/wasi.rs index 8fa15f0c20..302fcde8b7 100644 --- a/crates/c-api/src/wasi.rs +++ b/crates/c-api/src/wasi.rs @@ -1,7 +1,6 @@ //! The WASI embedding API definitions for Wasmtime. use crate::{wasm_extern_t, wasm_importtype_t, wasm_store_t, wasm_trap_t}; use anyhow::Result; -use cap_std::fs::Dir; use std::cell::RefCell; use std::collections::HashMap; use std::ffi::CStr; @@ -11,11 +10,13 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use std::slice; use std::str; -use wasi_cap_std_sync::WasiCtxBuilder; -use wasi_common::WasiCtx; use wasmtime::{Extern, Linker, Trap}; use wasmtime_wasi::{ - snapshots::preview_0::Wasi as WasiSnapshot0, snapshots::preview_1::Wasi as WasiPreview1, + sync::{ + snapshots::preview_0::Wasi as WasiSnapshot0, snapshots::preview_1::Wasi as WasiPreview1, + Dir, WasiCtxBuilder, + }, + WasiCtx, }; unsafe fn cstr_to_path<'a>(path: *const c_char) -> Option<&'a Path> { @@ -186,7 +187,7 @@ pub unsafe extern "C" fn wasi_config_preopen_dir( }; let dir = match cstr_to_path(path) { - Some(p) => match cap_std::fs::Dir::open_ambient_dir(p) { + Some(p) => match Dir::open_ambient_dir(p) { Ok(d) => d, Err(_) => return false, }, diff --git a/crates/cache/Cargo.toml b/crates/cache/Cargo.toml index 9baa480a51..29a628ea6c 100644 --- a/crates/cache/Cargo.toml +++ b/crates/cache/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-cache" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Support for automatic module caching with Wasmtime" license = "Apache-2.0 WITH LLVM-exception" diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs index 9fe69c5baf..f29dd21056 100644 --- a/crates/cache/src/lib.rs +++ b/crates/cache/src/lib.rs @@ -48,17 +48,17 @@ impl<'config> ModuleCacheEntry<'config> { T: Hash, U: Serialize + for<'a> Deserialize<'a>, { + let inner = match &self.0 { + Some(inner) => inner, + None => return compute(state), + }; + let mut hasher = Sha256Hasher(Sha256::new()); state.hash(&mut hasher); let hash: [u8; 32] = hasher.0.finalize().into(); // standard encoding uses '/' which can't be used for filename let hash = base64::encode_config(&hash, base64::URL_SAFE_NO_PAD); - let inner = match &self.0 { - Some(inner) => inner, - None => return compute(state), - }; - if let Some(cached_val) = inner.get_data(&hash) { let mod_cache_path = inner.root_path.join(&hash); inner.cache_config.on_cache_get_async(&mod_cache_path); // call on success diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 132ee49c44..56fa647628 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-cranelift" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Integration between Cranelift and Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -12,9 +12,10 @@ readme = "README.md" edition = "2018" [dependencies] -wasmtime-environ = { path = "../environ", version = "0.25.0" } -cranelift-wasm = { path = "../../cranelift/wasm", version = "0.72.0" } -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.72.0" } -cranelift-frontend = { path = "../../cranelift/frontend", version = "0.72.0" } -cranelift-entity = { path = "../../cranelift/entity", version = "0.72.0" } -wasmparser = "0.76.0" +wasmtime-environ = { path = "../environ", version = "0.26.0" } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.73.0" } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.73.0" } +cranelift-frontend = { path = "../../cranelift/frontend", version = "0.73.0" } +cranelift-entity = { path = "../../cranelift/entity", version = "0.73.0" } +wasmparser = "0.77.0" +target-lexicon = "0.12" diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index a9481aca93..29d58dac20 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -4,20 +4,20 @@ use cranelift_codegen::ir::condcodes::*; use cranelift_codegen::ir::immediates::{Offset32, Uimm64}; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{AbiParam, ArgumentPurpose, Function, InstBuilder, Signature}; -use cranelift_codegen::isa::{self, TargetFrontendConfig}; -use cranelift_entity::{EntityRef, PrimaryMap}; +use cranelift_codegen::isa::{self, TargetFrontendConfig, TargetIsa}; +use cranelift_entity::EntityRef; use cranelift_frontend::FunctionBuilder; use cranelift_frontend::Variable; use cranelift_wasm::{ - self, FuncIndex, FuncTranslationState, GlobalIndex, GlobalVariable, MemoryIndex, - SignatureIndex, TableIndex, TargetEnvironment, TypeIndex, WasmError, WasmResult, WasmType, + self, FuncIndex, FuncTranslationState, GlobalIndex, GlobalVariable, MemoryIndex, TableIndex, + TargetEnvironment, TypeIndex, WasmError, WasmResult, WasmType, }; use std::convert::TryFrom; use std::mem; use wasmparser::Operator; use wasmtime_environ::{ - BuiltinFunctionIndex, MemoryPlan, MemoryStyle, Module, TableStyle, Tunables, VMOffsets, - INTERRUPTED, WASM_PAGE_SIZE, + BuiltinFunctionIndex, MemoryPlan, MemoryStyle, Module, TableStyle, Tunables, TypeTables, + VMOffsets, INTERRUPTED, WASM_PAGE_SIZE, }; /// Compute an `ir::ExternalName` for a given wasm function index. @@ -109,14 +109,9 @@ wasmtime_environ::foreach_builtin_function!(declare_function_signatures); /// The `FuncEnvironment` implementation for use by the `ModuleEnvironment`. pub struct FuncEnvironment<'module_environment> { - /// Target-specified configuration. - target_config: TargetFrontendConfig, - - /// The module-level environment which this function-level environment belongs to. + isa: &'module_environment (dyn TargetIsa + 'module_environment), module: &'module_environment Module, - - /// The native signatures for each type signature in this module - native_signatures: &'module_environment PrimaryMap, + types: &'module_environment TypeTables, /// The Cranelift global holding the vmctx address. vmctx: Option, @@ -146,27 +141,27 @@ pub struct FuncEnvironment<'module_environment> { impl<'module_environment> FuncEnvironment<'module_environment> { pub fn new( - target_config: TargetFrontendConfig, + isa: &'module_environment (dyn TargetIsa + 'module_environment), module: &'module_environment Module, - native_signatures: &'module_environment PrimaryMap, + types: &'module_environment TypeTables, tunables: &'module_environment Tunables, ) -> Self { let builtin_function_signatures = BuiltinFunctionSignatures::new( - target_config.pointer_type(), - match target_config.pointer_type() { + isa.pointer_type(), + match isa.pointer_type() { ir::types::I32 => ir::types::R32, ir::types::I64 => ir::types::R64, _ => panic!(), }, - target_config.default_call_conv, + crate::wasmtime_call_conv(isa), ); Self { - target_config, + isa, module, - native_signatures, + types, vmctx: None, builtin_function_signatures, - offsets: VMOffsets::new(target_config.pointer_bytes(), module), + offsets: VMOffsets::new(isa.pointer_bytes(), module), tunables, fuel_var: Variable::new(0), vminterrupts_ptr: Variable::new(0), @@ -178,7 +173,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } fn pointer_type(&self) -> ir::Type { - self.target_config.pointer_type() + self.isa.pointer_type() } fn vmctx(&mut self, func: &mut Function) -> ir::GlobalValue { @@ -680,7 +675,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { impl<'module_environment> TargetEnvironment for FuncEnvironment<'module_environment> { fn target_config(&self) -> TargetFrontendConfig { - self.target_config + self.isa.frontend_config() } fn reference_type(&self, ty: WasmType) -> ir::Type { @@ -1339,7 +1334,8 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m index: TypeIndex, ) -> WasmResult { let index = self.module.types[index].unwrap_function(); - Ok(func.import_signature(self.native_signatures[index].clone())) + let sig = crate::indirect_signature(self.isa, self.types, index); + Ok(func.import_signature(sig)) } fn make_direct_func( @@ -1347,8 +1343,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m func: &mut ir::Function, index: FuncIndex, ) -> WasmResult { - let sig_index = self.module.functions[index]; - let sig = self.native_signatures[sig_index].clone(); + let sig = crate::func_signature(self.isa, self.module, self.types, index); let signature = func.import_signature(sig); let name = get_func_name(index); Ok(func.import_function(ir::ExtFuncData { diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index ee330ee6a7..896294ff39 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -90,16 +90,18 @@ use crate::func_environ::{get_func_name, FuncEnvironment}; use cranelift_codegen::ir::{self, ExternalName}; +use cranelift_codegen::isa::{CallConv, TargetIsa}; use cranelift_codegen::machinst::buffer::MachSrcLoc; use cranelift_codegen::print_errors::pretty_error; use cranelift_codegen::{binemit, isa, Context}; -use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator}; +use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator, SignatureIndex, WasmType}; use std::convert::TryFrom; use std::sync::Mutex; +use target_lexicon::CallingConvention; use wasmtime_environ::{ CompileError, CompiledFunction, Compiler, FunctionAddressMap, FunctionBodyData, - InstructionAddressMap, ModuleTranslation, Relocation, RelocationTarget, StackMapInformation, - TrapInformation, Tunables, TypeTables, + InstructionAddressMap, Module, ModuleTranslation, Relocation, RelocationTarget, + StackMapInformation, TrapInformation, Tunables, TypeTables, }; mod func_environ; @@ -354,18 +356,12 @@ impl Compiler for Cranelift { let func_index = module.func_index(func_index); let mut context = Context::new(); context.func.name = get_func_name(func_index); - let sig_index = module.functions[func_index]; - context.func.signature = types.native_signatures[sig_index].clone(); + context.func.signature = func_signature(isa, module, types, func_index); if tunables.generate_native_debuginfo { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new( - isa.frontend_config(), - module, - &types.native_signatures, - tunables, - ); + let mut func_env = FuncEnvironment::new(isa, module, types, tunables); // We use these as constant offsets below in // `stack_limit_from_arguments`, so assert their values here. This @@ -457,3 +453,83 @@ impl Compiler for Cranelift { }) } } + +pub fn blank_sig(isa: &dyn TargetIsa, call_conv: CallConv) -> ir::Signature { + let pointer_type = isa.pointer_type(); + let mut sig = ir::Signature::new(call_conv); + // Add the caller/callee `vmctx` parameters. + sig.params.push(ir::AbiParam::special( + pointer_type, + ir::ArgumentPurpose::VMContext, + )); + sig.params.push(ir::AbiParam::new(pointer_type)); + return sig; +} + +pub fn wasmtime_call_conv(isa: &dyn TargetIsa) -> CallConv { + match isa.triple().default_calling_convention() { + Ok(CallingConvention::SystemV) | Ok(CallingConvention::AppleAarch64) | Err(()) => { + CallConv::WasmtimeSystemV + } + Ok(CallingConvention::WindowsFastcall) => CallConv::WasmtimeFastcall, + Ok(unimp) => unimplemented!("calling convention: {:?}", unimp), + } +} + +pub fn push_types( + isa: &dyn TargetIsa, + sig: &mut ir::Signature, + types: &TypeTables, + index: SignatureIndex, +) { + let wasm = &types.wasm_signatures[index]; + + let cvt = |ty: &WasmType| { + ir::AbiParam::new(match ty { + WasmType::I32 => ir::types::I32, + WasmType::I64 => ir::types::I64, + WasmType::F32 => ir::types::F32, + WasmType::F64 => ir::types::F64, + WasmType::V128 => ir::types::I8X16, + WasmType::FuncRef | WasmType::ExternRef => { + wasmtime_environ::reference_type(*ty, isa.pointer_type()) + } + WasmType::ExnRef => unimplemented!(), + }) + }; + sig.params.extend(wasm.params.iter().map(&cvt)); + sig.returns.extend(wasm.returns.iter().map(&cvt)); +} + +pub fn indirect_signature( + isa: &dyn TargetIsa, + types: &TypeTables, + index: SignatureIndex, +) -> ir::Signature { + let mut sig = blank_sig(isa, wasmtime_call_conv(isa)); + push_types(isa, &mut sig, types, index); + return sig; +} + +pub fn func_signature( + isa: &dyn TargetIsa, + module: &Module, + types: &TypeTables, + index: FuncIndex, +) -> ir::Signature { + let call_conv = match module.defined_func_index(index) { + // If this is a defined function in the module and it's never possibly + // exported, then we can optimize this function to use the fastest + // calling convention since it's purely an internal implementation + // detail of the module itself. + Some(idx) if !module.possibly_exported_funcs.contains(&idx) => CallConv::Fast, + + // ... otherwise if it's an imported function or if it's a possibly + // exported function then we use the default ABI wasmtime would + // otherwise select. + _ => wasmtime_call_conv(isa), + }; + let mut sig = blank_sig(isa, call_conv); + push_types(isa, &mut sig, types, module.functions[index]); + return sig; +} diff --git a/crates/debug/Cargo.toml b/crates/debug/Cargo.toml index 0e69efd8fb..eb20daecbb 100644 --- a/crates/debug/Cargo.toml +++ b/crates/debug/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-debug" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Debug utils for WebAsssembly code in Cranelift" license = "Apache-2.0 WITH LLVM-exception" @@ -12,11 +12,11 @@ readme = "README.md" edition = "2018" [dependencies] -gimli = "0.23.0" -wasmparser = "0.76" -object = { version = "0.23.0", default-features = false, features = ["read_core", "elf", "write"] } -wasmtime-environ = { path = "../environ", version = "0.25.0" } -target-lexicon = { version = "0.11.0", default-features = false } +gimli = "0.24.0" +wasmparser = "0.77" +object = { version = "0.24.0", default-features = false, features = ["read_core", "elf", "write"] } +wasmtime-environ = { path = "../environ", version = "0.26.0" } +target-lexicon = { version = "0.12.0", default-features = false } anyhow = "1.0" thiserror = "1.0.4" more-asserts = "0.2.1" diff --git a/crates/debug/src/lib.rs b/crates/debug/src/lib.rs index 597ab2e25c..80009dccf9 100644 --- a/crates/debug/src/lib.rs +++ b/crates/debug/src/lib.rs @@ -3,6 +3,7 @@ #![allow(clippy::cast_ptr_alignment)] use anyhow::{bail, ensure, Error}; +use object::endian::{BigEndian, Endian, Endianness, LittleEndian}; use object::{RelocationEncoding, RelocationKind}; use std::collections::HashMap; @@ -18,13 +19,20 @@ pub fn create_gdbjit_image( defined_funcs_offset: usize, funcs: &[*const u8], ) -> Result, Error> { - ensure_supported_elf_format(&mut bytes)?; + let e = ensure_supported_elf_format(&bytes)?; // patch relocs - relocate_dwarf_sections(&mut bytes, defined_funcs_offset, funcs)?; + relocate_dwarf_sections(&bytes, defined_funcs_offset, funcs)?; // elf is still missing details... - convert_object_elf_to_loadable_file(&mut bytes, code_region); + match e { + Endianness::Little => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + Endianness::Big => { + convert_object_elf_to_loadable_file::(&mut bytes, code_region) + } + } // let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file"); // ::std::io::Write::write_all(&mut file, &bytes).expect("write"); @@ -33,7 +41,7 @@ pub fn create_gdbjit_image( } fn relocate_dwarf_sections( - bytes: &mut [u8], + bytes: &[u8], defined_funcs_offset: usize, funcs: &[*const u8], ) -> Result<(), Error> { @@ -83,20 +91,33 @@ fn relocate_dwarf_sections( Ok(()) } -fn ensure_supported_elf_format(bytes: &mut Vec) -> Result<(), Error> { +fn ensure_supported_elf_format(bytes: &[u8]) -> Result { use object::elf::*; - use object::endian::LittleEndian; + use object::read::elf::*; use std::mem::size_of; - let e = LittleEndian; - let header: &FileHeader64 = - unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; - ensure!( - header.e_ident.class == ELFCLASS64 && header.e_ident.data == ELFDATA2LSB, - "bits and endianess in .ELF", - ); + let kind = match object::FileKind::parse(bytes) { + Ok(file) => file, + Err(err) => { + bail!("Failed to parse file: {}", err); + } + }; + let header = match kind { + object::FileKind::Elf64 => match object::elf::FileHeader64::::parse(bytes) { + Ok(header) => header, + Err(err) => { + bail!("Unsupported ELF file: {}", err); + } + }, + _ => { + bail!("only 64-bit ELF files currently supported") + } + }; + let e = header.endian().unwrap(); + match header.e_machine.get(e) { EM_X86_64 => (), + EM_S390 => (), machine => { bail!("Unsupported ELF target machine: {:x}", machine); } @@ -106,23 +127,25 @@ fn ensure_supported_elf_format(bytes: &mut Vec) -> Result<(), Error> { "program header table is empty" ); let e_shentsize = header.e_shentsize.get(e); - ensure!( - e_shentsize as usize == size_of::>(), - "size of sh" - ); - Ok(()) + let req_shentsize = match e { + Endianness::Little => size_of::>(), + Endianness::Big => size_of::>(), + }; + ensure!(e_shentsize as usize == req_shentsize, "size of sh"); + Ok(e) } -fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const u8, usize)) { +fn convert_object_elf_to_loadable_file( + bytes: &mut Vec, + code_region: (*const u8, usize), +) { use object::elf::*; - use object::endian::LittleEndian; use std::ffi::CStr; use std::mem::size_of; use std::os::raw::c_char; - let e = LittleEndian; - let header: &FileHeader64 = - unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; + let e = E::default(); + let header: &FileHeader64 = unsafe { &*(bytes.as_mut_ptr() as *const FileHeader64<_>) }; let e_shentsize = header.e_shentsize.get(e); let e_shoff = header.e_shoff.get(e); @@ -130,7 +153,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const let mut shstrtab_off = 0; for i in 0..e_shnum { let off = e_shoff as isize + i as isize * e_shentsize as isize; - let section: &SectionHeader64 = + let section: &SectionHeader64 = unsafe { &*(bytes.as_ptr().offset(off) as *const SectionHeader64<_>) }; if section.sh_type.get(e) != SHT_STRTAB { continue; @@ -140,7 +163,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const let mut segment: Option<_> = None; for i in 0..e_shnum { let off = e_shoff as isize + i as isize * e_shentsize as isize; - let section: &mut SectionHeader64 = + let section: &mut SectionHeader64 = unsafe { &mut *(bytes.as_mut_ptr().offset(off) as *mut SectionHeader64<_>) }; if section.sh_type.get(e) != SHT_PROGBITS { continue; @@ -171,12 +194,12 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const // LLDB wants segment with virtual address set, placing them at the end of ELF. let ph_off = bytes.len(); - let e_phentsize = size_of::>(); + let e_phentsize = size_of::>(); let e_phnum = 1; bytes.resize(ph_off + e_phentsize * e_phnum, 0); if let Some((sh_offset, sh_size)) = segment { let (v_offset, size) = code_region; - let program: &mut ProgramHeader64 = + let program: &mut ProgramHeader64 = unsafe { &mut *(bytes.as_ptr().add(ph_off) as *mut ProgramHeader64<_>) }; program.p_type.set(e, PT_LOAD); program.p_offset.set(e, sh_offset); @@ -189,7 +212,7 @@ fn convert_object_elf_to_loadable_file(bytes: &mut Vec, code_region: (*const } // It is somewhat loadable ELF file at this moment. - let header: &mut FileHeader64 = + let header: &mut FileHeader64 = unsafe { &mut *(bytes.as_mut_ptr() as *mut FileHeader64<_>) }; header.e_type.set(e, ET_DYN); header.e_phoff.set(e, ph_off as u64); diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index 57f3cc7a21..484b9c6648 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -512,24 +512,28 @@ where } }; } + // Find all landing pads by scanning bytes, do not care about // false location at this moment. // Looks hacky but it is fast; does not need to be really exact. - for i in 0..buf.len() - 2 { - let op = buf[i]; - if op == gimli::constants::DW_OP_bra.0 || op == gimli::constants::DW_OP_skip.0 { - // TODO fix for big-endian - let offset = i16::from_le_bytes([buf[i + 1], buf[i + 2]]); - let origin = i + 3; - // Discarding out-of-bounds jumps (also some of falsely detected ops) - if (offset >= 0 && offset as usize + origin <= buf.len()) - || (offset < 0 && -offset as usize <= origin) - { - let target = buf.len() as isize - origin as isize - offset as isize; - jump_targets.insert(target as u64, JumpTargetMarker::new()); + if buf.len() > 2 { + for i in 0..buf.len() - 2 { + let op = buf[i]; + if op == gimli::constants::DW_OP_bra.0 || op == gimli::constants::DW_OP_skip.0 { + // TODO fix for big-endian + let offset = i16::from_le_bytes([buf[i + 1], buf[i + 2]]); + let origin = i + 3; + // Discarding out-of-bounds jumps (also some of falsely detected ops) + if (offset >= 0 && offset as usize + origin <= buf.len()) + || (offset < 0 && -offset as usize <= origin) + { + let target = buf.len() as isize - origin as isize - offset as isize; + jump_targets.insert(target as u64, JumpTargetMarker::new()); + } } } } + while !pc.is_empty() { let unread_bytes = pc.len().into_u64(); if let Some(marker) = jump_targets.get(&unread_bytes) { @@ -537,150 +541,145 @@ where parts.push(CompiledExpressionPart::LandingPad(marker.clone())); } - let next = buf[pc.offset_from(&expr.0).into_u64() as usize]; need_deref = true; - if next == 0xED { - // WebAssembly DWARF extension - pc.read_u8()?; - let ty = pc.read_uleb128()?; - // Supporting only wasm locals. - if ty != 0 { - // TODO support wasm globals? + + let pos = pc.offset_from(&expr.0).into_u64() as usize; + let op = Operation::parse(&mut pc, encoding)?; + match op { + Operation::FrameOffset { offset } => { + // Expand DW_OP_fbreg into frame location and DW_OP_plus_uconst. + if frame_base.is_some() { + // Add frame base expressions. + flush_code_chunk!(); + parts.extend_from_slice(&frame_base.unwrap().parts); + } + if let Some(CompiledExpressionPart::Local { trailing, .. }) = parts.last_mut() { + // Reset local trailing flag. + *trailing = false; + } + // Append DW_OP_plus_uconst part. + let mut writer = ExpressionWriter::new(); + writer.write_op(gimli::constants::DW_OP_plus_uconst)?; + writer.write_uleb128(offset as u64)?; + code_chunk.extend(writer.into_vec()); + continue; + } + Operation::Drop { .. } + | Operation::Pick { .. } + | Operation::Swap { .. } + | Operation::Rot { .. } + | Operation::Nop { .. } + | Operation::UnsignedConstant { .. } + | Operation::SignedConstant { .. } + | Operation::ConstantIndex { .. } + | Operation::PlusConstant { .. } + | Operation::Abs { .. } + | Operation::And { .. } + | Operation::Or { .. } + | Operation::Xor { .. } + | Operation::Shl { .. } + | Operation::Plus { .. } + | Operation::Minus { .. } + | Operation::Div { .. } + | Operation::Mod { .. } + | Operation::Mul { .. } + | Operation::Neg { .. } + | Operation::Not { .. } + | Operation::Lt { .. } + | Operation::Gt { .. } + | Operation::Le { .. } + | Operation::Ge { .. } + | Operation::Eq { .. } + | Operation::Ne { .. } + | Operation::TypedLiteral { .. } + | Operation::Convert { .. } + | Operation::Reinterpret { .. } + | Operation::Piece { .. } => (), + Operation::Bra { target } | Operation::Skip { target } => { + flush_code_chunk!(); + let arc_to = (pc.len().into_u64() as isize - target as isize) as u64; + let marker = match jump_targets.get(&arc_to) { + Some(m) => m.clone(), + None => { + // Marker not found: probably out of bounds. + return Ok(None); + } + }; + push!(CompiledExpressionPart::Jump { + conditionally: match op { + Operation::Bra { .. } => true, + _ => false, + }, + target: marker, + }); + continue; + } + Operation::StackValue => { + need_deref = false; + + // Find extra stack_value, that follow wasm-local operators, + // and mark such locals with special flag. + if let (Some(CompiledExpressionPart::Local { trailing, .. }), true) = + (parts.last_mut(), code_chunk.is_empty()) + { + *trailing = true; + continue; + } + } + Operation::Deref { .. } => { + flush_code_chunk!(); + push!(CompiledExpressionPart::Deref); + // Don't re-enter the loop here (i.e. continue), because the + // DW_OP_deref still needs to be kept. + } + Operation::WasmLocal { index } => { + flush_code_chunk!(); + let label = ValueLabel::from_u32(index as u32); + push!(CompiledExpressionPart::Local { + label, + trailing: false, + }); + continue; + } + Operation::Shr { .. } | Operation::Shra { .. } => { + // Insert value normalisation part. + // The semantic value is 32 bits (TODO: check unit) + // but the target architecture is 64-bits. So we'll + // clean out the upper 32 bits (in a sign-correct way) + // to avoid contamination of the result with randomness. + let mut writer = ExpressionWriter::new(); + writer.write_op(gimli::constants::DW_OP_plus_uconst)?; + writer.write_uleb128(32)?; // increase shift amount + writer.write_op(gimli::constants::DW_OP_swap)?; + writer.write_op(gimli::constants::DW_OP_const1u)?; + writer.write_u8(32)?; + writer.write_op(gimli::constants::DW_OP_shl)?; + writer.write_op(gimli::constants::DW_OP_swap)?; + code_chunk.extend(writer.into_vec()); + // Don't re-enter the loop here (i.e. continue), because the + // DW_OP_shr* still needs to be kept. + } + Operation::Address { .. } + | Operation::AddressIndex { .. } + | Operation::Call { .. } + | Operation::Register { .. } + | Operation::RegisterOffset { .. } + | Operation::CallFrameCFA + | Operation::PushObjectAddress + | Operation::TLS + | Operation::ImplicitValue { .. } + | Operation::ImplicitPointer { .. } + | Operation::EntryValue { .. } + | Operation::ParameterRef { .. } => { return Ok(None); } - let index = pc.read_sleb128()?; - flush_code_chunk!(); - let label = ValueLabel::from_u32(index as u32); - push!(CompiledExpressionPart::Local { - label, - trailing: false, - }); - } else { - let pos = pc.offset_from(&expr.0).into_u64() as usize; - let op = Operation::parse(&mut pc, encoding)?; - match op { - Operation::FrameOffset { offset } => { - // Expand DW_OP_fbreg into frame location and DW_OP_plus_uconst. - if frame_base.is_some() { - // Add frame base expressions. - flush_code_chunk!(); - parts.extend_from_slice(&frame_base.unwrap().parts); - } - if let Some(CompiledExpressionPart::Local { trailing, .. }) = parts.last_mut() { - // Reset local trailing flag. - *trailing = false; - } - // Append DW_OP_plus_uconst part. - let mut writer = ExpressionWriter::new(); - writer.write_op(gimli::constants::DW_OP_plus_uconst)?; - writer.write_uleb128(offset as u64)?; - code_chunk.extend(writer.into_vec()); - continue; - } - Operation::Drop { .. } - | Operation::Pick { .. } - | Operation::Swap { .. } - | Operation::Rot { .. } - | Operation::Nop { .. } - | Operation::UnsignedConstant { .. } - | Operation::SignedConstant { .. } - | Operation::ConstantIndex { .. } - | Operation::PlusConstant { .. } - | Operation::Abs { .. } - | Operation::And { .. } - | Operation::Or { .. } - | Operation::Xor { .. } - | Operation::Shl { .. } - | Operation::Plus { .. } - | Operation::Minus { .. } - | Operation::Div { .. } - | Operation::Mod { .. } - | Operation::Mul { .. } - | Operation::Neg { .. } - | Operation::Not { .. } - | Operation::Lt { .. } - | Operation::Gt { .. } - | Operation::Le { .. } - | Operation::Ge { .. } - | Operation::Eq { .. } - | Operation::Ne { .. } - | Operation::TypedLiteral { .. } - | Operation::Convert { .. } - | Operation::Reinterpret { .. } - | Operation::Piece { .. } => (), - Operation::Bra { target } | Operation::Skip { target } => { - flush_code_chunk!(); - let arc_to = (pc.len().into_u64() as isize - target as isize) as u64; - let marker = match jump_targets.get(&arc_to) { - Some(m) => m.clone(), - None => { - // Marker not found: probably out of bounds. - return Ok(None); - } - }; - push!(CompiledExpressionPart::Jump { - conditionally: match op { - Operation::Bra { .. } => true, - _ => false, - }, - target: marker, - }); - continue; - } - Operation::StackValue => { - need_deref = false; - - // Find extra stack_value, that follow wasm-local operators, - // and mark such locals with special flag. - if let (Some(CompiledExpressionPart::Local { trailing, .. }), true) = - (parts.last_mut(), code_chunk.is_empty()) - { - *trailing = true; - continue; - } - } - Operation::Deref { .. } => { - flush_code_chunk!(); - push!(CompiledExpressionPart::Deref); - // Don't re-enter the loop here (i.e. continue), because the - // DW_OP_deref still needs to be kept. - } - Operation::Shr { .. } | Operation::Shra { .. } => { - // Insert value normalisation part. - // The semantic value is 32 bits (TODO: check unit) - // but the target architecture is 64-bits. So we'll - // clean out the upper 32 bits (in a sign-correct way) - // to avoid contamination of the result with randomness. - let mut writer = ExpressionWriter::new(); - writer.write_op(gimli::constants::DW_OP_plus_uconst)?; - writer.write_uleb128(32)?; // increase shift amount - writer.write_op(gimli::constants::DW_OP_swap)?; - writer.write_op(gimli::constants::DW_OP_const1u)?; - writer.write_u8(32)?; - writer.write_op(gimli::constants::DW_OP_shl)?; - writer.write_op(gimli::constants::DW_OP_swap)?; - code_chunk.extend(writer.into_vec()); - // Don't re-enter the loop here (i.e. continue), because the - // DW_OP_shr* still needs to be kept. - } - Operation::Address { .. } - | Operation::AddressIndex { .. } - | Operation::Call { .. } - | Operation::Register { .. } - | Operation::RegisterOffset { .. } - | Operation::CallFrameCFA - | Operation::PushObjectAddress - | Operation::TLS - | Operation::ImplicitValue { .. } - | Operation::ImplicitPointer { .. } - | Operation::EntryValue { .. } - | Operation::ParameterRef { .. } => { - return Ok(None); - } + Operation::WasmGlobal { index: _ } | Operation::WasmStack { index: _ } => { + // TODO support those two + return Ok(None); } - let chunk = &buf[pos..pc.offset_from(&expr.0).into_u64() as usize]; - code_chunk.extend_from_slice(chunk); } + let chunk = &buf[pos..pc.offset_from(&expr.0).into_u64() as usize]; + code_chunk.extend_from_slice(chunk); } flush_code_chunk!(); diff --git a/crates/debug/src/transform/line_program.rs b/crates/debug/src/transform/line_program.rs index ba40a3e94b..7906fbcd40 100644 --- a/crates/debug/src/transform/line_program.rs +++ b/crates/debug/src/transform/line_program.rs @@ -189,10 +189,10 @@ where address: row.address(), op_index: row.op_index(), file_index: row.file_index(), - line: row.line().unwrap_or(0), + line: row.line().map(|nonzero| nonzero.get()).unwrap_or(0), column: match row.column() { gimli::ColumnType::LeftEdge => 0, - gimli::ColumnType::Column(val) => val, + gimli::ColumnType::Column(val) => val.get(), }, discriminator: row.discriminator(), is_stmt: row.is_stmt(), diff --git a/crates/debug/src/transform/unit.rs b/crates/debug/src/transform/unit.rs index 15f165418a..655d7738ee 100644 --- a/crates/debug/src/transform/unit.rs +++ b/crates/debug/src/transform/unit.rs @@ -10,6 +10,7 @@ use anyhow::{Context, Error}; use gimli::write; use gimli::{AttributeValue, DebuggingInformationEntry, Unit}; use std::collections::HashSet; +use wasmtime_environ::ir::Endianness; use wasmtime_environ::isa::TargetIsa; use wasmtime_environ::wasm::DefinedFuncIndex; use wasmtime_environ::{CompiledFunctions, ModuleMemoryOffset}; @@ -463,6 +464,19 @@ where isa, )?; + // Data in WebAssembly memory always uses little-endian byte order. + // If the native architecture is big-endian, we need to mark all + // base types used to refer to WebAssembly memory as little-endian + // using the DW_AT_endianity attribute, so that the debugger will + // be able to correctly access them. + if entry.tag() == gimli::DW_TAG_base_type && isa.endianness() == Endianness::Big { + let current_scope = comp_unit.get_mut(die_id); + current_scope.set( + gimli::DW_AT_endianity, + write::AttributeValue::Endianity(gimli::DW_END_little), + ); + } + if entry.tag() == gimli::DW_TAG_subprogram && !current_scope_ranges.is_empty() { append_vmctx_info( comp_unit, diff --git a/crates/debug/src/write_debuginfo.rs b/crates/debug/src/write_debuginfo.rs index 56c7231db0..491267b495 100644 --- a/crates/debug/src/write_debuginfo.rs +++ b/crates/debug/src/write_debuginfo.rs @@ -2,6 +2,7 @@ pub use crate::transform::transform_dwarf; use gimli::write::{Address, Dwarf, EndianVec, FrameTable, Result, Sections, Writer}; use gimli::{RunTimeEndian, SectionId}; use wasmtime_environ::entity::EntityRef; +use wasmtime_environ::ir::Endianness; use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa}; use wasmtime_environ::{CompiledFunctions, DebugInfoData, ModuleMemoryOffset}; @@ -26,10 +27,19 @@ pub struct DwarfSection { } fn emit_dwarf_sections( + isa: &dyn TargetIsa, mut dwarf: Dwarf, frames: Option, ) -> anyhow::Result> { - let mut sections = Sections::new(WriterRelocate::default()); + let endian = match isa.endianness() { + Endianness::Little => RunTimeEndian::Little, + Endianness::Big => RunTimeEndian::Big, + }; + let writer = WriterRelocate { + relocs: Vec::new(), + writer: EndianVec::new(endian), + }; + let mut sections = Sections::new(writer); dwarf.write(&mut sections)?; if let Some(frames) = frames { frames.write_debug_frame(&mut sections.debug_frame)?; @@ -54,15 +64,6 @@ pub struct WriterRelocate { writer: EndianVec, } -impl Default for WriterRelocate { - fn default() -> Self { - WriterRelocate { - relocs: Vec::new(), - writer: EndianVec::new(RunTimeEndian::Little), - } - } -} - impl Writer for WriterRelocate { type Endian = RunTimeEndian; @@ -156,6 +157,6 @@ pub fn emit_dwarf<'a>( ) -> anyhow::Result> { let dwarf = transform_dwarf(isa, debuginfo_data, funcs, memory_offset)?; let frame_table = create_frame_table(isa, funcs); - let sections = emit_dwarf_sections(dwarf, frame_table)?; + let sections = emit_dwarf_sections(isa, dwarf, frame_table)?; Ok(sections) } diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index 9d6826f557..6c90202bae 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-environ" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Standalone environment support for WebAsssembly code in Cranelift" license = "Apache-2.0 WITH LLVM-exception" @@ -12,19 +12,17 @@ readme = "README.md" edition = "2018" [dependencies] -anyhow = "1.0" -region = "2.2.0" -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.72.0", features = ["enable-serde"] } -cranelift-entity = { path = "../../cranelift/entity", version = "0.72.0", features = ["enable-serde"] } -cranelift-wasm = { path = "../../cranelift/wasm", version = "0.72.0", features = ["enable-serde"] } -wasmparser = "0.76" +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.73.0", features = ["enable-serde"] } +cranelift-entity = { path = "../../cranelift/entity", version = "0.73.0", features = ["enable-serde"] } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.73.0", features = ["enable-serde"] } +wasmparser = "0.77" indexmap = { version = "1.0.2", features = ["serde-1"] } thiserror = "1.0.4" serde = { version = "1.0.94", features = ["derive"] } log = { version = "0.4.8", default-features = false } more-asserts = "0.2.1" cfg-if = "1.0" -gimli = "0.23" +gimli = "0.24" [badges] maintenance = { status = "actively-developed" } diff --git a/crates/environ/src/data_structures.rs b/crates/environ/src/data_structures.rs index 07f2aedaec..12b321d779 100644 --- a/crates/environ/src/data_structures.rs +++ b/crates/environ/src/data_structures.rs @@ -3,14 +3,16 @@ pub mod ir { pub use cranelift_codegen::binemit::{Reloc, StackMap}; pub use cranelift_codegen::ir::{ - types, AbiParam, ArgumentPurpose, JumpTableOffsets, LabelValueLoc, LibCall, Signature, - SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc, + types, AbiParam, ArgumentPurpose, Endianness, JumpTableOffsets, LabelValueLoc, LibCall, + Signature, SourceLoc, StackSlots, TrapCode, Type, ValueLabel, ValueLoc, }; pub use cranelift_codegen::{ValueLabelsRanges, ValueLocRange}; } pub mod settings { - pub use cranelift_codegen::settings::{builder, Builder, Configurable, Flags, SetError}; + pub use cranelift_codegen::settings::{ + builder, Builder, Configurable, Flags, OptLevel, SetError, Setting, SettingKind, Value, + }; } pub mod isa { diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index f21c48c2fb..4b496a197c 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -39,7 +39,7 @@ pub use crate::data_structures::*; pub use crate::module::*; pub use crate::module_environ::*; pub use crate::tunables::Tunables; -pub use crate::vmoffsets::{TargetSharedSignatureIndex, VMOffsets, INTERRUPTED}; +pub use crate::vmoffsets::*; /// WebAssembly page sizes are defined to be 64KiB. pub const WASM_PAGE_SIZE: u32 = 0x10000; diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index ee4ff050dc..38a4293825 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -2,12 +2,12 @@ use crate::tunables::Tunables; use crate::WASM_MAX_PAGES; -use cranelift_codegen::ir; use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_wasm::*; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::convert::TryFrom; use std::sync::Arc; /// Implemenation styles for WebAssembly linear memory. @@ -87,7 +87,7 @@ pub struct MemoryInitializer { /// Optionally, a global variable giving a base index. pub base: Option, /// The offset to add to the base. - pub offset: usize, + pub offset: u32, /// The data to write into the linear memory. pub data: Box<[u8]>, } @@ -169,7 +169,15 @@ impl MemoryInitialization { // Perform a bounds check on the segment // As this segment is referencing a defined memory without a global base, the last byte // written to by the segment cannot exceed the memory's initial minimum size - if (initializer.offset + initializer.data.len()) + let offset = usize::try_from(initializer.offset).unwrap(); + let end = match offset.checked_add(initializer.data.len()) { + Some(end) => end, + None => { + out_of_bounds = true; + continue; + } + }; + if end > ((module.memory_plans[initializer.memory_index].memory.minimum as usize) * WASM_PAGE_SIZE) @@ -179,8 +187,8 @@ impl MemoryInitialization { } let pages = &mut map[index]; - let mut page_index = initializer.offset / WASM_PAGE_SIZE; - let mut page_offset = initializer.offset % WASM_PAGE_SIZE; + let mut page_index = offset / WASM_PAGE_SIZE; + let mut page_offset = offset % WASM_PAGE_SIZE; let mut data_offset = 0; let mut data_remaining = initializer.data.len(); @@ -269,7 +277,7 @@ pub struct TableInitializer { /// Optionally, a global variable giving a base index. pub base: Option, /// The offset to add to the base. - pub offset: usize, + pub offset: u32, /// The values to write into the table elements. pub elements: Box<[FuncIndex]>, } @@ -367,6 +375,10 @@ pub struct Module { /// The type of each nested wasm module this module contains. pub modules: PrimaryMap, + + /// The set of defined functions within this module which are located in + /// element segments. + pub possibly_exported_funcs: HashSet, } /// Initialization routines for creating an instance, encompassing imports, @@ -445,12 +457,14 @@ impl Module { } /// Convert a `DefinedFuncIndex` into a `FuncIndex`. + #[inline] pub fn func_index(&self, defined_func: DefinedFuncIndex) -> FuncIndex { FuncIndex::new(self.num_imported_funcs + defined_func.index()) } /// Convert a `FuncIndex` into a `DefinedFuncIndex`. Returns None if the /// index is an imported function. + #[inline] pub fn defined_func_index(&self, func: FuncIndex) -> Option { if func.index() < self.num_imported_funcs { None @@ -462,17 +476,20 @@ impl Module { } /// Test whether the given function index is for an imported function. + #[inline] pub fn is_imported_function(&self, index: FuncIndex) -> bool { index.index() < self.num_imported_funcs } /// Convert a `DefinedTableIndex` into a `TableIndex`. + #[inline] pub fn table_index(&self, defined_table: DefinedTableIndex) -> TableIndex { TableIndex::new(self.num_imported_tables + defined_table.index()) } /// Convert a `TableIndex` into a `DefinedTableIndex`. Returns None if the /// index is an imported table. + #[inline] pub fn defined_table_index(&self, table: TableIndex) -> Option { if table.index() < self.num_imported_tables { None @@ -484,17 +501,20 @@ impl Module { } /// Test whether the given table index is for an imported table. + #[inline] pub fn is_imported_table(&self, index: TableIndex) -> bool { index.index() < self.num_imported_tables } /// Convert a `DefinedMemoryIndex` into a `MemoryIndex`. + #[inline] pub fn memory_index(&self, defined_memory: DefinedMemoryIndex) -> MemoryIndex { MemoryIndex::new(self.num_imported_memories + defined_memory.index()) } /// Convert a `MemoryIndex` into a `DefinedMemoryIndex`. Returns None if the /// index is an imported memory. + #[inline] pub fn defined_memory_index(&self, memory: MemoryIndex) -> Option { if memory.index() < self.num_imported_memories { None @@ -506,17 +526,20 @@ impl Module { } /// Test whether the given memory index is for an imported memory. + #[inline] pub fn is_imported_memory(&self, index: MemoryIndex) -> bool { index.index() < self.num_imported_memories } /// Convert a `DefinedGlobalIndex` into a `GlobalIndex`. + #[inline] pub fn global_index(&self, defined_global: DefinedGlobalIndex) -> GlobalIndex { GlobalIndex::new(self.num_imported_globals + defined_global.index()) } /// Convert a `GlobalIndex` into a `DefinedGlobalIndex`. Returns None if the /// index is an imported global. + #[inline] pub fn defined_global_index(&self, global: GlobalIndex) -> Option { if global.index() < self.num_imported_globals { None @@ -528,6 +551,7 @@ impl Module { } /// Test whether the given global index is for an imported global. + #[inline] pub fn is_imported_global(&self, index: GlobalIndex) -> bool { index.index() < self.num_imported_globals } @@ -564,7 +588,6 @@ impl Module { #[allow(missing_docs)] pub struct TypeTables { pub wasm_signatures: PrimaryMap, - pub native_signatures: PrimaryMap, pub module_signatures: PrimaryMap, pub instance_signatures: PrimaryMap, } diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 2f53bb4af8..17f92cdd7e 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -4,14 +4,14 @@ use crate::module::{ }; use crate::tunables::Tunables; use cranelift_codegen::ir; -use cranelift_codegen::ir::{AbiParam, ArgumentPurpose}; use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_codegen::packed_option::ReservedValue; use cranelift_entity::PrimaryMap; use cranelift_wasm::{ self, translate_module, Alias, DataIndex, DefinedFuncIndex, ElemIndex, EntityIndex, EntityType, - FuncIndex, Global, GlobalIndex, InstanceIndex, InstanceTypeIndex, Memory, MemoryIndex, - ModuleIndex, ModuleTypeIndex, SignatureIndex, Table, TableIndex, TargetEnvironment, TypeIndex, - WasmError, WasmFuncType, WasmResult, + FuncIndex, Global, GlobalIndex, GlobalInit, InstanceIndex, InstanceTypeIndex, Memory, + MemoryIndex, ModuleIndex, ModuleTypeIndex, SignatureIndex, Table, TableIndex, + TargetEnvironment, TypeIndex, WasmError, WasmFuncType, WasmResult, }; use std::collections::{hash_map::Entry, HashMap}; use std::convert::TryFrom; @@ -41,6 +41,8 @@ pub struct ModuleEnvironment<'data> { /// Intern'd types for this entire translation, shared by all modules. types: TypeTables, + interned_func_types: HashMap, + // Various bits and pieces of configuration features: WasmFeatures, target_config: TargetFrontendConfig, @@ -147,6 +149,7 @@ impl<'data> ModuleEnvironment<'data> { tunables: tunables.clone(), features: *features, first_module: true, + interned_func_types: Default::default(), } } @@ -202,19 +205,28 @@ impl<'data> ModuleEnvironment<'data> { let slice = gimli::EndianSlice::new(data, endian); match name { - ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian), + // Dwarf fields. ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian), + ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice), + // TODO aranges? ".debug_info" => dwarf.debug_info = gimli::DebugInfo::new(data, endian), ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian), - ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice), ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice), - ".debug_str_sup" => dwarf.debug_str_sup = gimli::DebugStr::from(slice), - ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian), - ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian), + ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian), + ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice), + ".debug_str_sup" => { + let mut dwarf_sup: Dwarf<'data> = Default::default(); + dwarf_sup.debug_str = gimli::DebugStr::from(slice); + dwarf.sup = Some(Arc::new(dwarf_sup)); + } + ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice), + + // Additional fields. ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice), ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice), - ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice), - ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice), + ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian), + ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian), + other => { log::warn!("unknown debug section `{}`", other); return; @@ -354,6 +366,15 @@ impl<'data> ModuleEnvironment<'data> { .module_signatures .push(ModuleSignature { imports, exports }) } + + fn flag_func_possibly_exported(&mut self, func: FuncIndex) { + if func.is_reserved_value() { + return; + } + if let Some(idx) = self.result.module.defined_func_index(func) { + self.result.module.possibly_exported_funcs.insert(idx); + } + } } impl<'data> TargetEnvironment for ModuleEnvironment<'data> { @@ -372,22 +393,21 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data fn reserve_types(&mut self, num: u32) -> WasmResult<()> { let num = usize::try_from(num).unwrap(); self.result.module.types.reserve(num); - self.types.native_signatures.reserve(num); self.types.wasm_signatures.reserve(num); Ok(()) } - fn declare_type_func(&mut self, wasm: WasmFuncType, sig: ir::Signature) -> WasmResult<()> { - let sig = translate_signature(sig, self.pointer_type()); - - // FIXME(#2469): Signatures should be deduplicated in these two tables - // since `SignatureIndex` is already a index space separate from the - // module's index space. Note that this may get more urgent with - // module-linking modules where types are more likely to get repeated - // (across modules). - let sig_index = self.types.native_signatures.push(sig); - let sig_index2 = self.types.wasm_signatures.push(wasm); - debug_assert_eq!(sig_index, sig_index2); + fn declare_type_func(&mut self, wasm: WasmFuncType) -> WasmResult<()> { + // Deduplicate wasm function signatures through `interned_func_types`, + // which also deduplicates across wasm modules with module linking. + let sig_index = match self.interned_func_types.get(&wasm) { + Some(idx) => *idx, + None => { + let sig_index = self.types.wasm_signatures.push(wasm.clone()); + self.interned_func_types.insert(wasm, sig_index); + sig_index + } + }; self.result .module .types @@ -635,6 +655,9 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data } fn declare_global(&mut self, global: Global) -> WasmResult<()> { + if let GlobalInit::RefFunc(index) = global.initializer { + self.flag_func_possibly_exported(index); + } self.result.module.globals.push(global); Ok(()) } @@ -648,6 +671,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data } fn declare_func_export(&mut self, func_index: FuncIndex, name: &str) -> WasmResult<()> { + self.flag_func_possibly_exported(func_index); self.declare_export(EntityIndex::Function(func_index), name) } @@ -672,6 +696,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data } fn declare_start_func(&mut self, func_index: FuncIndex) -> WasmResult<()> { + self.flag_func_possibly_exported(func_index); debug_assert!(self.result.module.start_func.is_none()); self.result.module.start_func = Some(func_index); Ok(()) @@ -689,9 +714,12 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data &mut self, table_index: TableIndex, base: Option, - offset: usize, + offset: u32, elements: Box<[FuncIndex]>, ) -> WasmResult<()> { + for element in elements.iter() { + self.flag_func_possibly_exported(*element); + } self.result .module .table_initializers @@ -709,6 +737,9 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data elem_index: ElemIndex, segments: Box<[FuncIndex]>, ) -> WasmResult<()> { + for element in segments.iter() { + self.flag_func_possibly_exported(*element); + } let index = self.result.module.passive_elements.len(); self.result.module.passive_elements.push(segments); let old = self @@ -724,6 +755,13 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data Ok(()) } + fn declare_elements(&mut self, segments: Box<[FuncIndex]>) -> WasmResult<()> { + for element in segments.iter() { + self.flag_func_possibly_exported(*element); + } + Ok(()) + } + fn reserve_function_bodies(&mut self, _count: u32, offset: u64) { self.result.debuginfo.wasm_file.code_section_offset = offset; } @@ -772,7 +810,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data &mut self, memory_index: MemoryIndex, base: Option, - offset: usize, + offset: u32, data: &'data [u8], ) -> WasmResult<()> { match &mut self.result.module.memory_initialization { @@ -1064,15 +1102,3 @@ and for re-adding support for interface types you can see this issue: Ok(()) } } - -/// Add environment-specific function parameters. -pub fn translate_signature(mut sig: ir::Signature, pointer_type: ir::Type) -> ir::Signature { - // Prepend the vmctx argument. - sig.params.insert( - 0, - AbiParam::special(pointer_type, ArgumentPurpose::VMContext), - ); - // Prepend the caller vmctx argument. - sig.params.insert(1, AbiParam::new(pointer_type)); - sig -} diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 4e5aba9145..8a786ae88b 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -1,5 +1,7 @@ +use serde::{Deserialize, Serialize}; + /// Tunable parameters for WebAssembly compilation. -#[derive(Clone, Hash)] +#[derive(Clone, Hash, Serialize, Deserialize)] pub struct Tunables { /// For static heaps, the size in wasm pages of the heap protected by bounds checking. pub static_memory_bound: u32, diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index 042f0b9dcf..6fc68f5e28 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -6,7 +6,7 @@ // struct VMContext { // interrupts: *const VMInterrupts, // externref_activations_table: *mut VMExternRefActivationsTable, -// stack_map_registry: *mut StackMapRegistry, +// module_info_lookup: *const dyn ModuleInfoLookup, // signature_ids: [VMSharedSignatureIndex; module.num_signature_ids], // imported_functions: [VMFunctionImport; module.num_imported_functions], // imported_tables: [VMTableImport; module.num_imported_tables], @@ -44,6 +44,7 @@ fn cast_to_u32(sz: usize) -> u32 { } /// Align an offset used in this module to a specific byte-width by rounding up +#[inline] fn align(offset: u32, width: u32) -> u32 { (offset + (width - 1)) / width * width } @@ -72,12 +73,53 @@ pub struct VMOffsets { pub num_defined_memories: u32, /// The number of defined globals in the module. pub num_defined_globals: u32, + + // precalculated offsets of various member fields + interrupts: u32, + externref_activations_table: u32, + module_info_lookup: u32, + signature_ids: u32, + imported_functions: u32, + imported_tables: u32, + imported_memories: u32, + imported_globals: u32, + defined_tables: u32, + defined_memories: u32, + defined_globals: u32, + defined_anyfuncs: u32, + builtin_functions: u32, + size: u32, +} + +/// Used to construct a `VMOffsets` +#[derive(Debug, Clone, Copy)] +pub struct VMOffsetsFields { + /// The size in bytes of a pointer on the target. + pub pointer_size: u8, + /// The number of signature declarations in the module. + pub num_signature_ids: u32, + /// The number of imported functions in the module. + pub num_imported_functions: u32, + /// The number of imported tables in the module. + pub num_imported_tables: u32, + /// The number of imported memories in the module. + pub num_imported_memories: u32, + /// The number of imported globals in the module. + pub num_imported_globals: u32, + /// The number of defined functions in the module. + pub num_defined_functions: u32, + /// The number of defined tables in the module. + pub num_defined_tables: u32, + /// The number of defined memories in the module. + pub num_defined_memories: u32, + /// The number of defined globals in the module. + pub num_defined_globals: u32, } impl VMOffsets { /// Return a new `VMOffsets` instance, for a given pointer size. pub fn new(pointer_size: u8, module: &Module) -> Self { - Self { + VMOffsets::from(VMOffsetsFields { pointer_size, num_signature_ids: cast_to_u32(module.types.len()), num_imported_functions: cast_to_u32(module.num_imported_funcs), @@ -88,7 +130,139 @@ impl VMOffsets { num_defined_tables: cast_to_u32(module.table_plans.len()), num_defined_memories: cast_to_u32(module.memory_plans.len()), num_defined_globals: cast_to_u32(module.globals.len()), - } + }) + } +} + +impl From for VMOffsets { + fn from(fields: VMOffsetsFields) -> VMOffsets { + let mut ret = Self { + pointer_size: fields.pointer_size, + num_signature_ids: fields.num_signature_ids, + num_imported_functions: fields.num_imported_functions, + num_imported_tables: fields.num_imported_tables, + num_imported_memories: fields.num_imported_memories, + num_imported_globals: fields.num_imported_globals, + num_defined_functions: fields.num_defined_functions, + num_defined_tables: fields.num_defined_tables, + num_defined_memories: fields.num_defined_memories, + num_defined_globals: fields.num_defined_globals, + interrupts: 0, + externref_activations_table: 0, + module_info_lookup: 0, + signature_ids: 0, + imported_functions: 0, + imported_tables: 0, + imported_memories: 0, + imported_globals: 0, + defined_tables: 0, + defined_memories: 0, + defined_globals: 0, + defined_anyfuncs: 0, + builtin_functions: 0, + size: 0, + }; + + ret.interrupts = 0; + ret.externref_activations_table = ret + .interrupts + .checked_add(u32::from(fields.pointer_size)) + .unwrap(); + ret.module_info_lookup = ret + .externref_activations_table + .checked_add(u32::from(fields.pointer_size)) + .unwrap(); + ret.signature_ids = ret + .module_info_lookup + .checked_add(u32::from(fields.pointer_size * 2)) + .unwrap(); + ret.imported_functions = ret + .signature_ids + .checked_add( + fields + .num_signature_ids + .checked_mul(u32::from(ret.size_of_vmshared_signature_index())) + .unwrap(), + ) + .unwrap(); + ret.imported_tables = ret + .imported_functions + .checked_add( + ret.num_imported_functions + .checked_mul(u32::from(ret.size_of_vmfunction_import())) + .unwrap(), + ) + .unwrap(); + ret.imported_memories = ret + .imported_tables + .checked_add( + ret.num_imported_tables + .checked_mul(u32::from(ret.size_of_vmtable_import())) + .unwrap(), + ) + .unwrap(); + ret.imported_globals = ret + .imported_memories + .checked_add( + ret.num_imported_memories + .checked_mul(u32::from(ret.size_of_vmmemory_import())) + .unwrap(), + ) + .unwrap(); + ret.defined_tables = ret + .imported_globals + .checked_add( + ret.num_imported_globals + .checked_mul(u32::from(ret.size_of_vmglobal_import())) + .unwrap(), + ) + .unwrap(); + ret.defined_memories = ret + .defined_tables + .checked_add( + ret.num_defined_tables + .checked_mul(u32::from(ret.size_of_vmtable_definition())) + .unwrap(), + ) + .unwrap(); + ret.defined_globals = align( + ret.defined_memories + .checked_add( + ret.num_defined_memories + .checked_mul(u32::from(ret.size_of_vmmemory_definition())) + .unwrap(), + ) + .unwrap(), + 16, + ); + ret.defined_anyfuncs = ret + .defined_globals + .checked_add( + ret.num_defined_globals + .checked_mul(u32::from(ret.size_of_vmglobal_definition())) + .unwrap(), + ) + .unwrap(); + ret.builtin_functions = ret + .defined_anyfuncs + .checked_add( + ret.num_imported_functions + .checked_add(ret.num_defined_functions) + .unwrap() + .checked_mul(u32::from(ret.size_of_vmcaller_checked_anyfunc())) + .unwrap(), + ) + .unwrap(); + ret.size = ret + .builtin_functions + .checked_add( + BuiltinFunctionIndex::builtin_functions_total_number() + .checked_mul(u32::from(ret.pointer_size)) + .unwrap(), + ) + .unwrap(); + + return ret; } } @@ -96,17 +270,20 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `body` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmfunction_import_body(&self) -> u8 { 0 * self.pointer_size } /// The offset of the `vmctx` field. #[allow(clippy::identity_op)] + #[inline] pub fn vmfunction_import_vmctx(&self) -> u8 { 1 * self.pointer_size } /// Return the size of `VMFunctionImport`. + #[inline] pub fn size_of_vmfunction_import(&self) -> u8 { 2 * self.pointer_size } @@ -125,17 +302,20 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `from` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmtable_import_from(&self) -> u8 { 0 * self.pointer_size } /// The offset of the `vmctx` field. #[allow(clippy::identity_op)] + #[inline] pub fn vmtable_import_vmctx(&self) -> u8 { 1 * self.pointer_size } /// Return the size of `VMTableImport`. + #[inline] pub fn size_of_vmtable_import(&self) -> u8 { 2 * self.pointer_size } @@ -145,6 +325,7 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `base` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmtable_definition_base(&self) -> u8 { 0 * self.pointer_size } @@ -156,16 +337,19 @@ impl VMOffsets { } /// The size of the `current_elements` field. + #[inline] pub fn size_of_vmtable_definition_current_elements(&self) -> u8 { 4 } /// Return the size of `VMTableDefinition`. + #[inline] pub fn size_of_vmtable_definition(&self) -> u8 { 2 * self.pointer_size } /// The type of the `current_elements` field. + #[inline] pub fn type_of_vmtable_definition_current_elements(&self) -> ir::Type { ir::Type::int(u16::from(self.size_of_vmtable_definition_current_elements()) * 8).unwrap() } @@ -175,17 +359,20 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `from` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmmemory_import_from(&self) -> u8 { 0 * self.pointer_size } /// The offset of the `vmctx` field. #[allow(clippy::identity_op)] + #[inline] pub fn vmmemory_import_vmctx(&self) -> u8 { 1 * self.pointer_size } /// Return the size of `VMMemoryImport`. + #[inline] pub fn size_of_vmmemory_import(&self) -> u8 { 2 * self.pointer_size } @@ -195,27 +382,32 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `base` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmmemory_definition_base(&self) -> u8 { 0 * self.pointer_size } /// The offset of the `current_length` field. #[allow(clippy::identity_op)] + #[inline] pub fn vmmemory_definition_current_length(&self) -> u8 { 1 * self.pointer_size } /// The size of the `current_length` field. + #[inline] pub fn size_of_vmmemory_definition_current_length(&self) -> u8 { 4 } /// Return the size of `VMMemoryDefinition`. + #[inline] pub fn size_of_vmmemory_definition(&self) -> u8 { 2 * self.pointer_size } /// The type of the `current_length` field. + #[inline] pub fn type_of_vmmemory_definition_current_length(&self) -> ir::Type { ir::Type::int(u16::from(self.size_of_vmmemory_definition_current_length()) * 8).unwrap() } @@ -225,12 +417,14 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `from` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmglobal_import_from(&self) -> u8 { 0 * self.pointer_size } /// Return the size of `VMGlobalImport`. #[allow(clippy::identity_op)] + #[inline] pub fn size_of_vmglobal_import(&self) -> u8 { 1 * self.pointer_size } @@ -240,6 +434,7 @@ impl VMOffsets { impl VMOffsets { /// Return the size of `VMGlobalDefinition`; this is the size of the largest value type (i.e. a /// V128). + #[inline] pub fn size_of_vmglobal_definition(&self) -> u8 { 16 } @@ -248,6 +443,7 @@ impl VMOffsets { /// Offsets for `VMSharedSignatureIndex`. impl VMOffsets { /// Return the size of `VMSharedSignatureIndex`. + #[inline] pub fn size_of_vmshared_signature_index(&self) -> u8 { 4 } @@ -256,11 +452,13 @@ impl VMOffsets { /// Offsets for `VMInterrupts`. impl VMOffsets { /// Return the offset of the `stack_limit` field of `VMInterrupts` + #[inline] pub fn vminterrupts_stack_limit(&self) -> u8 { 0 } /// Return the offset of the `fuel_consumed` field of `VMInterrupts` + #[inline] pub fn vminterrupts_fuel_consumed(&self) -> u8 { self.pointer_size } @@ -270,22 +468,26 @@ impl VMOffsets { impl VMOffsets { /// The offset of the `func_ptr` field. #[allow(clippy::erasing_op)] + #[inline] pub fn vmcaller_checked_anyfunc_func_ptr(&self) -> u8 { 0 * self.pointer_size } /// The offset of the `type_index` field. #[allow(clippy::identity_op)] + #[inline] pub fn vmcaller_checked_anyfunc_type_index(&self) -> u8 { 1 * self.pointer_size } /// The offset of the `vmctx` field. + #[inline] pub fn vmcaller_checked_anyfunc_vmctx(&self) -> u8 { 2 * self.pointer_size } /// Return the size of `VMCallerCheckedAnyfunc`. + #[inline] pub fn size_of_vmcaller_checked_anyfunc(&self) -> u8 { 3 * self.pointer_size } @@ -294,350 +496,235 @@ impl VMOffsets { /// Offsets for `VMContext`. impl VMOffsets { /// Return the offset to the `VMInterrupts` structure + #[inline] pub fn vmctx_interrupts(&self) -> u32 { - 0 + self.interrupts } /// The offset of the `VMExternRefActivationsTable` member. + #[inline] pub fn vmctx_externref_activations_table(&self) -> u32 { - self.vmctx_interrupts() - .checked_add(u32::from(self.pointer_size)) - .unwrap() + self.externref_activations_table } - /// The offset of the `*mut StackMapRegistry` member. - pub fn vmctx_stack_map_registry(&self) -> u32 { - self.vmctx_externref_activations_table() - .checked_add(u32::from(self.pointer_size)) - .unwrap() + /// The offset of the `*const dyn ModuleInfoLookup` member. + #[inline] + pub fn vmctx_module_info_lookup(&self) -> u32 { + self.module_info_lookup } /// The offset of the `signature_ids` array. + #[inline] pub fn vmctx_signature_ids_begin(&self) -> u32 { - self.vmctx_stack_map_registry() - .checked_add(u32::from(self.pointer_size)) - .unwrap() + self.signature_ids } /// The offset of the `tables` array. #[allow(clippy::erasing_op)] + #[inline] pub fn vmctx_imported_functions_begin(&self) -> u32 { - self.vmctx_signature_ids_begin() - .checked_add( - self.num_signature_ids - .checked_mul(u32::from(self.size_of_vmshared_signature_index())) - .unwrap(), - ) - .unwrap() + self.imported_functions } /// The offset of the `tables` array. #[allow(clippy::identity_op)] + #[inline] pub fn vmctx_imported_tables_begin(&self) -> u32 { - self.vmctx_imported_functions_begin() - .checked_add( - self.num_imported_functions - .checked_mul(u32::from(self.size_of_vmfunction_import())) - .unwrap(), - ) - .unwrap() + self.imported_tables } /// The offset of the `memories` array. + #[inline] pub fn vmctx_imported_memories_begin(&self) -> u32 { - self.vmctx_imported_tables_begin() - .checked_add( - self.num_imported_tables - .checked_mul(u32::from(self.size_of_vmtable_import())) - .unwrap(), - ) - .unwrap() + self.imported_memories } /// The offset of the `globals` array. + #[inline] pub fn vmctx_imported_globals_begin(&self) -> u32 { - self.vmctx_imported_memories_begin() - .checked_add( - self.num_imported_memories - .checked_mul(u32::from(self.size_of_vmmemory_import())) - .unwrap(), - ) - .unwrap() + self.imported_globals } /// The offset of the `tables` array. + #[inline] pub fn vmctx_tables_begin(&self) -> u32 { - self.vmctx_imported_globals_begin() - .checked_add( - self.num_imported_globals - .checked_mul(u32::from(self.size_of_vmglobal_import())) - .unwrap(), - ) - .unwrap() + self.defined_tables } /// The offset of the `memories` array. + #[inline] pub fn vmctx_memories_begin(&self) -> u32 { - self.vmctx_tables_begin() - .checked_add( - self.num_defined_tables - .checked_mul(u32::from(self.size_of_vmtable_definition())) - .unwrap(), - ) - .unwrap() + self.defined_memories } /// The offset of the `globals` array. + #[inline] pub fn vmctx_globals_begin(&self) -> u32 { - let offset = self - .vmctx_memories_begin() - .checked_add( - self.num_defined_memories - .checked_mul(u32::from(self.size_of_vmmemory_definition())) - .unwrap(), - ) - .unwrap(); - align(offset, 16) + self.defined_globals } /// The offset of the `anyfuncs` array. + #[inline] pub fn vmctx_anyfuncs_begin(&self) -> u32 { - self.vmctx_globals_begin() - .checked_add( - self.num_defined_globals - .checked_mul(u32::from(self.size_of_vmglobal_definition())) - .unwrap(), - ) - .unwrap() + self.defined_anyfuncs } /// The offset of the builtin functions array. + #[inline] pub fn vmctx_builtin_functions_begin(&self) -> u32 { - self.vmctx_anyfuncs_begin() - .checked_add( - self.num_imported_functions - .checked_add(self.num_defined_functions) - .unwrap() - .checked_mul(u32::from(self.size_of_vmcaller_checked_anyfunc())) - .unwrap(), - ) - .unwrap() + self.builtin_functions } /// Return the size of the `VMContext` allocation. + #[inline] pub fn size_of_vmctx(&self) -> u32 { - self.vmctx_builtin_functions_begin() - .checked_add( - BuiltinFunctionIndex::builtin_functions_total_number() - .checked_mul(u32::from(self.pointer_size)) - .unwrap(), - ) - .unwrap() + self.size } /// Return the offset to `VMSharedSignatureId` index `index`. + #[inline] pub fn vmctx_vmshared_signature_id(&self, index: TypeIndex) -> u32 { assert_lt!(index.as_u32(), self.num_signature_ids); self.vmctx_signature_ids_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmshared_signature_index())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmshared_signature_index()) } /// Return the offset to `VMFunctionImport` index `index`. + #[inline] pub fn vmctx_vmfunction_import(&self, index: FuncIndex) -> u32 { assert_lt!(index.as_u32(), self.num_imported_functions); self.vmctx_imported_functions_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmfunction_import())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmfunction_import()) } /// Return the offset to `VMTableImport` index `index`. + #[inline] pub fn vmctx_vmtable_import(&self, index: TableIndex) -> u32 { assert_lt!(index.as_u32(), self.num_imported_tables); self.vmctx_imported_tables_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmtable_import())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmtable_import()) } /// Return the offset to `VMMemoryImport` index `index`. + #[inline] pub fn vmctx_vmmemory_import(&self, index: MemoryIndex) -> u32 { assert_lt!(index.as_u32(), self.num_imported_memories); self.vmctx_imported_memories_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmmemory_import())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmmemory_import()) } /// Return the offset to `VMGlobalImport` index `index`. + #[inline] pub fn vmctx_vmglobal_import(&self, index: GlobalIndex) -> u32 { assert_lt!(index.as_u32(), self.num_imported_globals); self.vmctx_imported_globals_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmglobal_import())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmglobal_import()) } /// Return the offset to `VMTableDefinition` index `index`. + #[inline] pub fn vmctx_vmtable_definition(&self, index: DefinedTableIndex) -> u32 { assert_lt!(index.as_u32(), self.num_defined_tables); - self.vmctx_tables_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmtable_definition())) - .unwrap(), - ) - .unwrap() + self.vmctx_tables_begin() + index.as_u32() * u32::from(self.size_of_vmtable_definition()) } /// Return the offset to `VMMemoryDefinition` index `index`. + #[inline] pub fn vmctx_vmmemory_definition(&self, index: DefinedMemoryIndex) -> u32 { assert_lt!(index.as_u32(), self.num_defined_memories); - self.vmctx_memories_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmmemory_definition())) - .unwrap(), - ) - .unwrap() + self.vmctx_memories_begin() + index.as_u32() * u32::from(self.size_of_vmmemory_definition()) } /// Return the offset to the `VMGlobalDefinition` index `index`. + #[inline] pub fn vmctx_vmglobal_definition(&self, index: DefinedGlobalIndex) -> u32 { assert_lt!(index.as_u32(), self.num_defined_globals); - self.vmctx_globals_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmglobal_definition())) - .unwrap(), - ) - .unwrap() + self.vmctx_globals_begin() + index.as_u32() * u32::from(self.size_of_vmglobal_definition()) } /// Return the offset to the `VMCallerCheckedAnyfunc` for the given function /// index (either imported or defined). + #[inline] pub fn vmctx_anyfunc(&self, index: FuncIndex) -> u32 { + assert_lt!( + index.as_u32(), + self.num_imported_functions + self.num_defined_functions + ); self.vmctx_anyfuncs_begin() - .checked_add( - index - .as_u32() - .checked_mul(u32::from(self.size_of_vmcaller_checked_anyfunc())) - .unwrap(), - ) - .unwrap() + + index.as_u32() * u32::from(self.size_of_vmcaller_checked_anyfunc()) } /// Return the offset to the `body` field in `*const VMFunctionBody` index `index`. + #[inline] pub fn vmctx_vmfunction_import_body(&self, index: FuncIndex) -> u32 { - self.vmctx_vmfunction_import(index) - .checked_add(u32::from(self.vmfunction_import_body())) - .unwrap() + self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_body()) } /// Return the offset to the `vmctx` field in `*const VMFunctionBody` index `index`. + #[inline] pub fn vmctx_vmfunction_import_vmctx(&self, index: FuncIndex) -> u32 { - self.vmctx_vmfunction_import(index) - .checked_add(u32::from(self.vmfunction_import_vmctx())) - .unwrap() + self.vmctx_vmfunction_import(index) + u32::from(self.vmfunction_import_vmctx()) } /// Return the offset to the `from` field in `VMTableImport` index `index`. + #[inline] pub fn vmctx_vmtable_import_from(&self, index: TableIndex) -> u32 { - self.vmctx_vmtable_import(index) - .checked_add(u32::from(self.vmtable_import_from())) - .unwrap() + self.vmctx_vmtable_import(index) + u32::from(self.vmtable_import_from()) } /// Return the offset to the `base` field in `VMTableDefinition` index `index`. + #[inline] pub fn vmctx_vmtable_definition_base(&self, index: DefinedTableIndex) -> u32 { - self.vmctx_vmtable_definition(index) - .checked_add(u32::from(self.vmtable_definition_base())) - .unwrap() + self.vmctx_vmtable_definition(index) + u32::from(self.vmtable_definition_base()) } /// Return the offset to the `current_elements` field in `VMTableDefinition` index `index`. + #[inline] pub fn vmctx_vmtable_definition_current_elements(&self, index: DefinedTableIndex) -> u32 { - self.vmctx_vmtable_definition(index) - .checked_add(u32::from(self.vmtable_definition_current_elements())) - .unwrap() + self.vmctx_vmtable_definition(index) + u32::from(self.vmtable_definition_current_elements()) } /// Return the offset to the `from` field in `VMMemoryImport` index `index`. + #[inline] pub fn vmctx_vmmemory_import_from(&self, index: MemoryIndex) -> u32 { - self.vmctx_vmmemory_import(index) - .checked_add(u32::from(self.vmmemory_import_from())) - .unwrap() + self.vmctx_vmmemory_import(index) + u32::from(self.vmmemory_import_from()) } /// Return the offset to the `vmctx` field in `VMMemoryImport` index `index`. + #[inline] pub fn vmctx_vmmemory_import_vmctx(&self, index: MemoryIndex) -> u32 { - self.vmctx_vmmemory_import(index) - .checked_add(u32::from(self.vmmemory_import_vmctx())) - .unwrap() + self.vmctx_vmmemory_import(index) + u32::from(self.vmmemory_import_vmctx()) } /// Return the offset to the `base` field in `VMMemoryDefinition` index `index`. + #[inline] pub fn vmctx_vmmemory_definition_base(&self, index: DefinedMemoryIndex) -> u32 { - self.vmctx_vmmemory_definition(index) - .checked_add(u32::from(self.vmmemory_definition_base())) - .unwrap() + self.vmctx_vmmemory_definition(index) + u32::from(self.vmmemory_definition_base()) } /// Return the offset to the `current_length` field in `VMMemoryDefinition` index `index`. + #[inline] pub fn vmctx_vmmemory_definition_current_length(&self, index: DefinedMemoryIndex) -> u32 { - self.vmctx_vmmemory_definition(index) - .checked_add(u32::from(self.vmmemory_definition_current_length())) - .unwrap() + self.vmctx_vmmemory_definition(index) + u32::from(self.vmmemory_definition_current_length()) } /// Return the offset to the `from` field in `VMGlobalImport` index `index`. + #[inline] pub fn vmctx_vmglobal_import_from(&self, index: GlobalIndex) -> u32 { - self.vmctx_vmglobal_import(index) - .checked_add(u32::from(self.vmglobal_import_from())) - .unwrap() + self.vmctx_vmglobal_import(index) + u32::from(self.vmglobal_import_from()) } /// Return the offset to builtin function in `VMBuiltinFunctionsArray` index `index`. + #[inline] pub fn vmctx_builtin_function(&self, index: BuiltinFunctionIndex) -> u32 { - self.vmctx_builtin_functions_begin() - .checked_add( - index - .index() - .checked_mul(u32::from(self.pointer_size)) - .unwrap(), - ) - .unwrap() + self.vmctx_builtin_functions_begin() + index.index() * u32::from(self.pointer_size) } } /// Offsets for `VMExternData`. impl VMOffsets { /// Return the offset for `VMExternData::ref_count`. + #[inline] pub fn vm_extern_data_ref_count() -> u32 { 0 } @@ -646,11 +733,13 @@ impl VMOffsets { /// Offsets for `VMExternRefActivationsTable`. impl VMOffsets { /// Return the offset for `VMExternRefActivationsTable::next`. + #[inline] pub fn vm_extern_ref_activation_table_next(&self) -> u32 { 0 } /// Return the offset for `VMExternRefActivationsTable::end`. + #[inline] pub fn vm_extern_ref_activation_table_end(&self) -> u32 { self.pointer_size.into() } diff --git a/crates/fiber/Cargo.toml b/crates/fiber/Cargo.toml index f13d2975fa..d5d8399de5 100644 --- a/crates/fiber/Cargo.toml +++ b/crates/fiber/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-fiber" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Fiber support for Wasmtime" license = "Apache-2.0 WITH LLVM-exception" diff --git a/crates/fiber/src/arch/s390x.S b/crates/fiber/src/arch/s390x.S new file mode 100644 index 0000000000..8d9548bbb0 --- /dev/null +++ b/crates/fiber/src/arch/s390x.S @@ -0,0 +1,112 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! +// +// Also at this time this file is heavily based off the x86_64 file, so you'll +// probably want to read that one as well. + +#include "header.h" + +// fn(top_of_stack(%x0): *mut u8) +HIDDEN(wasmtime_fiber_switch) +GLOBL(wasmtime_fiber_switch) +.p2align 2 +TYPE(wasmtime_fiber_switch) +FUNCTION(wasmtime_fiber_switch): + // Save all callee-saved registers on the stack since we're assuming + // they're clobbered as a result of the stack switch. + stmg %r6, %r15, 48(%r15) + aghi %r15, -64 + std %f8, 0(%r15) + std %f9, 8(%r15) + std %f10, 16(%r15) + std %f11, 24(%r15) + std %f12, 32(%r15) + std %f13, 40(%r15) + std %f14, 48(%r15) + std %f15, 56(%r15) + + // Load our previously saved stack pointer to resume to, and save off our + // current stack pointer on where to come back to eventually. + lg %r1, -16(%r2) + stg %r15, -16(%r2) + + // Switch to the new stack and restore all our callee-saved registers after + // the switch and return to our new stack. + ld %f8, 0(%r1) + ld %f9, 8(%r1) + ld %f10, 16(%r1) + ld %f11, 24(%r1) + ld %f12, 32(%r1) + ld %f13, 40(%r1) + ld %f14, 48(%r1) + ld %f15, 56(%r1) + lmg %r6, %r15, 112(%r1) + br %r14 +SIZE(wasmtime_fiber_switch) + +// fn( +// top_of_stack(%x0): *mut u8, +// entry_point(%x1): extern fn(*mut u8, *mut u8), +// entry_arg0(%x2): *mut u8, +// ) +HIDDEN(wasmtime_fiber_init) +GLOBL(wasmtime_fiber_init) +.p2align 2 +TYPE(wasmtime_fiber_init) +FUNCTION(wasmtime_fiber_init): + larl %r1, FUNCTION(wasmtime_fiber_start) + stg %r1, -48(%r2) // wasmtime_fiber_start - restored into %r14 + stg %r2, -112(%r2) // top_of_stack - restored into %r6 + stg %r3, -104(%r2) // entry_point - restored into %r7 + stg %r4, -96(%r2) // entry_arg0 - restored into %r8 + aghi %r2, -160 // 160 bytes register save area + stg %r2, 120(%r2) // bottom of register save area - restored into %r15 + + // `wasmtime_fiber_switch` has a 64 byte stack. + aghi %r2, -64 + stg %r2, 208(%r2) + br %r14 +SIZE(wasmtime_fiber_init) + +.p2align 2 +TYPE(wasmtime_fiber_start) +FUNCTION(wasmtime_fiber_start): +.cfi_startproc simple + + // See the x86_64 file for more commentary on what these CFI directives are + // doing. Like over there note that the relative offsets to registers here + // match the frame layout in `wasmtime_fiber_switch`. + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 7, /* the byte length of this expression */ \ + 0x7f, 0x90, 0x1, /* DW_OP_breg15 0x90 */ \ + 0x06, /* DW_OP_deref */ \ + 0x23, 0xe0, 0x1 /* DW_OP_plus_uconst 0xe0 */ + + .cfi_rel_offset 6, -112 + .cfi_rel_offset 7, -104 + .cfi_rel_offset 8, -96 + .cfi_rel_offset 9, -88 + .cfi_rel_offset 10, -80 + .cfi_rel_offset 11, -72 + .cfi_rel_offset 12, -64 + .cfi_rel_offset 13, -56 + .cfi_rel_offset 14, -48 + .cfi_rel_offset 15, -40 + + // Load our two arguments prepared by `wasmtime_fiber_init`. + lgr %r2, %r8 // entry_arg0 + lgr %r3, %r6 // top_of_stack + + // ... and then we call the function! Note that this is a function call so + // our frame stays on the stack to backtrace through. + basr %r14, %r7 // entry_point + // .. technically we shouldn't get here, so just trap. + .word 0x0000 + .cfi_endproc +SIZE(wasmtime_fiber_start) + +FOOTER diff --git a/crates/fiber/src/arch/x86_64.S b/crates/fiber/src/arch/x86_64.S index 89e20b1aee..2ae01fc290 100644 --- a/crates/fiber/src/arch/x86_64.S +++ b/crates/fiber/src/arch/x86_64.S @@ -68,7 +68,7 @@ FUNCTION(wasmtime_fiber_init): // And then we specify the stack pointer resumption should begin at. Our // `wasmtime_fiber_switch` function consumes 6 registers plus a return - // pointer, and the top 16 bytes aree resereved, so that's: + // pointer, and the top 16 bytes are reserved, so that's: // // (6 + 1) * 16 + 16 = 0x48 lea -0x48(%rdi), %rax diff --git a/crates/fiber/src/lib.rs b/crates/fiber/src/lib.rs index 9a8d057f35..6a21285206 100644 --- a/crates/fiber/src/lib.rs +++ b/crates/fiber/src/lib.rs @@ -14,7 +14,38 @@ mod unix; #[cfg(unix)] use unix as imp; +/// Represents an execution stack to use for a fiber. +#[derive(Debug)] +pub struct FiberStack(imp::FiberStack); + +impl FiberStack { + /// Creates a new fiber stack of the given size. + pub fn new(size: usize) -> io::Result { + Ok(Self(imp::FiberStack::new(size)?)) + } + + /// Creates a new fiber stack with the given pointer to the top of the stack. + /// + /// # Safety + /// + /// This is unsafe because there is no validation of the given stack pointer. + /// + /// The caller must properly allocate the stack space with a guard page and + /// make the pages accessible for correct behavior. + pub unsafe fn from_top_ptr(top: *mut u8) -> io::Result { + Ok(Self(imp::FiberStack::from_top_ptr(top)?)) + } + + /// Gets the top of the stack. + /// + /// Returns `None` if the platform does not support getting the top of the stack. + pub fn top(&self) -> Option<*mut u8> { + self.0.top() + } +} + pub struct Fiber<'a, Resume, Yield, Return> { + stack: FiberStack, inner: imp::Fiber, done: Cell, _phantom: PhantomData<&'a (Resume, Yield, Return)>, @@ -34,39 +65,20 @@ enum RunResult { } impl<'a, Resume, Yield, Return> Fiber<'a, Resume, Yield, Return> { - /// Creates a new fiber which will execute `func` on a new native stack of - /// size `stack_size`. + /// Creates a new fiber which will execute `func` on the given stack. /// /// This function returns a `Fiber` which, when resumed, will execute `func` /// to completion. When desired the `func` can suspend itself via /// `Fiber::suspend`. pub fn new( - stack_size: usize, + stack: FiberStack, func: impl FnOnce(Resume, &Suspend) -> Return + 'a, - ) -> io::Result> { - Ok(Fiber { - inner: imp::Fiber::new(stack_size, func)?, - done: Cell::new(false), - _phantom: PhantomData, - }) - } + ) -> io::Result { + let inner = imp::Fiber::new(&stack.0, func)?; - /// Creates a new fiber with existing stack space that will execute `func`. - /// - /// This function returns a `Fiber` which, when resumed, will execute `func` - /// to completion. When desired the `func` can suspend itself via - /// `Fiber::suspend`. - /// - /// # Safety - /// - /// The caller must properly allocate the stack space with a guard page and - /// make the pages accessible for correct behavior. - pub unsafe fn new_with_stack( - top_of_stack: *mut u8, - func: impl FnOnce(Resume, &Suspend) -> Return + 'a, - ) -> io::Result> { - Ok(Fiber { - inner: imp::Fiber::new_with_stack(top_of_stack, func)?, + Ok(Self { + stack, + inner, done: Cell::new(false), _phantom: PhantomData, }) @@ -90,7 +102,7 @@ impl<'a, Resume, Yield, Return> Fiber<'a, Resume, Yield, Return> { pub fn resume(&self, val: Resume) -> Result { assert!(!self.done.replace(true), "cannot resume a finished fiber"); let result = Cell::new(RunResult::Resuming(val)); - self.inner.resume(&result); + self.inner.resume(&self.stack.0, &result); match result.into_inner() { RunResult::Resuming(_) | RunResult::Executing => unreachable!(), RunResult::Yield(y) => { @@ -106,6 +118,11 @@ impl<'a, Resume, Yield, Return> Fiber<'a, Resume, Yield, Return> { pub fn done(&self) -> bool { self.done.get() } + + /// Gets the stack associated with this fiber. + pub fn stack(&self) -> &FiberStack { + &self.stack + } } impl Suspend { @@ -148,18 +165,18 @@ impl Drop for Fiber<'_, A, B, C> { #[cfg(test)] mod tests { - use super::Fiber; + use super::{Fiber, FiberStack}; use std::cell::Cell; use std::panic::{self, AssertUnwindSafe}; use std::rc::Rc; #[test] fn small_stacks() { - Fiber::<(), (), ()>::new(0, |_, _| {}) + Fiber::<(), (), ()>::new(FiberStack::new(0).unwrap(), |_, _| {}) .unwrap() .resume(()) .unwrap(); - Fiber::<(), (), ()>::new(1, |_, _| {}) + Fiber::<(), (), ()>::new(FiberStack::new(1).unwrap(), |_, _| {}) .unwrap() .resume(()) .unwrap(); @@ -169,7 +186,7 @@ mod tests { fn smoke() { let hit = Rc::new(Cell::new(false)); let hit2 = hit.clone(); - let fiber = Fiber::<(), (), ()>::new(1024 * 1024, move |_, _| { + let fiber = Fiber::<(), (), ()>::new(FiberStack::new(1024 * 1024).unwrap(), move |_, _| { hit2.set(true); }) .unwrap(); @@ -182,7 +199,7 @@ mod tests { fn suspend_and_resume() { let hit = Rc::new(Cell::new(false)); let hit2 = hit.clone(); - let fiber = Fiber::<(), (), ()>::new(1024 * 1024, move |_, s| { + let fiber = Fiber::<(), (), ()>::new(FiberStack::new(1024 * 1024).unwrap(), move |_, s| { s.suspend(()); hit2.set(true); s.suspend(()); @@ -219,14 +236,15 @@ mod tests { } fn run_test() { - let fiber = Fiber::<(), (), ()>::new(1024 * 1024, move |(), s| { - assert_contains_host(); - s.suspend(()); - assert_contains_host(); - s.suspend(()); - assert_contains_host(); - }) - .unwrap(); + let fiber = + Fiber::<(), (), ()>::new(FiberStack::new(1024 * 1024).unwrap(), move |(), s| { + assert_contains_host(); + s.suspend(()); + assert_contains_host(); + s.suspend(()); + assert_contains_host(); + }) + .unwrap(); assert!(fiber.resume(()).is_err()); assert!(fiber.resume(()).is_err()); assert!(fiber.resume(()).is_ok()); @@ -239,11 +257,12 @@ mod tests { fn panics_propagated() { let a = Rc::new(Cell::new(false)); let b = SetOnDrop(a.clone()); - let fiber = Fiber::<(), (), ()>::new(1024 * 1024, move |(), _s| { - drop(&b); - panic!(); - }) - .unwrap(); + let fiber = + Fiber::<(), (), ()>::new(FiberStack::new(1024 * 1024).unwrap(), move |(), _s| { + drop(&b); + panic!(); + }) + .unwrap(); assert!(panic::catch_unwind(AssertUnwindSafe(|| fiber.resume(()))).is_err()); assert!(a.get()); @@ -258,7 +277,7 @@ mod tests { #[test] fn suspend_and_resume_values() { - let fiber = Fiber::new(1024 * 1024, move |first, s| { + let fiber = Fiber::new(FiberStack::new(1024 * 1024).unwrap(), move |first, s| { assert_eq!(first, 2.0); assert_eq!(s.suspend(4), 3.0); "hello".to_string() diff --git a/crates/fiber/src/unix.rs b/crates/fiber/src/unix.rs index 0cc57ca319..7061c6c8d2 100644 --- a/crates/fiber/src/unix.rs +++ b/crates/fiber/src/unix.rs @@ -34,17 +34,81 @@ use std::cell::Cell; use std::io; use std::ptr; -pub struct Fiber { +#[derive(Debug)] +pub struct FiberStack { // The top of the stack; for stacks allocated by the fiber implementation itself, - // the base address of the allocation will be `top_of_stack.sub(alloc_len.unwrap())` - top_of_stack: *mut u8, - alloc_len: Option, + // the base address of the allocation will be `top.sub(len.unwrap())` + top: *mut u8, + // The length of the stack; `None` when the stack was not created by this implementation. + len: Option, } -pub struct Suspend { - top_of_stack: *mut u8, +impl FiberStack { + pub fn new(size: usize) -> io::Result { + unsafe { + // Round up our stack size request to the nearest multiple of the + // page size. + let page_size = libc::sysconf(libc::_SC_PAGESIZE) as usize; + let size = if size == 0 { + page_size + } else { + (size + (page_size - 1)) & (!(page_size - 1)) + }; + + // Add in one page for a guard page and then ask for some memory. + let mmap_len = size + page_size; + let mmap = libc::mmap( + ptr::null_mut(), + mmap_len, + libc::PROT_NONE, + libc::MAP_ANON | libc::MAP_PRIVATE, + -1, + 0, + ); + if mmap == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + if libc::mprotect( + mmap.cast::().add(page_size).cast(), + size, + libc::PROT_READ | libc::PROT_WRITE, + ) != 0 + { + return Err(io::Error::last_os_error()); + } + + Ok(Self { + top: mmap.cast::().add(mmap_len), + len: Some(mmap_len), + }) + } + } + + pub unsafe fn from_top_ptr(top: *mut u8) -> io::Result { + Ok(Self { top, len: None }) + } + + pub fn top(&self) -> Option<*mut u8> { + Some(self.top) + } } +impl Drop for FiberStack { + fn drop(&mut self) { + unsafe { + if let Some(len) = self.len { + let ret = libc::munmap(self.top.sub(len) as _, len); + debug_assert!(ret == 0); + } + } + } +} + +pub struct Fiber; + +pub struct Suspend(*mut u8); + extern "C" { fn wasmtime_fiber_init( top_of_stack: *mut u8, @@ -59,97 +123,35 @@ where F: FnOnce(A, &super::Suspend) -> C, { unsafe { - let inner = Suspend { top_of_stack }; + let inner = Suspend(top_of_stack); let initial = inner.take_resume::(); super::Suspend::::execute(inner, initial, Box::from_raw(arg0.cast::())) } } impl Fiber { - pub fn new(stack_size: usize, func: F) -> io::Result - where - F: FnOnce(A, &super::Suspend) -> C, - { - let fiber = Self::alloc_with_stack(stack_size)?; - fiber.init(func); - Ok(fiber) - } - - pub fn new_with_stack(top_of_stack: *mut u8, func: F) -> io::Result - where - F: FnOnce(A, &super::Suspend) -> C, - { - let fiber = Self { - top_of_stack, - alloc_len: None, - }; - - fiber.init(func); - - Ok(fiber) - } - - fn init(&self, func: F) + pub fn new(stack: &FiberStack, func: F) -> io::Result where F: FnOnce(A, &super::Suspend) -> C, { unsafe { let data = Box::into_raw(Box::new(func)).cast(); - wasmtime_fiber_init(self.top_of_stack, fiber_start::, data); + wasmtime_fiber_init(stack.top, fiber_start::, data); } + + Ok(Self) } - fn alloc_with_stack(stack_size: usize) -> io::Result { - unsafe { - // Round up our stack size request to the nearest multiple of the - // page size. - let page_size = libc::sysconf(libc::_SC_PAGESIZE) as usize; - let stack_size = if stack_size == 0 { - page_size - } else { - (stack_size + (page_size - 1)) & (!(page_size - 1)) - }; - - // Add in one page for a guard page and then ask for some memory. - let mmap_len = stack_size + page_size; - let mmap = libc::mmap( - ptr::null_mut(), - mmap_len, - libc::PROT_NONE, - libc::MAP_ANON | libc::MAP_PRIVATE, - -1, - 0, - ); - if mmap == libc::MAP_FAILED { - return Err(io::Error::last_os_error()); - } - let ret = Self { - top_of_stack: mmap.cast::().add(mmap_len), - alloc_len: Some(mmap_len), - }; - let res = libc::mprotect( - mmap.cast::().add(page_size).cast(), - stack_size, - libc::PROT_READ | libc::PROT_WRITE, - ); - if res != 0 { - Err(io::Error::last_os_error()) - } else { - Ok(ret) - } - } - } - - pub(crate) fn resume(&self, result: &Cell>) { + pub(crate) fn resume(&self, stack: &FiberStack, result: &Cell>) { unsafe { // Store where our result is going at the very tip-top of the // stack, otherwise known as our reserved slot for this information. // // In the diagram above this is updating address 0xAff8 - let addr = self.top_of_stack.cast::().offset(-1); + let addr = stack.top.cast::().offset(-1); addr.write(result as *const _ as usize); - wasmtime_fiber_switch(self.top_of_stack); + wasmtime_fiber_switch(stack.top); // null this out to help catch use-after-free addr.write(0); @@ -157,23 +159,12 @@ impl Fiber { } } -impl Drop for Fiber { - fn drop(&mut self) { - unsafe { - if let Some(alloc_len) = self.alloc_len { - let ret = libc::munmap(self.top_of_stack.sub(alloc_len) as _, alloc_len); - debug_assert!(ret == 0); - } - } - } -} - impl Suspend { pub(crate) fn switch(&self, result: RunResult) -> A { unsafe { // Calculate 0xAff8 and then write to it (*self.result_location::()).set(result); - wasmtime_fiber_switch(self.top_of_stack); + wasmtime_fiber_switch(self.0); self.take_resume::() } } @@ -186,8 +177,8 @@ impl Suspend { } unsafe fn result_location(&self) -> *const Cell> { - let ret = self.top_of_stack.cast::<*const u8>().offset(-1).read(); + let ret = self.0.cast::<*const u8>().offset(-1).read(); assert!(!ret.is_null()); - return ret.cast(); + ret.cast() } } diff --git a/crates/fiber/src/windows.rs b/crates/fiber/src/windows.rs index b2d657eb88..be35ae4e15 100644 --- a/crates/fiber/src/windows.rs +++ b/crates/fiber/src/windows.rs @@ -5,8 +5,26 @@ use std::ptr; use winapi::shared::minwindef::*; use winapi::shared::winerror::ERROR_NOT_SUPPORTED; use winapi::um::fibersapi::*; +use winapi::um::processthreadsapi::SetThreadStackGuarantee; use winapi::um::winbase::*; +#[derive(Debug)] +pub struct FiberStack(usize); + +impl FiberStack { + pub fn new(size: usize) -> io::Result { + Ok(Self(size)) + } + + pub unsafe fn from_top_ptr(_top: *mut u8) -> io::Result { + Err(io::Error::from_raw_os_error(ERROR_NOT_SUPPORTED as i32)) + } + + pub fn top(&self) -> Option<*mut u8> { + None + } +} + pub struct Fiber { fiber: LPVOID, state: Box, @@ -32,6 +50,13 @@ unsafe extern "system" fn fiber_start(data: LPVOID) where F: FnOnce(A, &super::Suspend) -> C, { + // Set the stack guarantee to be consistent with what Rust expects for threads + // This value is taken from: + // https://github.com/rust-lang/rust/blob/0d97f7a96877a96015d70ece41ad08bb7af12377/library/std/src/sys/windows/stack_overflow.rs + if SetThreadStackGuarantee(&mut 0x5000) == 0 { + panic!("failed to set fiber stack guarantee"); + } + let state = data.cast::(); let func = Box::from_raw((*state).initial_closure.get().cast::()); (*state).initial_closure.set(ptr::null_mut()); @@ -41,7 +66,7 @@ where } impl Fiber { - pub fn new(stack_size: usize, func: F) -> io::Result + pub fn new(stack: &FiberStack, func: F) -> io::Result where F: FnOnce(A, &super::Suspend) -> C, { @@ -51,30 +76,25 @@ impl Fiber { parent: Cell::new(ptr::null_mut()), result_location: Cell::new(ptr::null()), }); + let fiber = CreateFiberEx( 0, - stack_size, + stack.0, FIBER_FLAG_FLOAT_SWITCH, Some(fiber_start::), &*state as *const StartState as *mut _, ); + if fiber.is_null() { drop(Box::from_raw(state.initial_closure.get().cast::())); - Err(io::Error::last_os_error()) - } else { - Ok(Self { fiber, state }) + return Err(io::Error::last_os_error()); } + + Ok(Self { fiber, state }) } } - pub fn new_with_stack(_top_of_stack: *mut u8, _func: F) -> io::Result - where - F: FnOnce(A, &super::Suspend) -> C, - { - Err(io::Error::from_raw_os_error(ERROR_NOT_SUPPORTED as i32)) - } - - pub(crate) fn resume(&self, result: &Cell>) { + pub(crate) fn resume(&self, _stack: &FiberStack, result: &Cell>) { unsafe { let is_fiber = IsThreadAFiber() != 0; let parent_fiber = if is_fiber { diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index 66c524486b..c61404d952 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -13,16 +13,13 @@ arbitrary = { version = "1.0.0", features = ["derive"] } env_logger = "0.8.1" log = "0.4.8" rayon = "1.2.1" -wasmparser = "0.76" -wasmprinter = "0.2.24" +wasmparser = "0.77" +wasmprinter = "0.2.25" wasmtime = { path = "../wasmtime" } wasmtime-wast = { path = "../wast" } -wasm-encoder = "0.4" -wasm-smith = "0.4.0" +wasm-encoder = "0.4.1" +wasm-smith = "0.4.4" wasmi = "0.7.0" [dev-dependencies] -wat = "1.0.36" - -[features] -experimental_x64 = ["wasmtime/experimental_x64"] +wat = "1.0.37" diff --git a/crates/fuzzing/build.rs b/crates/fuzzing/build.rs index 73d3e9d131..428355beb7 100644 --- a/crates/fuzzing/build.rs +++ b/crates/fuzzing/build.rs @@ -13,11 +13,12 @@ fn main() { .unwrap() .join("../../tests/spec_testsuite"); let mut code = format!("static FILES: &[(&str, &str)] = &[\n"); - let entries = dir + let mut entries = dir .read_dir() .unwrap() .map(|p| p.unwrap().path().display().to_string()) .collect::>(); + entries.sort(); for path in entries { if !path.ends_with(".wast") { continue; diff --git a/crates/fuzzing/src/lib.rs b/crates/fuzzing/src/lib.rs index d131149ddb..7ef3382411 100644 --- a/crates/fuzzing/src/lib.rs +++ b/crates/fuzzing/src/lib.rs @@ -39,9 +39,6 @@ pub fn fuzz_default_config(strategy: wasmtime::Strategy) -> anyhow::Result std::fs::write(&wat, s).expect("failed to write wat file"), + // If wasmprinter failed remove a `*.wat` file, if any, to avoid + // confusing a preexisting one with this wasm which failed to get + // printed. + Err(_) => drop(std::fs::remove_file(&wat)), } } +fn create_store(engine: &Engine) -> Store { + Store::new_with_limits( + &engine, + StoreLimitsBuilder::new() + // The limits here are chosen based on the default "maximum type size" + // configured in wasm-smith, which is 1000. This means that instances + // are allowed to, for example, export up to 1000 memories. We bump that + // a little bit here to give us some slop. + .instances(1100) + .tables(1100) + .memories(1100) + .build(), + ) +} + /// Methods of timing out execution of a WebAssembly module #[derive(Debug)] pub enum Timeout { @@ -91,7 +110,7 @@ pub fn instantiate_with_config( _ => false, }); let engine = Engine::new(&config).unwrap(); - let store = Store::new(&engine); + let store = create_store(&engine); let mut timeout_state = SignalOnDrop::default(); match timeout { @@ -118,19 +137,31 @@ pub fn instantiate_with_config( Err(_) if !known_valid => return, Err(e) => panic!("failed to compile module: {:?}", e), }; - let imports = dummy_imports(&store, module.imports()); + let linker = dummy_linker(&store, &module); - match Instance::new(&store, &module, &imports) { + match linker.instantiate(&module) { Ok(_) => {} - // Allow traps which can happen normally with `unreachable` or a timeout - Err(e) if e.downcast_ref::().is_some() => {} - // Allow resource exhaustion since this is something that our wasm-smith - // generator doesn't guarantee is forbidden. - Err(e) if e.to_string().contains("resource limit exceeded") => {} - // Also allow errors related to fuel consumption - Err(e) if e.to_string().contains("all fuel consumed") => {} - // Everything else should be a bug in the fuzzer - Err(e) => panic!("failed to instantiate {}", e), + Err(e) => { + let string = e.to_string(); + // Allow traps which can happen normally with `unreachable` or a + // timeout + if e.downcast_ref::().is_some() + // Allow resource exhaustion since this is something that + // our wasm-smith generator doesn't guarantee is forbidden. + || string.contains("resource limit exceeded") + // Also allow errors related to fuel consumption + || string.contains("all fuel consumed") + // Currently we instantiate with a `Linker` which can't instantiate + // every single module under the sun due to using name-based resolution + // rather than positional-based resolution + || string.contains("incompatible import type") + { + return; + } + + // Everything else should be a bug in the fuzzer + panic!("failed to instantiate {:?}", e); + } } } @@ -179,9 +210,15 @@ pub fn differential_execution( let wasm = module.to_bytes(); log_wasm(&wasm); - for config in &configs { - let engine = Engine::new(config).unwrap(); - let store = Store::new(&engine); + for mut config in configs { + // Disable module linking since it isn't enabled by default for + // `wasm_smith::Module` but is enabled by default for our fuzz config. + // Since module linking is currently a breaking change this is required + // to accept modules that would otherwise be broken by module linking. + config.wasm_module_linking(false); + + let engine = Engine::new(&config).unwrap(); + let store = create_store(&engine); let module = Module::new(&engine, &wasm).unwrap(); @@ -190,13 +227,13 @@ pub fn differential_execution( // in and with what values. Like the results of exported functions, // calls to imports should also yield the same values for each // configuration, and we should assert that. - let imports = dummy_imports(&store, module.imports()); + let linker = dummy_linker(&store, &module); // Don't unwrap this: there can be instantiation-/link-time errors that // aren't caught during validation or compilation. For example, an imported // table might not have room for an element segment that we want to // initialize into it. - let instance = match Instance::new(&store, &module, &imports) { + let instance = match linker.instantiate(&module) { Ok(instance) => instance, Err(e) => { eprintln!( @@ -326,7 +363,7 @@ pub fn make_api_calls(api: crate::generators::api::ApiCalls) { ApiCall::StoreNew => { log::trace!("creating store"); assert!(store.is_none()); - store = Some(Store::new(engine.as_ref().unwrap())); + store = Some(create_store(engine.as_ref().unwrap())); } ApiCall::ModuleNew { id, wasm } => { @@ -354,13 +391,13 @@ pub fn make_api_calls(api: crate::generators::api::ApiCalls) { }; let store = store.as_ref().unwrap(); - let imports = dummy_imports(store, module.imports()); + let linker = dummy_linker(store, module); // Don't unwrap this: there can be instantiation-/link-time errors that // aren't caught during validation or compilation. For example, an imported // table might not have room for an element segment that we want to // initialize into it. - if let Ok(instance) = Instance::new(store, &module, &imports) { + if let Ok(instance) = linker.instantiate(&module) { instances.insert(id, instance); } } @@ -416,7 +453,8 @@ pub fn spectest(fuzz_config: crate::generators::Config, test: crate::generators: let mut config = fuzz_config.to_wasmtime(); config.wasm_reference_types(false); config.wasm_bulk_memory(false); - let store = Store::new(&Engine::new(&config).unwrap()); + config.wasm_module_linking(false); + let store = create_store(&Engine::new(&config).unwrap()); if fuzz_config.consume_fuel { store.add_fuel(u64::max_value()).unwrap(); } @@ -440,7 +478,7 @@ pub fn table_ops( let mut config = fuzz_config.to_wasmtime(); config.wasm_reference_types(true); let engine = Engine::new(&config).unwrap(); - let store = Store::new(&engine); + let store = create_store(&engine); if fuzz_config.consume_fuel { store.add_fuel(u64::max_value()).unwrap(); } @@ -555,7 +593,7 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con let mut wasmtime_config = config.to_wasmtime(); wasmtime_config.cranelift_nan_canonicalization(true); let wasmtime_engine = Engine::new(&wasmtime_config).unwrap(); - let wasmtime_store = Store::new(&wasmtime_engine); + let wasmtime_store = create_store(&wasmtime_engine); if config.consume_fuel { wasmtime_store.add_fuel(u64::max_value()).unwrap(); } diff --git a/crates/fuzzing/src/oracles/dummy.rs b/crates/fuzzing/src/oracles/dummy.rs index b28574f873..99b5be736b 100644 --- a/crates/fuzzing/src/oracles/dummy.rs +++ b/crates/fuzzing/src/oracles/dummy.rs @@ -4,22 +4,45 @@ use std::fmt::Write; use wasmtime::*; /// Create a set of dummy functions/globals/etc for the given imports. -pub fn dummy_imports<'module>( - store: &Store, - import_tys: impl Iterator>, -) -> Vec { - import_tys - .map(|imp| match imp.ty() { - ExternType::Func(func_ty) => Extern::Func(dummy_func(&store, func_ty)), - ExternType::Global(global_ty) => Extern::Global(dummy_global(&store, global_ty)), - ExternType::Table(table_ty) => Extern::Table(dummy_table(&store, table_ty)), - ExternType::Memory(mem_ty) => Extern::Memory(dummy_memory(&store, mem_ty)), - ExternType::Instance(instance_ty) => { - Extern::Instance(dummy_instance(&store, instance_ty)) +pub fn dummy_linker<'module>(store: &Store, module: &Module) -> Linker { + let mut linker = Linker::new(store); + linker.allow_shadowing(true); + for import in module.imports() { + match import.name() { + Some(name) => { + linker + .define(import.module(), name, dummy_extern(store, import.ty())) + .unwrap(); } - ExternType::Module(module_ty) => Extern::Module(dummy_module(&store, module_ty)), - }) - .collect() + None => match import.ty() { + ExternType::Instance(ty) => { + for ty in ty.exports() { + linker + .define(import.module(), ty.name(), dummy_extern(store, ty.ty())) + .unwrap(); + } + } + other => { + linker + .define_name(import.module(), dummy_extern(store, other)) + .unwrap(); + } + }, + } + } + linker +} + +/// Construct a dummy `Extern` from its type signature +pub fn dummy_extern(store: &Store, ty: ExternType) -> Extern { + match ty { + ExternType::Func(func_ty) => Extern::Func(dummy_func(store, func_ty)), + ExternType::Global(global_ty) => Extern::Global(dummy_global(store, global_ty)), + ExternType::Table(table_ty) => Extern::Table(dummy_table(store, table_ty)), + ExternType::Memory(mem_ty) => Extern::Memory(dummy_memory(store, mem_ty)), + ExternType::Instance(instance_ty) => Extern::Instance(dummy_instance(store, instance_ty)), + ExternType::Module(module_ty) => Extern::Module(dummy_module(store, module_ty)), + } } /// Construct a dummy function for the given function type @@ -64,7 +87,7 @@ pub fn dummy_table(store: &Store, ty: TableType) -> Table { /// Construct a dummy memory for the given memory type. pub fn dummy_memory(store: &Store, ty: MemoryType) -> Memory { - Memory::new(store, ty) + Memory::new(store, ty).unwrap() } /// Construct a dummy instance for the given instance type. diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index 4ddb6508cd..1e4caea321 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-jit" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "JIT-style execution for WebAsssembly code in Cranelift" documentation = "https://docs.rs/wasmtime-jit" @@ -12,31 +12,31 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.72.0", features = ["enable-serde"] } -cranelift-entity = { path = "../../cranelift/entity", version = "0.72.0", features = ["enable-serde"] } -cranelift-wasm = { path = "../../cranelift/wasm", version = "0.72.0", features = ["enable-serde"] } -cranelift-native = { path = "../../cranelift/native", version = "0.72.0" } -cranelift-frontend = { path = "../../cranelift/frontend", version = "0.72.0" } -wasmtime-environ = { path = "../environ", version = "0.25.0" } -wasmtime-runtime = { path = "../runtime", version = "0.25.0" } -wasmtime-cranelift = { path = "../cranelift", version = "0.25.0" } -wasmtime-lightbeam = { path = "../lightbeam/wasmtime", version = "0.25.0", optional = true } -wasmtime-debug = { path = "../debug", version = "0.25.0" } -wasmtime-profiling = { path = "../profiling", version = "0.25.0" } -wasmtime-obj = { path = "../obj", version = "0.25.0" } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.73.0", features = ["enable-serde"] } +cranelift-entity = { path = "../../cranelift/entity", version = "0.73.0", features = ["enable-serde"] } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.73.0", features = ["enable-serde"] } +cranelift-native = { path = "../../cranelift/native", version = "0.73.0" } +cranelift-frontend = { path = "../../cranelift/frontend", version = "0.73.0" } +wasmtime-environ = { path = "../environ", version = "0.26.0" } +wasmtime-runtime = { path = "../runtime", version = "0.26.0" } +wasmtime-cranelift = { path = "../cranelift", version = "0.26.0" } +wasmtime-lightbeam = { path = "../lightbeam/wasmtime", version = "0.26.0", optional = true } +wasmtime-debug = { path = "../debug", version = "0.26.0" } +wasmtime-profiling = { path = "../profiling", version = "0.26.0" } +wasmtime-obj = { path = "../obj", version = "0.26.0" } rayon = { version = "1.0", optional = true } region = "2.2.0" thiserror = "1.0.4" -target-lexicon = { version = "0.11.0", default-features = false } -wasmparser = "0.76" +target-lexicon = { version = "0.12.0", default-features = false } +wasmparser = "0.77" more-asserts = "0.2.1" anyhow = "1.0" cfg-if = "1.0" log = "0.4" -gimli = { version = "0.23.0", default-features = false, features = ["write"] } -object = { version = "0.23.0", default-features = false, features = ["write"] } +gimli = { version = "0.24.0", default-features = false, features = ["write"] } +object = { version = "0.24.0", default-features = false, features = ["write"] } serde = { version = "1.0.94", features = ["derive"] } -addr2line = { version = "0.14", default-features = false } +addr2line = { version = "0.15", default-features = false } [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3.8", features = ["winnt", "impl-default"] } @@ -46,10 +46,10 @@ lightbeam = ["wasmtime-lightbeam"] jitdump = ["wasmtime-profiling/jitdump"] vtune = ["wasmtime-profiling/vtune"] parallel-compilation = ["rayon"] +all-arch = ["cranelift-codegen/all-arch"] -# Try the experimental, work-in-progress new x86_64 backend. This is not stable -# as of June 2020. -experimental_x64 = ["cranelift-codegen/x64"] +# Use the old x86 backend. +old-x86-backend = ["cranelift-codegen/old-x86-backend"] [badges] maintenance = { status = "actively-developed" } diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 49a2d0ecd4..3362ed8b57 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -61,6 +61,15 @@ impl<'a> CodeMemoryObjectAllocation<'a> { pub fn code_range(self) -> &'a mut [u8] { self.buf } + + pub fn funcs_len(&self) -> usize { + self.funcs.len() + } + + pub fn trampolines_len(&self) -> usize { + self.trampolines.len() + } + pub fn funcs(&'a self) -> impl Iterator + 'a { let buf = self.buf as *const _ as *mut [u8]; self.funcs.iter().map(move |(i, (start, len))| { @@ -69,6 +78,7 @@ impl<'a> CodeMemoryObjectAllocation<'a> { }) }) } + pub fn trampolines( &'a self, ) -> impl Iterator + 'a { @@ -312,7 +322,7 @@ impl CodeMemory { } } - // Register all unwind entiries for functions and trampolines. + // Register all unwind entries for functions and trampolines. // TODO will `u32` type for start/len be enough for large code base. for i in unwind_info { match i { diff --git a/crates/jit/src/compiler.rs b/crates/jit/src/compiler.rs index fe94c27c02..6d2fc7caf2 100644 --- a/crates/jit/src/compiler.rs +++ b/crates/jit/src/compiler.rs @@ -5,6 +5,7 @@ use crate::object::{build_object, ObjectUnwindInfo}; use object::write::Object; #[cfg(feature = "parallel-compilation")] use rayon::prelude::*; +use serde::{Deserialize, Serialize}; use std::hash::{Hash, Hasher}; use std::mem; use wasmparser::WasmFeatures; @@ -18,7 +19,7 @@ use wasmtime_environ::{ }; /// Select which kind of compilation to use. -#[derive(Copy, Clone, Debug, Hash)] +#[derive(Copy, Clone, Debug, Hash, Serialize, Deserialize, Eq, PartialEq)] pub enum CompilationStrategy { /// Let Wasmtime pick the strategy. Auto, @@ -108,6 +109,11 @@ impl Compiler { self.isa.as_ref() } + /// Return the compiler's strategy. + pub fn strategy(&self) -> CompilationStrategy { + self.strategy + } + /// Return the target's frontend configuration settings. pub fn frontend_config(&self) -> TargetFrontendConfig { self.isa.frontend_config() diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index df6a17fa24..dd3f3577c7 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -83,6 +83,7 @@ struct DebugInfo { code_section_offset: u64, debug_abbrev: Range, debug_addr: Range, + debug_aranges: Range, debug_info: Range, debug_line: Range, debug_line_str: Range, @@ -176,11 +177,13 @@ struct FinishedFunctions(PrimaryMap); unsafe impl Send for FinishedFunctions {} unsafe impl Sync for FinishedFunctions {} +/// Information about a function, such as trap information, address map, +/// and stack maps. #[derive(Serialize, Deserialize, Clone)] -struct FunctionInfo { - traps: Vec, - address_map: FunctionAddressMap, - stack_maps: Vec, +pub struct FunctionInfo { + pub traps: Vec, + pub address_map: FunctionAddressMap, + pub stack_maps: Vec, } /// This is intended to mirror the type tables in `wasmtime_environ`, except that @@ -195,17 +198,25 @@ pub struct TypeTables { /// Container for data needed for an Instance function to exist. pub struct ModuleCode { + range: (usize, usize), code_memory: CodeMemory, #[allow(dead_code)] dbg_jit_registration: Option, } +impl ModuleCode { + /// Gets the [begin, end) range of the module's code. + pub fn range(&self) -> (usize, usize) { + self.range + } +} + /// A compiled wasm module, ready to be instantiated. pub struct CompiledModule { artifacts: CompilationArtifacts, code: Arc, finished_functions: FinishedFunctions, - trampolines: PrimaryMap, + trampolines: Vec<(SignatureIndex, VMTrampoline)>, } impl CompiledModule { @@ -259,10 +270,13 @@ impl CompiledModule { }; let finished_functions = FinishedFunctions(finished_functions); + let start = code_range.0 as usize; + let end = start + code_range.1; Ok(Arc::new(Self { artifacts, code: Arc::new(ModuleCode { + range: (start, end), code_memory, dbg_jit_registration, }), @@ -287,12 +301,13 @@ impl CompiledModule { } /// Returns the map of all finished JIT functions compiled for this module + #[inline] pub fn finished_functions(&self) -> &PrimaryMap { &self.finished_functions.0 } /// Returns the per-signature trampolines for this module. - pub fn trampolines(&self) -> &PrimaryMap { + pub fn trampolines(&self) -> &[(SignatureIndex, VMTrampoline)] { &self.trampolines } @@ -312,25 +327,52 @@ impl CompiledModule { ) } - /// Iterates over all functions in this module, returning information about - /// how to decode traps which happen in the function. - pub fn trap_information( - &self, - ) -> impl Iterator< - Item = ( - DefinedFuncIndex, - *mut [VMFunctionBody], - &[TrapInformation], - &FunctionAddressMap, - ), - > { - self.finished_functions() - .iter() - .zip(self.artifacts.funcs.values()) - .map(|((i, alloc), func)| (i, *alloc, func.traps.as_slice(), &func.address_map)) + /// Lookups a defined function by a program counter value. + /// + /// Returns the defined function index, the start address, and the end address (exclusive). + pub fn func_by_pc(&self, pc: usize) -> Option<(DefinedFuncIndex, usize, usize)> { + let functions = self.finished_functions(); + + let index = match functions.binary_search_values_by_key(&pc, |body| unsafe { + debug_assert!(!(**body).is_empty()); + // Return the inclusive "end" of the function + (**body).as_ptr() as usize + (**body).len() - 1 + }) { + Ok(k) => { + // Exact match, pc is at the end of this function + k + } + Err(k) => { + // Not an exact match, k is where `pc` would be "inserted" + // Since we key based on the end, function `k` might contain `pc`, + // so we'll validate on the range check below + k + } + }; + + let body = functions.get(index)?; + let (start, end) = unsafe { + let ptr = (**body).as_ptr(); + let len = (**body).len(); + (ptr as usize, ptr as usize + len) + }; + + if pc < start || end < pc { + return None; + } + + Some((index, start, end)) } - /// Returns all ranges convered by JIT code. + /// Gets the function information for a given function index. + pub fn func_info(&self, index: DefinedFuncIndex) -> &FunctionInfo { + self.artifacts + .funcs + .get(index) + .expect("defined function should be present") + } + + /// Returns all ranges covered by JIT code. pub fn jit_code_ranges<'a>(&'a self) -> impl Iterator + 'a { self.code.code_memory.published_ranges() } @@ -359,6 +401,7 @@ impl CompiledModule { let cx = addr2line::Context::from_sections( EndianSlice::new(&data[info.debug_abbrev.clone()], endian).into(), EndianSlice::new(&data[info.debug_addr.clone()], endian).into(), + EndianSlice::new(&data[info.debug_aranges.clone()], endian).into(), EndianSlice::new(&data[info.debug_info.clone()], endian).into(), EndianSlice::new(&data[info.debug_line.clone()], endian).into(), EndianSlice::new(&data[info.debug_line_str.clone()], endian).into(), @@ -438,13 +481,13 @@ fn build_code_memory( isa: &dyn TargetIsa, obj: &[u8], module: &Module, - unwind_info: &Box<[ObjectUnwindInfo]>, + unwind_info: &[ObjectUnwindInfo], ) -> Result< ( CodeMemory, (*const u8, usize), PrimaryMap, - PrimaryMap, + Vec<(SignatureIndex, VMTrampoline)>, ), String, > { @@ -454,21 +497,32 @@ fn build_code_memory( let allocation = code_memory.allocate_for_object(&obj, unwind_info)?; - // Second, create a PrimaryMap from result vector of pointers. - let mut finished_functions = PrimaryMap::new(); + // Populate the finished functions from the allocation + let mut finished_functions = PrimaryMap::with_capacity(allocation.funcs_len()); for (i, fat_ptr) in allocation.funcs() { + let start = fat_ptr.as_ptr() as usize; let fat_ptr: *mut [VMFunctionBody] = fat_ptr; + // Assert that the function bodies are pushed in sort order + // This property is relied upon to search for functions by PC values + assert!( + start + > finished_functions + .last() + .map(|f: &*mut [VMFunctionBody]| unsafe { (**f).as_ptr() as usize }) + .unwrap_or(0) + ); assert_eq!( Some(finished_functions.push(fat_ptr)), module.defined_func_index(i) ); } - let mut trampolines = PrimaryMap::new(); + // Populate the trampolines from the allocation + let mut trampolines = Vec::with_capacity(allocation.trampolines_len()); for (i, fat_ptr) in allocation.trampolines() { - let fat_ptr = + let fnptr = unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(fat_ptr.as_ptr()) }; - assert_eq!(trampolines.push(fat_ptr), i); + trampolines.push((i, fnptr)); } let code_range = allocation.code_range(); @@ -494,6 +548,7 @@ impl From> for DebugInfo { }; let debug_abbrev = push(raw.dwarf.debug_abbrev.reader().slice()); let debug_addr = push(raw.dwarf.debug_addr.reader().slice()); + let debug_aranges = push(raw.dwarf.debug_aranges.reader().slice()); let debug_info = push(raw.dwarf.debug_info.reader().slice()); let debug_line = push(raw.dwarf.debug_line.reader().slice()); let debug_line_str = push(raw.dwarf.debug_line_str.reader().slice()); @@ -505,6 +560,7 @@ impl From> for DebugInfo { data: data.into(), debug_abbrev, debug_addr, + debug_aranges, debug_info, debug_line, debug_line_str, diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs index 17e6294250..12329ea266 100644 --- a/crates/jit/src/lib.rs +++ b/crates/jit/src/lib.rs @@ -50,6 +50,7 @@ pub use crate::instantiate::{ CompilationArtifacts, CompiledModule, ModuleCode, SetupError, SymbolizeContext, TypeTables, }; pub use crate::link::link_module; +pub use wasmtime_cranelift::{blank_sig, wasmtime_call_conv}; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index 9c58a138a5..fffd9ecca8 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -111,6 +111,19 @@ fn apply_reloc( ); write_unaligned(reloc_address as *mut u32, reloc_delta_u64 as u32); }, + #[cfg(target_pointer_width = "64")] + (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) => unsafe { + let reloc_address = body.add(offset as usize) as usize; + let reloc_addend = r.addend() as isize; + let reloc_delta_u64 = (target_func_address as u64) + .wrapping_sub(reloc_address as u64) + .wrapping_add(reloc_addend as u64); + assert!( + (reloc_delta_u64 as isize) >> 1 <= i32::max_value() as isize, + "relocation too large to fit in i32" + ); + write_unaligned(reloc_address as *mut u32, (reloc_delta_u64 >> 1) as u32); + }, (RelocationKind::Elf(elf::R_AARCH64_CALL26), RelocationEncoding::Generic, 32) => unsafe { let reloc_address = body.add(offset as usize) as usize; let reloc_addend = r.addend() as isize; diff --git a/crates/jit/src/native.rs b/crates/jit/src/native.rs index afcf83d3cc..706cbc04b4 100644 --- a/crates/jit/src/native.rs +++ b/crates/jit/src/native.rs @@ -11,9 +11,4 @@ pub fn builder_without_flags() -> cranelift_codegen::isa::Builder { .expect("host machine is not a supported target") } -pub fn call_conv() -> cranelift_codegen::isa::CallConv { - use target_lexicon::HOST; - cranelift_codegen::isa::CallConv::triple_default(&HOST) -} - pub use cranelift_codegen::isa::lookup; diff --git a/crates/jit/src/object.rs b/crates/jit/src/object.rs index 24b431e597..e73fa8cf81 100644 --- a/crates/jit/src/object.rs +++ b/crates/jit/src/object.rs @@ -4,8 +4,8 @@ use super::trampoline::build_trampoline; use cranelift_frontend::FunctionBuilderContext; use object::write::Object; use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; use wasmtime_debug::DwarfSection; -use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::isa::{unwind::UnwindInfo, TargetIsa}; use wasmtime_environ::wasm::{FuncIndex, SignatureIndex}; use wasmtime_environ::{CompiledFunctions, ModuleTranslation, TypeTables}; @@ -39,22 +39,26 @@ pub(crate) fn build_object( .map(|info| ObjectUnwindInfo::Func(translation.module.func_index(index), info.clone())) })); - let mut trampolines = PrimaryMap::with_capacity(types.native_signatures.len()); + // Build trampolines for every signature that can be used by this module. + let signatures = translation + .module + .functions + .iter() + .filter_map(|(i, sig)| match translation.module.defined_func_index(i) { + Some(i) if !translation.module.possibly_exported_funcs.contains(&i) => None, + _ => Some(*sig), + }) + .collect::>(); + let mut trampolines = Vec::with_capacity(signatures.len()); let mut cx = FunctionBuilderContext::new(); - // Build trampolines for every signature. - // - // TODO: for the module linking proposal this builds too many native - // signatures. This builds trampolines for all signatures for all modules - // for each module. That's a lot of trampolines! We should instead figure - // out a way to share trampolines amongst all modules when compiling - // module-linking modules. - for (i, native_sig) in types.native_signatures.iter() { - let func = build_trampoline(isa, &mut cx, native_sig, std::mem::size_of::())?; + for i in signatures { + let native_sig = wasmtime_cranelift::indirect_signature(isa, &types, i); + let func = build_trampoline(isa, &mut cx, &native_sig, std::mem::size_of::())?; // Preserve trampoline function unwind info. if let Some(info) = &func.unwind_info { unwind_info.push(ObjectUnwindInfo::Trampoline(i, info.clone())) } - trampolines.push(func); + trampolines.push((i, func)); } let target = ObjectBuilderTarget::new(isa.triple().architecture)?; diff --git a/crates/jit/src/trampoline.rs b/crates/jit/src/trampoline.rs index bb470dbcb3..b8262fd93f 100644 --- a/crates/jit/src/trampoline.rs +++ b/crates/jit/src/trampoline.rs @@ -9,7 +9,7 @@ use wasmtime_runtime::{InstantiationError, VMFunctionBody, VMTrampoline}; pub mod ir { pub(super) use cranelift_codegen::ir::{ - AbiParam, ArgumentPurpose, ConstantOffset, JumpTable, Signature, SourceLoc, + AbiParam, ConstantOffset, JumpTable, Signature, SourceLoc, }; pub use cranelift_codegen::ir::{ ExternalName, Function, InstBuilder, MemFlags, StackSlotData, StackSlotKind, @@ -52,16 +52,8 @@ pub(crate) fn build_trampoline( value_size: usize, ) -> Result { let pointer_type = isa.pointer_type(); - let mut wrapper_sig = ir::Signature::new(isa.frontend_config().default_call_conv); - - // Add the callee `vmctx` parameter. - wrapper_sig.params.push(ir::AbiParam::special( - pointer_type, - ir::ArgumentPurpose::VMContext, - )); - - // Add the caller `vmctx` parameter. - wrapper_sig.params.push(ir::AbiParam::new(pointer_type)); + let mut wrapper_sig = + wasmtime_cranelift::blank_sig(isa, wasmtime_cranelift::wasmtime_call_conv(isa)); // Add the `callee_address` parameter. wrapper_sig.params.push(ir::AbiParam::new(pointer_type)); diff --git a/crates/jit/src/unwind/systemv.rs b/crates/jit/src/unwind/systemv.rs index b449f5d176..79f54ee386 100644 --- a/crates/jit/src/unwind/systemv.rs +++ b/crates/jit/src/unwind/systemv.rs @@ -90,7 +90,10 @@ impl UnwindRegistry { let mut eh_frame = EhFrame(EndianVec::new(RunTimeEndian::default())); table.write_eh_frame(&mut eh_frame).unwrap(); - if cfg!(any(all(target_os = "linux", target_env = "gnu"), target_os = "freebsd")) { + if cfg!(any( + all(target_os = "linux", target_env = "gnu"), + target_os = "freebsd" + )) { // libgcc expects a terminating "empty" length, so write a 0 length at the end of the table. eh_frame.0.write_u32(0).unwrap(); } @@ -101,7 +104,10 @@ impl UnwindRegistry { } unsafe fn register_frames(&mut self) { - if cfg!(any(all(target_os = "linux", target_env = "gnu"), target_os = "freebsd")) { + if cfg!(any( + all(target_os = "linux", target_env = "gnu"), + target_os = "freebsd" + )) { // On gnu (libgcc), `__register_frame` will walk the FDEs until an entry of length 0 let ptr = self.frame_table.as_ptr(); __register_frame(ptr); diff --git a/crates/lightbeam/Cargo.toml b/crates/lightbeam/Cargo.toml index a09695b032..6f30be3d49 100644 --- a/crates/lightbeam/Cargo.toml +++ b/crates/lightbeam/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lightbeam" -version = "0.25.0" +version = "0.26.0" authors = ["The Lightbeam Project Developers"] description = "An optimising one-pass streaming compiler for WebAssembly" license = "Apache-2.0 WITH LLVM-exception" @@ -13,22 +13,22 @@ edition = "2018" [dependencies] arrayvec = "0.5" capstone = "0.7.0" -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.72.0" } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.73.0" } derive_more = "0.99" dynasm = "1.0.0" dynasmrt = "1.0.0" -iter-enum = "0.2" +iter-enum = "1" itertools = "0.10.0" memoffset = "0.6.0" more-asserts = "0.2.1" smallvec = "1.6.1" thiserror = "1.0.9" typemap = "0.3" -wasmparser = "0.76" +wasmparser = "0.77" [dev-dependencies] lazy_static = "1.2" -wat = "1.0.36" +wat = "1.0.37" quickcheck = "1.0.0" anyhow = "1.0" diff --git a/crates/lightbeam/wasmtime/Cargo.toml b/crates/lightbeam/wasmtime/Cargo.toml index 7ddea6957c..a7870ca49c 100644 --- a/crates/lightbeam/wasmtime/Cargo.toml +++ b/crates/lightbeam/wasmtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-lightbeam" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Integration between Lightbeam and Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -12,7 +12,7 @@ readme = "README.md" edition = "2018" [dependencies] -lightbeam = { path = "..", version = "0.25.0" } -wasmparser = "0.76" -cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.72.0" } -wasmtime-environ = { path = "../../environ", version = "0.25.0" } +lightbeam = { path = "..", version = "0.26.0" } +wasmparser = "0.77" +cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.73.0" } +wasmtime-environ = { path = "../../environ", version = "0.26.0" } diff --git a/crates/lightbeam/wasmtime/src/lib.rs b/crates/lightbeam/wasmtime/src/lib.rs index a798eee4d2..acb69a11aa 100644 --- a/crates/lightbeam/wasmtime/src/lib.rs +++ b/crates/lightbeam/wasmtime/src/lib.rs @@ -9,12 +9,12 @@ use cranelift_codegen::isa; use lightbeam::{CodeGenSession, NullOffsetSink, Sinks}; use wasmtime_environ::wasm::{ DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, - GlobalIndex, MemoryIndex, SignatureIndex, TableIndex, TypeIndex, + GlobalIndex, MemoryIndex, TableIndex, TypeIndex, }; use wasmtime_environ::{ - entity::PrimaryMap, BuiltinFunctionIndex, CompileError, CompiledFunction, Compiler, - FunctionBodyData, Module, ModuleTranslation, Relocation, RelocationTarget, TrapInformation, - Tunables, TypeTables, VMOffsets, + BuiltinFunctionIndex, CompileError, CompiledFunction, Compiler, FunctionBodyData, Module, + ModuleTranslation, Relocation, RelocationTarget, TrapInformation, Tunables, TypeTables, + VMOffsets, }; /// A compiler that compiles a WebAssembly module with Lightbeam, directly translating the Wasm file. @@ -28,14 +28,14 @@ impl Compiler for Lightbeam { function_body: FunctionBodyData<'_>, isa: &dyn isa::TargetIsa, tunables: &Tunables, - types: &TypeTables, + _types: &TypeTables, ) -> Result { if tunables.generate_native_debuginfo { return Err(CompileError::DebugInfoNotSupported); } let func_index = translation.module.func_index(i); - let env = FuncEnvironment::new(isa.frontend_config().pointer_bytes(), translation, types); + let env = FuncEnvironment::new(isa.frontend_config().pointer_bytes(), translation); let mut codegen_session: CodeGenSession<_> = CodeGenSession::new( translation.function_body_inputs.len() as u32, &env, @@ -174,22 +174,15 @@ struct FuncEnvironment<'module_environment> { /// The module-level environment which this function-level environment belongs to. module: &'module_environment Module, - native_signatures: &'module_environment PrimaryMap, - /// Offsets to struct fields accessed by JIT code. offsets: VMOffsets, } impl<'module_environment> FuncEnvironment<'module_environment> { - fn new( - pointer_bytes: u8, - translation: &'module_environment ModuleTranslation<'_>, - types: &'module_environment TypeTables, - ) -> Self { + fn new(pointer_bytes: u8, translation: &'module_environment ModuleTranslation<'_>) -> Self { Self { module: &translation.module, offsets: VMOffsets::new(pointer_bytes, &translation.module), - native_signatures: &types.native_signatures, } } } @@ -227,8 +220,8 @@ impl lightbeam::ModuleContext for FuncEnvironment<'_> { self.module.functions[FuncIndex::from_u32(func_idx)].as_u32() } - fn signature(&self, index: u32) -> &Self::Signature { - &self.native_signatures[SignatureIndex::from_u32(index)] + fn signature(&self, _index: u32) -> &Self::Signature { + panic!("not implemented") } fn defined_table_index(&self, table_index: u32) -> Option { diff --git a/crates/misc/run-examples/src/main.rs b/crates/misc/run-examples/src/main.rs index 12746b2a38..362f9e1774 100644 --- a/crates/misc/run-examples/src/main.rs +++ b/crates/misc/run-examples/src/main.rs @@ -45,10 +45,13 @@ fn main() -> anyhow::Result<()> { .arg(target))?; } println!("======== Rust example `{}` ============", example); - run(Command::new("cargo") - .arg("run") - .arg("--example") - .arg(&example))?; + let mut cargo_cmd = Command::new("cargo"); + cargo_cmd.arg("run").arg("--example").arg(&example); + + if example.contains("tokio") { + cargo_cmd.arg("--features").arg("wasmtime-wasi/tokio"); + } + run(&mut cargo_cmd)?; println!("======== C/C++ example `{}` ============", example); for extension in ["c", "cc"].iter() { diff --git a/crates/misc/rust/Cargo.toml b/crates/misc/rust/Cargo.toml index 70e7377e7c..e99d01ec0a 100644 --- a/crates/misc/rust/Cargo.toml +++ b/crates/misc/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-rust" -version = "0.25.0" +version = "0.26.0" authors = ["Alex Crichton "] description = "Rust extension for Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -15,9 +15,9 @@ test = false doctest = false [dependencies] -wasmtime-rust-macro = { path = "./macro", version = "0.25.0" } -wasmtime-wasi = { path = "../../wasi", version = "0.25.0" } -wasmtime = { path = "../../wasmtime", version = "0.25.0" } +wasmtime-rust-macro = { path = "./macro", version = "0.26.0" } +wasmtime-wasi = { path = "../../wasi", version = "0.26.0" } +wasmtime = { path = "../../wasmtime", version = "0.26.0" } anyhow = "1.0.19" [badges] diff --git a/crates/misc/rust/macro/Cargo.toml b/crates/misc/rust/macro/Cargo.toml index e17730139d..de7f489f75 100644 --- a/crates/misc/rust/macro/Cargo.toml +++ b/crates/misc/rust/macro/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-rust-macro" -version = "0.25.0" +version = "0.26.0" authors = ["Alex Crichton "] description = "Macro support crate for wasmtime-rust" license = "Apache-2.0 WITH LLVM-exception" diff --git a/crates/obj/Cargo.toml b/crates/obj/Cargo.toml index 6748b1d8ec..6dd132af17 100644 --- a/crates/obj/Cargo.toml +++ b/crates/obj/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-obj" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Native object file output for WebAsssembly code in Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -12,11 +12,11 @@ edition = "2018" [dependencies] anyhow = "1.0" -wasmtime-environ = { path = "../environ", version = "0.25.0" } -object = { version = "0.23.0", default-features = false, features = ["write"] } +wasmtime-environ = { path = "../environ", version = "0.26.0" } +object = { version = "0.24.0", default-features = false, features = ["write"] } more-asserts = "0.2.1" -target-lexicon = { version = "0.11.0", default-features = false } -wasmtime-debug = { path = "../debug", version = "0.25.0" } +target-lexicon = { version = "0.12.0", default-features = false } +wasmtime-debug = { path = "../debug", version = "0.26.0" } [badges] maintenance = { status = "experimental" } diff --git a/crates/obj/src/builder.rs b/crates/obj/src/builder.rs index 046ae5f784..f606d72078 100644 --- a/crates/obj/src/builder.rs +++ b/crates/obj/src/builder.rs @@ -80,6 +80,7 @@ fn to_object_relocations<'a>( RelocationEncoding::Generic, 32, ), + Reloc::S390xPCRel32Dbl => (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32), other => unimplemented!("Unimplemented relocation {:?}", other), }; Some(ObjectRelocation { @@ -102,6 +103,7 @@ fn to_object_architecture( X86_64 => Architecture::X86_64, Arm(_) => Architecture::Arm, Aarch64(_) => Architecture::Aarch64, + S390x => Architecture::S390x, architecture => { anyhow::bail!("target architecture {:?} is unsupported", architecture,); } @@ -257,7 +259,7 @@ pub struct ObjectBuilder<'a> { module: &'a Module, code_alignment: u64, compilation: &'a CompiledFunctions, - trampolines: PrimaryMap, + trampolines: Vec<(SignatureIndex, CompiledFunction)>, dwarf_sections: Vec, } @@ -271,7 +273,7 @@ impl<'a> ObjectBuilder<'a> { target, module, code_alignment: 1, - trampolines: PrimaryMap::new(), + trampolines: Vec::new(), dwarf_sections: vec![], compilation, } @@ -284,7 +286,7 @@ impl<'a> ObjectBuilder<'a> { pub fn set_trampolines( &mut self, - trampolines: PrimaryMap, + trampolines: Vec<(SignatureIndex, CompiledFunction)>, ) -> &mut Self { self.trampolines = trampolines; self @@ -359,7 +361,7 @@ impl<'a> ObjectBuilder<'a> { } let mut trampolines = Vec::new(); for (i, func) in self.trampolines.iter() { - let name = utils::trampoline_symbol_name(i).as_bytes().to_vec(); + let name = utils::trampoline_symbol_name(*i).as_bytes().to_vec(); trampolines.push(append_func(name, func)); } @@ -399,7 +401,7 @@ impl<'a> ObjectBuilder<'a> { } } - for (func, symbol) in self.trampolines.values().zip(trampolines) { + for ((_, func), symbol) in self.trampolines.iter().zip(trampolines) { let (_, off) = obj.symbol_section_and_offset(symbol).unwrap(); for r in to_object_relocations( func.relocations.iter(), diff --git a/crates/profiling/Cargo.toml b/crates/profiling/Cargo.toml index 712a6e4470..afa7231e76 100644 --- a/crates/profiling/Cargo.toml +++ b/crates/profiling/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-profiling" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Runtime library support for Wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -13,18 +13,18 @@ edition = "2018" [dependencies] anyhow = "1.0" cfg-if = "1.0" -gimli = { version = "0.23.0", optional = true } +gimli = { version = "0.24.0", optional = true } lazy_static = "1.4" libc = { version = "0.2.60", default-features = false } scroll = { version = "0.10.1", features = ["derive"], optional = true } serde = { version = "1.0.99", features = ["derive"] } -target-lexicon = "0.11.0" -wasmtime-environ = { path = "../environ", version = "0.25.0" } -wasmtime-runtime = { path = "../runtime", version = "0.25.0" } +target-lexicon = "0.12.0" +wasmtime-environ = { path = "../environ", version = "0.26.0" } +wasmtime-runtime = { path = "../runtime", version = "0.26.0" } ittapi-rs = { version = "0.1.5", optional = true } [dependencies.object] -version = "0.23.0" +version = "0.24.0" optional = true default-features = false features = ['read_core', 'elf', 'std'] diff --git a/crates/profiling/src/jitdump_linux.rs b/crates/profiling/src/jitdump_linux.rs index 0f3a4bb00d..d5d26be92b 100644 --- a/crates/profiling/src/jitdump_linux.rs +++ b/crates/profiling/src/jitdump_linux.rs @@ -241,6 +241,7 @@ impl State { Architecture::X86_32(_) => elf::EM_386 as u32, Architecture::Arm(_) => elf::EM_ARM as u32, Architecture::Aarch64(_) => elf::EM_AARCH64 as u32, + Architecture::S390x => elf::EM_S390 as u32, _ => unimplemented!("unrecognized architecture"), } } @@ -370,7 +371,7 @@ impl State { pid: u32, tid: u32, ) -> Result<()> { - let file = object::File::parse(&dbg_image).unwrap(); + let file = object::File::parse(dbg_image).unwrap(); let endian = if file.is_little_endian() { gimli::RunTimeEndian::Little } else { @@ -385,8 +386,7 @@ impl State { } }; - let load_section_sup = |_| Ok(borrow::Cow::Borrowed(&[][..])); - let dwarf_cow = gimli::Dwarf::load(&load_section, &load_section_sup)?; + let dwarf_cow = gimli::Dwarf::load(&load_section)?; let borrow_section: &dyn for<'a> Fn( &'a borrow::Cow<[u8]>, ) @@ -598,9 +598,9 @@ impl State { header: RecordHeader { id: RecordId::JitCodeDebugInfo as u32, record_size: 0, - timestamp: timestamp, + timestamp, }, - address: address, + address, count: 0, }; @@ -616,9 +616,9 @@ impl State { ) .unwrap(); let filename = myfile.to_string_lossy()?; - let line = row.line().unwrap_or(0); + let line = row.line().map(|nonzero| nonzero.get()).unwrap_or(0); let column = match row.column() { - gimli::ColumnType::Column(column) => column, + gimli::ColumnType::Column(column) => column.get(), gimli::ColumnType::LeftEdge => 0, }; diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 715cbafa0f..66bd893f9c 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-runtime" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Runtime library support for Wasmtime" documentation = "https://docs.rs/wasmtime-runtime" @@ -12,7 +12,8 @@ readme = "README.md" edition = "2018" [dependencies] -wasmtime-environ = { path = "../environ", version = "0.25.0" } +wasmtime-environ = { path = "../environ", version = "0.26.0" } +wasmtime-fiber = { path = "../fiber", version = "0.26.0", optional = true } region = "2.1.0" libc = { version = "0.2.82", default-features = false } log = "0.4.8" @@ -23,7 +24,6 @@ more-asserts = "0.2.1" cfg-if = "1.0" backtrace = "0.3.55" lazy_static = "1.3.0" -psm = "0.1.11" rand = "0.8.3" anyhow = "1.0.38" @@ -45,5 +45,7 @@ maintenance = { status = "actively-developed" } [features] default = [] +async = ["wasmtime-fiber"] + # Enables support for userfaultfd in the pooling allocator when building on Linux uffd = ["userfaultfd"] diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs index 7fd964b8e3..4d7b8f9c47 100644 --- a/crates/runtime/src/externref.rs +++ b/crates/runtime/src/externref.rs @@ -99,18 +99,16 @@ //! Examination of Deferred Reference Counting and Cycle Detection* by Quinane: //! -use std::alloc::Layout; use std::any::Any; use std::cell::{Cell, RefCell, UnsafeCell}; use std::cmp::Ordering; -use std::collections::BTreeMap; use std::collections::HashSet; use std::hash::{Hash, Hasher}; use std::mem; use std::ops::Deref; use std::ptr::{self, NonNull}; -use std::rc::Rc; -use wasmtime_environ::{ir::StackMap, StackMapInformation}; +use std::{alloc::Layout, sync::Arc}; +use wasmtime_environ::ir::StackMap; /// An external reference to some opaque data. /// @@ -522,13 +520,13 @@ pub struct VMExternRefActivationsTable { /// than create a new hash set every GC. precise_stack_roots: RefCell>, - /// A pointer to a `u8` on the youngest host stack frame before we called + /// A pointer to the youngest host stack frame before we called /// into Wasm for the first time. When walking the stack in garbage /// collection, if we don't find this frame, then we failed to walk every /// Wasm stack frame, which means we failed to find all on-stack, /// inside-a-Wasm-frame roots, and doing a GC could lead to freeing one of /// those missed roots, and use after free. - stack_canary: Cell>>, + stack_canary: Cell>, } impl VMExternRefActivationsTable { @@ -596,10 +594,10 @@ impl VMExternRefActivationsTable { pub unsafe fn insert_with_gc( &self, externref: VMExternRef, - stack_maps_registry: &StackMapRegistry, + module_info_lookup: &dyn ModuleInfoLookup, ) { if let Err(externref) = self.try_insert(externref) { - self.gc_and_insert_slow(externref, stack_maps_registry); + self.gc_and_insert_slow(externref, module_info_lookup); } } @@ -607,9 +605,9 @@ impl VMExternRefActivationsTable { unsafe fn gc_and_insert_slow( &self, externref: VMExternRef, - stack_maps_registry: &StackMapRegistry, + module_info_lookup: &dyn ModuleInfoLookup, ) { - gc(stack_maps_registry, self); + gc(module_info_lookup, self); // Might as well insert right into the hash set, rather than the bump // chunk, since we are already on a slow path and we get de-duplication @@ -717,260 +715,54 @@ impl VMExternRefActivationsTable { } } - /// Set the stack canary around a call into Wasm. + /// Fetches the current value of this table's stack canary. /// - /// The return value should not be dropped until after the Wasm call has - /// returned. + /// This should only be used in conjunction with setting the stack canary + /// below if the return value is `None` typically. This is called from RAII + /// guards in `wasmtime::func::invoke_wasm_and_catch_traps`. /// - /// While this method is always safe to call (or not call), it is unsafe to - /// call the `wasmtime_runtime::gc` function unless this method is called at - /// the proper times and its return value properly outlives its Wasm call. - /// - /// For `gc` to be safe, this is only *strictly required* to surround the - /// oldest host-->Wasm stack frame transition on this thread, but repeatedly - /// calling it is idempotent and cheap, so it is recommended to call this - /// for every host-->Wasm call. - /// - /// # Example - /// - /// ```no_run - /// use wasmtime_runtime::*; - /// - /// # let get_table_from_somewhere = || unimplemented!(); - /// let table: &VMExternRefActivationsTable = get_table_from_somewhere(); - /// - /// // Set the canary before a Wasm call. The canary should always be a - /// // local on the stack. - /// let canary = 0; - /// let auto_reset_canary = table.set_stack_canary(&canary); - /// - /// // Do the call into Wasm. - /// # let call_into_wasm = || unimplemented!(); - /// call_into_wasm(); - /// - /// // Only drop the value returned by `set_stack_canary` after the Wasm - /// // call has returned. - /// drop(auto_reset_canary); - /// ``` - pub fn set_stack_canary<'a>(&'a self, canary: &u8) -> impl Drop + 'a { - let should_reset = if self.stack_canary.get().is_none() { - let canary = canary as *const u8 as *mut u8; - self.stack_canary.set(Some(unsafe { - debug_assert!(!canary.is_null()); - NonNull::new_unchecked(canary) - })); - true - } else { - false - }; + /// For more information on canaries see the gc functions below. + #[inline] + pub fn stack_canary(&self) -> Option { + self.stack_canary.get() + } - return AutoResetCanary { - table: self, - should_reset, - }; - - struct AutoResetCanary<'a> { - table: &'a VMExternRefActivationsTable, - should_reset: bool, - } - - impl Drop for AutoResetCanary<'_> { - fn drop(&mut self) { - if self.should_reset { - debug_assert!(self.table.stack_canary.get().is_some()); - self.table.stack_canary.set(None); - } - } - } + /// Sets the current value of the stack canary. + /// + /// This is called from RAII guards in + /// `wasmtime::func::invoke_wasm_and_catch_traps`. This is used to update + /// the stack canary to a concrete value and then reset it back to `None` + /// when wasm is finished. + /// + /// For more information on canaries see the gc functions below. + #[inline] + pub fn set_stack_canary(&self, canary: Option) { + self.stack_canary.set(canary); } } -/// A registry of stack maps for currently active Wasm modules. -#[derive(Default)] -pub struct StackMapRegistry { - inner: RefCell, +/// Used by the runtime to lookup information about a module given a +/// program counter value. +pub trait ModuleInfoLookup: 'static { + /// Lookup the module information from a program counter value. + fn lookup(&self, pc: usize) -> Option>; } -#[derive(Default)] -struct StackMapRegistryInner { - /// A map from the highest pc in a module, to its stack maps. - /// - /// For details, see the comment above `GlobalFrameInfo::ranges`. - ranges: BTreeMap, +/// Used by the runtime to query module information. +pub trait ModuleInfo { + /// Lookup the stack map at a program counter value. + fn lookup_stack_map(&self, pc: usize) -> Option<&StackMap>; } -#[derive(Debug)] -struct ModuleStackMaps { - /// The range of PCs that this module covers. Different modules must always - /// have distinct ranges. - range: std::ops::Range, +pub(crate) struct EmptyModuleInfoLookup; - /// A map from a PC in this module (that is a GC safepoint) to its - /// associated stack map. If `None` then it means that the PC is the start - /// of a range which has no stack map. - pc_to_stack_map: Vec<(usize, Option>)>, -} - -impl StackMapRegistry { - /// Register the stack maps for a given module. - /// - /// The stack maps should be given as an iterator over a function's PC range - /// in memory (that is, where the JIT actually allocated and emitted the - /// function's code at), and the stack maps and code offsets within that - /// range for each of its GC safepoints. - pub fn register_stack_maps<'a>( - &self, - stack_maps: impl IntoIterator, &'a [StackMapInformation])>, - ) { - let mut min = usize::max_value(); - let mut max = 0; - let mut pc_to_stack_map = vec![]; - let mut last_is_none_marker = true; - - for (range, infos) in stack_maps { - let len = range.end - range.start; - - min = std::cmp::min(min, range.start); - max = std::cmp::max(max, range.end); - - // Add a marker between functions indicating that this function's pc - // starts with no stack map so when our binary search later on finds - // a pc between the start of the function and the function's first - // stack map it doesn't think the previous stack map is our stack - // map. - // - // We skip this if the previous entry pushed was also a `None` - // marker, in which case the starting pc already has no stack map. - // This is also skipped if the first `code_offset` is zero since - // what we'll push applies for the first pc anyway. - if !last_is_none_marker && (infos.is_empty() || infos[0].code_offset > 0) { - pc_to_stack_map.push((range.start, None)); - last_is_none_marker = true; - } - - for info in infos { - assert!((info.code_offset as usize) < len); - pc_to_stack_map.push(( - range.start + (info.code_offset as usize), - Some(Rc::new(info.stack_map.clone())), - )); - last_is_none_marker = false; - } - } - - if pc_to_stack_map.is_empty() { - // Nothing to register. - return; - } - - let module_stack_maps = ModuleStackMaps { - range: min..max, - pc_to_stack_map, - }; - - let mut inner = self.inner.borrow_mut(); - - // Check if we've already registered this module. - if let Some(existing_module) = inner.ranges.get(&max) { - assert_eq!(existing_module.range, module_stack_maps.range); - debug_assert_eq!( - existing_module.pc_to_stack_map, - module_stack_maps.pc_to_stack_map, - ); - return; - } - - // Assert that this chunk of ranges doesn't collide with any other known - // chunks. - if let Some((_, prev)) = inner.ranges.range(max..).next() { - assert!(prev.range.start > max); - } - if let Some((prev_end, _)) = inner.ranges.range(..=min).next_back() { - assert!(*prev_end < min); - } - - let old = inner.ranges.insert(max, module_stack_maps); - assert!(old.is_none()); - } - - /// Lookup the stack map for the given PC, if any. - pub fn lookup_stack_map(&self, pc: usize) -> Option> { - let inner = self.inner.borrow(); - let stack_maps = inner.module_stack_maps(pc)?; - - // Do a binary search to find the stack map for the given PC. - // - // Because GC safepoints are technically only associated with a single - // PC, we should ideally only care about `Ok(index)` values returned - // from the binary search. However, safepoints are inserted right before - // calls, and there are two things that can disturb the PC/offset - // associated with the safepoint versus the PC we actually use to query - // for the stack map: - // - // 1. The `backtrace` crate gives us the PC in a frame that will be - // *returned to*, and where execution will continue from, rather than - // the PC of the call we are currently at. So we would need to - // disassemble one instruction backwards to query the actual PC for - // the stack map. - // - // TODO: One thing we *could* do to make this a little less error - // prone, would be to assert/check that the nearest GC safepoint - // found is within `max_encoded_size(any kind of call instruction)` - // our queried PC for the target architecture. - // - // 2. Cranelift's stack maps only handle the stack, not - // registers. However, some references that are arguments to a call - // may need to be in registers. In these cases, what Cranelift will - // do is: - // - // a. spill all the live references, - // b. insert a GC safepoint for those references, - // c. reload the references into registers, and finally - // d. make the call. - // - // Step (c) adds drift between the GC safepoint and the location of - // the call, which is where we actually walk the stack frame and - // collect its live references. - // - // Luckily, the spill stack slots for the live references are still - // up to date, so we can still find all the on-stack roots. - // Furthermore, we do not have a moving GC, so we don't need to worry - // whether the following code will reuse the references in registers - // (which would not have been updated to point to the moved objects) - // or reload from the stack slots (which would have been updated to - // point to the moved objects). - let index = match stack_maps - .pc_to_stack_map - .binary_search_by_key(&pc, |(pc, _stack_map)| *pc) - { - // Exact hit. - Ok(i) => i, - - // `Err(0)` means that the associated stack map would have been the - // first element in the array if this pc had an associated stack - // map, but this pc does not have an associated stack map. This can - // only happen inside a Wasm frame if there are no live refs at this - // pc. - Err(0) => return None, - - Err(n) => n - 1, - }; - - let stack_map = stack_maps.pc_to_stack_map[index].1.as_ref()?.clone(); - Some(stack_map) +impl ModuleInfoLookup for EmptyModuleInfoLookup { + fn lookup(&self, _pc: usize) -> Option> { + None } } -impl StackMapRegistryInner { - fn module_stack_maps(&self, pc: usize) -> Option<&ModuleStackMaps> { - let (end, stack_maps) = self.ranges.range(pc..).next()?; - if pc < stack_maps.range.start || *end < pc { - None - } else { - Some(stack_maps) - } - } -} +pub(crate) const EMPTY_MODULE_LOOKUP: EmptyModuleInfoLookup = EmptyModuleInfoLookup; #[derive(Debug, Default)] struct DebugOnly { @@ -1017,7 +809,7 @@ impl std::ops::DerefMut for DebugOnly { /// Additionally, you must have registered the stack maps for every Wasm module /// that has frames on the stack with the given `stack_maps_registry`. pub unsafe fn gc( - stack_maps_registry: &StackMapRegistry, + module_info_lookup: &dyn ModuleInfoLookup, externref_activations_table: &VMExternRefActivationsTable, ) { // We borrow the precise stack roots `RefCell` for the whole duration of @@ -1055,8 +847,7 @@ pub unsafe fn gc( if cfg!(debug_assertions) { // Assert that there aren't any Wasm frames on the stack. backtrace::trace(|frame| { - let stack_map = stack_maps_registry.lookup_stack_map(frame.ip() as usize); - assert!(stack_map.is_none()); + assert!(module_info_lookup.lookup(frame.ip() as usize).is_none()); true }); } @@ -1064,7 +855,7 @@ pub unsafe fn gc( log::debug!("end GC"); return; } - Some(canary) => canary.as_ptr() as usize, + Some(canary) => canary, }; // There is a stack canary, so there must be Wasm frames on the stack. The @@ -1100,28 +891,30 @@ pub unsafe fn gc( let pc = frame.ip() as usize; let sp = frame.sp() as usize; - if let Some(stack_map) = stack_maps_registry.lookup_stack_map(pc) { - debug_assert!(sp != 0, "we should always get a valid SP for Wasm frames"); + if let Some(module_info) = module_info_lookup.lookup(pc) { + if let Some(stack_map) = module_info.lookup_stack_map(pc) { + debug_assert!(sp != 0, "we should always get a valid SP for Wasm frames"); - for i in 0..(stack_map.mapped_words() as usize) { - if stack_map.get_bit(i) { - // Stack maps have one bit per word in the frame, and the - // zero^th bit is the *lowest* addressed word in the frame, - // i.e. the closest to the SP. So to get the `i`^th word in - // this frame, we add `i * sizeof(word)` to the SP. - let ptr_to_ref = sp + i * mem::size_of::(); + for i in 0..(stack_map.mapped_words() as usize) { + if stack_map.get_bit(i) { + // Stack maps have one bit per word in the frame, and the + // zero^th bit is the *lowest* addressed word in the frame, + // i.e. the closest to the SP. So to get the `i`^th word in + // this frame, we add `i * sizeof(word)` to the SP. + let ptr_to_ref = sp + i * mem::size_of::(); - let r = std::ptr::read(ptr_to_ref as *const *mut VMExternData); - debug_assert!( - r.is_null() || activations_table_set.contains(&r), - "every on-stack externref inside a Wasm frame should \ - have an entry in the VMExternRefActivationsTable" - ); - if let Some(r) = NonNull::new(r) { - VMExternRefActivationsTable::insert_precise_stack_root( - &mut precise_stack_roots, - r, + let r = std::ptr::read(ptr_to_ref as *const *mut VMExternData); + debug_assert!( + r.is_null() || activations_table_set.contains(&r), + "every on-stack externref inside a Wasm frame should \ + have an entry in the VMExternRefActivationsTable" ); + if let Some(r) = NonNull::new(r) { + VMExternRefActivationsTable::insert_precise_stack_root( + &mut precise_stack_roots, + r, + ); + } } } } @@ -1208,7 +1001,7 @@ mod tests { let actual_offset = (next_ptr as usize) - (table_ptr as usize); - let offsets = wasmtime_environ::VMOffsets { + let offsets = wasmtime_environ::VMOffsets::from(wasmtime_environ::VMOffsetsFields { pointer_size: 8, num_signature_ids: 0, num_imported_functions: 0, @@ -1219,7 +1012,7 @@ mod tests { num_defined_tables: 0, num_defined_memories: 0, num_defined_globals: 0, - }; + }); assert_eq!( offsets.vm_extern_ref_activation_table_next() as usize, actual_offset @@ -1235,7 +1028,7 @@ mod tests { let actual_offset = (end_ptr as usize) - (table_ptr as usize); - let offsets = wasmtime_environ::VMOffsets { + let offsets = wasmtime_environ::VMOffsets::from(wasmtime_environ::VMOffsetsFields { pointer_size: 8, num_signature_ids: 0, num_imported_functions: 0, @@ -1246,7 +1039,7 @@ mod tests { num_defined_tables: 0, num_defined_memories: 0, num_defined_globals: 0, - }; + }); assert_eq!( offsets.vm_extern_ref_activation_table_end() as usize, actual_offset diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 9273870367..f2143681fa 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -3,7 +3,7 @@ //! `InstanceHandle` is a reference-counting handle for an `Instance`. use crate::export::Export; -use crate::externref::{StackMapRegistry, VMExternRefActivationsTable}; +use crate::externref::{ModuleInfoLookup, VMExternRefActivationsTable}; use crate::memory::{Memory, RuntimeMemoryCreator}; use crate::table::{Table, TableElement}; use crate::traphandlers::Trap; @@ -37,6 +37,52 @@ mod allocator; pub use allocator::*; +/// Used by hosts to limit resource consumption of instances. +/// +/// An instance can be created with a resource limiter so that hosts can take into account +/// non-WebAssembly resource usage to determine if a linear memory or table should grow. +pub trait ResourceLimiter { + /// Notifies the resource limiter that an instance's linear memory has been requested to grow. + /// + /// * `current` is the current size of the linear memory in WebAssembly page units. + /// * `desired` is the desired size of the linear memory in WebAssembly page units. + /// * `maximum` is either the linear memory's maximum or a maximum from an instance allocator, + /// also in WebAssembly page units. A value of `None` indicates that the linear memory is + /// unbounded. + /// + /// This function should return `true` to indicate that the growing operation is permitted or + /// `false` if not permitted. Returning `true` when a maximum has been exceeded will have no + /// effect as the linear memory will not grow. + fn memory_growing(&self, current: u32, desired: u32, maximum: Option) -> bool; + + /// Notifies the resource limiter that an instance's table has been requested to grow. + /// + /// * `current` is the current number of elements in the table. + /// * `desired` is the desired number of elements in the table. + /// * `maximum` is either the table's maximum or a maximum from an instance allocator. + /// A value of `None` indicates that the table is unbounded. + /// + /// This function should return `true` to indicate that the growing operation is permitted or + /// `false` if not permitted. Returning `true` when a maximum has been exceeded will have no + /// effect as the table will not grow. + fn table_growing(&self, current: u32, desired: u32, maximum: Option) -> bool; + + /// The maximum number of instances that can be created for a `Store`. + /// + /// Module instantiation will fail if this limit is exceeded. + fn instances(&self) -> usize; + + /// The maximum number of tables that can be created for a `Store`. + /// + /// Module instantiation will fail if this limit is exceeded. + fn tables(&self) -> usize; + + /// The maximum number of tables that can be created for a `Store`. + /// + /// Module instantiation will fail if this limit is exceeded. + fn memories(&self) -> usize; +} + /// Runtime representation of an instance value, which erases all `Instance` /// information since instances are just a collection of values. pub type RuntimeInstance = Rc>; @@ -249,9 +295,9 @@ impl Instance { unsafe { self.vmctx_plus_offset(self.offsets.vmctx_externref_activations_table()) } } - /// Return a pointer to the `StackMapRegistry`. - pub fn stack_map_registry(&self) -> *mut *mut StackMapRegistry { - unsafe { self.vmctx_plus_offset(self.offsets.vmctx_stack_map_registry()) } + /// Return a pointer to the `ModuleInfoLookup`. + pub fn module_info_lookup(&self) -> *mut *const dyn ModuleInfoLookup { + unsafe { self.vmctx_plus_offset(self.offsets.vmctx_module_info_lookup()) } } /// Return a reference to the vmctx used by compiled wasm code. @@ -378,11 +424,12 @@ impl Instance { /// Returns `None` if memory can't be grown by the specified amount /// of pages. pub(crate) fn memory_grow(&self, memory_index: DefinedMemoryIndex, delta: u32) -> Option { - let result = self + let memory = self .memories .get(memory_index) - .unwrap_or_else(|| panic!("no memory for index {}", memory_index.index())) - .grow(delta); + .unwrap_or_else(|| panic!("no memory for index {}", memory_index.index())); + + let result = unsafe { memory.grow(delta) }; // Keep current the VMContext pointers used by compiled wasm code. self.set_memory(memory_index, self.memories[memory_index].vmmemory()); @@ -460,19 +507,18 @@ impl Instance { delta: u32, init_value: TableElement, ) -> Option { - unsafe { - let orig_size = self - .tables - .get(table_index) - .unwrap_or_else(|| panic!("no table for index {}", table_index.index())) - .grow(delta, init_value)?; + let table = self + .tables + .get(table_index) + .unwrap_or_else(|| panic!("no table for index {}", table_index.index())); - // Keep the `VMContext` pointers used by compiled Wasm code up to - // date. - self.set_table(table_index, self.tables[table_index].vmtable()); + let result = unsafe { table.grow(delta, init_value) }; - Some(orig_size) - } + // Keep the `VMContext` pointers used by compiled Wasm code up to + // date. + self.set_table(table_index, self.tables[table_index].vmtable()); + + result } pub(crate) fn defined_table_fill( @@ -527,11 +573,11 @@ impl Instance { return None; } - Some(unsafe { &*self.anyfunc_ptr(index) }) + unsafe { Some(&*self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))) } } - unsafe fn anyfunc_ptr(&self, index: FuncIndex) -> *mut VMCallerCheckedAnyfunc { - self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index)) + unsafe fn anyfunc_base(&self) -> *mut VMCallerCheckedAnyfunc { + self.vmctx_plus_offset(self.offsets.vmctx_anyfuncs_begin()) } fn find_passive_segment<'a, I, D, T>( @@ -565,38 +611,56 @@ impl Instance { src: u32, len: u32, ) -> Result<(), Trap> { - // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init - - let table = self.get_table(table_index); - let elements = Self::find_passive_segment( elem_index, &self.module.passive_elements_map, &self.module.passive_elements, &self.dropped_elements, ); + self.table_init_segment(table_index, elements, dst, src, len) + } - if src - .checked_add(len) - .map_or(true, |n| n as usize > elements.len()) - || dst.checked_add(len).map_or(true, |m| m > table.size()) + pub(crate) fn table_init_segment( + &self, + table_index: TableIndex, + elements: &[FuncIndex], + dst: u32, + src: u32, + len: u32, + ) -> Result<(), Trap> { + // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init + + let table = self.get_table(table_index); + + let elements = match elements + .get(usize::try_from(src).unwrap()..) + .and_then(|s| s.get(..usize::try_from(len).unwrap())) { - return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)); + Some(elements) => elements, + None => return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)), + }; + + match table.element_type() { + TableElementType::Func => unsafe { + let base = self.anyfunc_base(); + table.init_funcs( + dst, + elements.iter().map(|idx| { + if *idx == FuncIndex::reserved_value() { + ptr::null_mut() + } else { + debug_assert!(idx.as_u32() < self.offsets.num_defined_functions); + base.add(usize::try_from(idx.as_u32()).unwrap()) + } + }), + )?; + }, + + TableElementType::Val(_) => { + debug_assert!(elements.iter().all(|e| *e == FuncIndex::reserved_value())); + table.fill(dst, TableElement::ExternRef(None), len)?; + } } - - // TODO(#983): investigate replacing this get/set loop with a `memcpy`. - for (dst, src) in (dst..dst + len).zip(src..src + len) { - let elem = self - .get_caller_checked_anyfunc(elements[src as usize]) - .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { - f as *const VMCallerCheckedAnyfunc as *mut _ - }); - - table - .set(dst, TableElement::FuncRef(elem)) - .expect("should never panic because we already did the bounds check above"); - } - Ok(()) } @@ -727,16 +791,26 @@ impl Instance { src: u32, len: u32, ) -> Result<(), Trap> { - // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init - - let memory = self.get_memory(memory_index); - let data = Self::find_passive_segment( data_index, &self.module.passive_data_map, &self.module.passive_data, &self.dropped_data, ); + self.memory_init_segment(memory_index, &data, dst, src, len) + } + + pub(crate) fn memory_init_segment( + &self, + memory_index: MemoryIndex, + data: &[u8], + dst: u32, + src: u32, + len: u32, + ) -> Result<(), Trap> { + // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init + + let memory = self.get_memory(memory_index); if src .checked_add(len) @@ -818,10 +892,6 @@ pub struct InstanceHandle { } impl InstanceHandle { - pub(crate) unsafe fn new(instance: *mut Instance) -> Self { - Self { instance } - } - /// Create a new `InstanceHandle` pointing at the instance /// pointed to by the given `VMContext` pointer. /// @@ -970,6 +1040,7 @@ impl InstanceHandle { /// of the internals, there's no lifetime tracking around its validity. /// You'll need to ensure that the returned handles all go out of scope at /// the same time. + #[inline] pub unsafe fn clone(&self) -> InstanceHandle { InstanceHandle { instance: self.instance, diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs index 84e96d7aad..c91f89a74f 100644 --- a/crates/runtime/src/instance/allocator.rs +++ b/crates/runtime/src/instance/allocator.rs @@ -1,8 +1,8 @@ -use crate::externref::{StackMapRegistry, VMExternRefActivationsTable}; +use crate::externref::{ModuleInfoLookup, VMExternRefActivationsTable, EMPTY_MODULE_LOOKUP}; use crate::imports::Imports; -use crate::instance::{Instance, InstanceHandle, RuntimeMemoryCreator}; +use crate::instance::{Instance, InstanceHandle, ResourceLimiter, RuntimeMemoryCreator}; use crate::memory::{DefaultMemoryCreator, Memory}; -use crate::table::{Table, TableElement}; +use crate::table::Table; use crate::traphandlers::Trap; use crate::vmcontext::{ VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport, @@ -15,13 +15,13 @@ use std::any::Any; use std::cell::RefCell; use std::convert::TryFrom; use std::ptr::{self, NonNull}; +use std::rc::Rc; use std::slice; use std::sync::Arc; use thiserror::Error; -use wasmtime_environ::entity::{packed_option::ReservedValue, EntityRef, EntitySet, PrimaryMap}; +use wasmtime_environ::entity::{EntityRef, EntitySet, PrimaryMap}; use wasmtime_environ::wasm::{ - DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, GlobalInit, SignatureIndex, - TableElementType, WasmType, + DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, GlobalInit, SignatureIndex, WasmType, }; use wasmtime_environ::{ ir, MemoryInitialization, MemoryInitializer, Module, ModuleType, TableInitializer, VMOffsets, @@ -45,8 +45,8 @@ pub struct InstanceAllocationRequest<'a> { /// The imports to use for the instantiation. pub imports: Imports<'a>, - /// A callback for looking up shared signature indexes. - pub lookup_shared_signature: &'a dyn Fn(SignatureIndex) -> VMSharedSignatureIndex, + /// Translation from `SignatureIndex` to `VMSharedSignatureIndex` + pub shared_signatures: SharedSignatures<'a>, /// The host state to associate with the instance. pub host_state: Box, @@ -57,8 +57,11 @@ pub struct InstanceAllocationRequest<'a> { /// The pointer to the reference activations table to use for the instance. pub externref_activations_table: *mut VMExternRefActivationsTable, - /// The pointer to the stack map registry to use for the instance. - pub stack_map_registry: *mut StackMapRegistry, + /// The pointer to the module info lookup to use for the instance. + pub module_info_lookup: Option<*const dyn ModuleInfoLookup>, + + /// The resource limiter to use for the instance. + pub limiter: Option<&'a Rc>, } /// An link error while instantiating a module. @@ -87,13 +90,14 @@ pub enum InstantiationError { } /// An error while creating a fiber stack. +#[cfg(feature = "async")] #[derive(Error, Debug)] pub enum FiberStackError { /// Insufficient resources available for the request. #[error("Insufficient resources: {0}")] Resource(anyhow::Error), - /// An error for when the allocator doesn't support custom fiber stacks. - #[error("Custom fiber stacks are not supported by the allocator")] + /// An error for when the allocator doesn't support fiber stacks. + #[error("fiber stacks are not supported by the allocator")] NotSupported, /// A limit on how many fibers are supported has been reached. #[error("Limit of {0} concurrent fibers has been reached")] @@ -152,26 +156,62 @@ pub unsafe trait InstanceAllocator: Send + Sync { unsafe fn deallocate(&self, handle: &InstanceHandle); /// Allocates a fiber stack for calling async functions on. - /// - /// Returns the top of the fiber stack if successfully allocated. - fn allocate_fiber_stack(&self) -> Result<*mut u8, FiberStackError>; + #[cfg(feature = "async")] + fn allocate_fiber_stack(&self) -> Result; - /// Deallocates a fiber stack that was previously allocated. + /// Deallocates a fiber stack that was previously allocated with `allocate_fiber_stack`. /// /// # Safety /// - /// This function is unsafe because there are no guarantees that the given stack - /// is no longer in use. - /// - /// Additionally, passing a stack pointer that was not returned from `allocate_fiber_stack` - /// will lead to undefined behavior. - unsafe fn deallocate_fiber_stack(&self, stack: *mut u8); + /// The provided stack is required to have been allocated with `allocate_fiber_stack`. + #[cfg(feature = "async")] + unsafe fn deallocate_fiber_stack(&self, stack: &wasmtime_fiber::FiberStack); +} + +pub enum SharedSignatures<'a> { + /// Used for instantiating user-defined modules + Table(&'a PrimaryMap), + /// Used for instance creation that has only a single function + Always(VMSharedSignatureIndex), + /// Used for instance creation that has no functions + None, +} + +impl SharedSignatures<'_> { + fn lookup(&self, index: SignatureIndex) -> VMSharedSignatureIndex { + match self { + SharedSignatures::Table(table) => table[index], + SharedSignatures::Always(index) => *index, + SharedSignatures::None => unreachable!(), + } + } +} + +impl<'a> From for SharedSignatures<'a> { + fn from(val: VMSharedSignatureIndex) -> SharedSignatures<'a> { + SharedSignatures::Always(val) + } +} + +impl<'a> From> for SharedSignatures<'a> { + fn from(val: Option) -> SharedSignatures<'a> { + match val { + Some(idx) => SharedSignatures::Always(idx), + None => SharedSignatures::None, + } + } +} + +impl<'a> From<&'a PrimaryMap> for SharedSignatures<'a> { + fn from(val: &'a PrimaryMap) -> SharedSignatures<'a> { + SharedSignatures::Table(val) + } } fn get_table_init_start( init: &TableInitializer, instance: &Instance, -) -> Result { +) -> Result { match init.base { Some(base) => { let val = unsafe { @@ -182,7 +222,7 @@ fn get_table_init_start( } }; - init.offset.checked_add(val as usize).ok_or_else(|| { + init.offset.checked_add(val).ok_or_else(|| { InstantiationError::Link(LinkError( "element segment global base overflows".to_owned(), )) @@ -196,6 +236,7 @@ fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError for init in &instance.module.table_initializers { let table = instance.get_table(init.table_index); let start = get_table_init_start(init, instance)?; + let start = usize::try_from(start).unwrap(); let end = start.checked_add(init.elements.len()); match end { @@ -215,34 +256,15 @@ fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { for init in &instance.module.table_initializers { - let table = instance.get_table(init.table_index); - let start = get_table_init_start(init, instance)?; - let end = start.checked_add(init.elements.len()); - - match end { - Some(end) if end <= table.size() as usize => { - for (i, func_idx) in init.elements.iter().enumerate() { - let item = match table.element_type() { - TableElementType::Func => instance - .get_caller_checked_anyfunc(*func_idx) - .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { - f as *const VMCallerCheckedAnyfunc as *mut VMCallerCheckedAnyfunc - }) - .into(), - TableElementType::Val(_) => { - assert!(*func_idx == FuncIndex::reserved_value()); - TableElement::ExternRef(None) - } - }; - table.set(u32::try_from(start + i).unwrap(), item).unwrap(); - } - } - _ => { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::TableOutOfBounds, - ))) - } - } + instance + .table_init_segment( + init.table_index, + &init.elements, + get_table_init_start(init, instance)?, + 0, + init.elements.len() as u32, + ) + .map_err(InstantiationError::Trap)?; } Ok(()) @@ -251,7 +273,7 @@ fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { fn get_memory_init_start( init: &MemoryInitializer, instance: &Instance, -) -> Result { +) -> Result { match init.base { Some(base) => { let val = unsafe { @@ -262,7 +284,7 @@ fn get_memory_init_start( } }; - init.offset.checked_add(val as usize).ok_or_else(|| { + init.offset.checked_add(val).ok_or_else(|| { InstantiationError::Link(LinkError("data segment global base overflows".to_owned())) }) } @@ -270,24 +292,6 @@ fn get_memory_init_start( } } -unsafe fn get_memory_slice<'instance>( - init: &MemoryInitializer, - instance: &'instance Instance, -) -> &'instance mut [u8] { - let memory = if let Some(defined_memory_index) = - instance.module.defined_memory_index(init.memory_index) - { - instance.memory(defined_memory_index) - } else { - let import = instance.imported_memory(init.memory_index); - let foreign_instance = (&mut *(import).vmctx).instance(); - let foreign_memory = &mut *(import).from; - let foreign_index = foreign_instance.memory_index(foreign_memory); - foreign_instance.memory(foreign_index) - }; - &mut *ptr::slice_from_raw_parts_mut(memory.base, memory.current_length) -} - fn check_memory_init_bounds( instance: &Instance, initializers: &[MemoryInitializer], @@ -295,6 +299,7 @@ fn check_memory_init_bounds( for init in initializers { let memory = instance.get_memory(init.memory_index); let start = get_memory_init_start(init, instance)?; + let start = usize::try_from(start).unwrap(); let end = start.checked_add(init.data.len()); match end { @@ -317,21 +322,15 @@ fn initialize_memories( initializers: &[MemoryInitializer], ) -> Result<(), InstantiationError> { for init in initializers { - let memory = instance.get_memory(init.memory_index); - let start = get_memory_init_start(init, instance)?; - let end = start.checked_add(init.data.len()); - - match end { - Some(end) if end <= memory.current_length => { - let mem_slice = unsafe { get_memory_slice(init, instance) }; - mem_slice[start..end].copy_from_slice(&init.data); - } - _ => { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::HeapOutOfBounds, - ))) - } - } + instance + .memory_init_segment( + init.memory_index, + &init.data, + get_memory_init_start(init, instance)?, + 0, + init.data.len() as u32, + ) + .map_err(InstantiationError::Trap)?; } Ok(()) @@ -382,7 +381,9 @@ fn initialize_instance( for (page_index, page) in pages.iter().enumerate() { if let Some(data) = page { debug_assert_eq!(data.len(), WASM_PAGE_SIZE as usize); - slice[page_index * WASM_PAGE_SIZE as usize..].copy_from_slice(data); + let start = page_index * WASM_PAGE_SIZE as usize; + let end = start + WASM_PAGE_SIZE as usize; + slice[start..end].copy_from_slice(data); } } } @@ -408,13 +409,13 @@ unsafe fn initialize_vmcontext(instance: &Instance, req: InstanceAllocationReque *instance.interrupts() = req.interrupts; *instance.externref_activations_table() = req.externref_activations_table; - *instance.stack_map_registry() = req.stack_map_registry; + *instance.module_info_lookup() = req.module_info_lookup.unwrap_or(&EMPTY_MODULE_LOOKUP); // Initialize shared signatures let mut ptr = instance.signature_ids_ptr(); for sig in module.types.values() { *ptr = match sig { - ModuleType::Function(sig) => (req.lookup_shared_signature)(*sig), + ModuleType::Function(sig) => req.shared_signatures.lookup(*sig), _ => VMSharedSignatureIndex::new(u32::max_value()), }; ptr = ptr.add(1); @@ -453,8 +454,9 @@ unsafe fn initialize_vmcontext(instance: &Instance, req: InstanceAllocationReque ); // Initialize the functions + let mut base = instance.anyfunc_base(); for (index, sig) in instance.module.functions.iter() { - let type_index = (req.lookup_shared_signature)(*sig); + let type_index = req.shared_signatures.lookup(*sig); let (func_ptr, vmctx) = if let Some(def_index) = instance.module.defined_func_index(index) { ( @@ -467,13 +469,14 @@ unsafe fn initialize_vmcontext(instance: &Instance, req: InstanceAllocationReque }; ptr::write( - instance.anyfunc_ptr(index), + base, VMCallerCheckedAnyfunc { func_ptr, type_index, vmctx, }, ); + base = base.add(1); } // Initialize the defined tables @@ -539,27 +542,35 @@ unsafe fn initialize_vmcontext_globals(instance: &Instance) { #[derive(Clone)] pub struct OnDemandInstanceAllocator { mem_creator: Option>, + stack_size: usize, } impl OnDemandInstanceAllocator { /// Creates a new on-demand instance allocator. - pub fn new(mem_creator: Option>) -> Self { - Self { mem_creator } + pub fn new(mem_creator: Option>, stack_size: usize) -> Self { + Self { + mem_creator, + stack_size, + } } - fn create_tables(module: &Module) -> PrimaryMap { + fn create_tables( + module: &Module, + limiter: Option<&Rc>, + ) -> Result, InstantiationError> { let num_imports = module.num_imported_tables; let mut tables: PrimaryMap = PrimaryMap::with_capacity(module.table_plans.len() - num_imports); for table in &module.table_plans.values().as_slice()[num_imports..] { - tables.push(Table::new_dynamic(table)); + tables.push(Table::new_dynamic(table, limiter).map_err(InstantiationError::Resource)?); } - tables + Ok(tables) } fn create_memories( &self, module: &Module, + limiter: Option<&Rc>, ) -> Result, InstantiationError> { let creator = self .mem_creator @@ -569,20 +580,31 @@ impl OnDemandInstanceAllocator { let mut memories: PrimaryMap = PrimaryMap::with_capacity(module.memory_plans.len() - num_imports); for plan in &module.memory_plans.values().as_slice()[num_imports..] { - memories - .push(Memory::new_dynamic(plan, creator).map_err(InstantiationError::Resource)?); + memories.push( + Memory::new_dynamic(plan, creator, limiter) + .map_err(InstantiationError::Resource)?, + ); } Ok(memories) } } +impl Default for OnDemandInstanceAllocator { + fn default() -> Self { + Self { + mem_creator: None, + stack_size: 0, + } + } +} + unsafe impl InstanceAllocator for OnDemandInstanceAllocator { unsafe fn allocate( &self, mut req: InstanceAllocationRequest, ) -> Result { - let memories = self.create_memories(&req.module)?; - let tables = Self::create_tables(&req.module); + let memories = self.create_memories(&req.module, req.limiter)?; + let tables = Self::create_tables(&req.module, req.limiter)?; let host_state = std::mem::replace(&mut req.host_state, Box::new(())); @@ -605,7 +627,9 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator { alloc::handle_alloc_error(layout); } ptr::write(instance_ptr, instance); - InstanceHandle::new(instance_ptr) + InstanceHandle { + instance: instance_ptr, + } }; initialize_vmcontext(handle.instance(), req); @@ -627,13 +651,18 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator { alloc::dealloc(handle.instance.cast(), layout); } - fn allocate_fiber_stack(&self) -> Result<*mut u8, FiberStackError> { - // The on-demand allocator does not support allocating fiber stacks - Err(FiberStackError::NotSupported) + #[cfg(feature = "async")] + fn allocate_fiber_stack(&self) -> Result { + if self.stack_size == 0 { + return Err(FiberStackError::NotSupported); + } + + wasmtime_fiber::FiberStack::new(self.stack_size) + .map_err(|e| FiberStackError::Resource(e.into())) } - unsafe fn deallocate_fiber_stack(&self, _stack: *mut u8) { - // This should never be called as `allocate_fiber_stack` never returns success - unreachable!() + #[cfg(feature = "async")] + unsafe fn deallocate_fiber_stack(&self, _stack: &wasmtime_fiber::FiberStack) { + // The on-demand allocator has no further bookkeeping for fiber stacks } } diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 833538c604..7a17b2b143 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -8,8 +8,8 @@ //! when modules can be constrained based on configurable limits. use super::{ - initialize_instance, initialize_vmcontext, FiberStackError, InstanceAllocationRequest, - InstanceAllocator, InstanceHandle, InstantiationError, + initialize_instance, initialize_vmcontext, InstanceAllocationRequest, InstanceAllocator, + InstanceHandle, InstantiationError, ResourceLimiter, }; use crate::{instance::Instance, Memory, Mmap, Table, VMContext}; use anyhow::{anyhow, bail, Context, Result}; @@ -18,10 +18,11 @@ use std::cell::RefCell; use std::cmp::min; use std::convert::TryFrom; use std::mem; +use std::rc::Rc; use std::sync::{Arc, Mutex}; use wasmtime_environ::{ entity::{EntitySet, PrimaryMap}, - MemoryStyle, Module, Tunables, VMOffsets, WASM_PAGE_SIZE, + MemoryStyle, Module, Tunables, VMOffsets, VMOffsetsFields, WASM_PAGE_SIZE, }; cfg_if::cfg_if! { @@ -41,10 +42,13 @@ cfg_if::cfg_if! { } } -use imp::{ - commit_memory_pages, commit_stack_pages, commit_table_pages, decommit_memory_pages, - decommit_stack_pages, decommit_table_pages, -}; +use imp::{commit_memory_pages, commit_table_pages, decommit_memory_pages, decommit_table_pages}; + +#[cfg(all(feature = "async", unix))] +use imp::{commit_stack_pages, decommit_stack_pages}; + +#[cfg(feature = "async")] +use super::FiberStackError; fn round_up_to_pow2(n: usize, to: usize) -> usize { debug_assert!(to > 0); @@ -299,7 +303,7 @@ impl InstancePool { let page_size = region::page::size(); // Calculate the maximum size of an Instance structure given the limits - let offsets = VMOffsets { + let offsets = VMOffsets::from(VMOffsetsFields { pointer_size: std::mem::size_of::<*const u8>() as u8, num_signature_ids: module_limits.types, num_imported_functions: module_limits.imported_functions, @@ -310,7 +314,7 @@ impl InstancePool { num_defined_tables: module_limits.tables, num_defined_memories: module_limits.memories, num_defined_globals: module_limits.globals, - }; + }); let instance_size = round_up_to_pow2( mem::size_of::() @@ -373,10 +377,45 @@ impl InstancePool { } } + unsafe fn setup_instance( + &self, + index: usize, + mut req: InstanceAllocationRequest, + ) -> Result { + let instance = self.instance(index); + + instance.module = req.module.clone(); + instance.offsets = VMOffsets::new( + std::mem::size_of::<*const u8>() as u8, + instance.module.as_ref(), + ); + instance.host_state = std::mem::replace(&mut req.host_state, Box::new(())); + + Self::set_instance_memories( + instance, + self.memories.get(index), + self.memories.max_wasm_pages, + req.limiter, + )?; + + Self::set_instance_tables( + instance, + self.tables.get(index), + self.tables.max_elements, + req.limiter, + )?; + + initialize_vmcontext(instance, req); + + Ok(InstanceHandle { + instance: instance as _, + }) + } + fn allocate( &self, strategy: PoolingAllocationStrategy, - mut req: InstanceAllocationRequest, + req: InstanceAllocationRequest, ) -> Result { let index = { let mut free_list = self.free_list.lock().unwrap(); @@ -387,28 +426,15 @@ impl InstancePool { free_list.swap_remove(free_index) }; - let host_state = std::mem::replace(&mut req.host_state, Box::new(())); - unsafe { - let instance = self.instance(index); - - instance.module = req.module.clone(); - instance.offsets = VMOffsets::new( - std::mem::size_of::<*const u8>() as u8, - instance.module.as_ref(), - ); - instance.host_state = host_state; - - Self::set_instance_memories( - instance, - self.memories.get(index), - self.memories.max_wasm_pages, - )?; - Self::set_instance_tables(instance, self.tables.get(index), self.tables.max_elements)?; - - initialize_vmcontext(instance, req); - - Ok(InstanceHandle::new(instance as _)) + self.setup_instance(index, req).or_else(|e| { + // Deallocate the allocated instance on error + let instance = self.instance(index); + self.deallocate(&InstanceHandle { + instance: instance as _, + }); + Err(e) + }) } } @@ -470,6 +496,7 @@ impl InstancePool { instance: &mut Instance, mut memories: impl Iterator, max_pages: u32, + limiter: Option<&Rc>, ) -> Result<(), InstantiationError> { let module = instance.module.as_ref(); @@ -484,6 +511,7 @@ impl InstancePool { memories.next().unwrap(), max_pages, commit_memory_pages, + limiter, ) .map_err(InstantiationError::Resource)?, ); @@ -500,6 +528,7 @@ impl InstancePool { instance: &mut Instance, mut tables: impl Iterator, max_elements: u32, + limiter: Option<&Rc>, ) -> Result<(), InstantiationError> { let module = instance.module.as_ref(); @@ -511,9 +540,10 @@ impl InstancePool { commit_table_pages(base, max_elements as usize * mem::size_of::<*mut u8>()) .map_err(InstantiationError::Resource)?; - instance - .tables - .push(Table::new_static(plan, base as _, max_elements)); + instance.tables.push( + Table::new_static(plan, base as _, max_elements, limiter) + .map_err(InstantiationError::Resource)?, + ); } let mut dropped_elements = instance.dropped_elements.borrow_mut(); @@ -705,6 +735,7 @@ impl TablePool { /// /// The top of the stack (starting stack pointer) is returned when a stack is allocated /// from the pool. +#[cfg(all(feature = "async", unix))] #[derive(Debug)] struct StackPool { mapping: Mmap, @@ -714,13 +745,13 @@ struct StackPool { free_list: Mutex>, } +#[cfg(all(feature = "async", unix))] impl StackPool { fn new(instance_limits: &InstanceLimits, stack_size: usize) -> Result { let page_size = region::page::size(); - // On Windows, don't allocate any fiber stacks as native fibers are always used // Add a page to the stack size for the guard page when using fiber stacks - let stack_size = if cfg!(windows) || stack_size == 0 { + let stack_size = if stack_size == 0 { 0 } else { round_up_to_pow2(stack_size, page_size) @@ -758,8 +789,10 @@ impl StackPool { }) } - fn allocate(&self, strategy: PoolingAllocationStrategy) -> Result<*mut u8, FiberStackError> { - // Stacks are not supported if nothing was allocated + fn allocate( + &self, + strategy: PoolingAllocationStrategy, + ) -> Result { if self.stack_size == 0 { return Err(FiberStackError::NotSupported); } @@ -787,32 +820,36 @@ impl StackPool { commit_stack_pages(bottom_of_stack, size_without_guard) .map_err(FiberStackError::Resource)?; - // The top of the stack should be returned - Ok(bottom_of_stack.add(size_without_guard)) + wasmtime_fiber::FiberStack::from_top_ptr(bottom_of_stack.add(size_without_guard)) + .map_err(|e| FiberStackError::Resource(e.into())) } } - fn deallocate(&self, top_of_stack: *mut u8) { - debug_assert!(!top_of_stack.is_null()); + fn deallocate(&self, stack: &wasmtime_fiber::FiberStack) { + let top = stack + .top() + .expect("fiber stack not allocated from the pool") as usize; - unsafe { - // Remove the guard page from the size - let stack_size = self.stack_size - self.page_size; - let bottom_of_stack = top_of_stack.sub(stack_size); + let base = self.mapping.as_ptr() as usize; + let len = self.mapping.len(); + assert!( + top > base && top <= (base + len), + "fiber stack top pointer not in range" + ); - let base = self.mapping.as_ptr() as usize; - let start_of_stack = (bottom_of_stack as usize) - self.page_size; + // Remove the guard page from the size + let stack_size = self.stack_size - self.page_size; + let bottom_of_stack = top - stack_size; + let start_of_stack = bottom_of_stack - self.page_size; + debug_assert!(start_of_stack >= base && start_of_stack < (base + len)); + debug_assert!((start_of_stack - base) % self.stack_size == 0); - debug_assert!(start_of_stack >= base && start_of_stack < (base + self.mapping.len())); - debug_assert!((start_of_stack - base) % self.stack_size == 0); + let index = (start_of_stack - base) / self.stack_size; + debug_assert!(index < self.max_instances); - let index = (start_of_stack - base) / self.stack_size; - debug_assert!(index < self.max_instances); + decommit_stack_pages(bottom_of_stack as _, stack_size).unwrap(); - decommit_stack_pages(bottom_of_stack, stack_size).unwrap(); - - self.free_list.lock().unwrap().push(index); - } + self.free_list.lock().unwrap().push(index); } } @@ -828,7 +865,9 @@ pub struct PoolingInstanceAllocator { instance_limits: InstanceLimits, // This is manually drop so that the pools unmap their memory before the page fault handler drops. instances: mem::ManuallyDrop, + #[cfg(all(feature = "async", unix))] stacks: StackPool, + stack_size: usize, #[cfg(all(feature = "uffd", target_os = "linux"))] _fault_handler: imp::PageFaultHandler, } @@ -857,7 +896,6 @@ impl PoolingInstanceAllocator { min(instance_limits.memory_reservation_size, 0x200000000); let instances = InstancePool::new(&module_limits, &instance_limits)?; - let stacks = StackPool::new(&instance_limits, stack_size)?; #[cfg(all(feature = "uffd", target_os = "linux"))] let _fault_handler = imp::PageFaultHandler::new(&instances)?; @@ -867,7 +905,9 @@ impl PoolingInstanceAllocator { module_limits, instance_limits, instances: mem::ManuallyDrop::new(instances), - stacks, + #[cfg(all(feature = "async", unix))] + stacks: StackPool::new(&instance_limits, stack_size)?, + stack_size, #[cfg(all(feature = "uffd", target_os = "linux"))] _fault_handler, }) @@ -956,13 +996,31 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { self.instances.deallocate(handle); } - fn allocate_fiber_stack(&self) -> Result<*mut u8, FiberStackError> { + #[cfg(all(feature = "async", unix))] + fn allocate_fiber_stack(&self) -> Result { self.stacks.allocate(self.strategy) } - unsafe fn deallocate_fiber_stack(&self, stack: *mut u8) { + #[cfg(all(feature = "async", unix))] + unsafe fn deallocate_fiber_stack(&self, stack: &wasmtime_fiber::FiberStack) { self.stacks.deallocate(stack); } + + #[cfg(all(feature = "async", windows))] + fn allocate_fiber_stack(&self) -> Result { + if self.stack_size == 0 { + return Err(FiberStackError::NotSupported); + } + + // On windows, we don't use a stack pool as we use the native fiber implementation + wasmtime_fiber::FiberStack::new(self.stack_size) + .map_err(|e| FiberStackError::Resource(e.into())) + } + + #[cfg(all(feature = "async", windows))] + unsafe fn deallocate_fiber_stack(&self, _stack: &wasmtime_fiber::FiberStack) { + // A no-op as we don't own the fiber stack on Windows + } } #[cfg(test)] @@ -1310,7 +1368,9 @@ mod test { assert_eq!(instances.offsets.num_defined_tables, 1); assert_eq!(instances.offsets.num_defined_memories, 1); assert_eq!(instances.offsets.num_defined_globals, 0); - assert_eq!(instances.instance_size, 4096); + // As of April 2021, the instance struct's size is largely below the size of a single page, + // so it's safe to assume it's been rounded to the size of a single memory page here. + assert_eq!(instances.instance_size, region::page::size()); assert_eq!(instances.max_instances, 3); assert_eq!(&*instances.free_list.lock().unwrap(), &[0, 1, 2]); @@ -1333,11 +1393,12 @@ mod test { memories: &[], globals: &[], }, - lookup_shared_signature: &|_| VMSharedSignatureIndex::default(), + shared_signatures: VMSharedSignatureIndex::default().into(), host_state: Box::new(()), interrupts: std::ptr::null(), externref_activations_table: std::ptr::null_mut(), - stack_map_registry: std::ptr::null_mut(), + module_info_lookup: None, + limiter: None, }, ) .expect("allocation should succeed"), @@ -1357,11 +1418,12 @@ mod test { memories: &[], globals: &[], }, - lookup_shared_signature: &|_| VMSharedSignatureIndex::default(), + shared_signatures: VMSharedSignatureIndex::default().into(), host_state: Box::new(()), interrupts: std::ptr::null(), externref_activations_table: std::ptr::null_mut(), - stack_map_registry: std::ptr::null_mut(), + module_info_lookup: None, + limiter: None, }, ) { Err(InstantiationError::Limit(3)) => {} @@ -1446,10 +1508,12 @@ mod test { }, )?; - assert_eq!(pool.table_size, 4096); + let host_page_size = region::page::size(); + + assert_eq!(pool.table_size, host_page_size); assert_eq!(pool.max_tables, 4); assert_eq!(pool.max_instances, 7); - assert_eq!(pool.page_size, 4096); + assert_eq!(pool.page_size, host_page_size); assert_eq!(pool.max_elements, 100); let base = pool.mapping.as_ptr() as usize; @@ -1470,7 +1534,7 @@ mod test { Ok(()) } - #[cfg(all(unix, target_pointer_width = "64"))] + #[cfg(all(unix, target_pointer_width = "64", feature = "async"))] #[test] fn test_stack_pool() -> Result<()> { let pool = StackPool::new( @@ -1481,9 +1545,10 @@ mod test { 1, )?; - assert_eq!(pool.stack_size, 8192); + let native_page_size = region::page::size(); + assert_eq!(pool.stack_size, 2 * native_page_size); assert_eq!(pool.max_instances, 10); - assert_eq!(pool.page_size, 4096); + assert_eq!(pool.page_size, native_page_size); assert_eq!( &*pool.free_list.lock().unwrap(), @@ -1497,7 +1562,10 @@ mod test { let stack = pool .allocate(PoolingAllocationStrategy::NextAvailable) .expect("allocation should succeed"); - assert_eq!(((stack as usize - base) / pool.stack_size) - 1, i); + assert_eq!( + ((stack.top().unwrap() as usize - base) / pool.stack_size) - 1, + i + ); stacks.push(stack); } @@ -1512,7 +1580,7 @@ mod test { }; for stack in stacks { - pool.deallocate(stack); + pool.deallocate(&stack); } assert_eq!( @@ -1584,7 +1652,7 @@ mod test { } #[cfg_attr(target_arch = "aarch64", ignore)] // https://github.com/bytecodealliance/wasmtime/pull/2518#issuecomment-747280133 - #[cfg(all(unix, target_pointer_width = "64"))] + #[cfg(all(unix, target_pointer_width = "64", feature = "async"))] #[test] fn test_stack_zeroed() -> Result<()> { let allocator = PoolingInstanceAllocator::new( @@ -1611,13 +1679,13 @@ mod test { for _ in 0..10 { let stack = allocator.allocate_fiber_stack()?; - // The stack pointer is at the top, so decerement it first - let addr = stack.sub(1); + // The stack pointer is at the top, so decrement it first + let addr = stack.top().unwrap().sub(1); assert_eq!(*addr, 0); *addr = 1; - allocator.deallocate_fiber_stack(stack); + allocator.deallocate_fiber_stack(&stack); } } diff --git a/crates/runtime/src/instance/allocator/pooling/linux.rs b/crates/runtime/src/instance/allocator/pooling/linux.rs index 324200efe4..db8e6ff9bd 100644 --- a/crates/runtime/src/instance/allocator/pooling/linux.rs +++ b/crates/runtime/src/instance/allocator/pooling/linux.rs @@ -48,11 +48,13 @@ pub fn decommit_table_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len, false) } +#[cfg(feature = "async")] pub fn commit_stack_pages(_addr: *mut u8, _len: usize) -> Result<()> { // A no-op as stack pages remain READ|WRITE Ok(()) } +#[cfg(feature = "async")] pub fn decommit_stack_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len, false) } diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs index ebe5effbc3..43ba9a654a 100644 --- a/crates/runtime/src/instance/allocator/pooling/uffd.rs +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -79,11 +79,13 @@ pub fn decommit_table_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len) } +#[cfg(feature = "async")] pub fn commit_stack_pages(_addr: *mut u8, _len: usize) -> Result<()> { // A no-op as stack pages remain READ|WRITE Ok(()) } +#[cfg(feature = "async")] pub fn decommit_stack_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len) } @@ -517,11 +519,12 @@ mod test { memories: &[], globals: &[], }, - lookup_shared_signature: &|_| VMSharedSignatureIndex::default(), + shared_signatures: VMSharedSignatureIndex::default().into(), host_state: Box::new(()), interrupts: ptr::null(), externref_activations_table: ptr::null_mut(), - stack_map_registry: ptr::null_mut(), + module_info_lookup: None, + limiter: None, }, ) .expect("instance should allocate"), diff --git a/crates/runtime/src/instance/allocator/pooling/unix.rs b/crates/runtime/src/instance/allocator/pooling/unix.rs index d172f411eb..6a18b6b9a0 100644 --- a/crates/runtime/src/instance/allocator/pooling/unix.rs +++ b/crates/runtime/src/instance/allocator/pooling/unix.rs @@ -58,11 +58,13 @@ pub fn decommit_table_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len, false) } +#[cfg(feature = "async")] pub fn commit_stack_pages(_addr: *mut u8, _len: usize) -> Result<()> { // A no-op as stack pages remain READ|WRITE Ok(()) } +#[cfg(feature = "async")] pub fn decommit_stack_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len, false) } diff --git a/crates/runtime/src/instance/allocator/pooling/windows.rs b/crates/runtime/src/instance/allocator/pooling/windows.rs index 286cd459fe..c12db0fc63 100644 --- a/crates/runtime/src/instance/allocator/pooling/windows.rs +++ b/crates/runtime/src/instance/allocator/pooling/windows.rs @@ -45,11 +45,3 @@ pub fn commit_table_pages(addr: *mut u8, len: usize) -> Result<()> { pub fn decommit_table_pages(addr: *mut u8, len: usize) -> Result<()> { decommit(addr, len) } - -pub fn commit_stack_pages(addr: *mut u8, len: usize) -> Result<()> { - commit(addr, len) -} - -pub fn decommit_stack_pages(addr: *mut u8, len: usize) -> Result<()> { - decommit(addr, len) -} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 1e6cfc8cd2..1c3ce53a3d 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -38,9 +38,9 @@ pub use crate::export::*; pub use crate::externref::*; pub use crate::imports::Imports; pub use crate::instance::{ - FiberStackError, InstanceAllocationRequest, InstanceAllocator, InstanceHandle, InstanceLimits, + InstanceAllocationRequest, InstanceAllocator, InstanceHandle, InstanceLimits, InstantiationError, LinkError, ModuleLimits, OnDemandInstanceAllocator, - PoolingAllocationStrategy, PoolingInstanceAllocator, RuntimeInstance, + PoolingAllocationStrategy, PoolingInstanceAllocator, ResourceLimiter, RuntimeInstance, }; pub use crate::jit_int::GdbJitImageRegistration; pub use crate::memory::{Memory, RuntimeLinearMemory, RuntimeMemoryCreator}; diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index 026738a8a1..9c5800fd8e 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -449,8 +449,8 @@ pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc( let externref = VMExternRef::clone_from_raw(externref); let instance = (&mut *vmctx).instance(); let activations_table = &**instance.externref_activations_table(); - let registry = &**instance.stack_map_registry(); - activations_table.insert_with_gc(externref, registry); + let module_info_lookup = &**instance.module_info_lookup(); + activations_table.insert_with_gc(externref, module_info_lookup); } /// Perform a Wasm `global.get` for `externref` globals. @@ -466,8 +466,8 @@ pub unsafe extern "C" fn wasmtime_externref_global_get( Some(externref) => { let raw = externref.as_raw(); let activations_table = &**instance.externref_activations_table(); - let registry = &**instance.stack_map_registry(); - activations_table.insert_with_gc(externref, registry); + let module_info_lookup = &**instance.module_info_lookup(); + activations_table.insert_with_gc(externref, module_info_lookup); raw } } diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs index 024d901246..973016224d 100644 --- a/crates/runtime/src/memory.rs +++ b/crates/runtime/src/memory.rs @@ -4,12 +4,14 @@ use crate::mmap::Mmap; use crate::vmcontext::VMMemoryDefinition; -use anyhow::Result; +use crate::ResourceLimiter; +use anyhow::{bail, Result}; use more_asserts::{assert_ge, assert_le}; use std::cell::{Cell, RefCell}; use std::cmp::min; use std::convert::TryFrom; use std::ptr; +use std::rc::Rc; use wasmtime_environ::{MemoryPlan, MemoryStyle, WASM_MAX_PAGES, WASM_PAGE_SIZE}; /// A memory allocator @@ -33,6 +35,10 @@ pub trait RuntimeLinearMemory { /// Returns the number of allocated wasm pages. fn size(&self) -> u32; + /// Returns the maximum number of pages the memory can grow to. + /// Returns `None` if the memory is unbounded. + fn maximum(&self) -> Option; + /// Grow memory by the specified amount of wasm pages. /// /// Returns `None` if memory can't be grown by the specified amount @@ -105,6 +111,12 @@ impl RuntimeLinearMemory for MmapMemory { self.mmap.borrow().size } + /// Returns the maximum number of pages the memory can grow to. + /// Returns `None` if the memory is unbounded. + fn maximum(&self) -> Option { + self.maximum + } + /// Grow memory by the specified amount of wasm pages. /// /// Returns `None` if memory can't be grown by the specified amount @@ -189,12 +201,23 @@ enum MemoryStorage { } /// Represents an instantiation of a WebAssembly memory. -pub struct Memory(MemoryStorage); +pub struct Memory { + storage: MemoryStorage, + limiter: Option>, +} impl Memory { /// Create a new dynamic (movable) memory instance for the specified plan. - pub fn new_dynamic(plan: &MemoryPlan, creator: &dyn RuntimeMemoryCreator) -> Result { - Ok(Self(MemoryStorage::Dynamic(creator.new_memory(plan)?))) + pub fn new_dynamic( + plan: &MemoryPlan, + creator: &dyn RuntimeMemoryCreator, + limiter: Option<&Rc>, + ) -> Result { + Self::new( + plan, + MemoryStorage::Dynamic(creator.new_memory(plan)?), + limiter, + ) } /// Create a new static (immovable) memory instance for the specified plan. @@ -203,32 +226,78 @@ impl Memory { base: *mut u8, maximum: u32, make_accessible: fn(*mut u8, usize) -> Result<()>, + limiter: Option<&Rc>, ) -> Result { - if plan.memory.minimum > 0 { - make_accessible(base, plan.memory.minimum as usize * WASM_PAGE_SIZE as usize)?; - } - - Ok(Self(MemoryStorage::Static { + let storage = MemoryStorage::Static { base, size: Cell::new(plan.memory.minimum), maximum: min(plan.memory.maximum.unwrap_or(maximum), maximum), make_accessible, #[cfg(all(feature = "uffd", target_os = "linux"))] guard_page_faults: RefCell::new(Vec::new()), - })) + }; + + Self::new(plan, storage, limiter) + } + + fn new( + plan: &MemoryPlan, + storage: MemoryStorage, + limiter: Option<&Rc>, + ) -> Result { + if let Some(limiter) = limiter { + if !limiter.memory_growing(0, plan.memory.minimum, plan.memory.maximum) { + bail!( + "memory minimum size of {} pages exceeds memory limits", + plan.memory.minimum + ); + } + } + + if let MemoryStorage::Static { + base, + make_accessible, + .. + } = &storage + { + if plan.memory.minimum > 0 { + make_accessible( + *base, + plan.memory.minimum as usize * WASM_PAGE_SIZE as usize, + )?; + } + } + + Ok(Self { + storage, + limiter: limiter.cloned(), + }) } /// Returns the number of allocated wasm pages. pub fn size(&self) -> u32 { - match &self.0 { + match &self.storage { MemoryStorage::Static { size, .. } => size.get(), MemoryStorage::Dynamic(mem) => mem.size(), } } + /// Returns the maximum number of pages the memory can grow to at runtime. + /// + /// Returns `None` if the memory is unbounded. + /// + /// The runtime maximum may not be equal to the maximum from the linear memory's + /// Wasm type when it is being constrained by an instance allocator. + pub fn maximum(&self) -> Option { + match &self.storage { + MemoryStorage::Static { maximum, .. } => Some(*maximum), + MemoryStorage::Dynamic(mem) => mem.maximum(), + } + } + /// Returns whether or not the underlying storage of the memory is "static". pub(crate) fn is_static(&self) -> bool { - if let MemoryStorage::Static { .. } = &self.0 { + if let MemoryStorage::Static { .. } = &self.storage { true } else { false @@ -239,8 +308,30 @@ impl Memory { /// /// Returns `None` if memory can't be grown by the specified amount /// of wasm pages. - pub fn grow(&self, delta: u32) -> Option { - match &self.0 { + /// + /// # Safety + /// + /// Resizing the memory can reallocate the memory buffer for dynamic memories. + /// An instance's `VMContext` may have pointers to the memory's base and will + /// need to be fixed up after growing the memory. + /// + /// Generally, prefer using `InstanceHandle::memory_grow`, which encapsulates + /// this unsafety. + pub unsafe fn grow(&self, delta: u32) -> Option { + let old_size = self.size(); + if delta == 0 { + return Some(old_size); + } + + let new_size = old_size.checked_add(delta)?; + + if let Some(limiter) = &self.limiter { + if !limiter.memory_growing(old_size, new_size, self.maximum()) { + return None; + } + } + + match &self.storage { MemoryStorage::Static { base, size, @@ -252,13 +343,6 @@ impl Memory { #[cfg(all(feature = "uffd", target_os = "linux"))] self.reset_guard_pages().ok()?; - let old_size = size.get(); - if delta == 0 { - return Some(old_size); - } - - let new_size = old_size.checked_add(delta)?; - if new_size > *maximum || new_size >= WASM_MAX_PAGES { return None; } @@ -266,7 +350,7 @@ impl Memory { let start = usize::try_from(old_size).unwrap() * WASM_PAGE_SIZE as usize; let len = usize::try_from(delta).unwrap() * WASM_PAGE_SIZE as usize; - make_accessible(unsafe { base.add(start) }, len).ok()?; + make_accessible(base.add(start), len).ok()?; size.set(new_size); @@ -278,7 +362,7 @@ impl Memory { /// Return a `VMMemoryDefinition` for exposing the memory to compiled wasm code. pub fn vmmemory(&self) -> VMMemoryDefinition { - match &self.0 { + match &self.storage { MemoryStorage::Static { base, size, .. } => VMMemoryDefinition { base: *base, current_length: size.get() as usize * WASM_PAGE_SIZE as usize, @@ -299,7 +383,7 @@ impl Memory { size: usize, reset: fn(*mut u8, usize) -> Result<()>, ) { - match &self.0 { + match &self.storage { MemoryStorage::Static { guard_page_faults, .. } => { @@ -320,7 +404,7 @@ impl Memory { /// This function will panic if called on a dynamic memory. #[cfg(all(feature = "uffd", target_os = "linux"))] pub(crate) fn reset_guard_pages(&self) -> Result<()> { - match &self.0 { + match &self.storage { MemoryStorage::Static { guard_page_faults, .. } => { @@ -345,13 +429,16 @@ impl Default for Memory { unreachable!() } - Self(MemoryStorage::Static { - base: ptr::null_mut(), - size: Cell::new(0), - maximum: 0, - make_accessible, - #[cfg(all(feature = "uffd", target_os = "linux"))] - guard_page_faults: RefCell::new(Vec::new()), - }) + Self { + storage: MemoryStorage::Static { + base: ptr::null_mut(), + size: Cell::new(0), + maximum: 0, + make_accessible, + #[cfg(all(feature = "uffd", target_os = "linux"))] + guard_page_faults: RefCell::new(Vec::new()), + }, + limiter: None, + } } } diff --git a/crates/runtime/src/table.rs b/crates/runtime/src/table.rs index 8c857add45..9e26721fb4 100644 --- a/crates/runtime/src/table.rs +++ b/crates/runtime/src/table.rs @@ -3,19 +3,21 @@ //! `Table` is to WebAssembly tables what `LinearMemory` is to WebAssembly linear memories. use crate::vmcontext::{VMCallerCheckedAnyfunc, VMTableDefinition}; -use crate::{Trap, VMExternRef}; +use crate::{ResourceLimiter, Trap, VMExternRef}; +use anyhow::{bail, Result}; use std::cell::{Cell, RefCell}; use std::cmp::min; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::ops::Range; use std::ptr; +use std::rc::Rc; use wasmtime_environ::wasm::TableElementType; use wasmtime_environ::{ir, TablePlan}; /// An element going into or coming out of a table. /// /// Table elements are stored as pointers and are default-initialized with `ptr::null_mut`. -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum TableElement { /// A `funcref`. FuncRef(*mut VMCallerCheckedAnyfunc), @@ -69,7 +71,7 @@ impl TableElement { unsafe fn into_raw(self) -> *mut u8 { match self { Self::FuncRef(e) => e as _, - Self::ExternRef(e) => e.map(|e| e.into_raw()).unwrap_or(ptr::null_mut()), + Self::ExternRef(e) => e.map_or(ptr::null_mut(), |e| e.into_raw()), } } } @@ -92,7 +94,6 @@ impl From for TableElement { } } -#[derive(Debug)] enum TableStorage { Static { data: *mut *mut u8, @@ -108,38 +109,74 @@ enum TableStorage { } /// Represents an instance's table. -#[derive(Debug)] -pub struct Table(TableStorage); +pub struct Table { + storage: TableStorage, + limiter: Option>, +} impl Table { /// Create a new dynamic (movable) table instance for the specified table plan. - pub fn new_dynamic(plan: &TablePlan) -> Self { + pub fn new_dynamic( + plan: &TablePlan, + limiter: Option<&Rc>, + ) -> Result { let elements = RefCell::new(vec![ptr::null_mut(); plan.table.minimum as usize]); let ty = plan.table.ty.clone(); let maximum = plan.table.maximum; - Self(TableStorage::Dynamic { + + let storage = TableStorage::Dynamic { elements, ty, maximum, - }) + }; + + Self::new(plan, storage, limiter) } /// Create a new static (immovable) table instance for the specified table plan. - pub fn new_static(plan: &TablePlan, data: *mut *mut u8, maximum: u32) -> Self { + pub fn new_static( + plan: &TablePlan, + data: *mut *mut u8, + maximum: u32, + limiter: Option<&Rc>, + ) -> Result { let size = Cell::new(plan.table.minimum); let ty = plan.table.ty.clone(); let maximum = min(plan.table.maximum.unwrap_or(maximum), maximum); - Self(TableStorage::Static { + + let storage = TableStorage::Static { data, size, ty, maximum, + }; + + Self::new(plan, storage, limiter) + } + + fn new( + plan: &TablePlan, + storage: TableStorage, + limiter: Option<&Rc>, + ) -> Result { + if let Some(limiter) = limiter { + if !limiter.table_growing(0, plan.table.minimum, plan.table.maximum) { + bail!( + "table minimum size of {} elements exceeds table limits", + plan.table.minimum + ); + } + } + + Ok(Self { + storage, + limiter: limiter.cloned(), }) } /// Returns the type of the elements in this table. pub fn element_type(&self) -> TableElementType { - match &self.0 { + match &self.storage { TableStorage::Static { ty, .. } => *ty, TableStorage::Dynamic { ty, .. } => *ty, } @@ -147,7 +184,7 @@ impl Table { /// Returns whether or not the underlying storage of the table is "static". pub(crate) fn is_static(&self) -> bool { - if let TableStorage::Static { .. } = &self.0 { + if let TableStorage::Static { .. } = &self.storage { true } else { false @@ -156,20 +193,51 @@ impl Table { /// Returns the number of allocated elements. pub fn size(&self) -> u32 { - match &self.0 { + match &self.storage { TableStorage::Static { size, .. } => size.get(), TableStorage::Dynamic { elements, .. } => elements.borrow().len().try_into().unwrap(), } } - /// Returns the maximum number of elements. + /// Returns the maximum number of elements at runtime. + /// + /// Returns `None` if the table is unbounded. + /// + /// The runtime maximum may not be equal to the maximum from the table's Wasm type + /// when it is being constrained by an instance allocator. pub fn maximum(&self) -> Option { - match &self.0 { + match &self.storage { TableStorage::Static { maximum, .. } => Some(*maximum), TableStorage::Dynamic { maximum, .. } => maximum.clone(), } } + /// Fill `table[dst..]` with values from `items` + /// + /// Returns a trap error on out-of-bounds accesses. + pub fn init_funcs( + &self, + dst: u32, + items: impl ExactSizeIterator, + ) -> Result<(), Trap> { + assert!(self.element_type() == TableElementType::Func); + + self.with_elements_mut(|elements| { + let elements = match elements + .get_mut(usize::try_from(dst).unwrap()..) + .and_then(|s| s.get_mut(..items.len())) + { + Some(elements) => elements, + None => return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)), + }; + + for (item, slot) in items.zip(elements) { + *slot = item as *mut u8; + } + Ok(()) + }) + } + /// Fill `table[dst..dst + len]` with `val`. /// /// Returns a trap error on out-of-bounds accesses. @@ -218,8 +286,14 @@ impl Table { /// this unsafety. pub unsafe fn grow(&self, delta: u32, init_value: TableElement) -> Option { let old_size = self.size(); - let new_size = old_size.checked_add(delta)?; + + if let Some(limiter) = &self.limiter { + if !limiter.table_growing(old_size, new_size, self.maximum()) { + return None; + } + } + if let Some(max) = self.maximum() { if new_size > max { return None; @@ -229,7 +303,7 @@ impl Table { debug_assert!(self.type_matches(&init_value)); // First resize the storage and then fill with the init value - match &self.0 { + match &self.storage { TableStorage::Static { size, .. } => { size.set(new_size); } @@ -319,7 +393,7 @@ impl Table { /// Return a `VMTableDefinition` for exposing the table to compiled wasm code. pub fn vmtable(&self) -> VMTableDefinition { - match &self.0 { + match &self.storage { TableStorage::Static { data, size, .. } => VMTableDefinition { base: *data as _, current_elements: size.get(), @@ -346,7 +420,7 @@ impl Table { where F: FnOnce(&[*mut u8]) -> R, { - match &self.0 { + match &self.storage { TableStorage::Static { data, size, .. } => unsafe { f(std::slice::from_raw_parts(*data, size.get() as usize)) }, @@ -361,7 +435,7 @@ impl Table { where F: FnOnce(&mut [*mut u8]) -> R, { - match &self.0 { + match &self.storage { TableStorage::Static { data, size, .. } => unsafe { f(std::slice::from_raw_parts_mut(*data, size.get() as usize)) }, @@ -463,11 +537,14 @@ impl Drop for Table { // The default table representation is an empty funcref table that cannot grow. impl Default for Table { fn default() -> Self { - Self(TableStorage::Static { - data: std::ptr::null_mut(), - size: Cell::new(0), - ty: TableElementType::Func, - maximum: 0, - }) + Self { + storage: TableStorage::Static { + data: std::ptr::null_mut(), + size: Cell::new(0), + ty: TableElementType::Func, + maximum: 0, + }, + limiter: None, + } } } diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index d9fed4c17e..75f7a7d6d4 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -4,10 +4,11 @@ use crate::VMInterrupts; use backtrace::Backtrace; use std::any::Any; -use std::cell::Cell; +use std::cell::{Cell, UnsafeCell}; use std::error::Error; +use std::mem::MaybeUninit; use std::ptr; -use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; +use std::sync::atomic::Ordering::SeqCst; use std::sync::Once; use wasmtime_environ::ir; @@ -37,19 +38,32 @@ cfg_if::cfg_if! { pub use sys::SignalHandler; -/// This function performs the low-overhead platform-specific initialization -/// that we want to do eagerly to ensure a more-deterministic global process -/// state. +/// Globally-set callback to determine whether a program counter is actually a +/// wasm trap. /// -/// This is especially relevant for signal handlers since handler ordering -/// depends on installation order: the wasm signal handler must run *before* -/// the other crash handlers and since POSIX signal handlers work LIFO, this -/// function needs to be called at the end of the startup process, after other -/// handlers have been installed. This function can thus be called multiple -/// times, having no effect after the first call. -pub fn init_traps() { +/// This is initialized during `init_traps` below. The definition lives within +/// `wasmtime` currently. +static mut IS_WASM_PC: fn(usize) -> bool = |_| false; + +/// This function is required to be called before any WebAssembly is entered. +/// This will configure global state such as signal handlers to prepare the +/// process to receive wasm traps. +/// +/// This function must not only be called globally once before entering +/// WebAssembly but it must also be called once-per-thread that enters +/// WebAssembly. Currently in wasmtime's integration this function is called on +/// creation of a `Store`. +/// +/// The `is_wasm_pc` argument is used when a trap happens to determine if a +/// program counter is the pc of an actual wasm trap or not. This is then used +/// to disambiguate faults that happen due to wasm and faults that happen due to +/// bugs in Rust or elsewhere. +pub fn init_traps(is_wasm_pc: fn(usize) -> bool) { static INIT: Once = Once::new(); - INIT.call_once(|| unsafe { sys::platform_init() }); + INIT.call_once(|| unsafe { + IS_WASM_PC = is_wasm_pc; + sys::platform_init(); + }); } /// Raises a user-defined trap immediately. @@ -155,8 +169,6 @@ pub unsafe fn catch_traps(trap_info: &impl TrapInfo, mut closure: F) -> Resul where F: FnMut(), { - sys::lazy_per_thread_init()?; - return CallThreadState::new(trap_info).with(|cx| { RegisterSetjmp( cx.jmp_buf.as_ptr(), @@ -191,7 +203,7 @@ pub fn out_of_gas() { /// Temporary state stored on the stack which is registered in the `tls` module /// below for calls into wasm. pub struct CallThreadState<'a> { - unwind: Cell, + unwind: UnsafeCell>, jmp_buf: Cell<*const u8>, handling_trap: Cell, trap_info: &'a (dyn TrapInfo + 'a), @@ -208,19 +220,11 @@ pub unsafe trait TrapInfo { /// Converts this object into an `Any` to dynamically check its type. fn as_any(&self) -> &dyn Any; - /// Returns whether the given program counter lies within wasm code, - /// indicating whether we should handle a trap or not. - fn is_wasm_trap(&self, pc: usize) -> bool; - /// Uses `call` to call a custom signal handler, if one is specified. /// /// Returns `true` if `call` returns true, otherwise returns `false`. fn custom_signal_handler(&self, call: &dyn Fn(&SignalHandler) -> bool) -> bool; - /// Returns the maximum size, in bytes, the wasm native stack is allowed to - /// grow to. - fn max_wasm_stack(&self) -> usize; - /// Callback invoked whenever WebAssembly has entirely consumed the fuel /// that it was allotted. /// @@ -232,7 +236,6 @@ pub unsafe trait TrapInfo { } enum UnwindReason { - None, Panic(Box), UserTrap(Box), LibTrap(Trap), @@ -240,9 +243,10 @@ enum UnwindReason { } impl<'a> CallThreadState<'a> { + #[inline] fn new(trap_info: &'a (dyn TrapInfo + 'a)) -> CallThreadState<'a> { CallThreadState { - unwind: Cell::new(UnwindReason::None), + unwind: UnsafeCell::new(MaybeUninit::uninit()), jmp_buf: Cell::new(ptr::null()), handling_trap: Cell::new(false), trap_info, @@ -251,20 +255,14 @@ impl<'a> CallThreadState<'a> { } fn with(self, closure: impl FnOnce(&CallThreadState) -> i32) -> Result<(), Trap> { - let _reset = self.update_stack_limit()?; - let ret = tls::set(&self, || closure(&self)); - match self.unwind.replace(UnwindReason::None) { - UnwindReason::None => { - debug_assert_eq!(ret, 1); - Ok(()) - } - UnwindReason::UserTrap(data) => { - debug_assert_eq!(ret, 0); - Err(Trap::User(data)) - } + let ret = tls::set(&self, || closure(&self))?; + if ret != 0 { + return Ok(()); + } + match unsafe { (*self.unwind.get()).as_ptr().read() } { + UnwindReason::UserTrap(data) => Err(Trap::User(data)), UnwindReason::LibTrap(trap) => Err(trap), UnwindReason::JitTrap { backtrace, pc } => { - debug_assert_eq!(ret, 0); let interrupts = self.trap_info.interrupts(); let maybe_interrupted = interrupts.stack_limit.load(SeqCst) == wasmtime_environ::INTERRUPTED; @@ -274,106 +272,13 @@ impl<'a> CallThreadState<'a> { maybe_interrupted, }) } - UnwindReason::Panic(panic) => { - debug_assert_eq!(ret, 0); - std::panic::resume_unwind(panic) - } + UnwindReason::Panic(panic) => std::panic::resume_unwind(panic), } } - /// Checks and/or initializes the wasm native call stack limit. - /// - /// This function will inspect the current state of the stack and calling - /// context to determine which of three buckets we're in: - /// - /// 1. We are the first wasm call on the stack. This means that we need to - /// set up a stack limit where beyond which if the native wasm stack - /// pointer goes beyond forces a trap. For now we simply reserve an - /// arbitrary chunk of bytes (1 MB from roughly the current native stack - /// pointer). This logic will likely get tweaked over time. - /// - /// 2. We aren't the first wasm call on the stack. In this scenario the wasm - /// stack limit is already configured. This case of wasm -> host -> wasm - /// we assume that the native stack consumed by the host is accounted for - /// in the initial stack limit calculation. That means that in this - /// scenario we do nothing. - /// - /// 3. We were previously interrupted. In this case we consume the interrupt - /// here and return a trap, clearing the interrupt and allowing the next - /// wasm call to proceed. - /// - /// The return value here is a trap for case 3, a noop destructor in case 2, - /// and a meaningful destructor in case 1 - /// - /// For more information about interrupts and stack limits see - /// `crates/environ/src/cranelift.rs`. - /// - /// Note that this function must be called with `self` on the stack, not the - /// heap/etc. - fn update_stack_limit(&self) -> Result { - // Determine the stack pointer where, after which, any wasm code will - // immediately trap. This is checked on the entry to all wasm functions. - // - // Note that this isn't 100% precise. We are requested to give wasm - // `max_wasm_stack` bytes, but what we're actually doing is giving wasm - // probably a little less than `max_wasm_stack` because we're - // calculating the limit relative to this function's approximate stack - // pointer. Wasm will be executed on a frame beneath this one (or next - // to it). In any case it's expected to be at most a few hundred bytes - // of slop one way or another. When wasm is typically given a MB or so - // (a million bytes) the slop shouldn't matter too much. - let wasm_stack_limit = psm::stack_pointer() as usize - self.trap_info.max_wasm_stack(); - - let interrupts = self.trap_info.interrupts(); - let reset_stack_limit = match interrupts.stack_limit.compare_exchange( - usize::max_value(), - wasm_stack_limit, - SeqCst, - SeqCst, - ) { - Ok(_) => { - // We're the first wasm on the stack so we've now reserved the - // `max_wasm_stack` bytes of native stack space for wasm. - // Nothing left to do here now except reset back when we're - // done. - true - } - Err(n) if n == wasmtime_environ::INTERRUPTED => { - // This means that an interrupt happened before we actually - // called this function, which means that we're now - // considered interrupted. Be sure to consume this interrupt - // as part of this process too. - interrupts.stack_limit.store(usize::max_value(), SeqCst); - return Err(Trap::Wasm { - trap_code: ir::TrapCode::Interrupt, - backtrace: Backtrace::new_unresolved(), - }); - } - Err(_) => { - // The stack limit was previously set by a previous wasm - // call on the stack. We leave the original stack limit for - // wasm in place in that case, and don't reset the stack - // limit when we're done. - false - } - }; - - struct Reset<'a>(bool, &'a AtomicUsize); - - impl Drop for Reset<'_> { - fn drop(&mut self) { - if self.0 { - self.1.store(usize::max_value(), SeqCst); - } - } - } - - Ok(Reset(reset_stack_limit, &interrupts.stack_limit)) - } - fn unwind_with(&self, reason: UnwindReason) -> ! { - self.unwind.replace(reason); unsafe { + (*self.unwind.get()).as_mut_ptr().write(reason); Unwind(self.jmp_buf.get()); } } @@ -393,6 +298,7 @@ impl<'a> CallThreadState<'a> { /// instance, and the trap handler should quickly return. /// * a different pointer - a jmp_buf buffer to longjmp to, meaning that /// the wasm trap was succesfully handled. + #[cfg_attr(target_os = "macos", allow(dead_code))] // macOS is more raw and doesn't use this fn jmp_buf_if_trap( &self, pc: *const u8, @@ -421,7 +327,7 @@ impl<'a> CallThreadState<'a> { } // If this fault wasn't in wasm code, then it's not our problem - if !self.trap_info.is_wasm_trap(pc as usize) { + if unsafe { !IS_WASM_PC(pc as usize) } { return ptr::null(); } @@ -432,16 +338,21 @@ impl<'a> CallThreadState<'a> { fn capture_backtrace(&self, pc: *const u8) { let backtrace = Backtrace::new_unresolved(); - self.unwind.replace(UnwindReason::JitTrap { - backtrace, - pc: pc as usize, - }); + unsafe { + (*self.unwind.get()) + .as_mut_ptr() + .write(UnwindReason::JitTrap { + backtrace, + pc: pc as usize, + }); + } } } struct ResetCell<'a, T: Copy>(&'a Cell, T); impl Drop for ResetCell<'_, T> { + #[inline] fn drop(&mut self) { self.0.set(self.1); } @@ -454,6 +365,7 @@ impl Drop for ResetCell<'_, T> { // the caller to the trap site. mod tls { use super::CallThreadState; + use crate::Trap; use std::mem; use std::ptr; @@ -472,25 +384,38 @@ mod tls { // these TLS values when the runtime may have crossed threads. mod raw { use super::CallThreadState; + use crate::Trap; use std::cell::Cell; use std::ptr; pub type Ptr = *const CallThreadState<'static>; - thread_local!(static PTR: Cell = Cell::new(ptr::null())); + // The first entry here is the `Ptr` which is what's used as part of the + // public interface of this module. The second entry is a boolean which + // allows the runtime to perform per-thread initialization if necessary + // for handling traps (e.g. setting up ports on macOS and sigaltstack on + // Unix). + thread_local!(static PTR: Cell<(Ptr, bool)> = Cell::new((ptr::null(), false))); #[inline(never)] // see module docs for why this is here - pub fn replace(val: Ptr) -> Ptr { - // Mark the current thread as handling interrupts for this specific - // CallThreadState: may clobber the previous entry. - super::super::sys::register_tls(val); - - PTR.with(|p| p.replace(val)) + pub fn replace(val: Ptr) -> Result { + PTR.with(|p| { + // When a new value is configured that means that we may be + // entering WebAssembly so check to see if this thread has + // performed per-thread initialization for traps. + let (prev, mut initialized) = p.get(); + if !initialized { + super::super::sys::lazy_per_thread_init()?; + initialized = true; + } + p.set((val, initialized)); + Ok(prev) + }) } #[inline(never)] // see module docs for why this is here pub fn get() -> Ptr { - PTR.with(|p| p.get()) + PTR.with(|p| p.get().0) } } @@ -504,7 +429,7 @@ mod tls { /// /// This is not a safe operation since it's intended to only be used /// with stack switching found with fibers and async wasmtime. - pub unsafe fn take() -> TlsRestore { + pub unsafe fn take() -> Result { // Our tls pointer must be set at this time, and it must not be // null. We need to restore the previous pointer since we're // removing ourselves from the call-stack, and in the process we @@ -513,8 +438,8 @@ mod tls { let raw = raw::get(); assert!(!raw.is_null()); let prev = (*raw).prev.replace(ptr::null()); - raw::replace(prev); - TlsRestore(raw) + raw::replace(prev)?; + Ok(TlsRestore(raw)) } /// Restores a previous tls state back into this thread's TLS. @@ -522,17 +447,12 @@ mod tls { /// This is unsafe because it's intended to only be used within the /// context of stack switching within wasmtime. pub unsafe fn replace(self) -> Result<(), super::Trap> { - // When replacing to the previous value of TLS, we might have - // crossed a thread: make sure the trap-handling lazy initializer - // runs. - super::sys::lazy_per_thread_init()?; - // We need to configure our previous TLS pointer to whatever is in // TLS at this time, and then we set the current state to ourselves. let prev = raw::get(); assert!((*self.0).prev.get().is_null()); (*self.0).prev.set(prev); - raw::replace(self.0); + raw::replace(self.0)?; Ok(()) } } @@ -540,12 +460,14 @@ mod tls { /// Configures thread local state such that for the duration of the /// execution of `closure` any call to `with` will yield `ptr`, unless this /// is recursively called again. - pub fn set(state: &CallThreadState<'_>, closure: impl FnOnce() -> R) -> R { + pub fn set(state: &CallThreadState<'_>, closure: impl FnOnce() -> R) -> Result { struct Reset<'a, 'b>(&'a CallThreadState<'b>); impl Drop for Reset<'_, '_> { + #[inline] fn drop(&mut self) { - raw::replace(self.0.prev.replace(ptr::null())); + raw::replace(self.0.prev.replace(ptr::null())) + .expect("tls should be previously initialized"); } } @@ -555,10 +477,10 @@ mod tls { let ptr = unsafe { mem::transmute::<*const CallThreadState<'_>, *const CallThreadState<'static>>(state) }; - let prev = raw::replace(ptr); + let prev = raw::replace(ptr)?; state.prev.set(prev); let _reset = Reset(state); - closure() + Ok(closure()) } /// Returns the last pointer configured with `set` above. Panics if `set` diff --git a/crates/runtime/src/traphandlers/macos.rs b/crates/runtime/src/traphandlers/macos.rs index a290225042..f48ae034a1 100644 --- a/crates/runtime/src/traphandlers/macos.rs +++ b/crates/runtime/src/traphandlers/macos.rs @@ -13,19 +13,19 @@ //! here may not be super well documented. This file is 100% lifted from //! SpiderMonkey and then adapted for Wasmtime's purposes. Credit for almost //! all of this file goes to SpiderMonkey for figuring out all the fiddly bits. -//! See also https://searchfox.org/mozilla-central/source/js/src/wasm/WasmSignalHandlers.cpp for -//! the original code. +//! See also +//! +//! for the original code. //! //! The high-level overview is that when using mach ports a thread is blocked //! when it generates an exception and then a message can be read from the //! port. This means that, unlike signals, threads can't fix their own traps. //! Instead a helper thread is spun up to service exception messages. This is //! also in conflict with Wasmtime's exception handling currently which is to -//! use a thread-local to figure out whether a pc is a wasm pc or not on a -//! trap. To work around this we have a global map from mach thread numbers to -//! the state for that thread, updated on entry/exit from wasm. This is likely -//! slower than signals which do less updating on wasm entry/exit, but hopefully -//! by the time this is a problem we can figure out a better solution. +//! use a thread-local to store information about how to unwind. Additionally +//! this requires that the check of whether a pc is a wasm trap or not is a +//! global check rather than a per-thread check. This necessitates the existence +//! of `GlobalModuleRegistry` in the `wasmtime` crate. //! //! Otherwise this file heavily uses the `mach` Rust crate for type and //! function declarations. Many bits and pieces are copied or translated from @@ -33,7 +33,7 @@ #![allow(non_snake_case)] -use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use crate::traphandlers::{tls, Trap, Unwind}; use mach::exception_types::*; use mach::kern_return::*; use mach::mach_init::*; @@ -42,14 +42,11 @@ use mach::message::*; use mach::port::*; use mach::thread_act::*; use mach::traps::*; -use std::cell::Cell; -use std::collections::HashMap; use std::mem; -use std::ptr; -use std::sync::Mutex; use std::thread; -/// Other `mach` declarations awaiting https://github.com/fitzgen/mach/pull/64 to be merged. +/// Other `mach` declarations awaiting +/// to be merged. mod mach_addons { #![allow(non_camel_case_types)] #![allow(non_upper_case_globals)] @@ -154,20 +151,10 @@ pub enum Void {} /// Wasmtime on macOS. pub type SignalHandler<'a> = dyn Fn(Void) -> bool + 'a; -/// Process-global map for mapping thread names to their state to figure out -/// whether a thread's trap is related to wasm or not. This is extremely -/// unsafe and caution must be used when accessing. Be sure to read -/// documentation below on this. -static mut MAP: *mut Mutex>> = - ptr::null_mut(); - /// Process-global port that we use to route thread-level exceptions to. static mut WASMTIME_PORT: mach_port_name_t = MACH_PORT_NULL; pub unsafe fn platform_init() { - // Initialize the process global map - MAP = Box::into_raw(Default::default()); - // Allocate our WASMTIME_PORT and make sure that it can be sent to so we // can receive exceptions. let me = mach_task_self(); @@ -289,7 +276,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { let get_pc = |state: &ThreadState| state.__rip as *const u8; - let resume = |state: &mut ThreadState, pc: usize, jmp_buf: usize| { + let resume = |state: &mut ThreadState, pc: usize| { // The x86_64 ABI requires a 16-byte stack alignment for // functions, so typically we'll be 16-byte aligned. In this // case we simulate a `call` instruction by decrementing the @@ -315,7 +302,6 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { } state.__rip = unwind as u64; state.__rdi = pc as u64; - state.__rsi = jmp_buf as u64; }; let mut thread_state = ThreadState::new(); } else if #[cfg(target_arch = "aarch64")] { @@ -325,18 +311,17 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { let get_pc = |state: &ThreadState| state.__pc as *const u8; - let resume = |state: &mut ThreadState, pc: usize, jmp_buf: usize| { + let resume = |state: &mut ThreadState, pc: usize| { // Clobber LR with the faulting PC, so unwinding resumes at the // faulting instruction. The previous value of LR has been saved // by the callee (in Cranelift generated code), so no need to // stash it. state.__lr = pc as u64; - // Fill in the 2 arguments to unwind here, and set PC to it, so + // Fill in the argument to unwind here, and set PC to it, so // it looks like a call to unwind. - state.__pc = unwind as u64; state.__x[0] = pc as u64; - state.__x[1] = jmp_buf as u64; + state.__pc = unwind as u64; }; let mut thread_state = mem::zeroed::(); } else { @@ -372,19 +357,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { // pointer value and if `MAP` changes happen after we read our entry that's // ok since they won't invalidate our entry. let pc = get_pc(&thread_state); - let state = (*MAP) - .lock() - .unwrap_or_else(|e| e.into_inner()) - .get(&origin_thread) - .copied(); - let jmp_buf = match state { - Some(state) => (*state).jmp_buf_if_trap(pc, |_| false), - None => ptr::null(), - }; - if jmp_buf.is_null() { - return false; - } - if jmp_buf as usize == 1 { + if !super::IS_WASM_PC(pc as usize) { return false; } @@ -392,7 +365,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { // force the thread itself to trap. The thread's register state is // configured to resume in the `unwind` function below, we update the // thread's register state, and then we're off to the races. - resume(&mut thread_state, pc as usize, jmp_buf as usize); + resume(&mut thread_state, pc as usize); let kret = thread_set_state( origin_thread, thread_state_flavor, @@ -409,13 +382,13 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { /// a native backtrace once we've switched back to the thread itself. After /// the backtrace is captured we can do the usual `longjmp` back to the source /// of the wasm code. -unsafe extern "C" fn unwind(wasm_pc: *const u8, jmp_buf: *const u8) -> ! { - tls::with(|state| { - if let Some(state) = state { - state.capture_backtrace(wasm_pc); - } +unsafe extern "C" fn unwind(wasm_pc: *const u8) -> ! { + let jmp_buf = tls::with(|state| { + let state = state.unwrap(); + state.capture_backtrace(wasm_pc); + state.jmp_buf.get() }); - + debug_assert!(!jmp_buf.is_null()); Unwind(jmp_buf); } @@ -451,46 +424,16 @@ impl Drop for ClosePort { /// task-level port which is where we'd expected things like breakpad/crashpad /// exception handlers to get registered. pub fn lazy_per_thread_init() -> Result<(), Trap> { - thread_local! { - static PORTS_SET: Cell = Cell::new(false); + unsafe { + assert!(WASMTIME_PORT != MACH_PORT_NULL); + let kret = thread_set_exception_ports( + MY_PORT.with(|p| p.0), + EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION, + WASMTIME_PORT, + EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, + mach_addons::THREAD_STATE_NONE, + ); + assert_eq!(kret, KERN_SUCCESS, "failed to set thread exception port"); } - - PORTS_SET.with(|ports| { - if ports.replace(true) { - return; - } - - unsafe { - assert!(WASMTIME_PORT != MACH_PORT_NULL); - let kret = thread_set_exception_ports( - MY_PORT.with(|p| p.0), - EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION, - WASMTIME_PORT, - EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, - mach_addons::THREAD_STATE_NONE, - ); - assert_eq!(kret, KERN_SUCCESS, "failed to set thread exception port"); - } - }); Ok(()) } - -/// This hook is invoked whenever TLS state for the current thread is updated -/// to the `ptr` specified. -/// -/// The purpose for hooking this on macOS is we register in a process-global map -/// that our mach thread's state is `ptr` at this time. This allows the -/// exception handling thread to lookup in this map later if our thread -/// generates an exception. -/// -/// Note that in general this is quite unsafe since we're moving non-Send state -/// (`ptr`) which is also only valid for a short portion of the program (it -/// lives on the stack) into a global portion of the program. This needs to be -/// kept tightly in sync with `handle_exception` above where it's accessed in a -/// very limited fashion. -pub fn register_tls(ptr: *const CallThreadState<'static>) { - unsafe { - let me = MY_PORT.with(|p| p.0); - (*MAP).lock().unwrap().insert(me, ptr); - } -} diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs index a97976f9ca..b4046f0944 100644 --- a/crates/runtime/src/traphandlers/unix.rs +++ b/crates/runtime/src/traphandlers/unix.rs @@ -1,4 +1,4 @@ -use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use crate::traphandlers::{tls, Trap, Unwind}; use std::cell::RefCell; use std::convert::TryInto; use std::io; @@ -47,8 +47,8 @@ pub unsafe fn platform_init() { // Handle `unreachable` instructions which execute `ud2` right now register(&mut PREV_SIGILL, libc::SIGILL); - // x86 uses SIGFPE to report division by zero - if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { + // x86 and s390x use SIGFPE to report division by zero + if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") || cfg!(target_arch = "s390x") { register(&mut PREV_SIGFPE, libc::SIGFPE); } @@ -85,9 +85,8 @@ unsafe extern "C" fn trap_handler( // Otherwise flag ourselves as handling a trap, do the trap // handling, and reset our trap handling flag. Then we figure // out what to do based on the result of the trap handling. - let pc = get_pc(context); - let jmp_buf = - info.jmp_buf_if_trap(pc, |handler| handler(signum, siginfo, context)); + let pc = get_pc(context, signum); + let jmp_buf = info.jmp_buf_if_trap(pc, |handler| handler(signum, siginfo, context)); // Figure out what to do based on the result of this handling of // the trap. Note that our sentinel value of 1 means that the @@ -128,7 +127,7 @@ unsafe extern "C" fn trap_handler( } } -unsafe fn get_pc(cx: *mut libc::c_void) -> *const u8 { +unsafe fn get_pc(cx: *mut libc::c_void, _signum: libc::c_int) -> *const u8 { cfg_if::cfg_if! { if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { let cx = &*(cx as *const libc::ucontext_t); @@ -139,6 +138,23 @@ unsafe fn get_pc(cx: *mut libc::c_void) -> *const u8 { } else if #[cfg(all(any(target_os = "linux", target_os = "android"), target_arch = "aarch64"))] { let cx = &*(cx as *const libc::ucontext_t); cx.uc_mcontext.pc as *const u8 + } else if #[cfg(all(target_os = "linux", target_arch = "s390x"))] { + // On s390x, SIGILL and SIGFPE are delivered with the PSW address + // pointing *after* the faulting instruction, while SIGSEGV and + // SIGBUS are delivered with the PSW address pointing *to* the + // faulting instruction. To handle this, the code generator registers + // any trap that results in one of "late" signals on the last byte + // of the instruction, and any trap that results in one of the "early" + // signals on the first byte of the instruction (as usual). This + // means we simply need to decrement the reported PSW address by + // one in the case of a "late" signal here to ensure we always + // correctly find the associated trap handler. + let trap_offset = match _signum { + libc::SIGILL | libc::SIGFPE => 1, + _ => 0, + }; + let cx = &*(cx as *const libc::ucontext_t); + (cx.uc_mcontext.psw.addr - trap_offset) as *const u8 } else if #[cfg(all(target_os = "freebsd", target_arch = "x86_64"))] { let cx = &*(cx as *const libc::ucontext_t); cx.uc_mcontext.mc_rip as *const u8 @@ -155,41 +171,35 @@ unsafe fn get_pc(cx: *mut libc::c_void) -> *const u8 { /// and registering our own alternate stack that is large enough and has a guard /// page. pub fn lazy_per_thread_init() -> Result<(), Trap> { + // This thread local is purely used to register a `Stack` to get deallocated + // when the thread exists. Otherwise this function is only ever called at + // most once per-thread. thread_local! { - /// Thread-local state is lazy-initialized on the first time it's used, - /// and dropped when the thread exits. - static TLS: RefCell = RefCell::new(Tls::None); + static STACK: RefCell> = RefCell::new(None); } /// The size of the sigaltstack (not including the guard, which will be /// added). Make this large enough to run our signal handlers. const MIN_STACK_SIZE: usize = 16 * 4096; - enum Tls { - None, - Allocated { - mmap_ptr: *mut libc::c_void, - mmap_size: usize, - }, - BigEnough, + struct Stack { + mmap_ptr: *mut libc::c_void, + mmap_size: usize, } - return TLS.with(|slot| unsafe { - let mut slot = slot.borrow_mut(); - match *slot { - Tls::None => {} - // already checked - _ => return Ok(()), - } + return STACK.with(|s| { + *s.borrow_mut() = unsafe { allocate_sigaltstack()? }; + Ok(()) + }); + unsafe fn allocate_sigaltstack() -> Result, Trap> { // Check to see if the existing sigaltstack, if it exists, is big // enough. If so we don't need to allocate our own. let mut old_stack = mem::zeroed(); let r = libc::sigaltstack(ptr::null(), &mut old_stack); assert_eq!(r, 0, "learning about sigaltstack failed"); if old_stack.ss_flags & libc::SS_DISABLE == 0 && old_stack.ss_size >= MIN_STACK_SIZE { - *slot = Tls::BigEnough; - return Ok(()); + return Ok(None); } // ... but failing that we need to allocate our own, so do all that @@ -227,31 +237,19 @@ pub fn lazy_per_thread_init() -> Result<(), Trap> { let r = libc::sigaltstack(&new_stack, ptr::null_mut()); assert_eq!(r, 0, "registering new sigaltstack failed"); - *slot = Tls::Allocated { + Ok(Some(Stack { mmap_ptr: ptr, mmap_size: alloc_size, - }; - Ok(()) - }); + })) + } - impl Drop for Tls { + impl Drop for Stack { fn drop(&mut self) { - let (ptr, size) = match self { - Tls::Allocated { - mmap_ptr, - mmap_size, - } => (*mmap_ptr, *mmap_size), - _ => return, - }; unsafe { // Deallocate the stack memory. - let r = libc::munmap(ptr, size); + let r = libc::munmap(self.mmap_ptr, self.mmap_size); debug_assert_eq!(r, 0, "munmap failed during thread shutdown"); } } } } - -pub fn register_tls(_: *const CallThreadState<'static>) { - // Unused on unix -} diff --git a/crates/runtime/src/traphandlers/windows.rs b/crates/runtime/src/traphandlers/windows.rs index 9512c4b52c..3cfe3d00d9 100644 --- a/crates/runtime/src/traphandlers/windows.rs +++ b/crates/runtime/src/traphandlers/windows.rs @@ -1,4 +1,4 @@ -use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use crate::traphandlers::{tls, Trap, Unwind}; use std::io; use winapi::um::errhandlingapi::*; use winapi::um::minwinbase::*; @@ -77,7 +77,3 @@ pub fn lazy_per_thread_init() -> Result<(), Trap> { // Unused on Windows Ok(()) } - -pub fn register_tls(_: *const CallThreadState<'static>) { - // Unused on Windows -} diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs index f4dffeee2c..fcb17466db 100644 --- a/crates/runtime/src/vmcontext.rs +++ b/crates/runtime/src/vmcontext.rs @@ -490,17 +490,20 @@ mod test_vmshared_signature_index { impl VMSharedSignatureIndex { /// Create a new `VMSharedSignatureIndex`. + #[inline] pub fn new(value: u32) -> Self { Self(value) } /// Returns the underlying bits of the index. + #[inline] pub fn bits(&self) -> u32 { self.0 } } impl Default for VMSharedSignatureIndex { + #[inline] fn default() -> Self { Self::new(u32::MAX) } diff --git a/crates/test-programs/Cargo.toml b/crates/test-programs/Cargo.toml index 660abc0a47..7c4d9c3ad4 100644 --- a/crates/test-programs/Cargo.toml +++ b/crates/test-programs/Cargo.toml @@ -11,17 +11,18 @@ license = "Apache-2.0 WITH LLVM-exception" cfg-if = "1.0" [dev-dependencies] -wasi-common = { path = "../wasi-common", version = "0.25.0" } -wasi-cap-std-sync = { path = "../wasi-common/cap-std-sync", version = "0.25.0" } -wasmtime = { path = "../wasmtime", version = "0.25.0" } -wasmtime-wasi = { path = "../wasi", version = "0.25.0" } -target-lexicon = "0.11.0" +wasi-common = { path = "../wasi-common", version = "0.26.0" } +wasi-cap-std-sync = { path = "../wasi-common/cap-std-sync", version = "0.26.0" } +wasmtime = { path = "../wasmtime", version = "0.26.0" } +wasmtime-wasi = { path = "../wasi", version = "0.26.0", features = ["tokio"] } +target-lexicon = "0.12.0" pretty_env_logger = "0.4.0" tempfile = "3.1.0" os_pipe = "0.9" anyhow = "1.0.19" -wat = "1.0.36" +wat = "1.0.37" cap-std = "0.13" +tokio = { version = "1.5.0", features = ["rt-multi-thread"] } [features] test_programs = [] diff --git a/crates/test-programs/build.rs b/crates/test-programs/build.rs index abef0738f0..6894f66a81 100644 --- a/crates/test-programs/build.rs +++ b/crates/test-programs/build.rs @@ -40,7 +40,9 @@ mod wasi_tests { File::create(out_dir.join("wasi_tests.rs")).expect("error generating test source file"); build_tests("wasi-tests", &out_dir).expect("building tests"); test_directory(&mut out, "wasi-cap-std-sync", "cap_std_sync", &out_dir) - .expect("generating tests"); + .expect("generating wasi-cap-std-sync tests"); + test_directory(&mut out, "wasi-tokio", "tokio", &out_dir) + .expect("generating wasi-tokio tests"); } fn build_tests(testsuite: &str, out_dir: &Path) -> io::Result<()> { @@ -173,6 +175,7 @@ mod wasi_tests { match testsuite { "wasi-cap-std-sync" => cap_std_sync_ignore(name), "wasi-virtfs" => virtfs_ignore(name), + "wasi-tokio" => tokio_ignore(name), _ => panic!("unknown test suite: {}", testsuite), } } @@ -200,6 +203,10 @@ mod wasi_tests { .contains(&name) } + /// Tokio should support the same things as cap_std_sync + fn tokio_ignore(name: &str) -> bool { + cap_std_sync_ignore(name) + } /// Virtfs barely works at all and is not suitable for any purpose fn virtfs_ignore(name: &str) -> bool { [ @@ -260,7 +267,7 @@ mod wasi_tests { /// Mark tests which require inheriting parent process stdio fn inherit_stdio(testsuite: &str, name: &str) -> bool { match testsuite { - "wasi-cap-std-sync" => match name { + "wasi-cap-std-sync" | "wasi-tokio" => match name { "poll_oneoff_stdio" => true, _ => false, }, diff --git a/crates/test-programs/tests/wasm_tests/runtime/cap_std_sync.rs b/crates/test-programs/tests/wasm_tests/runtime/cap_std_sync.rs index 1610880115..32aa3cbedb 100644 --- a/crates/test-programs/tests/wasm_tests/runtime/cap_std_sync.rs +++ b/crates/test-programs/tests/wasm_tests/runtime/cap_std_sync.rs @@ -1,10 +1,26 @@ use anyhow::Context; use std::path::Path; -use wasi_cap_std_sync::WasiCtxBuilder; use wasi_common::pipe::WritePipe; use wasmtime::{Linker, Module, Store}; +use wasmtime_wasi::sync::{Wasi, WasiCtxBuilder}; pub fn instantiate(data: &[u8], bin_name: &str, workspace: Option<&Path>) -> anyhow::Result<()> { + run(data, bin_name, workspace, false) +} +pub fn instantiate_inherit_stdio( + data: &[u8], + bin_name: &str, + workspace: Option<&Path>, +) -> anyhow::Result<()> { + run(data, bin_name, workspace, true) +} + +fn run( + data: &[u8], + bin_name: &str, + workspace: Option<&Path>, + inherit_stdio: bool, +) -> anyhow::Result<()> { let stdout = WritePipe::new_in_memory(); let stderr = WritePipe::new_in_memory(); @@ -15,41 +31,29 @@ pub fn instantiate(data: &[u8], bin_name: &str, workspace: Option<&Path>) -> any // Additionally register any preopened directories if we have them. let mut builder = WasiCtxBuilder::new(); - builder = builder - .arg(bin_name)? - .arg(".")? - .stdout(Box::new(stdout.clone())) - .stderr(Box::new(stderr.clone())); + if inherit_stdio { + builder = builder.inherit_stdio(); + } else { + builder = builder + .stdout(Box::new(stdout.clone())) + .stderr(Box::new(stderr.clone())); + } + + builder = builder.arg(bin_name)?.arg(".")?; if let Some(workspace) = workspace { println!("preopen: {:?}", workspace); let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(workspace) }?; builder = builder.preopened_dir(preopen_dir, ".")?; } - - #[cfg(windows)] - { - builder = builder - .env("ERRNO_MODE_WINDOWS", "1")? - .env("NO_DANGLING_FILESYSTEM", "1")? - .env("NO_FD_ALLOCATE", "1")? - .env("NO_RENAME_DIR_TO_EMPTY_DIR", "1")? - } - #[cfg(all(unix, not(target_os = "macos")))] - { - builder = builder.env("ERRNO_MODE_UNIX", "1")?; - } - #[cfg(target_os = "macos")] - { - builder = builder - .env("ERRNO_MODE_MACOS", "1")? - .env("NO_FD_ALLOCATE", "1")?; + for (var, val) in super::test_suite_environment() { + builder = builder.env(var, val)?; } // cap-std-sync does not yet support the sync family of fdflags builder = builder.env("NO_FDFLAGS_SYNC_SUPPORT", "1")?; - let wasi = wasmtime_wasi::Wasi::new(&store, builder.build()?); + let wasi = Wasi::new(&store, builder.build()?); let mut linker = Linker::new(&store); @@ -82,41 +86,3 @@ pub fn instantiate(data: &[u8], bin_name: &str, workspace: Option<&Path>) -> any } } } - -pub fn instantiate_inherit_stdio( - data: &[u8], - bin_name: &str, - workspace: Option<&Path>, -) -> anyhow::Result<()> { - let r = { - let store = Store::default(); - - // Create our wasi context. - // Additionally register any preopened directories if we have them. - let mut builder = WasiCtxBuilder::new(); - - builder = builder.arg(bin_name)?.arg(".")?.inherit_stdio(); - - if let Some(workspace) = workspace { - println!("preopen: {:?}", workspace); - let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(workspace) }?; - builder = builder.preopened_dir(preopen_dir, ".")?; - } - - let snapshot1 = wasmtime_wasi::Wasi::new(&store, builder.build()?); - - let mut linker = Linker::new(&store); - - snapshot1.add_to_linker(&mut linker)?; - - let module = Module::new(store.engine(), &data).context("failed to create wasm module")?; - let instance = linker.instantiate(&module)?; - let start = instance.get_typed_func::<(), ()>("_start")?; - start.call(()).map_err(anyhow::Error::from) - }; - - match r { - Ok(()) => Ok(()), - Err(trap) => Err(trap.context(format!("error while testing Wasm module '{}'", bin_name,))), - } -} diff --git a/crates/test-programs/tests/wasm_tests/runtime/mod.rs b/crates/test-programs/tests/wasm_tests/runtime/mod.rs index 035b1e83ea..9a4fe8202a 100644 --- a/crates/test-programs/tests/wasm_tests/runtime/mod.rs +++ b/crates/test-programs/tests/wasm_tests/runtime/mod.rs @@ -1 +1,34 @@ pub mod cap_std_sync; +pub mod tokio; + +// Configure the test suite environment. +// Test programs use these environment variables to determine what behavior +// is expected: different errnos are expected on windows, mac, and other unixes, +// and other filesystem operations are supported or not. +pub fn test_suite_environment() -> &'static [(&'static str, &'static str)] { + #[cfg(windows)] + { + &[ + ("ERRNO_MODE_WINDOWS", "1"), + // Windows does not support dangling links or symlinks in the filesystem. + ("NO_DANGLING_FILESYSTEM", "1"), + // Windows does not support fd_allocate. + ("NO_FD_ALLOCATE", "1"), + // Windows does not support renaming a directory to an empty directory - + // empty directory must be deleted. + ("NO_RENAME_DIR_TO_EMPTY_DIR", "1"), + ] + } + #[cfg(all(unix, not(target_os = "macos")))] + { + &[("ERRNO_MODE_UNIX", "1")] + } + #[cfg(target_os = "macos")] + { + &[ + ("ERRNO_MODE_MACOS", "1"), + // MacOS does not support fd_allocate + ("NO_FD_ALLOCATE", "1"), + ] + } +} diff --git a/crates/test-programs/tests/wasm_tests/runtime/tokio.rs b/crates/test-programs/tests/wasm_tests/runtime/tokio.rs new file mode 100644 index 0000000000..28577015bc --- /dev/null +++ b/crates/test-programs/tests/wasm_tests/runtime/tokio.rs @@ -0,0 +1,96 @@ +use anyhow::Context; +use std::path::Path; +use wasi_common::pipe::WritePipe; +use wasmtime::{Config, Engine, Linker, Module, Store}; +use wasmtime_wasi::tokio::{Wasi, WasiCtxBuilder}; + +pub fn instantiate(data: &[u8], bin_name: &str, workspace: Option<&Path>) -> anyhow::Result<()> { + run(data, bin_name, workspace, false) +} +pub fn instantiate_inherit_stdio( + data: &[u8], + bin_name: &str, + workspace: Option<&Path>, +) -> anyhow::Result<()> { + run(data, bin_name, workspace, true) +} + +fn run( + data: &[u8], + bin_name: &str, + workspace: Option<&Path>, + inherit_stdio: bool, +) -> anyhow::Result<()> { + let stdout = WritePipe::new_in_memory(); + let stdout_ = stdout.clone(); + let stderr = WritePipe::new_in_memory(); + let stderr_ = stderr.clone(); + + let r = tokio::runtime::Runtime::new() + .expect("create runtime") + .block_on(async move { + let mut config = Config::new(); + config.async_support(true); + Wasi::add_to_config(&mut config); + let engine = Engine::new(&config)?; + let store = Store::new(&engine); + + // Create our wasi context. + let mut builder = WasiCtxBuilder::new(); + + if inherit_stdio { + builder = builder.inherit_stdio(); + } else { + builder = builder + .stdout(Box::new(stdout_.clone())) + .stderr(Box::new(stderr_.clone())); + } + + builder = builder.arg(bin_name)?.arg(".")?; + + if let Some(workspace) = workspace { + println!("preopen: {:?}", workspace); + let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(workspace) }?; + builder = builder.preopened_dir(preopen_dir, ".")?; + } + + for (var, val) in super::test_suite_environment() { + builder = builder.env(var, val)?; + } + + // tokio does not yet support the sync family of fdflags, because cap-std-sync + // does not. + builder = builder.env("NO_FDFLAGS_SYNC_SUPPORT", "1")?; + + Wasi::set_context(&store, builder.build()?) + .map_err(|_| anyhow::anyhow!("wasi set_context failed"))?; + + let module = + Module::new(store.engine(), &data).context("failed to create wasm module")?; + let linker = Linker::new(&store); + let instance = linker.instantiate_async(&module).await?; + let start = instance.get_typed_func::<(), ()>("_start")?; + start.call_async(()).await.map_err(anyhow::Error::from) + }); + + match r { + Ok(()) => Ok(()), + Err(trap) => { + let stdout = stdout + .try_into_inner() + .expect("sole ref to stdout") + .into_inner(); + if !stdout.is_empty() { + println!("guest stdout:\n{}\n===", String::from_utf8_lossy(&stdout)); + } + let stderr = stderr + .try_into_inner() + .expect("sole ref to stderr") + .into_inner(); + if !stderr.is_empty() { + println!("guest stderr:\n{}\n===", String::from_utf8_lossy(&stderr)); + } + Err(trap.context(format!("error while testing Wasm module '{}'", bin_name,))) + } + } +} diff --git a/crates/test-programs/wasi-tests/src/bin/poll_oneoff.rs b/crates/test-programs/wasi-tests/src/bin/poll_oneoff_files.rs similarity index 57% rename from crates/test-programs/wasi-tests/src/bin/poll_oneoff.rs rename to crates/test-programs/wasi-tests/src/bin/poll_oneoff_files.rs index a680cf6309..d457a80544 100644 --- a/crates/test-programs/wasi-tests/src/bin/poll_oneoff.rs +++ b/crates/test-programs/wasi-tests/src/bin/poll_oneoff_files.rs @@ -26,9 +26,10 @@ unsafe fn test_empty_poll() { } unsafe fn test_timeout() { + let timeout = 5_000_000u64; // 5 milliseconds let clock = wasi::SubscriptionClock { id: wasi::CLOCKID_MONOTONIC, - timeout: 5_000_000u64, // 5 milliseconds + timeout, precision: 0, flags: 0, }; @@ -39,7 +40,9 @@ unsafe fn test_timeout() { u: wasi::SubscriptionUU { clock }, }, }]; + let before = wasi::clock_time_get(wasi::CLOCKID_MONOTONIC, 0).unwrap(); let out = poll_oneoff_impl(&r#in).unwrap(); + let after = wasi::clock_time_get(wasi::CLOCKID_MONOTONIC, 0).unwrap(); assert_eq!(out.len(), 1, "should return 1 event"); let event = &out[0]; assert_errno!(event.error, wasi::ERRNO_SUCCESS); @@ -52,19 +55,60 @@ unsafe fn test_timeout() { event.userdata, CLOCK_ID, "the event.userdata should contain clock_id specified by the user" ); + assert!( + after - before >= timeout, + "poll_oneoff should sleep for the specified interval" + ); } -unsafe fn test_fd_readwrite(fd: wasi::Fd, error_code: wasi::Errno) { - let fd_readwrite = wasi::SubscriptionFdReadwrite { - file_descriptor: fd, +// Like test_timeout, but uses `CLOCKID_REALTIME`, as WASI libc's sleep +// functions do. +unsafe fn test_sleep() { + let timeout = 5_000_000u64; // 5 milliseconds + let clock = wasi::SubscriptionClock { + id: wasi::CLOCKID_REALTIME, + timeout, + precision: 0, + flags: 0, }; + let r#in = [wasi::Subscription { + userdata: CLOCK_ID, + u: wasi::SubscriptionU { + tag: wasi::EVENTTYPE_CLOCK, + u: wasi::SubscriptionUU { clock }, + }, + }]; + let before = wasi::clock_time_get(wasi::CLOCKID_MONOTONIC, 0).unwrap(); + let out = poll_oneoff_impl(&r#in).unwrap(); + let after = wasi::clock_time_get(wasi::CLOCKID_MONOTONIC, 0).unwrap(); + assert_eq!(out.len(), 1, "should return 1 event"); + let event = &out[0]; + assert_errno!(event.error, wasi::ERRNO_SUCCESS); + assert_eq!( + event.r#type, + wasi::EVENTTYPE_CLOCK, + "the event.type should equal clock" + ); + assert_eq!( + event.userdata, CLOCK_ID, + "the event.userdata should contain clock_id specified by the user" + ); + assert!( + after - before >= timeout, + "poll_oneoff should sleep for the specified interval" + ); +} + +unsafe fn test_fd_readwrite(readable_fd: wasi::Fd, writable_fd: wasi::Fd, error_code: wasi::Errno) { let r#in = [ wasi::Subscription { userdata: 1, u: wasi::SubscriptionU { tag: wasi::EVENTTYPE_FD_READ, u: wasi::SubscriptionUU { - fd_read: fd_readwrite, + fd_read: wasi::SubscriptionFdReadwrite { + file_descriptor: readable_fd, + }, }, }, }, @@ -73,13 +117,15 @@ unsafe fn test_fd_readwrite(fd: wasi::Fd, error_code: wasi::Errno) { u: wasi::SubscriptionU { tag: wasi::EVENTTYPE_FD_WRITE, u: wasi::SubscriptionUU { - fd_write: fd_readwrite, + fd_write: wasi::SubscriptionFdReadwrite { + file_descriptor: writable_fd, + }, }, }, }, ]; let out = poll_oneoff_impl(&r#in).unwrap(); - assert_eq!(out.len(), 2, "should return 2 events"); + assert_eq!(out.len(), 2, "should return 2 events, got: {:?}", out); assert_eq!( out[0].userdata, 1, "the event.userdata should contain fd userdata specified by the user" @@ -104,26 +150,64 @@ unsafe fn test_fd_readwrite(fd: wasi::Fd, error_code: wasi::Errno) { unsafe fn test_fd_readwrite_valid_fd(dir_fd: wasi::Fd) { // Create a file in the scratch directory. - let file_fd = wasi::path_open( + let nonempty_file = wasi::path_open( dir_fd, 0, - "file", + "readable_file", wasi::OFLAGS_CREAT, - wasi::RIGHTS_FD_READ | wasi::RIGHTS_FD_WRITE | wasi::RIGHTS_POLL_FD_READWRITE, + wasi::RIGHTS_FD_WRITE, 0, 0, ) - .expect("opening a file"); + .expect("create writable file"); + // Write to file + let contents = &[1u8]; + let ciovec = wasi::Ciovec { + buf: contents.as_ptr() as *const _, + buf_len: contents.len(), + }; + wasi::fd_write(nonempty_file, &[ciovec]).expect("write"); + wasi::fd_close(nonempty_file).expect("close"); + + // Now open the file for reading + let readable_fd = wasi::path_open( + dir_fd, + 0, + "readable_file", + 0, + wasi::RIGHTS_FD_READ | wasi::RIGHTS_POLL_FD_READWRITE, + 0, + 0, + ) + .expect("opening a readable file"); + assert_gt!( - file_fd, + readable_fd, + libc::STDERR_FILENO as wasi::Fd, + "file descriptor range check", + ); + // Create a file in the scratch directory. + let writable_fd = wasi::path_open( + dir_fd, + 0, + "writable_file", + wasi::OFLAGS_CREAT, + wasi::RIGHTS_FD_WRITE | wasi::RIGHTS_POLL_FD_READWRITE, + 0, + 0, + ) + .expect("opening a writable file"); + assert_gt!( + writable_fd, libc::STDERR_FILENO as wasi::Fd, "file descriptor range check", ); - test_fd_readwrite(file_fd, wasi::ERRNO_SUCCESS); + test_fd_readwrite(readable_fd, writable_fd, wasi::ERRNO_SUCCESS); - wasi::fd_close(file_fd).expect("closing a file"); - wasi::path_unlink_file(dir_fd, "file").expect("removing a file"); + wasi::fd_close(readable_fd).expect("closing readable_file"); + wasi::path_unlink_file(dir_fd, "readable_file").expect("removing readable_file"); + wasi::path_unlink_file(dir_fd, "writable_file").expect("removing writable_file"); } unsafe fn test_fd_readwrite_invalid_fd() { @@ -156,6 +240,7 @@ unsafe fn test_fd_readwrite_invalid_fd() { unsafe fn test_poll_oneoff(dir_fd: wasi::Fd) { test_timeout(); + test_sleep(); test_empty_poll(); test_fd_readwrite_valid_fd(dir_fd); test_fd_readwrite_invalid_fd(); diff --git a/crates/test-programs/wasi-tests/src/bin/poll_oneoff_stdio.rs b/crates/test-programs/wasi-tests/src/bin/poll_oneoff_stdio.rs index 19f60d076c..d9b107a3c3 100644 --- a/crates/test-programs/wasi-tests/src/bin/poll_oneoff_stdio.rs +++ b/crates/test-programs/wasi-tests/src/bin/poll_oneoff_stdio.rs @@ -1,6 +1,8 @@ +use std::collections::HashMap; use std::mem::MaybeUninit; use wasi_tests::{assert_errno, STDERR_FD, STDIN_FD, STDOUT_FD}; +const TIMEOUT: u64 = 50_000_000u64; // 50 milliseconds, required to satisfy slow execution in CI const CLOCK_ID: wasi::Userdata = 0x0123_45678; const STDIN_ID: wasi::Userdata = 0x8765_43210; @@ -17,7 +19,7 @@ unsafe fn poll_oneoff_impl(r#in: &[wasi::Subscription]) -> Result= 1, "should return at least 1 event"); + assert!(out.len() >= 1, "stdin read should return at least 1 event"); for event in out { if event.r#type == wasi::EVENTTYPE_CLOCK { assert_errno!(event.error, wasi::ERRNO_SUCCESS); @@ -66,55 +68,61 @@ unsafe fn test_stdin_read() { } } +fn writable_subs(h: &HashMap) -> Vec { + println!("writable subs: {:?}", h); + h.iter() + .map(|(ud, fd)| wasi::Subscription { + userdata: *ud, + u: wasi::SubscriptionU { + tag: wasi::EVENTTYPE_FD_WRITE, + u: wasi::SubscriptionUU { + fd_write: wasi::SubscriptionFdReadwrite { + file_descriptor: *fd, + }, + }, + }, + }) + .collect() +} + unsafe fn test_stdout_stderr_write() { - let stdout_readwrite = wasi::SubscriptionFdReadwrite { - file_descriptor: STDOUT_FD, - }; - let stderr_readwrite = wasi::SubscriptionFdReadwrite { - file_descriptor: STDERR_FD, - }; - let r#in = [ - wasi::Subscription { - userdata: 1, - u: wasi::SubscriptionU { - tag: wasi::EVENTTYPE_FD_WRITE, - u: wasi::SubscriptionUU { - fd_write: stdout_readwrite, + let mut writable: HashMap = + vec![(1, STDOUT_FD), (2, STDERR_FD)].into_iter().collect(); + + let clock = wasi::Subscription { + userdata: CLOCK_ID, + u: wasi::SubscriptionU { + tag: wasi::EVENTTYPE_CLOCK, + u: wasi::SubscriptionUU { + clock: wasi::SubscriptionClock { + id: wasi::CLOCKID_MONOTONIC, + timeout: TIMEOUT, + precision: 0, + flags: 0, }, }, }, - wasi::Subscription { - userdata: 2, - u: wasi::SubscriptionU { - tag: wasi::EVENTTYPE_FD_WRITE, - u: wasi::SubscriptionUU { - fd_write: stderr_readwrite, - }, - }, - }, - ]; - let out = poll_oneoff_impl(&r#in).unwrap(); - assert_eq!(out.len(), 2, "should return 2 events"); - assert_eq!( - out[0].userdata, 1, - "the event.userdata should contain fd userdata specified by the user" - ); - assert_errno!(out[0].error, wasi::ERRNO_SUCCESS); - assert_eq!( - out[0].r#type, - wasi::EVENTTYPE_FD_WRITE, - "the event.type should equal FD_WRITE" - ); - assert_eq!( - out[1].userdata, 2, - "the event.userdata should contain fd userdata specified by the user" - ); - assert_errno!(out[1].error, wasi::ERRNO_SUCCESS); - assert_eq!( - out[1].r#type, - wasi::EVENTTYPE_FD_WRITE, - "the event.type should equal FD_WRITE" - ); + }; + while !writable.is_empty() { + let mut subs = writable_subs(&writable); + subs.push(clock.clone()); + let out = poll_oneoff_impl(&subs).unwrap(); + for event in out { + match event.userdata { + CLOCK_ID => { + panic!("timed out with the following pending subs: {:?}", writable) + } + ud => { + if let Some(_) = writable.remove(&ud) { + assert_eq!(event.r#type, wasi::EVENTTYPE_FD_WRITE); + assert_errno!(event.error, wasi::ERRNO_SUCCESS); + } else { + panic!("Unknown userdata {}, pending sub: {:?}", ud, writable) + } + } + } + } + } } unsafe fn test_poll_oneoff() { diff --git a/crates/wasi-common/Cargo.toml b/crates/wasi-common/Cargo.toml index 58646c549d..e61eb77b41 100644 --- a/crates/wasi-common/Cargo.toml +++ b/crates/wasi-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasi-common" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "WASI implementation in Rust" license = "Apache-2.0 WITH LLVM-exception" @@ -20,7 +20,7 @@ links = "wasi-common-19" [dependencies] anyhow = "1.0" thiserror = "1.0" -wiggle = { path = "../wiggle", default-features = false, version = "0.25.0" } +wiggle = { path = "../wiggle", default-features = false, version = "0.26.0" } tracing = "0.1.19" cap-std = "0.13" cap-rand = "0.13" diff --git a/crates/wasi-common/cap-std-sync/Cargo.toml b/crates/wasi-common/cap-std-sync/Cargo.toml index 74ec6cbd3d..9c9d7032fa 100644 --- a/crates/wasi-common/cap-std-sync/Cargo.toml +++ b/crates/wasi-common/cap-std-sync/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasi-cap-std-sync" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "WASI implementation in Rust" license = "Apache-2.0 WITH LLVM-exception" @@ -12,15 +12,16 @@ edition = "2018" include = ["src/**/*", "LICENSE" ] [dependencies] -wasi-common = { path = "../", version = "0.25.0" } +wasi-common = { path = "../", version = "0.26.0" } +async-trait = "0.1" anyhow = "1.0" -cap-std = "0.13.7" -cap-fs-ext = "0.13.7" -cap-time-ext = "0.13.7" -cap-rand = "0.13.2" +cap-std = "0.13.10" +cap-fs-ext = "0.13.10" +cap-time-ext = "0.13.10" +cap-rand = "0.13.10" fs-set-times = "0.3.1" -unsafe-io = "0.6.2" -system-interface = { version = "0.6.3", features = ["cap_std_impls"] } +unsafe-io = "0.6.5" +system-interface = { version = "0.6.4", features = ["cap_std_impls"] } tracing = "0.1.19" bitflags = "1.2" diff --git a/crates/wasi-common/cap-std-sync/src/dir.rs b/crates/wasi-common/cap-std-sync/src/dir.rs index 2552787900..0b846ccf15 100644 --- a/crates/wasi-common/cap-std-sync/src/dir.rs +++ b/crates/wasi-common/cap-std-sync/src/dir.rs @@ -15,13 +15,8 @@ impl Dir { pub fn from_cap_std(dir: cap_std::fs::Dir) -> Self { Dir(dir) } -} -impl WasiDir for Dir { - fn as_any(&self) -> &dyn Any { - self - } - fn open_file( + pub fn open_file_( &self, symlink_follow: bool, path: &str, @@ -29,7 +24,7 @@ impl WasiDir for Dir { read: bool, write: bool, fdflags: FdFlags, - ) -> Result, Error> { + ) -> Result { use cap_fs_ext::{FollowSymlinks, OpenOptionsFollowExt}; let mut opts = cap_std::fs::OpenOptions::new(); @@ -81,26 +76,67 @@ impl WasiDir for Dir { if fdflags.contains(wasi_common::file::FdFlags::NONBLOCK) { f.set_fd_flags(system_interface::fs::FdFlags::NONBLOCK)?; } - Ok(Box::new(File::from_cap_std(f))) + Ok(File::from_cap_std(f)) } - fn open_dir(&self, symlink_follow: bool, path: &str) -> Result, Error> { + pub fn open_dir_(&self, symlink_follow: bool, path: &str) -> Result { let d = if symlink_follow { self.0.open_dir(Path::new(path))? } else { self.0.open_dir_nofollow(Path::new(path))? }; - Ok(Box::new(Dir::from_cap_std(d))) + Ok(Dir::from_cap_std(d)) } - fn create_dir(&self, path: &str) -> Result<(), Error> { + pub fn rename_(&self, src_path: &str, dest_dir: &Self, dest_path: &str) -> Result<(), Error> { + self.0 + .rename(Path::new(src_path), &dest_dir.0, Path::new(dest_path))?; + Ok(()) + } + pub fn hard_link_( + &self, + src_path: &str, + target_dir: &Self, + target_path: &str, + ) -> Result<(), Error> { + let src_path = Path::new(src_path); + let target_path = Path::new(target_path); + self.0.hard_link(src_path, &target_dir.0, target_path)?; + Ok(()) + } +} + +#[async_trait::async_trait(?Send)] +impl WasiDir for Dir { + fn as_any(&self) -> &dyn Any { + self + } + async fn open_file( + &self, + symlink_follow: bool, + path: &str, + oflags: OFlags, + read: bool, + write: bool, + fdflags: FdFlags, + ) -> Result, Error> { + let f = self.open_file_(symlink_follow, path, oflags, read, write, fdflags)?; + Ok(Box::new(f)) + } + + async fn open_dir(&self, symlink_follow: bool, path: &str) -> Result, Error> { + let d = self.open_dir_(symlink_follow, path)?; + Ok(Box::new(d)) + } + + async fn create_dir(&self, path: &str) -> Result<(), Error> { self.0.create_dir(Path::new(path))?; Ok(()) } - fn readdir( + async fn readdir( &self, cursor: ReaddirCursor, - ) -> Result>>, Error> { + ) -> Result> + Send>, Error> { // cap_std's read_dir does not include . and .., we should prepend these. // Why does the Ok contain a tuple? We can't construct a cap_std::fs::DirEntry, and we don't // have enough info to make a ReaddirEntity yet. @@ -116,9 +152,9 @@ impl WasiDir for Dir { }, ] .into_iter() - .chain( + .chain({ // Now process the `DirEntry`s: - self.0.entries()?.map(|entry| { + let entries = self.0.entries()?.map(|entry| { let entry = entry?; let meta = entry.full_metadata()?; let inode = meta.ino(); @@ -128,8 +164,27 @@ impl WasiDir for Dir { .into_string() .map_err(|_| Error::illegal_byte_sequence().context("filename"))?; Ok((filetype, inode, name)) - }), - ) + }); + + // On Windows, filter out files like `C:\DumpStack.log.tmp` which we + // can't get a full metadata for. + #[cfg(windows)] + let entries = entries.filter(|entry: &Result<_, wasi_common::Error>| { + use winapi::shared::winerror::{ERROR_ACCESS_DENIED, ERROR_SHARING_VIOLATION}; + if let Err(err) = entry { + if let Some(err) = err.downcast_ref::() { + if err.raw_os_error() == Some(ERROR_SHARING_VIOLATION as i32) + || err.raw_os_error() == Some(ERROR_ACCESS_DENIED as i32) + { + return false; + } + } + } + true + }); + + entries + }) // Enumeration of the iterator makes it possible to define the ReaddirCursor .enumerate() .map(|(ix, r)| match r { @@ -146,24 +201,24 @@ impl WasiDir for Dir { Ok(Box::new(rd)) } - fn symlink(&self, src_path: &str, dest_path: &str) -> Result<(), Error> { + async fn symlink(&self, src_path: &str, dest_path: &str) -> Result<(), Error> { self.0.symlink(src_path, dest_path)?; Ok(()) } - fn remove_dir(&self, path: &str) -> Result<(), Error> { + async fn remove_dir(&self, path: &str) -> Result<(), Error> { self.0.remove_dir(Path::new(path))?; Ok(()) } - fn unlink_file(&self, path: &str) -> Result<(), Error> { + async fn unlink_file(&self, path: &str) -> Result<(), Error> { self.0.remove_file_or_symlink(Path::new(path))?; Ok(()) } - fn read_link(&self, path: &str) -> Result { + async fn read_link(&self, path: &str) -> Result { let link = self.0.read_link(Path::new(path))?; Ok(link) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { let meta = self.0.dir_metadata()?; Ok(Filestat { device_id: meta.dev(), @@ -176,7 +231,11 @@ impl WasiDir for Dir { ctim: meta.created().map(|t| Some(t.into_std())).unwrap_or(None), }) } - fn get_path_filestat(&self, path: &str, follow_symlinks: bool) -> Result { + async fn get_path_filestat( + &self, + path: &str, + follow_symlinks: bool, + ) -> Result { let meta = if follow_symlinks { self.0.metadata(Path::new(path))? } else { @@ -193,16 +252,19 @@ impl WasiDir for Dir { ctim: meta.created().map(|t| Some(t.into_std())).unwrap_or(None), }) } - fn rename(&self, src_path: &str, dest_dir: &dyn WasiDir, dest_path: &str) -> Result<(), Error> { + async fn rename( + &self, + src_path: &str, + dest_dir: &dyn WasiDir, + dest_path: &str, + ) -> Result<(), Error> { let dest_dir = dest_dir .as_any() .downcast_ref::() .ok_or(Error::badf().context("failed downcast to cap-std Dir"))?; - self.0 - .rename(Path::new(src_path), &dest_dir.0, Path::new(dest_path))?; - Ok(()) + self.rename_(src_path, dest_dir, dest_path) } - fn hard_link( + async fn hard_link( &self, src_path: &str, target_dir: &dyn WasiDir, @@ -212,12 +274,9 @@ impl WasiDir for Dir { .as_any() .downcast_ref::() .ok_or(Error::badf().context("failed downcast to cap-std Dir"))?; - let src_path = Path::new(src_path); - let target_path = Path::new(target_path); - self.0.hard_link(src_path, &target_dir.0, target_path)?; - Ok(()) + self.hard_link_(src_path, target_dir, target_path) } - fn set_times( + async fn set_times( &self, path: &str, atime: Option, @@ -261,7 +320,7 @@ mod test { let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(tempdir.path()) } .expect("open ambient temporary dir"); let preopen_dir = Dir::from_cap_std(preopen_dir); - wasi_common::WasiDir::open_dir(&preopen_dir, false, ".") + run(wasi_common::WasiDir::open_dir(&preopen_dir, false, ".")) .expect("open the same directory via WasiDir abstraction"); } @@ -275,9 +334,8 @@ mod test { fn readdir_into_map(dir: &dyn WasiDir) -> HashMap { let mut out = HashMap::new(); - for readdir_result in dir - .readdir(ReaddirCursor::from(0)) - .expect("readdir succeeds") + for readdir_result in + run(dir.readdir(ReaddirCursor::from(0))).expect("readdir succeeds") { let entity = readdir_result.expect("readdir entry is valid"); out.insert(entity.name.clone(), entity); @@ -303,16 +361,15 @@ mod test { assert!(entities.get(".").is_some()); assert!(entities.get("..").is_some()); - preopen_dir - .open_file( - false, - "file1", - OFlags::CREATE, - true, - false, - FdFlags::empty(), - ) - .expect("create file1"); + run(preopen_dir.open_file( + false, + "file1", + OFlags::CREATE, + true, + false, + FdFlags::empty(), + )) + .expect("create file1"); let entities = readdir_into_map(&preopen_dir); assert_eq!(entities.len(), 3, "should be ., .., file1 {:?}", entities); @@ -329,4 +386,41 @@ mod test { FileType::RegularFile ); } + + fn run(future: F) -> F::Output { + use std::pin::Pin; + use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; + + let mut f = Pin::from(Box::new(future)); + let waker = dummy_waker(); + let mut cx = Context::from_waker(&waker); + match f.as_mut().poll(&mut cx) { + Poll::Ready(val) => return val, + Poll::Pending => { + panic!("Cannot wait on pending future: must enable wiggle \"async\" future and execute on an async Store") + } + } + + fn dummy_waker() -> Waker { + return unsafe { Waker::from_raw(clone(5 as *const _)) }; + + unsafe fn clone(ptr: *const ()) -> RawWaker { + assert_eq!(ptr as usize, 5); + const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); + RawWaker::new(ptr, &VTABLE) + } + + unsafe fn wake(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn wake_by_ref(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn drop(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + } + } } diff --git a/crates/wasi-common/cap-std-sync/src/file.rs b/crates/wasi-common/cap-std-sync/src/file.rs index f89a090ea1..0105d61583 100644 --- a/crates/wasi-common/cap-std-sync/src/file.rs +++ b/crates/wasi-common/cap-std-sync/src/file.rs @@ -20,27 +20,28 @@ impl File { } } +#[async_trait::async_trait(?Send)] impl WasiFile for File { fn as_any(&self) -> &dyn Any { self } - fn datasync(&self) -> Result<(), Error> { + async fn datasync(&self) -> Result<(), Error> { self.0.sync_data()?; Ok(()) } - fn sync(&self) -> Result<(), Error> { + async fn sync(&self) -> Result<(), Error> { self.0.sync_all()?; Ok(()) } - fn get_filetype(&self) -> Result { + async fn get_filetype(&self) -> Result { let meta = self.0.metadata()?; Ok(filetype_from(&meta.file_type())) } - fn get_fdflags(&self) -> Result { + async fn get_fdflags(&self) -> Result { let fdflags = self.0.get_fd_flags()?; Ok(from_sysif_fdflags(fdflags)) } - fn set_fdflags(&mut self, fdflags: FdFlags) -> Result<(), Error> { + async fn set_fdflags(&mut self, fdflags: FdFlags) -> Result<(), Error> { if fdflags.intersects( wasi_common::file::FdFlags::DSYNC | wasi_common::file::FdFlags::SYNC @@ -50,7 +51,7 @@ impl WasiFile for File { } Ok(self.0.set_fd_flags(to_sysif_fdflags(fdflags))?) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { let meta = self.0.metadata()?; Ok(Filestat { device_id: meta.dev(), @@ -63,19 +64,19 @@ impl WasiFile for File { ctim: meta.created().map(|t| Some(t.into_std())).unwrap_or(None), }) } - fn set_filestat_size(&self, size: u64) -> Result<(), Error> { + async fn set_filestat_size(&self, size: u64) -> Result<(), Error> { self.0.set_len(size)?; Ok(()) } - fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { + async fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { self.0.advise(offset, len, convert_advice(advice))?; Ok(()) } - fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { + async fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { self.0.allocate(offset, len)?; Ok(()) } - fn set_times( + async fn set_times( &self, atime: Option, mtime: Option, @@ -84,32 +85,46 @@ impl WasiFile for File { .set_times(convert_systimespec(atime), convert_systimespec(mtime))?; Ok(()) } - fn read_vectored(&self, bufs: &mut [io::IoSliceMut]) -> Result { + async fn read_vectored<'a>(&self, bufs: &mut [io::IoSliceMut<'a>]) -> Result { let n = self.0.read_vectored(bufs)?; Ok(n.try_into()?) } - fn read_vectored_at(&self, bufs: &mut [io::IoSliceMut], offset: u64) -> Result { + async fn read_vectored_at<'a>( + &self, + bufs: &mut [io::IoSliceMut<'a>], + offset: u64, + ) -> Result { let n = self.0.read_vectored_at(bufs, offset)?; Ok(n.try_into()?) } - fn write_vectored(&self, bufs: &[io::IoSlice]) -> Result { + async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { let n = self.0.write_vectored(bufs)?; Ok(n.try_into()?) } - fn write_vectored_at(&self, bufs: &[io::IoSlice], offset: u64) -> Result { + async fn write_vectored_at<'a>( + &self, + bufs: &[io::IoSlice<'a>], + offset: u64, + ) -> Result { let n = self.0.write_vectored_at(bufs, offset)?; Ok(n.try_into()?) } - fn seek(&self, pos: std::io::SeekFrom) -> Result { + async fn seek(&self, pos: std::io::SeekFrom) -> Result { Ok(self.0.seek(pos)?) } - fn peek(&self, buf: &mut [u8]) -> Result { + async fn peek(&self, buf: &mut [u8]) -> Result { let n = self.0.peek(buf)?; Ok(n.try_into()?) } - fn num_ready_bytes(&self) -> Result { + async fn num_ready_bytes(&self) -> Result { Ok(self.0.num_ready_bytes()?) } + async fn readable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } + async fn writable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } } pub fn filetype_from(ft: &cap_std::fs::FileType) -> FileType { diff --git a/crates/wasi-common/cap-std-sync/src/lib.rs b/crates/wasi-common/cap-std-sync/src/lib.rs index fe38951296..ac8ee602e5 100644 --- a/crates/wasi-common/cap-std-sync/src/lib.rs +++ b/crates/wasi-common/cap-std-sync/src/lib.rs @@ -14,6 +14,10 @@ //! `cap_std::fs::Dir`, and provides convenience methods for inheriting the //! parent process's stdio, args, and env. //! +//! For the convenience of consumers, `cap_std::fs::Dir` is re-exported from +//! this crate. This saves consumers tracking an additional dep on the exact +//! version of cap_std used by this crate, if they want to avoid it. +//! //! The only place we expect to run into long-term compatibility issues //! between `wasi-cap-std-sync` and the other impl crates that will come later //! is in the `Sched` abstraction. Once we can build an async scheduler based @@ -33,6 +37,7 @@ pub mod file; pub mod sched; pub mod stdio; +pub use cap_std::fs::Dir; pub use clocks::clocks_ctx; pub use sched::sched_ctx; @@ -110,13 +115,9 @@ impl WasiCtxBuilder { pub fn inherit_stdio(self) -> Self { self.inherit_stdin().inherit_stdout().inherit_stderr() } - pub fn preopened_dir( - self, - dir: cap_std::fs::Dir, - path: impl AsRef, - ) -> Result { + pub fn preopened_dir(self, dir: Dir, guest_path: impl AsRef) -> Result { let dir = Box::new(crate::dir::Dir::from_cap_std(dir)); - Ok(WasiCtxBuilder(self.0.preopened_dir(dir, path)?)) + Ok(WasiCtxBuilder(self.0.preopened_dir(dir, guest_path)?)) } pub fn build(self) -> Result { self.0.build() diff --git a/crates/wasi-common/cap-std-sync/src/sched.rs b/crates/wasi-common/cap-std-sync/src/sched.rs index 43af68a952..cbda527109 100644 --- a/crates/wasi-common/cap-std-sync/src/sched.rs +++ b/crates/wasi-common/cap-std-sync/src/sched.rs @@ -1,15 +1,40 @@ #[cfg(unix)] -mod unix; +pub mod unix; #[cfg(unix)] -pub use unix::*; +pub use unix::poll_oneoff; #[cfg(windows)] -mod windows; +pub mod windows; #[cfg(windows)] -pub use windows::*; +pub use windows::poll_oneoff; -use wasi_common::sched::WasiSched; +use std::thread; +use std::time::Duration; +use wasi_common::{ + sched::{Poll, WasiSched}, + Error, +}; +pub struct SyncSched {} +impl SyncSched { + pub fn new() -> Self { + Self {} + } +} +#[async_trait::async_trait(?Send)] +impl WasiSched for SyncSched { + async fn poll_oneoff<'a>(&self, poll: &mut Poll<'a>) -> Result<(), Error> { + poll_oneoff(poll).await + } + async fn sched_yield(&self) -> Result<(), Error> { + thread::yield_now(); + Ok(()) + } + async fn sleep(&self, duration: Duration) -> Result<(), Error> { + std::thread::sleep(duration); + Ok(()) + } +} pub fn sched_ctx() -> Box { Box::new(SyncSched::new()) } diff --git a/crates/wasi-common/cap-std-sync/src/sched/unix.rs b/crates/wasi-common/cap-std-sync/src/sched/unix.rs index 15bbb29138..7b232a4ed9 100644 --- a/crates/wasi-common/cap-std-sync/src/sched/unix.rs +++ b/crates/wasi-common/cap-std-sync/src/sched/unix.rs @@ -1,113 +1,97 @@ use cap_std::time::Duration; use std::convert::TryInto; -use std::ops::Deref; use std::os::unix::io::{AsRawFd, RawFd}; use wasi_common::{ file::WasiFile, sched::{ subscription::{RwEventFlags, Subscription}, - Poll, WasiSched, + Poll, }, Error, ErrorExt, }; use poll::{PollFd, PollFlags}; -pub struct SyncSched; - -impl SyncSched { - pub fn new() -> Self { - SyncSched +pub async fn poll_oneoff<'a>(poll: &mut Poll<'a>) -> Result<(), Error> { + if poll.is_empty() { + return Ok(()); } -} + let mut pollfds = Vec::new(); + for s in poll.rw_subscriptions() { + match s { + Subscription::Read(f) => { + let raw_fd = wasi_file_raw_fd(f.file).ok_or( + Error::invalid_argument().context("read subscription fd downcast failed"), + )?; + pollfds.push(unsafe { PollFd::new(raw_fd, PollFlags::POLLIN) }); + } -impl WasiSched for SyncSched { - fn poll_oneoff<'a>(&self, poll: &'a Poll<'a>) -> Result<(), Error> { - if poll.is_empty() { - return Ok(()); + Subscription::Write(f) => { + let raw_fd = wasi_file_raw_fd(f.file).ok_or( + Error::invalid_argument().context("write subscription fd downcast failed"), + )?; + pollfds.push(unsafe { PollFd::new(raw_fd, PollFlags::POLLOUT) }); + } + Subscription::MonotonicClock { .. } => unreachable!(), } - let mut pollfds = Vec::new(); - let timeout = poll.earliest_clock_deadline(); - for s in poll.rw_subscriptions() { - match s { - Subscription::Read(f) => { - let raw_fd = wasi_file_raw_fd(f.file.deref()).ok_or( - Error::invalid_argument().context("read subscription fd downcast failed"), - )?; - pollfds.push(unsafe { PollFd::new(raw_fd, PollFlags::POLLIN) }); - } + } - Subscription::Write(f) => { - let raw_fd = wasi_file_raw_fd(f.file.deref()).ok_or( - Error::invalid_argument().context("write subscription fd downcast failed"), - )?; - pollfds.push(unsafe { PollFd::new(raw_fd, PollFlags::POLLOUT) }); - } - Subscription::MonotonicClock { .. } => unreachable!(), - } - } - - let ready = loop { - let poll_timeout = if let Some(t) = timeout { - let duration = t.duration_until().unwrap_or(Duration::from_secs(0)); - (duration.as_millis() + 1) // XXX try always rounding up? - .try_into() - .map_err(|_| Error::overflow().context("poll timeout"))? - } else { - libc::c_int::max_value() - }; - tracing::debug!( - poll_timeout = tracing::field::debug(poll_timeout), - poll_fds = tracing::field::debug(&pollfds), - "poll" - ); - match poll::poll(&mut pollfds, poll_timeout) { - Ok(ready) => break ready, - Err(_) => { - let last_err = std::io::Error::last_os_error(); - if last_err.raw_os_error().unwrap() == libc::EINTR { - continue; - } else { - return Err(last_err.into()); - } - } - } - }; - if ready > 0 { - for (rwsub, pollfd) in poll.rw_subscriptions().zip(pollfds.into_iter()) { - if let Some(revents) = pollfd.revents() { - let (nbytes, rwsub) = match rwsub { - Subscription::Read(sub) => { - let ready = sub.file.num_ready_bytes()?; - (std::cmp::max(ready, 1), sub) - } - Subscription::Write(sub) => (0, sub), - _ => unreachable!(), - }; - if revents.contains(PollFlags::POLLNVAL) { - rwsub.error(Error::badf()); - } else if revents.contains(PollFlags::POLLERR) { - rwsub.error(Error::io()); - } else if revents.contains(PollFlags::POLLHUP) { - rwsub.complete(nbytes, RwEventFlags::HANGUP); - } else { - rwsub.complete(nbytes, RwEventFlags::empty()); - }; - } - } + let ready = loop { + let poll_timeout = if let Some(t) = poll.earliest_clock_deadline() { + let duration = t.duration_until().unwrap_or(Duration::from_secs(0)); + (duration.as_millis() + 1) // XXX try always rounding up? + .try_into() + .map_err(|_| Error::overflow().context("poll timeout"))? } else { - timeout - .expect("timed out") - .result() - .expect("timer deadline is past") - .unwrap() + libc::c_int::max_value() + }; + tracing::debug!( + poll_timeout = tracing::field::debug(poll_timeout), + poll_fds = tracing::field::debug(&pollfds), + "poll" + ); + match poll::poll(&mut pollfds, poll_timeout) { + Ok(ready) => break ready, + Err(_) => { + let last_err = std::io::Error::last_os_error(); + if last_err.raw_os_error().unwrap() == libc::EINTR { + continue; + } else { + return Err(last_err.into()); + } + } } - Ok(()) - } - fn sched_yield(&self) -> Result<(), Error> { - std::thread::yield_now(); - Ok(()) + }; + if ready > 0 { + for (rwsub, pollfd) in poll.rw_subscriptions().zip(pollfds.into_iter()) { + if let Some(revents) = pollfd.revents() { + let (nbytes, rwsub) = match rwsub { + Subscription::Read(sub) => { + let ready = sub.file.num_ready_bytes().await?; + (std::cmp::max(ready, 1), sub) + } + Subscription::Write(sub) => (0, sub), + _ => unreachable!(), + }; + if revents.contains(PollFlags::POLLNVAL) { + rwsub.error(Error::badf()); + } else if revents.contains(PollFlags::POLLERR) { + rwsub.error(Error::io()); + } else if revents.contains(PollFlags::POLLHUP) { + rwsub.complete(nbytes, RwEventFlags::HANGUP); + } else { + rwsub.complete(nbytes, RwEventFlags::empty()); + }; + } + } + } else { + poll.earliest_clock_deadline() + .expect("timed out") + .result() + .expect("timer deadline is past") + .unwrap() } + Ok(()) } fn wasi_file_raw_fd(f: &dyn WasiFile) -> Option { diff --git a/crates/wasi-common/cap-std-sync/src/sched/windows.rs b/crates/wasi-common/cap-std-sync/src/sched/windows.rs index 03eb41514c..41f3f1dda3 100644 --- a/crates/wasi-common/cap-std-sync/src/sched/windows.rs +++ b/crates/wasi-common/cap-std-sync/src/sched/windows.rs @@ -1,3 +1,13 @@ +// The windows scheduler is unmaintained and due for a rewrite. +// +// Rather than use a polling mechanism for file read/write readiness, +// it checks readiness just once, before sleeping for any timer subscriptions. +// Checking stdin readiness uses a worker thread which, once started, lives for the +// lifetime of the process. +// +// We suspect there are bugs in this scheduler, however, we have not +// taken the time to improve it. See bug #2880. + use anyhow::Context; use std::ops::Deref; use std::os::windows::io::{AsRawHandle, RawHandle}; @@ -9,128 +19,127 @@ use wasi_common::{ file::WasiFile, sched::{ subscription::{RwEventFlags, Subscription}, - Poll, WasiSched, + Poll, }, Error, ErrorExt, }; -pub struct SyncSched {} -impl SyncSched { - pub fn new() -> Self { - Self {} - } +pub async fn poll_oneoff<'a>(poll: &mut Poll<'a>) -> Result<(), Error> { + poll_oneoff_(poll, wasi_file_is_stdin, wasi_file_raw_handle).await } -impl WasiSched for SyncSched { - fn poll_oneoff<'a>(&self, poll: &'a Poll<'a>) -> Result<(), Error> { - if poll.is_empty() { - return Ok(()); +// For reuse by wasi-tokio, which has a different WasiFile -> RawHandle translator. +pub async fn poll_oneoff_<'a>( + poll: &mut Poll<'a>, + file_is_stdin: impl Fn(&dyn WasiFile) -> bool, + file_to_handle: impl Fn(&dyn WasiFile) -> Option, +) -> Result<(), Error> { + if poll.is_empty() { + return Ok(()); + } + + let mut ready = false; + let waitmode = if let Some(t) = poll.earliest_clock_deadline() { + if let Some(duration) = t.duration_until() { + WaitMode::Timeout(duration) + } else { + WaitMode::Immediate } + } else { + if ready { + WaitMode::Immediate + } else { + WaitMode::Infinite + } + }; - let mut ready = false; - let timeout = poll.earliest_clock_deadline(); - - let mut stdin_read_subs = Vec::new(); - let mut immediate_subs = Vec::new(); - for s in poll.rw_subscriptions() { - match s { - Subscription::Read(r) if r.file.as_any().is::() => { + let mut stdin_read_subs = Vec::new(); + let mut immediate_reads = Vec::new(); + let mut immediate_writes = Vec::new(); + for s in poll.rw_subscriptions() { + match s { + Subscription::Read(r) => { + if file_is_stdin(r.file.deref()) { stdin_read_subs.push(r); + } else if file_to_handle(r.file.deref()).is_some() { + immediate_reads.push(r); + } else { + return Err( + Error::invalid_argument().context("read subscription fd downcast failed") + ); } - Subscription::Read(rw) | Subscription::Write(rw) => { - if wasi_file_raw_handle(rw.file.deref()).is_some() { - immediate_subs.push(s); - } else { - return Err(Error::invalid_argument() - .context("read/write subscription fd downcast failed")); - } - } - Subscription::MonotonicClock { .. } => unreachable!(), } + Subscription::Write(w) => { + if file_to_handle(w.file.deref()).is_some() { + immediate_writes.push(w); + } else { + return Err( + Error::invalid_argument().context("write subscription fd downcast failed") + ); + } + } + Subscription::MonotonicClock { .. } => unreachable!(), } + } - if !stdin_read_subs.is_empty() { - let waitmode = if let Some(t) = timeout { - if let Some(duration) = t.duration_until() { - WaitMode::Timeout(duration) - } else { - WaitMode::Immediate - } - } else { - if ready { - WaitMode::Immediate - } else { - WaitMode::Infinite - } - }; - let state = STDIN_POLL - .lock() - .map_err(|_| Error::trap("failed to take lock of STDIN_POLL"))? - .poll(waitmode)?; - for readsub in stdin_read_subs.into_iter() { - match state { - PollState::Ready => { - readsub.complete(1, RwEventFlags::empty()); - ready = true; - } - PollState::NotReady | PollState::TimedOut => {} - PollState::Error(ref e) => { - // Unfortunately, we need to deliver the Error to each of the - // subscriptions, but there is no Clone on std::io::Error. So, we convert it to the - // kind, and then back to std::io::Error, and finally to anyhow::Error. - // When its time to turn this into an errno elsewhere, the error kind will - // be inspected. - let ekind = e.kind(); - let ioerror = std::io::Error::from(ekind); - readsub.error(ioerror.into()); - ready = true; - } - } - } - } - for sub in immediate_subs { - match sub { - Subscription::Read(r) => { - // XXX This doesnt strictly preserve the behavior in the earlier - // implementation, which would always do complete(0) for reads from - // stdout/err. - match r.file.num_ready_bytes() { - Ok(ready_bytes) => { - r.complete(ready_bytes, RwEventFlags::empty()); - ready = true; - } - Err(e) => { - r.error(e); - ready = true; - } - } - } - Subscription::Write(w) => { - // Everything is always ready for writing, apparently? - w.complete(0, RwEventFlags::empty()); + if !stdin_read_subs.is_empty() { + let state = STDIN_POLL + .lock() + .map_err(|_| Error::trap("failed to take lock of STDIN_POLL"))? + .poll(waitmode)?; + for readsub in stdin_read_subs.into_iter() { + match state { + PollState::Ready => { + readsub.complete(1, RwEventFlags::empty()); ready = true; } - Subscription::MonotonicClock { .. } => unreachable!(), - } - } - - if !ready { - if let Some(t) = timeout { - if let Some(duration) = t.duration_until() { - thread::sleep(duration); + PollState::NotReady | PollState::TimedOut => {} + PollState::Error(ref e) => { + // Unfortunately, we need to deliver the Error to each of the + // subscriptions, but there is no Clone on std::io::Error. So, we convert it to the + // kind, and then back to std::io::Error, and finally to anyhow::Error. + // When its time to turn this into an errno elsewhere, the error kind will + // be inspected. + let ekind = e.kind(); + let ioerror = std::io::Error::from(ekind); + readsub.error(ioerror.into()); + ready = true; } } } + } + for r in immediate_reads { + match r.file.num_ready_bytes().await { + Ok(ready_bytes) => { + r.complete(ready_bytes, RwEventFlags::empty()); + ready = true; + } + Err(e) => { + r.error(e); + ready = true; + } + } + } + for w in immediate_writes { + // Everything is always ready for writing, apparently? + w.complete(0, RwEventFlags::empty()); + ready = true; + } - Ok(()) - } - fn sched_yield(&self) -> Result<(), Error> { - thread::yield_now(); - Ok(()) + if !ready { + if let WaitMode::Timeout(duration) = waitmode { + thread::sleep(duration); + } } + + Ok(()) } -fn wasi_file_raw_handle(f: &dyn WasiFile) -> Option { +pub fn wasi_file_is_stdin(f: &dyn WasiFile) -> bool { + f.as_any().is::() +} + +pub fn wasi_file_raw_handle(f: &dyn WasiFile) -> Option { let a = f.as_any(); if a.is::() { Some( @@ -168,6 +177,7 @@ enum PollState { Error(std::io::Error), } +#[derive(Copy, Clone)] enum WaitMode { Timeout(Duration), Infinite, diff --git a/crates/wasi-common/cap-std-sync/src/stdio.rs b/crates/wasi-common/cap-std-sync/src/stdio.rs index 92a76d47ff..c6afa8f9b4 100644 --- a/crates/wasi-common/cap-std-sync/src/stdio.rs +++ b/crates/wasi-common/cap-std-sync/src/stdio.rs @@ -22,31 +22,32 @@ pub fn stdin() -> Stdin { Stdin(std::io::stdin()) } +#[async_trait::async_trait(?Send)] impl WasiFile for Stdin { fn as_any(&self) -> &dyn Any { self } - fn datasync(&self) -> Result<(), Error> { + async fn datasync(&self) -> Result<(), Error> { Ok(()) } - fn sync(&self) -> Result<(), Error> { + async fn sync(&self) -> Result<(), Error> { Ok(()) } - fn get_filetype(&self) -> Result { + async fn get_filetype(&self) -> Result { Ok(FileType::Unknown) } - fn get_fdflags(&self) -> Result { + async fn get_fdflags(&self) -> Result { Ok(FdFlags::empty()) } - fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { + async fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { Err(Error::badf()) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { let meta = self.0.as_file_view().metadata()?; Ok(Filestat { device_id: 0, inode: 0, - filetype: self.get_filetype()?, + filetype: self.get_filetype().await?, nlink: 0, size: meta.len(), atim: meta.accessed().ok(), @@ -54,35 +55,43 @@ impl WasiFile for Stdin { ctim: meta.created().ok(), }) } - fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { + async fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { Err(Error::badf()) } - fn advise(&self, _offset: u64, _len: u64, _advice: Advice) -> Result<(), Error> { + async fn advise(&self, _offset: u64, _len: u64, _advice: Advice) -> Result<(), Error> { Err(Error::badf()) } - fn allocate(&self, _offset: u64, _len: u64) -> Result<(), Error> { + async fn allocate(&self, _offset: u64, _len: u64) -> Result<(), Error> { Err(Error::badf()) } - fn read_vectored(&self, bufs: &mut [io::IoSliceMut]) -> Result { + async fn read_vectored<'a>(&self, bufs: &mut [io::IoSliceMut<'a>]) -> Result { let n = self.0.as_file_view().read_vectored(bufs)?; Ok(n.try_into().map_err(|_| Error::range())?) } - fn read_vectored_at(&self, _bufs: &mut [io::IoSliceMut], _offset: u64) -> Result { + async fn read_vectored_at<'a>( + &self, + _bufs: &mut [io::IoSliceMut<'a>], + _offset: u64, + ) -> Result { Err(Error::seek_pipe()) } - fn write_vectored(&self, _bufs: &[io::IoSlice]) -> Result { + async fn write_vectored<'a>(&self, _bufs: &[io::IoSlice<'a>]) -> Result { Err(Error::badf()) } - fn write_vectored_at(&self, _bufs: &[io::IoSlice], _offset: u64) -> Result { + async fn write_vectored_at<'a>( + &self, + _bufs: &[io::IoSlice<'a>], + _offset: u64, + ) -> Result { Err(Error::badf()) } - fn seek(&self, _pos: std::io::SeekFrom) -> Result { + async fn seek(&self, _pos: std::io::SeekFrom) -> Result { Err(Error::seek_pipe()) } - fn peek(&self, _buf: &mut [u8]) -> Result { + async fn peek(&self, _buf: &mut [u8]) -> Result { Err(Error::seek_pipe()) } - fn set_times( + async fn set_times( &self, atime: Option, mtime: Option, @@ -91,9 +100,15 @@ impl WasiFile for Stdin { .set_times(convert_systimespec(atime), convert_systimespec(mtime))?; Ok(()) } - fn num_ready_bytes(&self) -> Result { + async fn num_ready_bytes(&self) -> Result { Ok(self.0.num_ready_bytes()?) } + async fn readable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } + async fn writable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } } #[cfg(windows)] impl AsRawHandle for Stdin { @@ -110,31 +125,32 @@ impl AsRawFd for Stdin { macro_rules! wasi_file_write_impl { ($ty:ty) => { + #[async_trait::async_trait(?Send)] impl WasiFile for $ty { fn as_any(&self) -> &dyn Any { self } - fn datasync(&self) -> Result<(), Error> { + async fn datasync(&self) -> Result<(), Error> { Ok(()) } - fn sync(&self) -> Result<(), Error> { + async fn sync(&self) -> Result<(), Error> { Ok(()) } - fn get_filetype(&self) -> Result { + async fn get_filetype(&self) -> Result { Ok(FileType::Unknown) } - fn get_fdflags(&self) -> Result { + async fn get_fdflags(&self) -> Result { Ok(FdFlags::APPEND) } - fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { + async fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { Err(Error::badf()) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { let meta = self.0.as_file_view().metadata()?; Ok(Filestat { device_id: 0, inode: 0, - filetype: self.get_filetype()?, + filetype: self.get_filetype().await?, nlink: 0, size: meta.len(), atim: meta.accessed().ok(), @@ -142,39 +158,46 @@ macro_rules! wasi_file_write_impl { ctim: meta.created().ok(), }) } - fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { + async fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { Err(Error::badf()) } - fn advise(&self, _offset: u64, _len: u64, _advice: Advice) -> Result<(), Error> { + async fn advise(&self, _offset: u64, _len: u64, _advice: Advice) -> Result<(), Error> { Err(Error::badf()) } - fn allocate(&self, _offset: u64, _len: u64) -> Result<(), Error> { + async fn allocate(&self, _offset: u64, _len: u64) -> Result<(), Error> { Err(Error::badf()) } - fn read_vectored(&self, _bufs: &mut [io::IoSliceMut]) -> Result { - Err(Error::badf()) - } - fn read_vectored_at( + async fn read_vectored<'a>( &self, - _bufs: &mut [io::IoSliceMut], + _bufs: &mut [io::IoSliceMut<'a>], + ) -> Result { + Err(Error::badf()) + } + async fn read_vectored_at<'a>( + &self, + _bufs: &mut [io::IoSliceMut<'a>], _offset: u64, ) -> Result { Err(Error::badf()) } - fn write_vectored(&self, bufs: &[io::IoSlice]) -> Result { + async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { let n = self.0.as_file_view().write_vectored(bufs)?; Ok(n.try_into().map_err(|c| Error::range().context(c))?) } - fn write_vectored_at(&self, _bufs: &[io::IoSlice], _offset: u64) -> Result { + async fn write_vectored_at<'a>( + &self, + _bufs: &[io::IoSlice<'a>], + _offset: u64, + ) -> Result { Err(Error::seek_pipe()) } - fn seek(&self, _pos: std::io::SeekFrom) -> Result { + async fn seek(&self, _pos: std::io::SeekFrom) -> Result { Err(Error::seek_pipe()) } - fn peek(&self, _buf: &mut [u8]) -> Result { + async fn peek(&self, _buf: &mut [u8]) -> Result { Err(Error::badf()) } - fn set_times( + async fn set_times( &self, atime: Option, mtime: Option, @@ -183,9 +206,15 @@ macro_rules! wasi_file_write_impl { .set_times(convert_systimespec(atime), convert_systimespec(mtime))?; Ok(()) } - fn num_ready_bytes(&self) -> Result { + async fn num_ready_bytes(&self) -> Result { Ok(0) } + async fn readable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } + async fn writable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } } #[cfg(windows)] impl AsRawHandle for $ty { diff --git a/crates/wasi-common/src/clocks.rs b/crates/wasi-common/src/clocks.rs index babf0acff4..679759caf9 100644 --- a/crates/wasi-common/src/clocks.rs +++ b/crates/wasi-common/src/clocks.rs @@ -5,12 +5,12 @@ pub enum SystemTimeSpec { Absolute(SystemTime), } -pub trait WasiSystemClock { +pub trait WasiSystemClock: Send + Sync { fn resolution(&self) -> Duration; fn now(&self, precision: Duration) -> SystemTime; } -pub trait WasiMonotonicClock { +pub trait WasiMonotonicClock: Send + Sync { fn resolution(&self) -> Duration; fn now(&self, precision: Duration) -> Instant; } diff --git a/crates/wasi-common/src/dir.rs b/crates/wasi-common/src/dir.rs index 1c7e3b2c56..9c49c6c8aa 100644 --- a/crates/wasi-common/src/dir.rs +++ b/crates/wasi-common/src/dir.rs @@ -6,9 +6,10 @@ use std::cell::Ref; use std::ops::Deref; use std::path::PathBuf; -pub trait WasiDir { +#[wiggle::async_trait] +pub trait WasiDir: Send + Sync { fn as_any(&self) -> &dyn Any; - fn open_file( + async fn open_file( &self, symlink_follow: bool, path: &str, @@ -17,26 +18,33 @@ pub trait WasiDir { write: bool, fdflags: FdFlags, ) -> Result, Error>; - fn open_dir(&self, symlink_follow: bool, path: &str) -> Result, Error>; - fn create_dir(&self, path: &str) -> Result<(), Error>; - fn readdir( + async fn open_dir(&self, symlink_follow: bool, path: &str) -> Result, Error>; + async fn create_dir(&self, path: &str) -> Result<(), Error>; + // XXX the iterator here needs to be asyncified as well! + async fn readdir( &self, cursor: ReaddirCursor, - ) -> Result>>, Error>; - fn symlink(&self, old_path: &str, new_path: &str) -> Result<(), Error>; - fn remove_dir(&self, path: &str) -> Result<(), Error>; - fn unlink_file(&self, path: &str) -> Result<(), Error>; - fn read_link(&self, path: &str) -> Result; - fn get_filestat(&self) -> Result; - fn get_path_filestat(&self, path: &str, follow_symlinks: bool) -> Result; - fn rename(&self, path: &str, dest_dir: &dyn WasiDir, dest_path: &str) -> Result<(), Error>; - fn hard_link( + ) -> Result> + Send>, Error>; + async fn symlink(&self, old_path: &str, new_path: &str) -> Result<(), Error>; + async fn remove_dir(&self, path: &str) -> Result<(), Error>; + async fn unlink_file(&self, path: &str) -> Result<(), Error>; + async fn read_link(&self, path: &str) -> Result; + async fn get_filestat(&self) -> Result; + async fn get_path_filestat(&self, path: &str, follow_symlinks: bool) + -> Result; + async fn rename( + &self, + path: &str, + dest_dir: &dyn WasiDir, + dest_path: &str, + ) -> Result<(), Error>; + async fn hard_link( &self, path: &str, target_dir: &dyn WasiDir, target_path: &str, ) -> Result<(), Error>; - fn set_times( + async fn set_times( &self, path: &str, atime: Option, diff --git a/crates/wasi-common/src/error.rs b/crates/wasi-common/src/error.rs index cf132b59ee..20277554fc 100644 --- a/crates/wasi-common/src/error.rs +++ b/crates/wasi-common/src/error.rs @@ -23,7 +23,7 @@ //! The real value of using `anyhow::Error` here is being able to use //! `anyhow::Result::context` to aid in debugging of errors. -pub use anyhow::Error; +pub use anyhow::{Context, Error}; /// Internal error type for the `wasi-common` crate. /// Contains variants of the WASI `$errno` type are added according to what is actually used internally by diff --git a/crates/wasi-common/src/file.rs b/crates/wasi-common/src/file.rs index 65fba7cd4a..c718b7ad25 100644 --- a/crates/wasi-common/src/file.rs +++ b/crates/wasi-common/src/file.rs @@ -4,30 +4,41 @@ use std::any::Any; use std::cell::{Ref, RefMut}; use std::ops::{Deref, DerefMut}; -pub trait WasiFile { +#[wiggle::async_trait] +pub trait WasiFile: Send { fn as_any(&self) -> &dyn Any; - fn datasync(&self) -> Result<(), Error>; // write op - fn sync(&self) -> Result<(), Error>; // file op - fn get_filetype(&self) -> Result; // file op - fn get_fdflags(&self) -> Result; // file op - fn set_fdflags(&mut self, flags: FdFlags) -> Result<(), Error>; // file op - fn get_filestat(&self) -> Result; // split out get_length as a read & write op, rest is a file op - fn set_filestat_size(&self, _size: u64) -> Result<(), Error>; // write op - fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error>; // file op - fn allocate(&self, offset: u64, len: u64) -> Result<(), Error>; // write op - fn set_times( + async fn datasync(&self) -> Result<(), Error>; // write op + async fn sync(&self) -> Result<(), Error>; // file op + async fn get_filetype(&self) -> Result; // file op + async fn get_fdflags(&self) -> Result; // file op + async fn set_fdflags(&mut self, flags: FdFlags) -> Result<(), Error>; // file op + async fn get_filestat(&self) -> Result; // split out get_length as a read & write op, rest is a file op + async fn set_filestat_size(&self, _size: u64) -> Result<(), Error>; // write op + async fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error>; // file op + async fn allocate(&self, offset: u64, len: u64) -> Result<(), Error>; // write op + async fn set_times( &self, atime: Option, mtime: Option, ) -> Result<(), Error>; - fn read_vectored(&self, bufs: &mut [std::io::IoSliceMut]) -> Result; // read op - fn read_vectored_at(&self, bufs: &mut [std::io::IoSliceMut], offset: u64) - -> Result; // file op - fn write_vectored(&self, bufs: &[std::io::IoSlice]) -> Result; // write op - fn write_vectored_at(&self, bufs: &[std::io::IoSlice], offset: u64) -> Result; // file op - fn seek(&self, pos: std::io::SeekFrom) -> Result; // file op that generates a new stream from a file will supercede this - fn peek(&self, buf: &mut [u8]) -> Result; // read op - fn num_ready_bytes(&self) -> Result; // read op + async fn read_vectored<'a>(&self, bufs: &mut [std::io::IoSliceMut<'a>]) -> Result; // read op + async fn read_vectored_at<'a>( + &self, + bufs: &mut [std::io::IoSliceMut<'a>], + offset: u64, + ) -> Result; // file op + async fn write_vectored<'a>(&self, bufs: &[std::io::IoSlice<'a>]) -> Result; // write op + async fn write_vectored_at<'a>( + &self, + bufs: &[std::io::IoSlice<'a>], + offset: u64, + ) -> Result; // file op + async fn seek(&self, pos: std::io::SeekFrom) -> Result; // file op that generates a new stream from a file will supercede this + async fn peek(&self, buf: &mut [u8]) -> Result; // read op + async fn num_ready_bytes(&self) -> Result; // read op + + async fn readable(&mut self) -> Result<(), Error>; + async fn writable(&mut self) -> Result<(), Error>; } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -111,11 +122,11 @@ impl FileEntry { Ok(()) } - pub fn get_fdstat(&self) -> Result { + pub async fn get_fdstat(&self) -> Result { Ok(FdStat { - filetype: self.file.get_filetype()?, + filetype: self.file.get_filetype().await?, caps: self.caps, - flags: self.file.get_fdflags()?, + flags: self.file.get_fdflags().await?, }) } } diff --git a/crates/wasi-common/src/lib.rs b/crates/wasi-common/src/lib.rs index 4575b423b6..63910d4a60 100644 --- a/crates/wasi-common/src/lib.rs +++ b/crates/wasi-common/src/lib.rs @@ -66,7 +66,7 @@ pub use cap_rand::RngCore; pub use clocks::{SystemTimeSpec, WasiClocks, WasiMonotonicClock, WasiSystemClock}; pub use ctx::{WasiCtx, WasiCtxBuilder}; pub use dir::WasiDir; -pub use error::{Error, ErrorExt, ErrorKind}; +pub use error::{Context, Error, ErrorExt, ErrorKind}; pub use file::WasiFile; pub use sched::{Poll, WasiSched}; pub use string_array::StringArrayError; diff --git a/crates/wasi-common/src/pipe.rs b/crates/wasi-common/src/pipe.rs index 8bbc8d73b7..2f39f5cb9c 100644 --- a/crates/wasi-common/src/pipe.rs +++ b/crates/wasi-common/src/pipe.rs @@ -105,30 +105,31 @@ impl From<&str> for ReadPipe> { } } -impl WasiFile for ReadPipe { +#[wiggle::async_trait] +impl WasiFile for ReadPipe { fn as_any(&self) -> &dyn Any { self } - fn datasync(&self) -> Result<(), Error> { + async fn datasync(&self) -> Result<(), Error> { Ok(()) // trivial: no implementation needed } - fn sync(&self) -> Result<(), Error> { + async fn sync(&self) -> Result<(), Error> { Ok(()) // trivial } - fn get_filetype(&self) -> Result { + async fn get_filetype(&self) -> Result { Ok(FileType::Pipe) } - fn get_fdflags(&self) -> Result { + async fn get_fdflags(&self) -> Result { Ok(FdFlags::empty()) } - fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { + async fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { Err(Error::badf()) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { Ok(Filestat { device_id: 0, inode: 0, - filetype: self.get_filetype()?, + filetype: self.get_filetype().await?, nlink: 0, size: 0, // XXX no way to get a size out of a Read :( atim: None, @@ -136,44 +137,58 @@ impl WasiFile for ReadPipe { ctim: None, }) } - fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { + async fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { Err(Error::badf()) } - fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { + async fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { Err(Error::badf()) } - fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { + async fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { Err(Error::badf()) } - fn read_vectored(&self, bufs: &mut [io::IoSliceMut]) -> Result { + async fn read_vectored<'a>(&self, bufs: &mut [io::IoSliceMut<'a>]) -> Result { let n = self.borrow().read_vectored(bufs)?; Ok(n.try_into()?) } - fn read_vectored_at(&self, bufs: &mut [io::IoSliceMut], offset: u64) -> Result { + async fn read_vectored_at<'a>( + &self, + bufs: &mut [io::IoSliceMut<'a>], + offset: u64, + ) -> Result { Err(Error::badf()) } - fn write_vectored(&self, bufs: &[io::IoSlice]) -> Result { + async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { Err(Error::badf()) } - fn write_vectored_at(&self, bufs: &[io::IoSlice], offset: u64) -> Result { + async fn write_vectored_at<'a>( + &self, + bufs: &[io::IoSlice<'a>], + offset: u64, + ) -> Result { Err(Error::badf()) } - fn seek(&self, pos: std::io::SeekFrom) -> Result { + async fn seek(&self, pos: std::io::SeekFrom) -> Result { Err(Error::badf()) } - fn peek(&self, buf: &mut [u8]) -> Result { + async fn peek(&self, buf: &mut [u8]) -> Result { Err(Error::badf()) } - fn set_times( + async fn set_times( &self, atime: Option, mtime: Option, ) -> Result<(), Error> { Err(Error::badf()) } - fn num_ready_bytes(&self) -> Result { + async fn num_ready_bytes(&self) -> Result { Ok(0) } + async fn readable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } + async fn writable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } } /// A virtual pipe write end. @@ -249,30 +264,31 @@ impl WritePipe>> { } } -impl WasiFile for WritePipe { +#[wiggle::async_trait] +impl WasiFile for WritePipe { fn as_any(&self) -> &dyn Any { self } - fn datasync(&self) -> Result<(), Error> { + async fn datasync(&self) -> Result<(), Error> { Ok(()) } - fn sync(&self) -> Result<(), Error> { + async fn sync(&self) -> Result<(), Error> { Ok(()) } - fn get_filetype(&self) -> Result { + async fn get_filetype(&self) -> Result { Ok(FileType::Pipe) } - fn get_fdflags(&self) -> Result { + async fn get_fdflags(&self) -> Result { Ok(FdFlags::APPEND) } - fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { + async fn set_fdflags(&mut self, _fdflags: FdFlags) -> Result<(), Error> { Err(Error::badf()) } - fn get_filestat(&self) -> Result { + async fn get_filestat(&self) -> Result { Ok(Filestat { device_id: 0, inode: 0, - filetype: self.get_filetype()?, + filetype: self.get_filetype().await?, nlink: 0, size: 0, // XXX no way to get a size out of a Write :( atim: None, @@ -280,42 +296,56 @@ impl WasiFile for WritePipe { ctim: None, }) } - fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { + async fn set_filestat_size(&self, _size: u64) -> Result<(), Error> { Err(Error::badf()) } - fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { + async fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { Err(Error::badf()) } - fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { + async fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { Err(Error::badf()) } - fn read_vectored(&self, bufs: &mut [io::IoSliceMut]) -> Result { + async fn read_vectored<'a>(&self, bufs: &mut [io::IoSliceMut<'a>]) -> Result { Err(Error::badf()) } - fn read_vectored_at(&self, bufs: &mut [io::IoSliceMut], offset: u64) -> Result { + async fn read_vectored_at<'a>( + &self, + bufs: &mut [io::IoSliceMut<'a>], + offset: u64, + ) -> Result { Err(Error::badf()) } - fn write_vectored(&self, bufs: &[io::IoSlice]) -> Result { + async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { let n = self.borrow().write_vectored(bufs)?; Ok(n.try_into()?) } - fn write_vectored_at(&self, bufs: &[io::IoSlice], offset: u64) -> Result { + async fn write_vectored_at<'a>( + &self, + bufs: &[io::IoSlice<'a>], + offset: u64, + ) -> Result { Err(Error::badf()) } - fn seek(&self, pos: std::io::SeekFrom) -> Result { + async fn seek(&self, pos: std::io::SeekFrom) -> Result { Err(Error::badf()) } - fn peek(&self, buf: &mut [u8]) -> Result { + async fn peek(&self, buf: &mut [u8]) -> Result { Err(Error::badf()) } - fn set_times( + async fn set_times( &self, atime: Option, mtime: Option, ) -> Result<(), Error> { Err(Error::badf()) } - fn num_ready_bytes(&self) -> Result { + async fn num_ready_bytes(&self) -> Result { Ok(0) } + async fn readable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } + async fn writable(&mut self) -> Result<(), Error> { + Err(Error::badf()) + } } diff --git a/crates/wasi-common/src/sched.rs b/crates/wasi-common/src/sched.rs index db2a81f9db..fecac2c5ca 100644 --- a/crates/wasi-common/src/sched.rs +++ b/crates/wasi-common/src/sched.rs @@ -1,17 +1,22 @@ use crate::clocks::WasiMonotonicClock; use crate::file::WasiFile; use crate::Error; -use cap_std::time::{Duration, Instant}; -use std::cell::Ref; +use cap_std::time::Instant; pub mod subscription; +pub use cap_std::time::Duration; -use subscription::{MonotonicClockSubscription, RwSubscription, Subscription, SubscriptionResult}; +pub use subscription::{ + MonotonicClockSubscription, RwEventFlags, RwSubscription, Subscription, SubscriptionResult, +}; +#[wiggle::async_trait] pub trait WasiSched { - fn poll_oneoff(&self, poll: &Poll) -> Result<(), Error>; - fn sched_yield(&self) -> Result<(), Error>; + async fn poll_oneoff<'a>(&self, poll: &mut Poll<'a>) -> Result<(), Error>; + async fn sched_yield(&self) -> Result<(), Error>; + async fn sleep(&self, duration: Duration) -> Result<(), Error>; } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Userdata(u64); impl From for Userdata { fn from(u: u64) -> Userdata { @@ -25,6 +30,8 @@ impl From for u64 { } } +pub type PollResults = Vec<(SubscriptionResult, Userdata)>; + pub struct Poll<'a> { subs: Vec<(Subscription<'a>, Userdata)>, } @@ -49,11 +56,11 @@ impl<'a> Poll<'a> { ud, )); } - pub fn subscribe_read(&mut self, file: Ref<'a, dyn WasiFile>, ud: Userdata) { + pub fn subscribe_read(&mut self, file: &'a mut dyn WasiFile, ud: Userdata) { self.subs .push((Subscription::Read(RwSubscription::new(file)), ud)); } - pub fn subscribe_write(&mut self, file: Ref<'a, dyn WasiFile>, ud: Userdata) { + pub fn subscribe_write(&mut self, file: &'a mut dyn WasiFile, ud: Userdata) { self.subs .push((Subscription::Write(RwSubscription::new(file)), ud)); } @@ -66,20 +73,17 @@ impl<'a> Poll<'a> { pub fn is_empty(&self) -> bool { self.subs.is_empty() } - pub fn earliest_clock_deadline(&'a self) -> Option<&MonotonicClockSubscription<'a>> { - let mut subs = self - .subs + pub fn earliest_clock_deadline(&self) -> Option<&MonotonicClockSubscription<'a>> { + self.subs .iter() .filter_map(|(s, _ud)| match s { Subscription::MonotonicClock(t) => Some(t), _ => None, }) - .collect::>>(); - subs.sort_by(|a, b| a.deadline.cmp(&b.deadline)); - subs.into_iter().next() // First element is earliest + .min_by(|a, b| a.deadline.cmp(&b.deadline)) } - pub fn rw_subscriptions(&'a self) -> impl Iterator> { - self.subs.iter().filter_map(|(s, _ud)| match s { + pub fn rw_subscriptions<'b>(&'b mut self) -> impl Iterator> { + self.subs.iter_mut().filter_map(|(s, _ud)| match s { Subscription::Read { .. } | Subscription::Write { .. } => Some(s), _ => None, }) diff --git a/crates/wasi-common/src/sched/subscription.rs b/crates/wasi-common/src/sched/subscription.rs index 799cfc665f..cd861f6df0 100644 --- a/crates/wasi-common/src/sched/subscription.rs +++ b/crates/wasi-common/src/sched/subscription.rs @@ -3,7 +3,7 @@ use crate::file::WasiFile; use crate::Error; use bitflags::bitflags; use cap_std::time::{Duration, Instant}; -use std::cell::{Cell, Ref}; +use std::cell::Cell; bitflags! { pub struct RwEventFlags: u32 { @@ -12,12 +12,12 @@ bitflags! { } pub struct RwSubscription<'a> { - pub file: Ref<'a, dyn WasiFile>, + pub file: &'a mut dyn WasiFile, status: Cell>>, } impl<'a> RwSubscription<'a> { - pub fn new(file: Ref<'a, dyn WasiFile>) -> Self { + pub fn new(file: &'a mut dyn WasiFile) -> Self { Self { file, status: Cell::new(None), @@ -29,8 +29,8 @@ impl<'a> RwSubscription<'a> { pub fn error(&self, error: Error) { self.status.set(Some(Err(error))) } - pub fn result(self) -> Option> { - self.status.into_inner() + pub fn result(&self) -> Option> { + self.status.take() } } @@ -62,6 +62,7 @@ pub enum Subscription<'a> { MonotonicClock(MonotonicClockSubscription<'a>), } +#[derive(Debug)] pub enum SubscriptionResult { Read(Result<(u64, RwEventFlags), Error>), Write(Result<(u64, RwEventFlags), Error>), diff --git a/crates/wasi-common/src/snapshots/preview_0.rs b/crates/wasi-common/src/snapshots/preview_0.rs index 65f4823a3e..6eff280a47 100644 --- a/crates/wasi-common/src/snapshots/preview_0.rs +++ b/crates/wasi-common/src/snapshots/preview_0.rs @@ -1,12 +1,14 @@ -use crate::file::{FileCaps, FileEntryExt, TableFileExt}; +use crate::file::{FileCaps, FileEntryExt, FileEntryMutExt, TableFileExt, WasiFile}; use crate::sched::{ subscription::{RwEventFlags, SubscriptionResult}, - Poll, + Poll, Userdata, }; use crate::snapshots::preview_1::types as snapshot1_types; use crate::snapshots::preview_1::wasi_snapshot_preview1::WasiSnapshotPreview1 as Snapshot1; use crate::{Error, ErrorExt, WasiCtx}; use cap_std::time::Duration; +use std::cell::RefMut; +use std::collections::HashSet; use std::convert::{TryFrom, TryInto}; use std::io::{IoSlice, IoSliceMut}; use std::ops::Deref; @@ -16,6 +18,7 @@ use wiggle::GuestPtr; wiggle::from_witx!({ witx: ["$WASI_ROOT/phases/old/snapshot_0/witx/wasi_unstable.witx"], errors: { errno => Error }, + async: *, }); impl wiggle::GuestErrorType for types::Errno { @@ -24,14 +27,6 @@ impl wiggle::GuestErrorType for types::Errno { } } -impl types::GuestErrorConversion for WasiCtx { - fn into_errno(&self, e: wiggle::GuestError) -> types::Errno { - debug!("Guest error: {:?}", e); - let snapshot1_errno: snapshot1_types::Errno = e.into(); - snapshot1_errno.into() - } -} - impl types::UserErrorConversion for WasiCtx { fn errno_from_error(&self, e: Error) -> Result { debug!("Error: {:?}", e); @@ -340,79 +335,80 @@ convert_flags_bidirectional!( // This implementation, wherever possible, delegates directly to the Snapshot1 implementation, // performing the no-op type conversions along the way. -impl<'a> wasi_unstable::WasiUnstable for WasiCtx { - fn args_get<'b>( +#[wiggle::async_trait] +impl wasi_unstable::WasiUnstable for WasiCtx { + async fn args_get<'a>( &self, - argv: &GuestPtr<'b, GuestPtr<'b, u8>>, - argv_buf: &GuestPtr<'b, u8>, + argv: &GuestPtr<'a, GuestPtr<'a, u8>>, + argv_buf: &GuestPtr<'a, u8>, ) -> Result<(), Error> { - Snapshot1::args_get(self, argv, argv_buf) + Snapshot1::args_get(self, argv, argv_buf).await } - fn args_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { - Snapshot1::args_sizes_get(self) + async fn args_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { + Snapshot1::args_sizes_get(self).await } - fn environ_get<'b>( + async fn environ_get<'a>( &self, - environ: &GuestPtr<'b, GuestPtr<'b, u8>>, - environ_buf: &GuestPtr<'b, u8>, + environ: &GuestPtr<'a, GuestPtr<'a, u8>>, + environ_buf: &GuestPtr<'a, u8>, ) -> Result<(), Error> { - Snapshot1::environ_get(self, environ, environ_buf) + Snapshot1::environ_get(self, environ, environ_buf).await } - fn environ_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { - Snapshot1::environ_sizes_get(self) + async fn environ_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { + Snapshot1::environ_sizes_get(self).await } - fn clock_res_get(&self, id: types::Clockid) -> Result { - Snapshot1::clock_res_get(self, id.into()) + async fn clock_res_get(&self, id: types::Clockid) -> Result { + Snapshot1::clock_res_get(self, id.into()).await } - fn clock_time_get( + async fn clock_time_get( &self, id: types::Clockid, precision: types::Timestamp, ) -> Result { - Snapshot1::clock_time_get(self, id.into(), precision) + Snapshot1::clock_time_get(self, id.into(), precision).await } - fn fd_advise( + async fn fd_advise( &self, fd: types::Fd, offset: types::Filesize, len: types::Filesize, advice: types::Advice, ) -> Result<(), Error> { - Snapshot1::fd_advise(self, fd.into(), offset, len, advice.into()) + Snapshot1::fd_advise(self, fd.into(), offset, len, advice.into()).await } - fn fd_allocate( + async fn fd_allocate( &self, fd: types::Fd, offset: types::Filesize, len: types::Filesize, ) -> Result<(), Error> { - Snapshot1::fd_allocate(self, fd.into(), offset, len) + Snapshot1::fd_allocate(self, fd.into(), offset, len).await } - fn fd_close(&self, fd: types::Fd) -> Result<(), Error> { - Snapshot1::fd_close(self, fd.into()) + async fn fd_close(&self, fd: types::Fd) -> Result<(), Error> { + Snapshot1::fd_close(self, fd.into()).await } - fn fd_datasync(&self, fd: types::Fd) -> Result<(), Error> { - Snapshot1::fd_datasync(self, fd.into()) + async fn fd_datasync(&self, fd: types::Fd) -> Result<(), Error> { + Snapshot1::fd_datasync(self, fd.into()).await } - fn fd_fdstat_get(&self, fd: types::Fd) -> Result { - Ok(Snapshot1::fd_fdstat_get(self, fd.into())?.into()) + async fn fd_fdstat_get(&self, fd: types::Fd) -> Result { + Ok(Snapshot1::fd_fdstat_get(self, fd.into()).await?.into()) } - fn fd_fdstat_set_flags(&self, fd: types::Fd, flags: types::Fdflags) -> Result<(), Error> { - Snapshot1::fd_fdstat_set_flags(self, fd.into(), flags.into()) + async fn fd_fdstat_set_flags(&self, fd: types::Fd, flags: types::Fdflags) -> Result<(), Error> { + Snapshot1::fd_fdstat_set_flags(self, fd.into(), flags.into()).await } - fn fd_fdstat_set_rights( + async fn fd_fdstat_set_rights( &self, fd: types::Fd, fs_rights_base: types::Rights, @@ -424,24 +420,29 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { fs_rights_base.into(), fs_rights_inheriting.into(), ) + .await } - fn fd_filestat_get(&self, fd: types::Fd) -> Result { - Ok(Snapshot1::fd_filestat_get(self, fd.into())?.into()) + async fn fd_filestat_get(&self, fd: types::Fd) -> Result { + Ok(Snapshot1::fd_filestat_get(self, fd.into()).await?.into()) } - fn fd_filestat_set_size(&self, fd: types::Fd, size: types::Filesize) -> Result<(), Error> { - Snapshot1::fd_filestat_set_size(self, fd.into(), size) + async fn fd_filestat_set_size( + &self, + fd: types::Fd, + size: types::Filesize, + ) -> Result<(), Error> { + Snapshot1::fd_filestat_set_size(self, fd.into(), size).await } - fn fd_filestat_set_times( + async fn fd_filestat_set_times( &self, fd: types::Fd, atim: types::Timestamp, mtim: types::Timestamp, fst_flags: types::Fstflags, ) -> Result<(), Error> { - Snapshot1::fd_filestat_set_times(self, fd.into(), atim, mtim, fst_flags.into()) + Snapshot1::fd_filestat_set_times(self, fd.into(), atim, mtim, fst_flags.into()).await } // NOTE on fd_read, fd_pread, fd_write, fd_pwrite implementations: @@ -452,7 +453,11 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { // The bodies of these functions is mostly about converting the GuestPtr and types::-based // representation to a std::io::IoSlice(Mut) representation. - fn fd_read(&self, fd: types::Fd, iovs: &types::IovecArray<'_>) -> Result { + async fn fd_read<'a>( + &self, + fd: types::Fd, + iovs: &types::IovecArray<'a>, + ) -> Result { let table = self.table(); let f = table.get_file(u32::from(fd))?.get_cap(FileCaps::READ)?; @@ -470,14 +475,14 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { .map(|s| IoSliceMut::new(&mut *s)) .collect(); - let bytes_read = f.read_vectored(&mut ioslices)?; + let bytes_read = f.read_vectored(&mut ioslices).await?; Ok(types::Size::try_from(bytes_read)?) } - fn fd_pread( + async fn fd_pread<'a>( &self, fd: types::Fd, - iovs: &types::IovecArray<'_>, + iovs: &types::IovecArray<'a>, offset: types::Filesize, ) -> Result { let table = self.table(); @@ -499,14 +504,14 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { .map(|s| IoSliceMut::new(&mut *s)) .collect(); - let bytes_read = f.read_vectored_at(&mut ioslices, offset)?; + let bytes_read = f.read_vectored_at(&mut ioslices, offset).await?; Ok(types::Size::try_from(bytes_read)?) } - fn fd_write( + async fn fd_write<'a>( &self, fd: types::Fd, - ciovs: &types::CiovecArray<'_>, + ciovs: &types::CiovecArray<'a>, ) -> Result { let table = self.table(); let f = table.get_file(u32::from(fd))?.get_cap(FileCaps::WRITE)?; @@ -524,15 +529,15 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { .iter() .map(|s| IoSlice::new(s.deref())) .collect(); - let bytes_written = f.write_vectored(&ioslices)?; + let bytes_written = f.write_vectored(&ioslices).await?; Ok(types::Size::try_from(bytes_written)?) } - fn fd_pwrite( + async fn fd_pwrite<'a>( &self, fd: types::Fd, - ciovs: &types::CiovecArray<'_>, + ciovs: &types::CiovecArray<'a>, offset: types::Filesize, ) -> Result { let table = self.table(); @@ -553,77 +558,81 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { .iter() .map(|s| IoSlice::new(s.deref())) .collect(); - let bytes_written = f.write_vectored_at(&ioslices, offset)?; + let bytes_written = f.write_vectored_at(&ioslices, offset).await?; Ok(types::Size::try_from(bytes_written)?) } - fn fd_prestat_get(&self, fd: types::Fd) -> Result { - Ok(Snapshot1::fd_prestat_get(self, fd.into())?.into()) + async fn fd_prestat_get(&self, fd: types::Fd) -> Result { + Ok(Snapshot1::fd_prestat_get(self, fd.into()).await?.into()) } - fn fd_prestat_dir_name( + async fn fd_prestat_dir_name<'a>( &self, fd: types::Fd, - path: &GuestPtr, + path: &GuestPtr<'a, u8>, path_max_len: types::Size, ) -> Result<(), Error> { - Snapshot1::fd_prestat_dir_name(self, fd.into(), path, path_max_len) + Snapshot1::fd_prestat_dir_name(self, fd.into(), path, path_max_len).await } - fn fd_renumber(&self, from: types::Fd, to: types::Fd) -> Result<(), Error> { - Snapshot1::fd_renumber(self, from.into(), to.into()) + async fn fd_renumber(&self, from: types::Fd, to: types::Fd) -> Result<(), Error> { + Snapshot1::fd_renumber(self, from.into(), to.into()).await } - fn fd_seek( + async fn fd_seek( &self, fd: types::Fd, offset: types::Filedelta, whence: types::Whence, ) -> Result { - Snapshot1::fd_seek(self, fd.into(), offset, whence.into()) + Snapshot1::fd_seek(self, fd.into(), offset, whence.into()).await } - fn fd_sync(&self, fd: types::Fd) -> Result<(), Error> { - Snapshot1::fd_sync(self, fd.into()) + async fn fd_sync(&self, fd: types::Fd) -> Result<(), Error> { + Snapshot1::fd_sync(self, fd.into()).await } - fn fd_tell(&self, fd: types::Fd) -> Result { - Snapshot1::fd_tell(self, fd.into()) + async fn fd_tell(&self, fd: types::Fd) -> Result { + Snapshot1::fd_tell(self, fd.into()).await } - fn fd_readdir( + async fn fd_readdir<'a>( &self, fd: types::Fd, - buf: &GuestPtr, + buf: &GuestPtr<'a, u8>, buf_len: types::Size, cookie: types::Dircookie, ) -> Result { - Snapshot1::fd_readdir(self, fd.into(), buf, buf_len, cookie) + Snapshot1::fd_readdir(self, fd.into(), buf, buf_len, cookie).await } - fn path_create_directory( + async fn path_create_directory<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result<(), Error> { - Snapshot1::path_create_directory(self, dirfd.into(), path) + Snapshot1::path_create_directory(self, dirfd.into(), path).await } - fn path_filestat_get( + async fn path_filestat_get<'a>( &self, dirfd: types::Fd, flags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result { - Ok(Snapshot1::path_filestat_get(self, dirfd.into(), flags.into(), path)?.into()) + Ok( + Snapshot1::path_filestat_get(self, dirfd.into(), flags.into(), path) + .await? + .into(), + ) } - fn path_filestat_set_times( + async fn path_filestat_set_times<'a>( &self, dirfd: types::Fd, flags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, atim: types::Timestamp, mtim: types::Timestamp, fst_flags: types::Fstflags, @@ -637,15 +646,16 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { mtim, fst_flags.into(), ) + .await } - fn path_link( + async fn path_link<'a>( &self, src_fd: types::Fd, src_flags: types::Lookupflags, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, target_fd: types::Fd, - target_path: &GuestPtr<'_, str>, + target_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { Snapshot1::path_link( self, @@ -655,13 +665,14 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { target_fd.into(), target_path, ) + .await } - fn path_open( + async fn path_open<'a>( &self, dirfd: types::Fd, dirflags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, oflags: types::Oflags, fs_rights_base: types::Rights, fs_rights_inheriting: types::Rights, @@ -676,49 +687,54 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { fs_rights_base.into(), fs_rights_inheriting.into(), fdflags.into(), - )? + ) + .await? .into()) } - fn path_readlink( + async fn path_readlink<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, - buf: &GuestPtr, + path: &GuestPtr<'a, str>, + buf: &GuestPtr<'a, u8>, buf_len: types::Size, ) -> Result { - Snapshot1::path_readlink(self, dirfd.into(), path, buf, buf_len) + Snapshot1::path_readlink(self, dirfd.into(), path, buf, buf_len).await } - fn path_remove_directory( + async fn path_remove_directory<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result<(), Error> { - Snapshot1::path_remove_directory(self, dirfd.into(), path) + Snapshot1::path_remove_directory(self, dirfd.into(), path).await } - fn path_rename( + async fn path_rename<'a>( &self, src_fd: types::Fd, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, dest_fd: types::Fd, - dest_path: &GuestPtr<'_, str>, + dest_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { - Snapshot1::path_rename(self, src_fd.into(), src_path, dest_fd.into(), dest_path) + Snapshot1::path_rename(self, src_fd.into(), src_path, dest_fd.into(), dest_path).await } - fn path_symlink( + async fn path_symlink<'a>( &self, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, dirfd: types::Fd, - dest_path: &GuestPtr<'_, str>, + dest_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { - Snapshot1::path_symlink(self, src_path, dirfd.into(), dest_path) + Snapshot1::path_symlink(self, src_path, dirfd.into(), dest_path).await } - fn path_unlink_file(&self, dirfd: types::Fd, path: &GuestPtr<'_, str>) -> Result<(), Error> { - Snapshot1::path_unlink_file(self, dirfd.into(), path) + async fn path_unlink_file<'a>( + &self, + dirfd: types::Fd, + path: &GuestPtr<'a, str>, + ) -> Result<(), Error> { + Snapshot1::path_unlink_file(self, dirfd.into(), path).await } // NOTE on poll_oneoff implementation: @@ -728,17 +744,46 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { // The implementations are identical, but the `types::` in scope locally is different. // The bodies of these functions is mostly about converting the GuestPtr and types::-based // representation to use the Poll abstraction. - fn poll_oneoff( + async fn poll_oneoff<'a>( &self, - subs: &GuestPtr, - events: &GuestPtr, + subs: &GuestPtr<'a, types::Subscription>, + events: &GuestPtr<'a, types::Event>, nsubscriptions: types::Size, ) -> Result { if nsubscriptions == 0 { return Err(Error::invalid_argument().context("nsubscriptions must be nonzero")); } + // Special-case a `poll_oneoff` which is just sleeping on a single + // relative timer event, such as what WASI libc uses to implement sleep + // functions. This supports all clock IDs, because POSIX says that + // `clock_settime` doesn't effect relative sleeps. + if nsubscriptions == 1 { + let sub = subs.read()?; + if let types::SubscriptionU::Clock(clocksub) = sub.u { + if !clocksub + .flags + .contains(types::Subclockflags::SUBSCRIPTION_CLOCK_ABSTIME) + { + self.sched + .sleep(Duration::from_nanos(clocksub.timeout)) + .await?; + events.write(types::Event { + userdata: sub.userdata, + error: types::Errno::Success, + type_: types::Eventtype::Clock, + fd_readwrite: fd_readwrite_empty(), + })?; + return Ok(1); + } + } + } + let table = self.table(); + let mut sub_fds: HashSet = HashSet::new(); + // We need these refmuts to outlive Poll, which will hold the &mut dyn WasiFile inside + let mut read_refs: Vec<(RefMut<'_, dyn WasiFile>, Userdata)> = Vec::new(); + let mut write_refs: Vec<(RefMut<'_, dyn WasiFile>, Userdata)> = Vec::new(); let mut poll = Poll::new(); let subs = subs.as_array(nsubscriptions); @@ -777,22 +822,34 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { }, types::SubscriptionU::FdRead(readsub) => { let fd = readsub.file_descriptor; - let file = table - .get_file(u32::from(fd))? + if sub_fds.contains(&fd) { + return Err(Error::invalid_argument() + .context("Fd can be subscribed to at most once per poll")); + } else { + sub_fds.insert(fd); + } + let file_ref = table + .get_file_mut(u32::from(fd))? .get_cap(FileCaps::POLL_READWRITE)?; - poll.subscribe_read(file, sub.userdata.into()); + read_refs.push((file_ref, sub.userdata.into())); } types::SubscriptionU::FdWrite(writesub) => { let fd = writesub.file_descriptor; - let file = table - .get_file(u32::from(fd))? + if sub_fds.contains(&fd) { + return Err(Error::invalid_argument() + .context("Fd can be subscribed to at most once per poll")); + } else { + sub_fds.insert(fd); + } + let file_ref = table + .get_file_mut(u32::from(fd))? .get_cap(FileCaps::POLL_READWRITE)?; - poll.subscribe_write(file, sub.userdata.into()); + write_refs.push((file_ref, sub.userdata.into())); } } } - self.sched.poll_oneoff(&poll)?; + self.sched.poll_oneoff(&mut poll).await?; let results = poll.results(); let num_results = results.len(); @@ -867,41 +924,45 @@ impl<'a> wasi_unstable::WasiUnstable for WasiCtx { Ok(num_results.try_into().expect("results fit into memory")) } - fn proc_exit(&self, status: types::Exitcode) -> wiggle::Trap { - Snapshot1::proc_exit(self, status) + async fn proc_exit(&self, status: types::Exitcode) -> wiggle::Trap { + Snapshot1::proc_exit(self, status).await } - fn proc_raise(&self, _sig: types::Signal) -> Result<(), Error> { + async fn proc_raise(&self, _sig: types::Signal) -> Result<(), Error> { Err(Error::trap("proc_raise unsupported")) } - fn sched_yield(&self) -> Result<(), Error> { - Snapshot1::sched_yield(self) + async fn sched_yield(&self) -> Result<(), Error> { + Snapshot1::sched_yield(self).await } - fn random_get(&self, buf: &GuestPtr, buf_len: types::Size) -> Result<(), Error> { - Snapshot1::random_get(self, buf, buf_len) + async fn random_get<'a>( + &self, + buf: &GuestPtr<'a, u8>, + buf_len: types::Size, + ) -> Result<(), Error> { + Snapshot1::random_get(self, buf, buf_len).await } - fn sock_recv( + async fn sock_recv<'a>( &self, _fd: types::Fd, - _ri_data: &types::IovecArray<'_>, + _ri_data: &types::IovecArray<'a>, _ri_flags: types::Riflags, ) -> Result<(types::Size, types::Roflags), Error> { Err(Error::trap("sock_recv unsupported")) } - fn sock_send( + async fn sock_send<'a>( &self, _fd: types::Fd, - _si_data: &types::CiovecArray<'_>, + _si_data: &types::CiovecArray<'a>, _si_flags: types::Siflags, ) -> Result { Err(Error::trap("sock_send unsupported")) } - fn sock_shutdown(&self, _fd: types::Fd, _how: types::Sdflags) -> Result<(), Error> { + async fn sock_shutdown(&self, _fd: types::Fd, _how: types::Sdflags) -> Result<(), Error> { Err(Error::trap("sock_shutdown unsupported")) } } diff --git a/crates/wasi-common/src/snapshots/preview_1.rs b/crates/wasi-common/src/snapshots/preview_1.rs index d5ec0a586a..553066628b 100644 --- a/crates/wasi-common/src/snapshots/preview_1.rs +++ b/crates/wasi-common/src/snapshots/preview_1.rs @@ -2,17 +2,18 @@ use crate::{ dir::{DirCaps, DirEntry, DirEntryExt, DirFdStat, ReaddirCursor, ReaddirEntity, TableDirExt}, file::{ Advice, FdFlags, FdStat, FileCaps, FileEntry, FileEntryExt, FileEntryMutExt, FileType, - Filestat, OFlags, TableFileExt, + Filestat, OFlags, TableFileExt, WasiFile, }, sched::{ subscription::{RwEventFlags, SubscriptionResult}, - Poll, + Poll, Userdata, }, Error, ErrorExt, ErrorKind, SystemTimeSpec, WasiCtx, }; use anyhow::Context; use cap_std::time::{Duration, SystemClock}; use std::cell::{Ref, RefMut}; +use std::collections::HashSet; use std::convert::{TryFrom, TryInto}; use std::io::{IoSlice, IoSliceMut}; use std::ops::{Deref, DerefMut}; @@ -22,6 +23,10 @@ use wiggle::GuestPtr; wiggle::from_witx!({ witx: ["$WASI_ROOT/phases/snapshot/witx/wasi_snapshot_preview1.witx"], errors: { errno => Error }, + // Note: not every function actually needs to be async, however, nearly all of them do, and + // keeping that set the same in this macro and the wasmtime_wiggle / lucet_wiggle macros is + // tedious, and there is no cost to having a sync function be async in this case. + async: * }); impl wiggle::GuestErrorType for types::Errno { @@ -30,13 +35,6 @@ impl wiggle::GuestErrorType for types::Errno { } } -impl types::GuestErrorConversion for WasiCtx { - fn into_errno(&self, e: wiggle::GuestError) -> types::Errno { - debug!("Guest error: {:?}", e); - e.into() - } -} - impl types::UserErrorConversion for WasiCtx { fn errno_from_error(&self, e: Error) -> Result { debug!("Error: {:?}", e); @@ -103,7 +101,6 @@ impl From for types::Errno { InvalidUtf8 { .. } => Self::Ilseq, TryFromIntError { .. } => Self::Overflow, InFunc { err, .. } => types::Errno::from(*err), - InDataField { err, .. } => types::Errno::from(*err), SliceLengthsDiffer { .. } => Self::Fault, BorrowCheckerOutOfHandles { .. } => Self::Fault, } @@ -196,8 +193,9 @@ impl TryFrom for types::Errno { } } -impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { - fn args_get<'b>( +#[wiggle::async_trait] +impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { + async fn args_get<'b>( &self, argv: &GuestPtr<'b, GuestPtr<'b, u8>>, argv_buf: &GuestPtr<'b, u8>, @@ -205,11 +203,11 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.args.write_to_guest(argv_buf, argv) } - fn args_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { + async fn args_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { Ok((self.args.number_elements(), self.args.cumulative_size())) } - fn environ_get<'b>( + async fn environ_get<'b>( &self, environ: &GuestPtr<'b, GuestPtr<'b, u8>>, environ_buf: &GuestPtr<'b, u8>, @@ -217,11 +215,11 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.env.write_to_guest(environ_buf, environ) } - fn environ_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { + async fn environ_sizes_get(&self) -> Result<(types::Size, types::Size), Error> { Ok((self.env.number_elements(), self.env.cumulative_size())) } - fn clock_res_get(&self, id: types::Clockid) -> Result { + async fn clock_res_get(&self, id: types::Clockid) -> Result { let resolution = match id { types::Clockid::Realtime => Ok(self.clocks.system.resolution()), types::Clockid::Monotonic => Ok(self.clocks.monotonic.resolution()), @@ -232,7 +230,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(resolution.as_nanos().try_into()?) } - fn clock_time_get( + async fn clock_time_get( &self, id: types::Clockid, precision: types::Timestamp, @@ -257,7 +255,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { } } - fn fd_advise( + async fn fd_advise( &self, fd: types::Fd, offset: types::Filesize, @@ -267,11 +265,12 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.table() .get_file(u32::from(fd))? .get_cap(FileCaps::ADVISE)? - .advise(offset, len, advice.into())?; + .advise(offset, len, advice.into()) + .await?; Ok(()) } - fn fd_allocate( + async fn fd_allocate( &self, fd: types::Fd, offset: types::Filesize, @@ -280,11 +279,12 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.table() .get_file(u32::from(fd))? .get_cap(FileCaps::ALLOCATE)? - .allocate(offset, len)?; + .allocate(offset, len) + .await?; Ok(()) } - fn fd_close(&self, fd: types::Fd) -> Result<(), Error> { + async fn fd_close(&self, fd: types::Fd) -> Result<(), Error> { let mut table = self.table(); let fd = u32::from(fd); @@ -310,20 +310,21 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(()) } - fn fd_datasync(&self, fd: types::Fd) -> Result<(), Error> { + async fn fd_datasync(&self, fd: types::Fd) -> Result<(), Error> { self.table() .get_file(u32::from(fd))? .get_cap(FileCaps::DATASYNC)? - .datasync()?; + .datasync() + .await?; Ok(()) } - fn fd_fdstat_get(&self, fd: types::Fd) -> Result { + async fn fd_fdstat_get(&self, fd: types::Fd) -> Result { let table = self.table(); let fd = u32::from(fd); if table.is::(fd) { let file_entry: Ref = table.get(fd)?; - let fdstat = file_entry.get_fdstat()?; + let fdstat = file_entry.get_fdstat().await?; Ok(types::Fdstat::from(&fdstat)) } else if table.is::(fd) { let dir_entry: Ref = table.get(fd)?; @@ -334,14 +335,15 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { } } - fn fd_fdstat_set_flags(&self, fd: types::Fd, flags: types::Fdflags) -> Result<(), Error> { + async fn fd_fdstat_set_flags(&self, fd: types::Fd, flags: types::Fdflags) -> Result<(), Error> { self.table() .get_file_mut(u32::from(fd))? .get_cap(FileCaps::FDSTAT_SET_FLAGS)? .set_fdflags(FdFlags::from(flags)) + .await } - fn fd_fdstat_set_rights( + async fn fd_fdstat_set_rights( &self, fd: types::Fd, fs_rights_base: types::Rights, @@ -363,35 +365,42 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { } } - fn fd_filestat_get(&self, fd: types::Fd) -> Result { + async fn fd_filestat_get(&self, fd: types::Fd) -> Result { let table = self.table(); let fd = u32::from(fd); if table.is::(fd) { let filestat = table .get_file(fd)? .get_cap(FileCaps::FILESTAT_GET)? - .get_filestat()?; + .get_filestat() + .await?; Ok(filestat.into()) } else if table.is::(fd) { let filestat = table .get_dir(fd)? .get_cap(DirCaps::FILESTAT_GET)? - .get_filestat()?; + .get_filestat() + .await?; Ok(filestat.into()) } else { Err(Error::badf()) } } - fn fd_filestat_set_size(&self, fd: types::Fd, size: types::Filesize) -> Result<(), Error> { + async fn fd_filestat_set_size( + &self, + fd: types::Fd, + size: types::Filesize, + ) -> Result<(), Error> { self.table() .get_file(u32::from(fd))? .get_cap(FileCaps::FILESTAT_SET_SIZE)? - .set_filestat_size(size)?; + .set_filestat_size(size) + .await?; Ok(()) } - fn fd_filestat_set_times( + async fn fd_filestat_set_times( &self, fd: types::Fd, atim: types::Timestamp, @@ -415,18 +424,24 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .expect("checked that entry is file") .get_cap(FileCaps::FILESTAT_SET_TIMES)? .set_times(atim, mtim) + .await } else if table.is::(fd) { table .get_dir(fd) .expect("checked that entry is dir") .get_cap(DirCaps::FILESTAT_SET_TIMES)? .set_times(".", atim, mtim, false) + .await } else { Err(Error::badf()) } } - fn fd_read(&self, fd: types::Fd, iovs: &types::IovecArray<'_>) -> Result { + async fn fd_read<'a>( + &self, + fd: types::Fd, + iovs: &types::IovecArray<'a>, + ) -> Result { let table = self.table(); let f = table.get_file(u32::from(fd))?.get_cap(FileCaps::READ)?; @@ -444,14 +459,14 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .map(|s| IoSliceMut::new(&mut *s)) .collect(); - let bytes_read = f.read_vectored(&mut ioslices)?; + let bytes_read = f.read_vectored(&mut ioslices).await?; Ok(types::Size::try_from(bytes_read)?) } - fn fd_pread( + async fn fd_pread<'a>( &self, fd: types::Fd, - iovs: &types::IovecArray<'_>, + iovs: &types::IovecArray<'a>, offset: types::Filesize, ) -> Result { let table = self.table(); @@ -473,14 +488,14 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .map(|s| IoSliceMut::new(&mut *s)) .collect(); - let bytes_read = f.read_vectored_at(&mut ioslices, offset)?; + let bytes_read = f.read_vectored_at(&mut ioslices, offset).await?; Ok(types::Size::try_from(bytes_read)?) } - fn fd_write( + async fn fd_write<'a>( &self, fd: types::Fd, - ciovs: &types::CiovecArray<'_>, + ciovs: &types::CiovecArray<'a>, ) -> Result { let table = self.table(); let f = table.get_file(u32::from(fd))?.get_cap(FileCaps::WRITE)?; @@ -498,15 +513,15 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .iter() .map(|s| IoSlice::new(s.deref())) .collect(); - let bytes_written = f.write_vectored(&ioslices)?; + let bytes_written = f.write_vectored(&ioslices).await?; Ok(types::Size::try_from(bytes_written)?) } - fn fd_pwrite( + async fn fd_pwrite<'a>( &self, fd: types::Fd, - ciovs: &types::CiovecArray<'_>, + ciovs: &types::CiovecArray<'a>, offset: types::Filesize, ) -> Result { let table = self.table(); @@ -527,12 +542,12 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .iter() .map(|s| IoSlice::new(s.deref())) .collect(); - let bytes_written = f.write_vectored_at(&ioslices, offset)?; + let bytes_written = f.write_vectored_at(&ioslices, offset).await?; Ok(types::Size::try_from(bytes_written)?) } - fn fd_prestat_get(&self, fd: types::Fd) -> Result { + async fn fd_prestat_get(&self, fd: types::Fd) -> Result { let table = self.table(); let dir_entry: Ref = table.get(u32::from(fd)).map_err(|_| Error::badf())?; if let Some(ref preopen) = dir_entry.preopen_path() { @@ -544,10 +559,10 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { } } - fn fd_prestat_dir_name( + async fn fd_prestat_dir_name<'a>( &self, fd: types::Fd, - path: &GuestPtr, + path: &GuestPtr<'a, u8>, path_max_len: types::Size, ) -> Result<(), Error> { let table = self.table(); @@ -568,7 +583,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Err(Error::not_supported()) } } - fn fd_renumber(&self, from: types::Fd, to: types::Fd) -> Result<(), Error> { + async fn fd_renumber(&self, from: types::Fd, to: types::Fd) -> Result<(), Error> { let mut table = self.table(); let from = u32::from(from); let to = u32::from(to); @@ -585,7 +600,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(()) } - fn fd_seek( + async fn fd_seek( &self, fd: types::Fd, offset: types::Filedelta, @@ -608,32 +623,35 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .table() .get_file(u32::from(fd))? .get_cap(required_caps)? - .seek(whence)?; + .seek(whence) + .await?; Ok(newoffset) } - fn fd_sync(&self, fd: types::Fd) -> Result<(), Error> { + async fn fd_sync(&self, fd: types::Fd) -> Result<(), Error> { self.table() .get_file(u32::from(fd))? .get_cap(FileCaps::SYNC)? - .sync()?; + .sync() + .await?; Ok(()) } - fn fd_tell(&self, fd: types::Fd) -> Result { + async fn fd_tell(&self, fd: types::Fd) -> Result { // XXX should this be stream_position? let offset = self .table() .get_file(u32::from(fd))? .get_cap(FileCaps::TELL)? - .seek(std::io::SeekFrom::Current(0))?; + .seek(std::io::SeekFrom::Current(0)) + .await?; Ok(offset) } - fn fd_readdir( + async fn fd_readdir<'a>( &self, fd: types::Fd, - buf: &GuestPtr, + buf: &GuestPtr<'a, u8>, buf_len: types::Size, cookie: types::Dircookie, ) -> Result { @@ -643,7 +661,8 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .table() .get_dir(u32::from(fd))? .get_cap(DirCaps::READDIR)? - .readdir(ReaddirCursor::from(cookie))? + .readdir(ReaddirCursor::from(cookie)) + .await? { let entity = entity?; let dirent_raw = dirent_bytes(types::Dirent::try_from(&entity)?); @@ -683,22 +702,23 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(bufused) } - fn path_create_directory( + async fn path_create_directory<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result<(), Error> { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::CREATE_DIRECTORY)? .create_dir(path.as_str()?.deref()) + .await } - fn path_filestat_get( + async fn path_filestat_get<'a>( &self, dirfd: types::Fd, flags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result { let filestat = self .table() @@ -707,15 +727,16 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_path_filestat( path.as_str()?.deref(), flags.contains(types::Lookupflags::SYMLINK_FOLLOW), - )?; + ) + .await?; Ok(types::Filestat::from(filestat)) } - fn path_filestat_set_times( + async fn path_filestat_set_times<'a>( &self, dirfd: types::Fd, flags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, atim: types::Timestamp, mtim: types::Timestamp, fst_flags: types::Fstflags, @@ -736,15 +757,16 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { mtim, flags.contains(types::Lookupflags::SYMLINK_FOLLOW), ) + .await } - fn path_link( + async fn path_link<'a>( &self, src_fd: types::Fd, src_flags: types::Lookupflags, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, target_fd: types::Fd, - target_path: &GuestPtr<'_, str>, + target_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { let table = self.table(); let src_dir = table @@ -759,18 +781,20 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .context("symlink following on path_link is not supported")); } - src_dir.hard_link( - src_path.as_str()?.deref(), - target_dir.deref(), - target_path.as_str()?.deref(), - ) + src_dir + .hard_link( + src_path.as_str()?.deref(), + target_dir.deref(), + target_path.as_str()?.deref(), + ) + .await } - fn path_open( + async fn path_open<'a>( &self, dirfd: types::Fd, dirflags: types::Lookupflags, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, oflags: types::Oflags, fs_rights_base: types::Rights, fs_rights_inheriting: types::Rights, @@ -798,7 +822,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { let dir_caps = dir_entry.child_dir_caps(DirCaps::from(&fs_rights_base)); let file_caps = dir_entry.child_file_caps(FileCaps::from(&fs_rights_inheriting)); let dir = dir_entry.get_cap(DirCaps::OPEN)?; - let child_dir = dir.open_dir(symlink_follow, path.deref())?; + let child_dir = dir.open_dir(symlink_follow, path.deref()).await?; drop(dir); let fd = table.push(Box::new(DirEntry::new( dir_caps, file_caps, None, child_dir, @@ -816,25 +840,28 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { let write = file_caps.contains(FileCaps::WRITE) || file_caps.contains(FileCaps::ALLOCATE) || file_caps.contains(FileCaps::FILESTAT_SET_SIZE); - let file = dir.open_file(symlink_follow, path.deref(), oflags, read, write, fdflags)?; + let file = dir + .open_file(symlink_follow, path.deref(), oflags, read, write, fdflags) + .await?; drop(dir); let fd = table.push(Box::new(FileEntry::new(file_caps, file)))?; Ok(types::Fd::from(fd)) } } - fn path_readlink( + async fn path_readlink<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, - buf: &GuestPtr, + path: &GuestPtr<'a, str>, + buf: &GuestPtr<'a, u8>, buf_len: types::Size, ) -> Result { let link = self .table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::READLINK)? - .read_link(path.as_str()?.deref())? + .read_link(path.as_str()?.deref()) + .await? .into_os_string() .into_string() .map_err(|_| Error::illegal_byte_sequence().context("link contents"))?; @@ -848,23 +875,24 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(link_len as types::Size) } - fn path_remove_directory( + async fn path_remove_directory<'a>( &self, dirfd: types::Fd, - path: &GuestPtr<'_, str>, + path: &GuestPtr<'a, str>, ) -> Result<(), Error> { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::REMOVE_DIRECTORY)? .remove_dir(path.as_str()?.deref()) + .await } - fn path_rename( + async fn path_rename<'a>( &self, src_fd: types::Fd, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, dest_fd: types::Fd, - dest_path: &GuestPtr<'_, str>, + dest_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { let table = self.table(); let src_dir = table @@ -873,43 +901,80 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { let dest_dir = table .get_dir(u32::from(dest_fd))? .get_cap(DirCaps::RENAME_TARGET)?; - src_dir.rename( - src_path.as_str()?.deref(), - dest_dir.deref(), - dest_path.as_str()?.deref(), - ) + src_dir + .rename( + src_path.as_str()?.deref(), + dest_dir.deref(), + dest_path.as_str()?.deref(), + ) + .await } - fn path_symlink( + async fn path_symlink<'a>( &self, - src_path: &GuestPtr<'_, str>, + src_path: &GuestPtr<'a, str>, dirfd: types::Fd, - dest_path: &GuestPtr<'_, str>, + dest_path: &GuestPtr<'a, str>, ) -> Result<(), Error> { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::SYMLINK)? .symlink(src_path.as_str()?.deref(), dest_path.as_str()?.deref()) + .await } - fn path_unlink_file(&self, dirfd: types::Fd, path: &GuestPtr<'_, str>) -> Result<(), Error> { + async fn path_unlink_file<'a>( + &self, + dirfd: types::Fd, + path: &GuestPtr<'a, str>, + ) -> Result<(), Error> { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::UNLINK_FILE)? .unlink_file(path.as_str()?.deref()) + .await } - fn poll_oneoff( + async fn poll_oneoff<'a>( &self, - subs: &GuestPtr, - events: &GuestPtr, + subs: &GuestPtr<'a, types::Subscription>, + events: &GuestPtr<'a, types::Event>, nsubscriptions: types::Size, ) -> Result { if nsubscriptions == 0 { return Err(Error::invalid_argument().context("nsubscriptions must be nonzero")); } + // Special-case a `poll_oneoff` which is just sleeping on a single + // relative timer event, such as what WASI libc uses to implement sleep + // functions. This supports all clock IDs, because POSIX says that + // `clock_settime` doesn't effect relative sleeps. + if nsubscriptions == 1 { + let sub = subs.read()?; + if let types::SubscriptionU::Clock(clocksub) = sub.u { + if !clocksub + .flags + .contains(types::Subclockflags::SUBSCRIPTION_CLOCK_ABSTIME) + { + self.sched + .sleep(Duration::from_nanos(clocksub.timeout)) + .await?; + events.write(types::Event { + userdata: sub.userdata, + error: types::Errno::Success, + type_: types::Eventtype::Clock, + fd_readwrite: fd_readwrite_empty(), + })?; + return Ok(1); + } + } + } + let table = self.table(); + let mut sub_fds: HashSet = HashSet::new(); + // We need these refmuts to outlive Poll, which will hold the &mut dyn WasiFile inside + let mut read_refs: Vec<(RefMut<'_, dyn WasiFile>, Userdata)> = Vec::new(); + let mut write_refs: Vec<(RefMut<'_, dyn WasiFile>, Userdata)> = Vec::new(); let mut poll = Poll::new(); let subs = subs.as_array(nsubscriptions); @@ -948,22 +1013,41 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { }, types::SubscriptionU::FdRead(readsub) => { let fd = readsub.file_descriptor; - let file = table - .get_file(u32::from(fd))? + if sub_fds.contains(&fd) { + return Err(Error::invalid_argument() + .context("Fd can be subscribed to at most once per poll")); + } else { + sub_fds.insert(fd); + } + let file_ref = table + .get_file_mut(u32::from(fd))? .get_cap(FileCaps::POLL_READWRITE)?; - poll.subscribe_read(file, sub.userdata.into()); + read_refs.push((file_ref, sub.userdata.into())); } types::SubscriptionU::FdWrite(writesub) => { let fd = writesub.file_descriptor; - let file = table - .get_file(u32::from(fd))? + if sub_fds.contains(&fd) { + return Err(Error::invalid_argument() + .context("Fd can be subscribed to at most once per poll")); + } else { + sub_fds.insert(fd); + } + let file_ref = table + .get_file_mut(u32::from(fd))? .get_cap(FileCaps::POLL_READWRITE)?; - poll.subscribe_write(file, sub.userdata.into()); + write_refs.push((file_ref, sub.userdata.into())); } } } - self.sched.poll_oneoff(&poll)?; + for (f, ud) in read_refs.iter_mut() { + poll.subscribe_read(f.deref_mut(), *ud); + } + for (f, ud) in write_refs.iter_mut() { + poll.subscribe_write(f.deref_mut(), *ud); + } + + self.sched.poll_oneoff(&mut poll).await?; let results = poll.results(); let num_results = results.len(); @@ -1038,7 +1122,7 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { Ok(num_results.try_into().expect("results fit into memory")) } - fn proc_exit(&self, status: types::Exitcode) -> wiggle::Trap { + async fn proc_exit(&self, status: types::Exitcode) -> wiggle::Trap { // Check that the status is within WASI's range. if status < 126 { wiggle::Trap::I32Exit(status as i32) @@ -1047,39 +1131,43 @@ impl<'a> wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { } } - fn proc_raise(&self, _sig: types::Signal) -> Result<(), Error> { + async fn proc_raise(&self, _sig: types::Signal) -> Result<(), Error> { Err(Error::trap("proc_raise unsupported")) } - fn sched_yield(&self) -> Result<(), Error> { - self.sched.sched_yield() + async fn sched_yield(&self) -> Result<(), Error> { + self.sched.sched_yield().await } - fn random_get(&self, buf: &GuestPtr, buf_len: types::Size) -> Result<(), Error> { + async fn random_get<'a>( + &self, + buf: &GuestPtr<'a, u8>, + buf_len: types::Size, + ) -> Result<(), Error> { let mut buf = buf.as_array(buf_len).as_slice_mut()?; self.random.borrow_mut().try_fill_bytes(buf.deref_mut())?; Ok(()) } - fn sock_recv( + async fn sock_recv<'a>( &self, _fd: types::Fd, - _ri_data: &types::IovecArray<'_>, + _ri_data: &types::IovecArray<'a>, _ri_flags: types::Riflags, ) -> Result<(types::Size, types::Roflags), Error> { Err(Error::trap("sock_recv unsupported")) } - fn sock_send( + async fn sock_send<'a>( &self, _fd: types::Fd, - _si_data: &types::CiovecArray<'_>, + _si_data: &types::CiovecArray<'a>, _si_flags: types::Siflags, ) -> Result { Err(Error::trap("sock_send unsupported")) } - fn sock_shutdown(&self, _fd: types::Fd, _how: types::Sdflags) -> Result<(), Error> { + async fn sock_shutdown(&self, _fd: types::Fd, _how: types::Sdflags) -> Result<(), Error> { Err(Error::trap("sock_shutdown unsupported")) } } diff --git a/crates/wasi-common/tokio/Cargo.toml b/crates/wasi-common/tokio/Cargo.toml new file mode 100644 index 0000000000..adef1364ee --- /dev/null +++ b/crates/wasi-common/tokio/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "wasi-tokio" +version = "0.26.0" +authors = ["The Wasmtime Project Developers"] +description = "WASI implementation in Rust" +license = "Apache-2.0 WITH LLVM-exception" +categories = ["wasm"] +keywords = ["webassembly", "wasm"] +repository = "https://github.com/bytecodealliance/wasmtime" +readme = "README.md" +edition = "2018" +include = ["src/**/*", "LICENSE" ] + +[dependencies] +wasi-common = { path = "../", version = "0.26.0" } +wasi-cap-std-sync = { path = "../cap-std-sync", version = "0.26.0" } +wiggle = { path = "../../wiggle", version = "0.26.0" } +tokio = { version = "1.5.0", features = [ "rt", "fs", "time", "io-util", "net", "io-std", "rt-multi-thread"] } +cap-std = "0.13.7" +cap-fs-ext = "0.13.7" +cap-time-ext = "0.13.7" +fs-set-times = "0.3.1" +unsafe-io = "0.6.5" +system-interface = { version = "0.6.4", features = ["cap_std_impls"] } +tracing = "0.1.19" +bitflags = "1.2" +anyhow = "1" + +[target.'cfg(unix)'.dependencies] +libc = "0.2" +posish = "0.6.1" + + +[target.'cfg(windows)'.dependencies] +winapi = "0.3" +lazy_static = "1.4" + +[dev-dependencies] +tempfile = "3.1.0" +tokio = { version = "1.5.0", features = [ "macros" ] } +anyhow = "1" +cap-tempfile = "0.13.7" diff --git a/crates/wasi-common/tokio/src/dir.rs b/crates/wasi-common/tokio/src/dir.rs new file mode 100644 index 0000000000..a70b44e182 --- /dev/null +++ b/crates/wasi-common/tokio/src/dir.rs @@ -0,0 +1,209 @@ +use crate::{block_on_dummy_executor, file::File}; +use std::any::Any; +use std::path::PathBuf; +use wasi_common::{ + dir::{ReaddirCursor, ReaddirEntity, WasiDir}, + file::{FdFlags, Filestat, OFlags, WasiFile}, + Error, ErrorExt, +}; + +pub struct Dir(wasi_cap_std_sync::dir::Dir); + +impl Dir { + pub fn from_cap_std(dir: cap_std::fs::Dir) -> Self { + Dir(wasi_cap_std_sync::dir::Dir::from_cap_std(dir)) + } +} + +#[wiggle::async_trait] +impl WasiDir for Dir { + fn as_any(&self) -> &dyn Any { + self + } + async fn open_file( + &self, + symlink_follow: bool, + path: &str, + oflags: OFlags, + read: bool, + write: bool, + fdflags: FdFlags, + ) -> Result, Error> { + let f = block_on_dummy_executor(move || async move { + self.0 + .open_file_(symlink_follow, path, oflags, read, write, fdflags) + })?; + Ok(Box::new(File::from_inner(f))) + } + + async fn open_dir(&self, symlink_follow: bool, path: &str) -> Result, Error> { + let d = + block_on_dummy_executor(move || async move { self.0.open_dir_(symlink_follow, path) })?; + Ok(Box::new(Dir(d))) + } + + async fn create_dir(&self, path: &str) -> Result<(), Error> { + block_on_dummy_executor(|| self.0.create_dir(path)) + } + async fn readdir( + &self, + cursor: ReaddirCursor, + ) -> Result> + Send>, Error> { + struct I(Box> + Send>); + impl Iterator for I { + type Item = Result; + fn next(&mut self) -> Option { + tokio::task::block_in_place(move || self.0.next()) + } + } + + let inner = block_on_dummy_executor(move || self.0.readdir(cursor))?; + Ok(Box::new(I(inner))) + } + + async fn symlink(&self, src_path: &str, dest_path: &str) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.symlink(src_path, dest_path)) + } + async fn remove_dir(&self, path: &str) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.remove_dir(path)) + } + + async fn unlink_file(&self, path: &str) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.unlink_file(path)) + } + async fn read_link(&self, path: &str) -> Result { + block_on_dummy_executor(move || self.0.read_link(path)) + } + async fn get_filestat(&self) -> Result { + block_on_dummy_executor(|| self.0.get_filestat()) + } + async fn get_path_filestat( + &self, + path: &str, + follow_symlinks: bool, + ) -> Result { + block_on_dummy_executor(move || self.0.get_path_filestat(path, follow_symlinks)) + } + async fn rename( + &self, + src_path: &str, + dest_dir: &dyn WasiDir, + dest_path: &str, + ) -> Result<(), Error> { + let dest_dir = dest_dir + .as_any() + .downcast_ref::() + .ok_or(Error::badf().context("failed downcast to tokio Dir"))?; + block_on_dummy_executor( + move || async move { self.0.rename_(src_path, &dest_dir.0, dest_path) }, + ) + } + async fn hard_link( + &self, + src_path: &str, + target_dir: &dyn WasiDir, + target_path: &str, + ) -> Result<(), Error> { + let target_dir = target_dir + .as_any() + .downcast_ref::() + .ok_or(Error::badf().context("failed downcast to tokio Dir"))?; + block_on_dummy_executor(move || async move { + self.0.hard_link_(src_path, &target_dir.0, target_path) + }) + } + async fn set_times( + &self, + path: &str, + atime: Option, + mtime: Option, + follow_symlinks: bool, + ) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.set_times(path, atime, mtime, follow_symlinks)) + } +} + +#[cfg(test)] +mod test { + use super::Dir; + #[tokio::test(flavor = "multi_thread")] + async fn scratch_dir() { + let tempdir = tempfile::Builder::new() + .prefix("cap-std-sync") + .tempdir() + .expect("create temporary dir"); + let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(tempdir.path()) } + .expect("open ambient temporary dir"); + let preopen_dir = Dir::from_cap_std(preopen_dir); + wasi_common::WasiDir::open_dir(&preopen_dir, false, ".") + .await + .expect("open the same directory via WasiDir abstraction"); + } + + // Readdir does not work on windows, so we won't test it there. + #[cfg(not(windows))] + #[tokio::test(flavor = "multi_thread")] + async fn readdir() { + use std::collections::HashMap; + use wasi_common::dir::{ReaddirCursor, ReaddirEntity, WasiDir}; + use wasi_common::file::{FdFlags, FileType, OFlags}; + + async fn readdir_into_map(dir: &dyn WasiDir) -> HashMap { + let mut out = HashMap::new(); + for readdir_result in dir + .readdir(ReaddirCursor::from(0)) + .await + .expect("readdir succeeds") + { + let entity = readdir_result.expect("readdir entry is valid"); + out.insert(entity.name.clone(), entity); + } + out + } + + let tempdir = tempfile::Builder::new() + .prefix("cap-std-sync") + .tempdir() + .expect("create temporary dir"); + let preopen_dir = unsafe { cap_std::fs::Dir::open_ambient_dir(tempdir.path()) } + .expect("open ambient temporary dir"); + let preopen_dir = Dir::from_cap_std(preopen_dir); + + let entities = readdir_into_map(&preopen_dir).await; + assert_eq!( + entities.len(), + 2, + "should just be . and .. in empty dir: {:?}", + entities + ); + assert!(entities.get(".").is_some()); + assert!(entities.get("..").is_some()); + + preopen_dir + .open_file( + false, + "file1", + OFlags::CREATE, + true, + false, + FdFlags::empty(), + ) + .await + .expect("create file1"); + + let entities = readdir_into_map(&preopen_dir).await; + assert_eq!(entities.len(), 3, "should be ., .., file1 {:?}", entities); + assert_eq!( + entities.get(".").expect(". entry").filetype, + FileType::Directory + ); + assert_eq!( + entities.get("..").expect(".. entry").filetype, + FileType::Directory + ); + assert_eq!( + entities.get("file1").expect("file1 entry").filetype, + FileType::RegularFile + ); + } +} diff --git a/crates/wasi-common/tokio/src/file.rs b/crates/wasi-common/tokio/src/file.rs new file mode 100644 index 0000000000..5907aa7f53 --- /dev/null +++ b/crates/wasi-common/tokio/src/file.rs @@ -0,0 +1,186 @@ +use crate::block_on_dummy_executor; +use std::any::Any; +use std::io; +#[cfg(windows)] +use std::os::windows::io::{AsRawHandle, RawHandle}; +use wasi_common::{ + file::{Advice, FdFlags, FileType, Filestat, WasiFile}, + Error, +}; + +pub struct File(wasi_cap_std_sync::file::File); + +impl File { + pub(crate) fn from_inner(file: wasi_cap_std_sync::file::File) -> Self { + File(file) + } + pub fn from_cap_std(file: cap_std::fs::File) -> Self { + Self::from_inner(wasi_cap_std_sync::file::File::from_cap_std(file)) + } +} + +pub struct Stdin(wasi_cap_std_sync::stdio::Stdin); + +pub fn stdin() -> Stdin { + Stdin(wasi_cap_std_sync::stdio::stdin()) +} + +pub struct Stdout(wasi_cap_std_sync::stdio::Stdout); + +pub fn stdout() -> Stdout { + Stdout(wasi_cap_std_sync::stdio::stdout()) +} + +pub struct Stderr(wasi_cap_std_sync::stdio::Stderr); + +pub fn stderr() -> Stderr { + Stderr(wasi_cap_std_sync::stdio::stderr()) +} + +macro_rules! wasi_file_impl { + ($ty:ty) => { + #[wiggle::async_trait] + impl WasiFile for $ty { + fn as_any(&self) -> &dyn Any { + self + } + async fn datasync(&self) -> Result<(), Error> { + block_on_dummy_executor(|| self.0.datasync()) + } + async fn sync(&self) -> Result<(), Error> { + block_on_dummy_executor(|| self.0.sync()) + } + async fn get_filetype(&self) -> Result { + block_on_dummy_executor(|| self.0.get_filetype()) + } + async fn get_fdflags(&self) -> Result { + block_on_dummy_executor(|| self.0.get_fdflags()) + } + async fn set_fdflags(&mut self, fdflags: FdFlags) -> Result<(), Error> { + block_on_dummy_executor(|| self.0.set_fdflags(fdflags)) + } + async fn get_filestat(&self) -> Result { + block_on_dummy_executor(|| self.0.get_filestat()) + } + async fn set_filestat_size(&self, size: u64) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.set_filestat_size(size)) + } + async fn advise(&self, offset: u64, len: u64, advice: Advice) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.advise(offset, len, advice)) + } + async fn allocate(&self, offset: u64, len: u64) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.allocate(offset, len)) + } + async fn read_vectored<'a>( + &self, + bufs: &mut [io::IoSliceMut<'a>], + ) -> Result { + block_on_dummy_executor(move || self.0.read_vectored(bufs)) + } + async fn read_vectored_at<'a>( + &self, + bufs: &mut [io::IoSliceMut<'a>], + offset: u64, + ) -> Result { + block_on_dummy_executor(move || self.0.read_vectored_at(bufs, offset)) + } + async fn write_vectored<'a>(&self, bufs: &[io::IoSlice<'a>]) -> Result { + block_on_dummy_executor(move || self.0.write_vectored(bufs)) + } + async fn write_vectored_at<'a>( + &self, + bufs: &[io::IoSlice<'a>], + offset: u64, + ) -> Result { + block_on_dummy_executor(move || self.0.write_vectored_at(bufs, offset)) + } + async fn seek(&self, pos: std::io::SeekFrom) -> Result { + block_on_dummy_executor(move || self.0.seek(pos)) + } + async fn peek(&self, buf: &mut [u8]) -> Result { + block_on_dummy_executor(move || self.0.peek(buf)) + } + async fn set_times( + &self, + atime: Option, + mtime: Option, + ) -> Result<(), Error> { + block_on_dummy_executor(move || self.0.set_times(atime, mtime)) + } + async fn num_ready_bytes(&self) -> Result { + block_on_dummy_executor(|| self.0.num_ready_bytes()) + } + + #[cfg(not(windows))] + async fn readable(&mut self) -> Result<(), Error> { + // The Inner impls OwnsRaw, which asserts exclusive use of the handle by the owned object. + // AsyncFd needs to wrap an owned `impl std::os::unix::io::AsRawFd`. Rather than introduce + // mutability to let it own the `Inner`, we are depending on the `&mut self` bound on this + // async method to ensure this is the only Future which can access the RawFd during the + // lifetime of the AsyncFd. + use tokio::io::{unix::AsyncFd, Interest}; + use unsafe_io::os::posish::AsRawFd; + let rawfd = self.0.as_raw_fd(); + match AsyncFd::with_interest(rawfd, Interest::READABLE) { + Ok(asyncfd) => { + let _ = asyncfd.readable().await?; + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { + // if e is EPERM, this file isnt supported by epoll because it is immediately + // available for reading: + Ok(()) + } + Err(e) => Err(e.into()), + } + } + #[cfg(windows)] + async fn readable(&mut self) -> Result<(), Error> { + // Windows uses a rawfd based scheduler :( + use wasi_common::ErrorExt; + Err(Error::badf()) + } + + #[cfg(not(windows))] + async fn writable(&mut self) -> Result<(), Error> { + // The Inner impls OwnsRaw, which asserts exclusive use of the handle by the owned object. + // AsyncFd needs to wrap an owned `impl std::os::unix::io::AsRawFd`. Rather than introduce + // mutability to let it own the `Inner`, we are depending on the `&mut self` bound on this + // async method to ensure this is the only Future which can access the RawFd during the + // lifetime of the AsyncFd. + use tokio::io::{unix::AsyncFd, Interest}; + use unsafe_io::os::posish::AsRawFd; + let rawfd = self.0.as_raw_fd(); + match AsyncFd::with_interest(rawfd, Interest::WRITABLE) { + Ok(asyncfd) => { + let _ = asyncfd.writable().await?; + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { + // if e is EPERM, this file isnt supported by epoll because it is immediately + // available for writing: + Ok(()) + } + Err(e) => Err(e.into()), + } + } + #[cfg(windows)] + async fn writable(&mut self) -> Result<(), Error> { + // Windows uses a rawfd based scheduler :( + use wasi_common::ErrorExt; + Err(Error::badf()) + } + } + #[cfg(windows)] + impl AsRawHandle for $ty { + fn as_raw_handle(&self) -> RawHandle { + self.0.as_raw_handle() + } + } + }; +} + +wasi_file_impl!(File); +wasi_file_impl!(Stdin); +wasi_file_impl!(Stdout); +wasi_file_impl!(Stderr); diff --git a/crates/wasi-common/tokio/src/lib.rs b/crates/wasi-common/tokio/src/lib.rs new file mode 100644 index 0000000000..e7dc7f42e6 --- /dev/null +++ b/crates/wasi-common/tokio/src/lib.rs @@ -0,0 +1,114 @@ +mod dir; +mod file; +pub mod sched; +pub mod stdio; + +use std::cell::RefCell; +use std::future::Future; +use std::path::Path; +use std::rc::Rc; +pub use wasi_cap_std_sync::{clocks_ctx, random_ctx}; +use wasi_common::{Error, Table, WasiCtx}; + +pub use dir::Dir; +pub use file::File; + +use crate::sched::sched_ctx; + +pub struct WasiCtxBuilder(wasi_common::WasiCtxBuilder); + +impl WasiCtxBuilder { + pub fn new() -> Self { + WasiCtxBuilder(WasiCtx::builder( + random_ctx(), + clocks_ctx(), + sched_ctx(), + Rc::new(RefCell::new(Table::new())), + )) + } + pub fn env(self, var: &str, value: &str) -> Result { + let s = self.0.env(var, value)?; + Ok(WasiCtxBuilder(s)) + } + pub fn envs(self, env: &[(String, String)]) -> Result { + let mut s = self; + for (k, v) in env { + s = s.env(k, v)?; + } + Ok(s) + } + pub fn inherit_env(self) -> Result { + let mut s = self.0; + for (key, value) in std::env::vars() { + s = s.env(&key, &value)?; + } + Ok(WasiCtxBuilder(s)) + } + pub fn arg(self, arg: &str) -> Result { + let s = self.0.arg(arg)?; + Ok(WasiCtxBuilder(s)) + } + pub fn args(self, arg: &[String]) -> Result { + let mut s = self; + for a in arg { + s = s.arg(&a)?; + } + Ok(s) + } + pub fn inherit_args(self) -> Result { + let mut s = self.0; + for arg in std::env::args() { + s = s.arg(&arg)?; + } + Ok(WasiCtxBuilder(s)) + } + pub fn stdin(self, f: Box) -> Self { + WasiCtxBuilder(self.0.stdin(f)) + } + pub fn stdout(self, f: Box) -> Self { + WasiCtxBuilder(self.0.stdout(f)) + } + pub fn stderr(self, f: Box) -> Self { + WasiCtxBuilder(self.0.stderr(f)) + } + pub fn inherit_stdin(self) -> Self { + self.stdin(Box::new(crate::stdio::stdin())) + } + pub fn inherit_stdout(self) -> Self { + self.stdout(Box::new(crate::stdio::stdout())) + } + pub fn inherit_stderr(self) -> Self { + self.stderr(Box::new(crate::stdio::stderr())) + } + pub fn inherit_stdio(self) -> Self { + self.inherit_stdin().inherit_stdout().inherit_stderr() + } + pub fn preopened_dir( + self, + dir: cap_std::fs::Dir, + guest_path: impl AsRef, + ) -> Result { + let dir = Box::new(Dir::from_cap_std(dir)); + Ok(WasiCtxBuilder(self.0.preopened_dir(dir, guest_path)?)) + } + pub fn build(self) -> Result { + self.0.build() + } +} + +// Much of this crate is implemented in terms of `async` methods from the +// wasi-cap-std-sync crate. These methods may be async in signature, however, +// they are synchronous in implementation (always Poll::Ready on first poll) +// and perform blocking syscalls. +// +// This function takes this blocking code and executes it using a dummy executor +// to assert its immediate readiness. We tell tokio this is a blocking operation +// with the block_in_place function. +pub(crate) fn block_on_dummy_executor<'a, F, Fut, T>(f: F) -> Result +where + F: FnOnce() -> Fut + Send + 'a, + Fut: Future>, + T: Send + 'static, +{ + tokio::task::block_in_place(move || wiggle::run_in_dummy_executor(f())) +} diff --git a/crates/wasi-common/tokio/src/sched.rs b/crates/wasi-common/tokio/src/sched.rs new file mode 100644 index 0000000000..1b97166641 --- /dev/null +++ b/crates/wasi-common/tokio/src/sched.rs @@ -0,0 +1,35 @@ +#[cfg(unix)] +mod unix; +#[cfg(unix)] +pub use unix::poll_oneoff; + +#[cfg(windows)] +mod windows; +#[cfg(windows)] +pub use windows::poll_oneoff; + +use wasi_common::{ + sched::{Duration, Poll, WasiSched}, + Error, +}; + +pub fn sched_ctx() -> Box { + struct AsyncSched; + + #[wiggle::async_trait] + impl WasiSched for AsyncSched { + async fn poll_oneoff<'a>(&self, poll: &mut Poll<'a>) -> Result<(), Error> { + poll_oneoff(poll).await + } + async fn sched_yield(&self) -> Result<(), Error> { + tokio::task::yield_now().await; + Ok(()) + } + async fn sleep(&self, duration: Duration) -> Result<(), Error> { + tokio::time::sleep(duration).await; + Ok(()) + } + } + + Box::new(AsyncSched) +} diff --git a/crates/wasi-common/tokio/src/sched/unix.rs b/crates/wasi-common/tokio/src/sched/unix.rs new file mode 100644 index 0000000000..35e0234f77 --- /dev/null +++ b/crates/wasi-common/tokio/src/sched/unix.rs @@ -0,0 +1,91 @@ +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll as FPoll}; +use wasi_common::{ + sched::{ + subscription::{RwEventFlags, Subscription}, + Poll, + }, + Context as _, Error, +}; + +struct FirstReady<'a, T>(Vec + 'a>>>); + +impl<'a, T> FirstReady<'a, T> { + fn new() -> Self { + FirstReady(Vec::new()) + } + fn push(&mut self, f: impl Future + 'a) { + self.0.push(Box::pin(f)); + } +} + +impl<'a, T> Future for FirstReady<'a, T> { + type Output = T; + fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> FPoll { + let mut result = FPoll::Pending; + for f in self.as_mut().0.iter_mut() { + match f.as_mut().poll(cx) { + FPoll::Ready(r) => match result { + // First ready gets to set the result. But, continue the loop so all futures + // which are ready simultaneously (often on first poll) get to report their + // readiness. + FPoll::Pending => { + result = FPoll::Ready(r); + } + _ => {} + }, + _ => continue, + } + } + return result; + } +} + +pub async fn poll_oneoff<'a>(poll: &mut Poll<'a>) -> Result<(), Error> { + if poll.is_empty() { + return Ok(()); + } + + let duration = poll + .earliest_clock_deadline() + .map(|sub| sub.duration_until()); + + let mut futures = FirstReady::new(); + for s in poll.rw_subscriptions() { + match s { + Subscription::Read(f) => { + futures.push(async move { + f.file.readable().await.context("readable future")?; + f.complete( + f.file + .num_ready_bytes() + .await + .context("read num_ready_bytes")?, + RwEventFlags::empty(), + ); + Ok::<(), Error>(()) + }); + } + + Subscription::Write(f) => { + futures.push(async move { + f.file.writable().await.context("writable future")?; + f.complete(0, RwEventFlags::empty()); + Ok(()) + }); + } + Subscription::MonotonicClock { .. } => unreachable!(), + } + } + if let Some(Some(remaining_duration)) = duration { + match tokio::time::timeout(remaining_duration, futures).await { + Ok(r) => r?, + Err(_deadline_elapsed) => {} + } + } else { + futures.await?; + } + + Ok(()) +} diff --git a/crates/wasi-common/tokio/src/sched/windows.rs b/crates/wasi-common/tokio/src/sched/windows.rs new file mode 100644 index 0000000000..90b33aeb47 --- /dev/null +++ b/crates/wasi-common/tokio/src/sched/windows.rs @@ -0,0 +1,47 @@ +use crate::block_on_dummy_executor; +use std::os::windows::io::{AsRawHandle, RawHandle}; +use wasi_cap_std_sync::sched::windows::poll_oneoff_; +use wasi_common::{file::WasiFile, sched::Poll, Error}; + +pub async fn poll_oneoff<'a>(poll: &mut Poll<'a>) -> Result<(), Error> { + // Tokio doesn't provide us the AsyncFd primitive on Windows, so instead + // we use the blocking poll_oneoff implementation from the wasi-cap-std-crate. + // We provide a function specific to this crate's WasiFile types for downcasting + // to a RawHandle. + block_on_dummy_executor(move || poll_oneoff_(poll, wasi_file_is_stdin, wasi_file_raw_handle)) +} + +pub fn wasi_file_is_stdin(f: &dyn WasiFile) -> bool { + f.as_any().is::() +} + +fn wasi_file_raw_handle(f: &dyn WasiFile) -> Option { + let a = f.as_any(); + if a.is::() { + Some( + a.downcast_ref::() + .unwrap() + .as_raw_handle(), + ) + } else if a.is::() { + Some( + a.downcast_ref::() + .unwrap() + .as_raw_handle(), + ) + } else if a.is::() { + Some( + a.downcast_ref::() + .unwrap() + .as_raw_handle(), + ) + } else if a.is::() { + Some( + a.downcast_ref::() + .unwrap() + .as_raw_handle(), + ) + } else { + None + } +} diff --git a/crates/wasi-common/tokio/src/stdio.rs b/crates/wasi-common/tokio/src/stdio.rs new file mode 100644 index 0000000000..dd23c0555a --- /dev/null +++ b/crates/wasi-common/tokio/src/stdio.rs @@ -0,0 +1 @@ +pub use crate::file::{stderr, stdin, stdout, Stderr, Stdin, Stdout}; diff --git a/crates/wasi-common/tokio/tests/poll_oneoff.rs b/crates/wasi-common/tokio/tests/poll_oneoff.rs new file mode 100644 index 0000000000..0e40a510d4 --- /dev/null +++ b/crates/wasi-common/tokio/tests/poll_oneoff.rs @@ -0,0 +1,160 @@ +use anyhow::{Context, Error}; +use cap_std::time::Duration; +use std::collections::HashMap; +use wasi_common::{ + file::{FdFlags, OFlags}, + sched::{Poll, RwEventFlags, SubscriptionResult, Userdata}, + WasiDir, WasiFile, +}; +use wasi_tokio::{clocks_ctx, sched::poll_oneoff, Dir}; + +const TIMEOUT: Duration = Duration::from_millis(50); // Required for slow execution in CI + +#[tokio::test(flavor = "multi_thread")] +async fn empty_file_readable() -> Result<(), Error> { + let clocks = clocks_ctx(); + + let workspace = unsafe { cap_tempfile::tempdir().expect("create tempdir") }; + workspace.create_dir("d").context("create dir")?; + let d = workspace.open_dir("d").context("open dir")?; + let d = Dir::from_cap_std(d); + + let f = d + .open_file(false, "f", OFlags::CREATE, false, true, FdFlags::empty()) + .await + .context("create writable file f")?; + let to_write: Vec = vec![0]; + f.write_vectored(&vec![std::io::IoSlice::new(&to_write)]) + .await + .context("write to f")?; + drop(f); + + let mut f = d + .open_file(false, "f", OFlags::empty(), true, false, FdFlags::empty()) + .await + .context("open f as readable")?; + + let mut poll = Poll::new(); + poll.subscribe_read(&mut *f, Userdata::from(123)); + // Timeout bounds time in poll_oneoff + poll.subscribe_monotonic_clock( + &*clocks.monotonic, + clocks + .monotonic + .now(clocks.monotonic.resolution()) + .checked_add(TIMEOUT) + .unwrap(), + clocks.monotonic.resolution(), + Userdata::from(0), + ); + poll_oneoff(&mut poll).await?; + + let events = poll.results(); + + match events.get(0).expect("at least one event") { + (SubscriptionResult::Read(Ok((1, flags))), ud) => { + assert_eq!(*flags, RwEventFlags::empty()); + assert_eq!(*ud, Userdata::from(123)); + } + _ => panic!("expected (Read(Ok(1, empty), 123), got: {:?}", events[0]), + } + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn empty_file_writable() -> Result<(), Error> { + let clocks = clocks_ctx(); + + let workspace = unsafe { cap_tempfile::tempdir().expect("create tempdir") }; + workspace.create_dir("d").context("create dir")?; + let d = workspace.open_dir("d").context("open dir")?; + let d = Dir::from_cap_std(d); + + let mut writable_f = d + .open_file(false, "f", OFlags::CREATE, true, true, FdFlags::empty()) + .await + .context("create writable file")?; + + let mut poll = Poll::new(); + poll.subscribe_write(&mut *writable_f, Userdata::from(123)); + // Timeout bounds time in poll_oneoff + poll.subscribe_monotonic_clock( + &*clocks.monotonic, + clocks + .monotonic + .now(clocks.monotonic.resolution()) + .checked_add(TIMEOUT) + .unwrap(), + clocks.monotonic.resolution(), + Userdata::from(0), + ); + poll_oneoff(&mut poll).await?; + + let events = poll.results(); + + match events.get(0).expect("at least one event") { + (SubscriptionResult::Write(Ok((0, flags))), ud) => { + assert_eq!(*flags, RwEventFlags::empty()); + assert_eq!(*ud, Userdata::from(123)); + } + _ => panic!(""), + } + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn stdio_readable() -> Result<(), Error> { + let clocks = clocks_ctx(); + + let deadline = clocks + .monotonic + .now(clocks.monotonic.resolution()) + .checked_add(TIMEOUT) + .unwrap(); + + let mut waiting_on: HashMap> = vec![ + ( + 1, + Box::new(wasi_tokio::stdio::stdout()) as Box, + ), + (2, Box::new(wasi_tokio::stdio::stderr())), + ] + .into_iter() + .collect(); + + while !waiting_on.is_empty() { + let mut poll = Poll::new(); + + for (ix, file) in waiting_on.iter_mut() { + poll.subscribe_write(&mut **file, Userdata::from(*ix)); + } + // Timeout bounds time in poll_oneoff + poll.subscribe_monotonic_clock( + &*clocks.monotonic, + deadline, + clocks.monotonic.resolution(), + Userdata::from(999), + ); + poll_oneoff(&mut poll).await?; + let events = poll.results(); + + for e in events { + match e { + (SubscriptionResult::Write(Ok(_)), ud) => { + let _ = waiting_on.remove(&u64::from(ud)); + } + (SubscriptionResult::Write(Err(_)), ud) => { + panic!("error on ix {}", u64::from(ud)) + } + (SubscriptionResult::Read { .. }, _) => unreachable!(), + (SubscriptionResult::MonotonicClock { .. }, _) => { + panic!("timed out before stdin and stdout ready for reading") + } + } + } + } + + Ok(()) +} diff --git a/crates/wasi-crypto/Cargo.toml b/crates/wasi-crypto/Cargo.toml index 2b1c9ed7d8..71354ce575 100644 --- a/crates/wasi-crypto/Cargo.toml +++ b/crates/wasi-crypto/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wasi-crypto" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Wasmtime implementation of the wasi-crypto API" documentation = "https://docs.rs/wasmtime-wasi-crypto" @@ -14,9 +14,9 @@ edition = "2018" [dependencies] anyhow = "1.0" wasi-crypto = { path = "spec/implementations/hostcalls/rust", version = "0.1.4" } -wasmtime = { path = "../wasmtime", version = "0.25.0", default-features = false } -wasmtime-wiggle = { path = "../wiggle/wasmtime", version = "0.25.0" } -wiggle = { path = "../wiggle", version = "0.25.0" } +wasmtime = { path = "../wasmtime", version = "0.26.0", default-features = false } +wasmtime-wiggle = { path = "../wiggle/wasmtime", version = "0.26.0" } +wiggle = { path = "../wiggle", version = "0.26.0" } [badges] maintenance = { status = "experimental" } diff --git a/crates/wasi-crypto/src/wiggle_interfaces/error.rs b/crates/wasi-crypto/src/wiggle_interfaces/error.rs index e3651f6db9..e54a104b03 100644 --- a/crates/wasi-crypto/src/wiggle_interfaces/error.rs +++ b/crates/wasi-crypto/src/wiggle_interfaces/error.rs @@ -1,4 +1,4 @@ -use super::{guest_types, WasiCryptoCtx}; +use super::guest_types; use std::num::TryFromIntError; use wasi_crypto::CryptoError; @@ -52,13 +52,6 @@ impl<'a> wiggle::GuestErrorType for guest_types::CryptoErrno { } } -impl guest_types::GuestErrorConversion for WasiCryptoCtx { - fn into_crypto_errno(&self, e: wiggle::GuestError) -> guest_types::CryptoErrno { - eprintln!("GuestError (witx) {:?}", e); - guest_types::CryptoErrno::GuestError - } -} - impl From for guest_types::CryptoErrno { fn from(e: wiggle::GuestError) -> Self { eprintln!("GuestError (impl) {:?}", e); diff --git a/crates/wasi-nn/Cargo.toml b/crates/wasi-nn/Cargo.toml index 64b577acca..a5de00ed3a 100644 --- a/crates/wasi-nn/Cargo.toml +++ b/crates/wasi-nn/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wasi-nn" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "Wasmtime implementation of the wasi-nn API" documentation = "https://docs.rs/wasmtime-wasi-nn" @@ -15,14 +15,14 @@ edition = "2018" # These dependencies are necessary for the witx-generation macros to work: anyhow = "1.0" log = { version = "0.4", default-features = false } -wasmtime = { path = "../wasmtime", version = "0.25.0", default-features = false } -wasmtime-runtime = { path = "../runtime", version = "0.25.0" } -wasmtime-wiggle = { path = "../wiggle/wasmtime", version = "0.25.0" } -wasmtime-wasi = { path = "../wasi", version = "0.25.0" } -wiggle = { path = "../wiggle", version = "0.25.0" } +wasmtime = { path = "../wasmtime", version = "0.26.0", default-features = false } +wasmtime-runtime = { path = "../runtime", version = "0.26.0" } +wasmtime-wiggle = { path = "../wiggle/wasmtime", version = "0.26.0" } +wasmtime-wasi = { path = "../wasi", version = "0.26.0" } +wiggle = { path = "../wiggle", version = "0.26.0" } # These dependencies are necessary for the wasi-nn implementation: -openvino = "0.1.5" +openvino = { version = "0.3.1", features = ["runtime-linking"] } thiserror = "1.0" [build-dependencies] diff --git a/crates/wasi-nn/build.rs b/crates/wasi-nn/build.rs index 189b9513a6..535d0e0f80 100644 --- a/crates/wasi-nn/build.rs +++ b/crates/wasi-nn/build.rs @@ -1,11 +1,9 @@ //! This build script: //! - has the configuration necessary for the wiggle and witx macros. - -use std::path::PathBuf; - fn main() { // This is necessary for Wiggle/Witx macros. - let wasi_root = PathBuf::from("./spec").canonicalize().unwrap(); + let cwd = std::env::current_dir().unwrap(); + let wasi_root = cwd.join("spec"); println!("cargo:rustc-env=WASI_ROOT={}", wasi_root.display()); // Also automatically rebuild if the Witx files change diff --git a/crates/wasi-nn/examples/classification-example/Cargo.lock b/crates/wasi-nn/examples/classification-example/Cargo.lock index f98a1e19a8..0a24148738 100644 --- a/crates/wasi-nn/examples/classification-example/Cargo.lock +++ b/crates/wasi-nn/examples/classification-example/Cargo.lock @@ -3,11 +3,12 @@ [[package]] name = "wasi-nn" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c909acded993dc129e02f64a7646eb7b53079f522a814024a88772f41558996" [[package]] name = "wasi-nn-example" version = "0.19.0" dependencies = [ - "wasi-nn 0.1.0", + "wasi-nn", ] - diff --git a/crates/wasi-nn/examples/classification-example/Cargo.toml b/crates/wasi-nn/examples/classification-example/Cargo.toml index 52bf2dac32..f74c4eab84 100644 --- a/crates/wasi-nn/examples/classification-example/Cargo.toml +++ b/crates/wasi-nn/examples/classification-example/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" publish = false [dependencies] -wasi-nn = { path = "../wasi-nn-rust-bindings", version = "0.1.0" } +wasi-nn = "0.1.0" # This crate is built with the wasm32-wasi target, so it's separate # from the main Wasmtime build, so use this directive to exclude it diff --git a/crates/wasi-nn/examples/classification-example/src/main.rs b/crates/wasi-nn/examples/classification-example/src/main.rs index 3465de5cae..6b415fb96b 100644 --- a/crates/wasi-nn/examples/classification-example/src/main.rs +++ b/crates/wasi-nn/examples/classification-example/src/main.rs @@ -3,10 +3,10 @@ use std::fs; use wasi_nn; pub fn main() { - let xml = fs::read_to_string("fixture/alexnet.xml").unwrap(); + let xml = fs::read_to_string("fixture/model.xml").unwrap(); println!("Read graph XML, first 50 characters: {}", &xml[..50]); - let weights = fs::read("fixture/alexnet.bin").unwrap(); + let weights = fs::read("fixture/model.bin").unwrap(); println!("Read graph weights, size in bytes: {}", weights.len()); let graph = unsafe { @@ -24,10 +24,10 @@ pub fn main() { // Load a tensor that precisely matches the graph input tensor (see // `fixture/frozen_inference_graph.xml`). - let tensor_data = fs::read("fixture/tensor-1x3x227x227-f32.bgr").unwrap(); + let tensor_data = fs::read("fixture/tensor.bgr").unwrap(); println!("Read input tensor, size in bytes: {}", tensor_data.len()); let tensor = wasi_nn::Tensor { - dimensions: &[1, 3, 227, 227], + dimensions: &[1, 3, 224, 224], r#type: wasi_nn::TENSOR_TYPE_F32, data: &tensor_data, }; @@ -42,14 +42,15 @@ pub fn main() { println!("Executed graph inference"); // Retrieve the output. - let mut output_buffer = vec![0f32; 1000]; + let mut output_buffer = vec![0f32; 1001]; unsafe { wasi_nn::get_output( context, 0, &mut output_buffer[..] as *mut [f32] as *mut u8, (output_buffer.len() * 4).try_into().unwrap(), - ); + ) + .unwrap(); } println!( "Found results, sorted top 5: {:?}", @@ -59,10 +60,13 @@ pub fn main() { // Sort the buffer of probabilities. The graph places the match probability for each class at the // index for that class (e.g. the probability of class 42 is placed at buffer[42]). Here we convert -// to a wrapping InferenceResult and sort the results. +// to a wrapping InferenceResult and sort the results. It is unclear why the MobileNet output +// indices are "off by one" but the `.skip(1)` below seems necessary to get results that make sense +// (e.g. 763 = "revolver" vs 762 = "restaurant") fn sort_results(buffer: &[f32]) -> Vec { let mut results: Vec = buffer .iter() + .skip(1) .enumerate() .map(|(c, p)| InferenceResult(c, *p)) .collect(); diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/.gitignore b/crates/wasi-nn/examples/wasi-nn-rust-bindings/.gitignore deleted file mode 100644 index ea8c4bf7f3..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.lock b/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.lock deleted file mode 100644 index b8e596de2f..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.lock +++ /dev/null @@ -1,5 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -[[package]] -name = "wasi-nn" -version = "0.1.0" diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.toml b/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.toml deleted file mode 100644 index c26f080711..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "wasi-nn" -version = "0.1.0" -authors = ["The Wasmtime Project Developers"] -readme = "README.md" -edition = "2018" -publish = false - -[dependencies] - -# This crate is only used when building the example, so it's separate -# from the main Wasmtime build, so use this directive to exclude it -# from the parent directory's workspace. -[workspace] diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/LICENSE b/crates/wasi-nn/examples/wasi-nn-rust-bindings/LICENSE deleted file mode 100644 index 16fe87b06e..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/README.md b/crates/wasi-nn/examples/wasi-nn-rust-bindings/README.md deleted file mode 100644 index b11ef16773..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/README.md +++ /dev/null @@ -1,65 +0,0 @@ -wasi-nn Rust Bindings -===================== - -This crate contains API bindings for [wasi-nn] system calls in Rust. It is similar in purpose to the [wasi bindings] but -this crate provides access to the optional neural network functionality from WebAssembly. - -[wasi-nn]: https://github.com/WebAssembly/wasi-nn -[wasi bindings]: https://github.com/bytecodealliance/wasi - -> __NOTE__: These bindings are experimental (use at your own risk) and subject to upstream changes in the wasi-nn -> specification. - -> __NOTE__: In the future this crate may be (should be) moved to its own repository, like the [wasi bindings]. - -### Use - -Depend on this crate in your `Cargo.toml`: - -```toml -[dependencies] -wasi-nn = "0.1.0" -``` - -Use the wasi-nn APIs in your application: - -```rust -use wasi_nn; - -unsafe { - wasi_nn::load( - &[&xml.into_bytes(), &weights], - wasi_nn::GRAPH_ENCODING_OPENVINO, - wasi_nn::EXECUTION_TARGET_CPU, - ) - .unwrap() -} -``` - -Compile the application to WebAssembly: - -```shell script -cargo build --target=wasm32-wasi -``` - -Run the generated Wasm in a runtime supporting wasi-nn. Currently Wasmtime has experimental support using the Wasmtime -APIs; see [main.rs](../main.rs) for an example of how this is accomplished. - -### Generation - -This crate contains code ([`src/generated.rs`](src/generated.rs)) generated by -[`witx-bindgen`](https://github.com/bytecodealliance/wasi/tree/main/crates/witx-bindgen). -To regenerate this code, run `witx-bindgen` against the [`wasi-nn` WITX file](https://github.com/WebAssembly/wasi-nn/blob/master/phases/ephemeral/witx/wasi_ephemeral_nn.witx): - -```shell script -.../crates/witx-bindgen$ cargo run .../wasi-nn/phases/ephemeral/witx/wasi_ephemeral_nn.witx -``` - -### License - -This project is licensed under the Apache 2.0 license. See [LICENSE](LICENSE) for more details. - -### Contribution - -Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in this project by you, -as defined in the Apache-2.0 license, shall be licensed as above, without any additional terms or conditions. diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/error.rs b/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/error.rs deleted file mode 100644 index 7ae1a50edb..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/error.rs +++ /dev/null @@ -1,76 +0,0 @@ -use super::NnErrno; -use core::fmt; -use core::num::NonZeroU16; - -/// A raw error returned by wasi-nn APIs, internally containing a 16-bit error -/// code. -#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] -pub struct Error { - code: NonZeroU16, -} - -impl Error { - /// Constructs a new error from a raw error code, returning `None` if the - /// error code is zero (which means success). - pub fn from_raw_error(error: NnErrno) -> Option { - Some(Error { - code: NonZeroU16::new(error)?, - }) - } - - /// Returns the raw error code that this error represents. - pub fn raw_error(&self) -> u16 { - self.code.get() - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} (error {})", strerror(self.code.get()), self.code)?; - Ok(()) - } -} - -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Error") - .field("code", &self.code) - .field("message", &strerror(self.code.get())) - .finish() - } -} - -/// This should be generated automatically by witx-bindgen but is not yet for enums other than -/// `Errno` (this API uses `NnErrno` to avoid naming conflicts). TODO: https://github.com/bytecodealliance/wasi/issues/52. -fn strerror(code: u16) -> &'static str { - match code { - super::NN_ERRNO_SUCCESS => "No error occurred.", - super::NN_ERRNO_INVALID_ARGUMENT => "Caller module passed an invalid argument.", - super::NN_ERRNO_MISSING_MEMORY => "Caller module is missing a memory export.", - super::NN_ERRNO_BUSY => "Device or resource busy.", - _ => "Unknown error.", - } -} - -#[cfg(feature = "std")] -extern crate std; -#[cfg(feature = "std")] -impl std::error::Error for Error {} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn error_from_success_code() { - assert_eq!(None, Error::from_raw_error(0)); - } - - #[test] - fn error_from_invalid_argument_code() { - assert_eq!( - "Caller module passed an invalid argument. (error 1)", - Error::from_raw_error(1).unwrap().to_string() - ); - } -} diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/generated.rs b/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/generated.rs deleted file mode 100644 index d03fd1ed0c..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/generated.rs +++ /dev/null @@ -1,199 +0,0 @@ -// This file is automatically generated, DO NOT EDIT -// -// To regenerate this file run the `crates/witx-bindgen` command - -use core::mem::MaybeUninit; - -pub use crate::error::Error; -pub type Result = core::result::Result; -pub type BufferSize = u32; -pub type NnErrno = u16; -/// No error occurred. -pub const NN_ERRNO_SUCCESS: NnErrno = 0; -/// Caller module passed an invalid argument. -pub const NN_ERRNO_INVALID_ARGUMENT: NnErrno = 1; -/// Caller module is missing a memory export. -pub const NN_ERRNO_MISSING_MEMORY: NnErrno = 2; -/// Device or resource busy. -pub const NN_ERRNO_BUSY: NnErrno = 3; -pub type TensorDimensions<'a> = &'a [u32]; -pub type TensorType = u8; -pub const TENSOR_TYPE_F16: TensorType = 0; -pub const TENSOR_TYPE_F32: TensorType = 1; -pub const TENSOR_TYPE_U8: TensorType = 2; -pub const TENSOR_TYPE_I32: TensorType = 3; -pub type TensorData<'a> = &'a [u8]; -#[repr(C)] -#[derive(Copy, Clone, Debug)] -pub struct Tensor<'a> { - /// Describe the size of the tensor (e.g. 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor containing a single value, - /// use `[1]` for the tensor dimensions. - pub dimensions: TensorDimensions<'a>, - pub r#type: TensorType, - /// Contains the tensor data. - pub data: TensorData<'a>, -} -pub type GraphBuilder<'a> = &'a [u8]; -pub type GraphBuilderArray<'a> = &'a [GraphBuilder<'a>]; -pub type Graph = u32; -pub type GraphEncoding = u8; -/// TODO document buffer order -pub const GRAPH_ENCODING_OPENVINO: GraphEncoding = 0; -pub type ExecutionTarget = u8; -pub const EXECUTION_TARGET_CPU: ExecutionTarget = 0; -pub const EXECUTION_TARGET_GPU: ExecutionTarget = 1; -pub const EXECUTION_TARGET_TPU: ExecutionTarget = 2; -pub type GraphExecutionContext = u32; -/// Load an opaque sequence of bytes to use for inference. -/// -/// This allows runtime implementations to support multiple graph encoding formats. For unsupported graph encodings, -/// return `errno::inval`. -/// -/// ## Parameters -/// -/// * `builder` - The bytes necessary to build the graph. -/// * `encoding` - The encoding of the graph. -/// * `target` - Where to execute the graph. -pub unsafe fn load( - builder: GraphBuilderArray, - encoding: GraphEncoding, - target: ExecutionTarget, -) -> Result { - let mut graph = MaybeUninit::uninit(); - let rc = wasi_ephemeral_nn::load( - builder.as_ptr(), - builder.len(), - encoding, - target, - graph.as_mut_ptr(), - ); - if let Some(err) = Error::from_raw_error(rc) { - Err(err) - } else { - Ok(graph.assume_init()) - } -} - -/// TODO Functions like `describe_graph_inputs` and `describe_graph_outputs` (returning -/// an array of `$tensor_description`s) might be useful for introspecting the graph but are not yet included here. -/// Create an execution instance of a loaded graph. -/// TODO this may need to accept flags that might affect the compilation or execution of the graph. -pub unsafe fn init_execution_context(graph: Graph) -> Result { - let mut context = MaybeUninit::uninit(); - let rc = wasi_ephemeral_nn::init_execution_context(graph, context.as_mut_ptr()); - if let Some(err) = Error::from_raw_error(rc) { - Err(err) - } else { - Ok(context.assume_init()) - } -} - -/// Define the inputs to use for inference. -/// -/// This should return an $nn_errno (TODO define) if the input tensor does not match the expected dimensions and type. -/// -/// ## Parameters -/// -/// * `index` - The index of the input to change. -/// * `tensor` - The tensor to set as the input. -pub unsafe fn set_input(context: GraphExecutionContext, index: u32, tensor: Tensor) -> Result<()> { - let rc = wasi_ephemeral_nn::set_input(context, index, &tensor as *const _ as *mut _); - if let Some(err) = Error::from_raw_error(rc) { - Err(err) - } else { - Ok(()) - } -} - -/// Extract the outputs after inference. -/// -/// This should return an $nn_errno (TODO define) if the inference has not yet run. -/// -/// ## Parameters -/// -/// * `index` - The index of the output to retrieve. -/// * `out_buffer` - An out parameter to which to copy the tensor data. The caller is responsible for allocating enough memory for -/// the tensor data or an error will be returned. Currently there is no dynamic way to extract the additional -/// tensor metadata (i.e. dimension, element type) but this should be added at some point. -/// -/// ## Return -/// -/// * `bytes_written` - The number of bytes of tensor data written to the `$out_buffer`. -pub unsafe fn get_output( - context: GraphExecutionContext, - index: u32, - out_buffer: *mut u8, - out_buffer_max_size: BufferSize, -) -> Result { - let mut bytes_written = MaybeUninit::uninit(); - let rc = wasi_ephemeral_nn::get_output( - context, - index, - out_buffer, - out_buffer_max_size, - bytes_written.as_mut_ptr(), - ); - if let Some(err) = Error::from_raw_error(rc) { - Err(err) - } else { - Ok(bytes_written.assume_init()) - } -} - -/// Compute the inference on the given inputs (see `set_input`). -/// -/// This should return an $nn_errno (TODO define) if the inputs are not all defined. -pub unsafe fn compute(context: GraphExecutionContext) -> Result<()> { - let rc = wasi_ephemeral_nn::compute(context); - if let Some(err) = Error::from_raw_error(rc) { - Err(err) - } else { - Ok(()) - } -} - -pub mod wasi_ephemeral_nn { - use super::*; - #[link(wasm_import_module = "wasi_ephemeral_nn")] - extern "C" { - /// Load an opaque sequence of bytes to use for inference. - /// - /// This allows runtime implementations to support multiple graph encoding formats. For unsupported graph encodings, - /// return `errno::inval`. - pub fn load( - builder_ptr: *const GraphBuilder, - builder_len: usize, - encoding: GraphEncoding, - target: ExecutionTarget, - graph: *mut Graph, - ) -> NnErrno; - /// TODO Functions like `describe_graph_inputs` and `describe_graph_outputs` (returning - /// an array of `$tensor_description`s) might be useful for introspecting the graph but are not yet included here. - /// Create an execution instance of a loaded graph. - /// TODO this may need to accept flags that might affect the compilation or execution of the graph. - pub fn init_execution_context(graph: Graph, context: *mut GraphExecutionContext) - -> NnErrno; - /// Define the inputs to use for inference. - /// - /// This should return an $nn_errno (TODO define) if the input tensor does not match the expected dimensions and type. - pub fn set_input( - context: GraphExecutionContext, - index: u32, - tensor: *mut Tensor, - ) -> NnErrno; - /// Extract the outputs after inference. - /// - /// This should return an $nn_errno (TODO define) if the inference has not yet run. - pub fn get_output( - context: GraphExecutionContext, - index: u32, - out_buffer: *mut u8, - out_buffer_max_size: BufferSize, - bytes_written: *mut BufferSize, - ) -> NnErrno; - /// Compute the inference on the given inputs (see `set_input`). - /// - /// This should return an $nn_errno (TODO define) if the inputs are not all defined. - pub fn compute(context: GraphExecutionContext) -> NnErrno; - } -} diff --git a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/lib.rs b/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/lib.rs deleted file mode 100644 index 86fdcd0091..0000000000 --- a/crates/wasi-nn/examples/wasi-nn-rust-bindings/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod error; -mod generated; -pub use generated::*; diff --git a/crates/wasi-nn/src/ctx.rs b/crates/wasi-nn/src/ctx.rs index f180f113fb..dbe93c7224 100644 --- a/crates/wasi-nn/src/ctx.rs +++ b/crates/wasi-nn/src/ctx.rs @@ -2,7 +2,7 @@ //! wasi-nn API. use crate::r#impl::UsageError; use crate::witx::types::{Graph, GraphExecutionContext}; -use openvino::InferenceError; +use openvino::{InferenceError, SetupError}; use std::cell::RefCell; use std::collections::HashMap; use std::hash::Hash; @@ -14,8 +14,10 @@ use wiggle::GuestError; pub enum WasiNnError { #[error("guest error")] GuestError(#[from] GuestError), - #[error("openvino error")] - OpenvinoError(#[from] InferenceError), + #[error("openvino inference error")] + OpenvinoInferenceError(#[from] InferenceError), + #[error("openvino setup error")] + OpenvinoSetupError(#[from] SetupError), #[error("usage error")] UsageError(#[from] UsageError), } @@ -74,7 +76,7 @@ impl ExecutionContext { /// Capture the state necessary for calling into `openvino`. pub struct Ctx { - pub(crate) core: openvino::Core, + pub(crate) core: Option, pub(crate) graphs: Table, pub(crate) executions: Table, } @@ -83,7 +85,7 @@ impl Ctx { /// Make a new `WasiNnCtx` with the default settings. pub fn new() -> WasiNnResult { Ok(Self { - core: openvino::Core::new(None)?, + core: Option::default(), graphs: Table::default(), executions: Table::default(), }) diff --git a/crates/wasi-nn/src/impl.rs b/crates/wasi-nn/src/impl.rs index e18caafdc2..47fd323fdb 100644 --- a/crates/wasi-nn/src/impl.rs +++ b/crates/wasi-nn/src/impl.rs @@ -12,6 +12,8 @@ use wiggle::GuestPtr; #[derive(Debug, Error)] pub enum UsageError { + #[error("Invalid context; has the load function been called?")] + InvalidContext, #[error("Only OpenVINO's IR is currently supported, passed encoding: {0:?}")] InvalidEncoding(GraphEncoding), #[error("OpenVINO expects only two buffers (i.e. [ir, weights]), passed: {0}")] @@ -34,9 +36,21 @@ impl<'a> WasiEphemeralNn for WasiNnCtx { if encoding != GraphEncoding::Openvino { return Err(UsageError::InvalidEncoding(encoding).into()); } + if builders.len() != 2 { return Err(UsageError::InvalidNumberOfBuilders(builders.len()).into()); } + + // Construct the context if none is present; this is done lazily (i.e. upon actually loading + // a model) because it may fail to find and load the OpenVINO libraries. The laziness limits + // the extent of the error only to wasi-nn users, not all WASI users. + if self.ctx.borrow().core.is_none() { + self.ctx + .borrow_mut() + .core + .replace(openvino::Core::new(None)?); + } + let builders = builders.as_ptr(); let xml = builders.read()?.as_slice()?; let weights = builders.add(1)?.read()?.as_slice()?; @@ -44,11 +58,15 @@ impl<'a> WasiEphemeralNn for WasiNnCtx { .ctx .borrow_mut() .core + .as_mut() + .ok_or(UsageError::InvalidContext)? .read_network_from_buffer(&xml, &weights)?; let executable_graph = self .ctx .borrow_mut() .core + .as_mut() + .ok_or(UsageError::InvalidContext)? .load_network(&graph, map_execution_target_to_string(target))?; let id = self .ctx @@ -94,7 +112,7 @@ impl<'a> WasiEphemeralNn for WasiNnCtx { .dimensions .as_slice()? .iter() - .map(|d| *d as u64) + .map(|d| *d as usize) .collect::>(); let precision = match tensor.type_ { TensorType::F16 => Precision::FP16, diff --git a/crates/wasi-nn/src/witx.rs b/crates/wasi-nn/src/witx.rs index 72120276b7..8244f99644 100644 --- a/crates/wasi-nn/src/witx.rs +++ b/crates/wasi-nn/src/witx.rs @@ -10,21 +10,12 @@ wiggle::from_witx!({ use types::NnErrno; -/// Wiggle generates code that performs some input validation on the arguments passed in by users of -/// wasi-nn. Here we convert the validation error into one (or more, eventually) of the error -/// variants defined in the witx. -impl types::GuestErrorConversion for WasiNnCtx { - fn into_nn_errno(&self, e: wiggle::GuestError) -> NnErrno { - eprintln!("Guest error: {:?}", e); - NnErrno::InvalidArgument - } -} - impl<'a> types::UserErrorConversion for WasiNnCtx { fn nn_errno_from_wasi_nn_error(&self, e: WasiNnError) -> Result { eprintln!("Host error: {:?}", e); match e { - WasiNnError::OpenvinoError(_) => unimplemented!(), + WasiNnError::OpenvinoSetupError(_) => unimplemented!(), + WasiNnError::OpenvinoInferenceError(_) => unimplemented!(), WasiNnError::GuestError(_) => unimplemented!(), WasiNnError::UsageError(_) => unimplemented!(), } diff --git a/crates/wasi/Cargo.toml b/crates/wasi/Cargo.toml index 5937ec55a2..3fa9dad245 100644 --- a/crates/wasi/Cargo.toml +++ b/crates/wasi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wasi" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "WASI implementation in Rust" license = "Apache-2.0 WITH LLVM-exception" @@ -13,8 +13,15 @@ include = ["src/**/*", "LICENSE", "build.rs"] build = "build.rs" [dependencies] -wasi-common = { path = "../wasi-common", version = "0.25.0" } -wiggle = { path = "../wiggle", default-features = false, version = "0.25.0" } -wasmtime-wiggle = { path = "../wiggle/wasmtime", default-features = false, version = "0.25.0" } -wasmtime = { path = "../wasmtime", default-features = false, version = "0.25.0" } +wasi-common = { path = "../wasi-common", version = "0.26.0" } +wasi-cap-std-sync = { path = "../wasi-common/cap-std-sync", version = "0.26.0", optional = true } +wasi-tokio = { path = "../wasi-common/tokio", version = "0.26.0", optional = true } +wiggle = { path = "../wiggle", default-features = false, version = "0.26.0" } +wasmtime-wiggle = { path = "../wiggle/wasmtime", default-features = false, version = "0.26.0" } +wasmtime = { path = "../wasmtime", default-features = false, version = "0.26.0" } anyhow = "1.0" + +[features] +default = ["sync"] +sync = ["wasi-cap-std-sync"] +tokio = ["wasi-tokio", "wasmtime/async", "wasmtime-wiggle/async"] diff --git a/crates/wasi/src/lib.rs b/crates/wasi/src/lib.rs index 1c3ad448ad..3edaa34505 100644 --- a/crates/wasi/src/lib.rs +++ b/crates/wasi/src/lib.rs @@ -7,10 +7,43 @@ //! Individual snapshots are available through //! `wasmtime_wasi::snapshots::preview_{0, 1}::Wasi::new(&Store, Rc>)`. +pub use wasi_common::{Error, WasiCtx, WasiCtxBuilder, WasiDir, WasiFile}; + +/// Re-export the commonly used wasi-cap-std-sync crate here. This saves +/// consumers of this library from having to keep additional dependencies +/// in sync. +#[cfg(feature = "sync")] +pub mod sync { + pub use wasi_cap_std_sync::*; + super::define_wasi!(block_on); +} + +/// Sync mode is the "default" of this crate, so we also export it at the top +/// level. +#[cfg(feature = "sync")] +pub use sync::*; + +/// Re-export the wasi-tokio crate here. This saves consumers of this library from having +/// to keep additional dependencies in sync. +#[cfg(feature = "tokio")] +pub mod tokio { + pub use wasi_tokio::*; + super::define_wasi!(async); +} + +// The only difference between these definitions for sync vs async is whether +// the wasmtime::Funcs generated are async (& therefore need an async Store and an executor to run) +// or whether they have an internal "dummy executor" that expects the implementation of all +// the async funcs to poll to Ready immediately. +#[doc(hidden)] +#[macro_export] +macro_rules! define_wasi { + ($async_mode: tt) => { + use std::cell::RefCell; use std::rc::Rc; -pub use wasi_common::{Error, WasiCtx, WasiCtxBuilder, WasiDir, WasiFile}; use wasmtime::{Config, Linker, Store}; +use wasi_common::WasiCtx; /// An instantiated instance of all available wasi exports. Presently includes /// both the "preview1" snapshot and the "unstable" (preview0) snapshot. @@ -71,6 +104,7 @@ necessary. Additionally [`Wasi::get_export`] can be used to do name-based resolution.", }, }, + $async_mode: * }); } pub mod preview_0 { @@ -98,6 +132,9 @@ necessary. Additionally [`Wasi::get_export`] can be used to do name-based resolution.", }, }, + $async_mode: * }); } } +} +} diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index c99f37001e..a7e5a914cb 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "High-level API to expose the Wasmtime runtime" documentation = "https://docs.rs/wasmtime" @@ -9,18 +9,19 @@ repository = "https://github.com/bytecodealliance/wasmtime" readme = "README.md" edition = "2018" -[package.metadata.docs.rs] -rustdoc-args = ["--cfg", "nightlydoc"] +# FIXME(rust-lang/cargo#9300): uncomment once that lands +# [package.metadata.docs.rs] +# rustdoc-args = ["--cfg", "nightlydoc"] [dependencies] -wasmtime-runtime = { path = "../runtime", version = "0.25.0" } -wasmtime-environ = { path = "../environ", version = "0.25.0" } -wasmtime-jit = { path = "../jit", version = "0.25.0" } -wasmtime-cache = { path = "../cache", version = "0.25.0", optional = true } -wasmtime-profiling = { path = "../profiling", version = "0.25.0" } -wasmtime-fiber = { path = "../fiber", version = "0.25.0", optional = true } -target-lexicon = { version = "0.11.0", default-features = false } -wasmparser = "0.76" +wasmtime-runtime = { path = "../runtime", version = "0.26.0" } +wasmtime-environ = { path = "../environ", version = "0.26.0" } +wasmtime-jit = { path = "../jit", version = "0.26.0" } +wasmtime-cache = { path = "../cache", version = "0.26.0", optional = true } +wasmtime-profiling = { path = "../profiling", version = "0.26.0" } +wasmtime-fiber = { path = "../fiber", version = "0.26.0", optional = true } +target-lexicon = { version = "0.12.0", default-features = false } +wasmparser = "0.77" anyhow = "1.0.19" region = "2.2.0" libc = "0.2" @@ -35,6 +36,8 @@ serde = { version = "1.0.94", features = ["derive"] } bincode = "1.2.1" indexmap = "1.6" paste = "1.0.3" +psm = "0.1.11" +lazy_static = "1.4" [target.'cfg(target_os = "windows")'.dependencies] winapi = "0.3.7" @@ -67,12 +70,15 @@ parallel-compilation = ["wasmtime-jit/parallel-compilation"] # Enables support for automatic cache configuration to be enabled in `Config`. cache = ["wasmtime-cache"] -# Enables support for new x64 backend. -experimental_x64 = ["wasmtime-jit/experimental_x64"] +# Use the old x86 backend. +old-x86-backend = ["wasmtime-jit/old-x86-backend"] # Enables support for "async stores" as well as defining host functions as # `async fn` and calling functions asynchronously. -async = ["wasmtime-fiber"] +async = ["wasmtime-fiber", "wasmtime-runtime/async"] # Enables userfaultfd support in the runtime's pooling allocator when building on Linux uffd = ["wasmtime-runtime/uffd"] + +# Enables support for all architectures in JIT and the `wasmtime compile` CLI command. +all-arch = ["wasmtime-jit/all-arch"] diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index da85f832b8..f133fa787b 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -2,6 +2,7 @@ use crate::memory::MemoryCreator; use crate::trampoline::MemoryCreatorProxy; use crate::{func::HostFunc, Caller, FuncType, IntoFunc, Trap, Val, WasmRet, WasmTy}; use anyhow::{bail, Result}; +use serde::{Deserialize, Serialize}; use std::cmp; use std::collections::HashMap; use std::convert::TryFrom; @@ -318,6 +319,10 @@ impl HostFuncMap { fn async_required(&self) -> bool { self.funcs.values().any(|f| f.1) } + + fn iter(&self) -> impl Iterator { + self.funcs.values().map(|v| &*v.0) + } } macro_rules! generate_wrap_async_host_func { @@ -378,9 +383,6 @@ pub struct Config { pub(crate) max_wasm_stack: usize, pub(crate) features: WasmFeatures, pub(crate) wasm_backtrace_details_env_used: bool, - pub(crate) max_instances: usize, - pub(crate) max_tables: usize, - pub(crate) max_memories: usize, #[cfg(feature = "async")] pub(crate) async_stack_size: usize, host_funcs: HostFuncMap, @@ -399,16 +401,6 @@ impl Config { .enable("avoid_div_traps") .expect("should be valid flag"); - // Invert cranelift's default-on verification to instead default off. - flags - .set("enable_verifier", "false") - .expect("should be valid flag"); - - // Turn on cranelift speed optimizations by default - flags - .set("opt_level", "speed") - .expect("should be valid flag"); - // We don't use probestack as a stack limit mechanism flags .set("enable_probestack", "false") @@ -426,24 +418,42 @@ impl Config { allocation_strategy: InstanceAllocationStrategy::OnDemand, max_wasm_stack: 1 << 20, wasm_backtrace_details_env_used: false, - features: WasmFeatures { - reference_types: true, - bulk_memory: true, - multi_value: true, - ..WasmFeatures::default() - }, - max_instances: 10_000, - max_tables: 10_000, - max_memories: 10_000, + features: WasmFeatures::default(), #[cfg(feature = "async")] async_stack_size: 2 << 20, host_funcs: HostFuncMap::new(), async_support: false, }; + ret.cranelift_debug_verifier(false); + ret.cranelift_opt_level(OptLevel::Speed); + ret.wasm_reference_types(true); + ret.wasm_multi_value(true); + ret.wasm_bulk_memory(true); ret.wasm_backtrace_details(WasmBacktraceDetails::Environment); ret } + /// Sets the target triple for the [`Config`]. + /// + /// By default, the host target triple is used for the [`Config`]. + /// + /// This method can be used to change the target triple. + /// + /// Cranelift flags will not be inferred for the given target and any + /// existing target-specific Cranelift flags will be cleared. + /// + /// # Errors + /// + /// This method will error if the given target triple is not supported. + pub fn target(&mut self, target: &str) -> Result<&mut Self> { + use std::str::FromStr; + self.isa_flags = native::lookup( + target_lexicon::Triple::from_str(target).map_err(|e| anyhow::anyhow!(e))?, + )?; + + Ok(self) + } + /// Whether or not to enable support for asynchronous functions in Wasmtime. /// /// When enabled, the config can optionally define host functions with `async`. @@ -884,18 +894,31 @@ impl Config { self } - /// Clears native CPU flags inferred from the host. + /// Allows setting a Cranelift boolean flag or preset. This allows + /// fine-tuning of Cranelift settings. /// - /// By default Wasmtime will tune generated code for the host that Wasmtime - /// itself is running on. If you're compiling on one host, however, and - /// shipping artifacts to another host then this behavior may not be - /// desired. This function will clear all inferred native CPU features. + /// Since Cranelift flags may be unstable, this method should not be considered to be stable + /// either; other `Config` functions should be preferred for stability. /// - /// To enable CPU features afterwards it's recommended to use the - /// [`Config::cranelift_other_flag`] method. - pub fn cranelift_clear_cpu_flags(&mut self) -> &mut Self { - self.isa_flags = native::builder_without_flags(); - self + /// # Safety + /// + /// This is marked as unsafe, because setting the wrong flag might break invariants, + /// resulting in execution hazards. + /// + /// # Errors + /// + /// This method can fail if the flag's name does not exist. + pub unsafe fn cranelift_flag_enable(&mut self, flag: &str) -> Result<&mut Self> { + if let Err(err) = self.flags.enable(flag) { + match err { + SetError::BadName(_) => { + // Try the target-specific flags. + self.isa_flags.enable(flag)?; + } + _ => bail!(err), + } + } + Ok(self) } /// Allows settings another Cranelift flag defined by a flag name and value. This allows @@ -911,7 +934,7 @@ impl Config { /// /// This method can fail if the flag's name does not exist, or the value is not appropriate for /// the flag type. - pub unsafe fn cranelift_other_flag(&mut self, name: &str, value: &str) -> Result<&mut Self> { + pub unsafe fn cranelift_flag_set(&mut self, name: &str, value: &str) -> Result<&mut Self> { if let Err(err) = self.flags.set(name, value) { match err { SetError::BadName(_) => { @@ -1167,39 +1190,6 @@ impl Config { self } - /// Configures the maximum number of instances which can be created within - /// this `Store`. - /// - /// Instantiation will fail with an error if this limit is exceeded. - /// - /// This value defaults to 10,000. - pub fn max_instances(&mut self, instances: usize) -> &mut Self { - self.max_instances = instances; - self - } - - /// Configures the maximum number of tables which can be created within - /// this `Store`. - /// - /// Instantiation will fail with an error if this limit is exceeded. - /// - /// This value defaults to 10,000. - pub fn max_tables(&mut self, tables: usize) -> &mut Self { - self.max_tables = tables; - self - } - - /// Configures the maximum number of memories which can be created within - /// this `Store`. - /// - /// Instantiation will fail with an error if this limit is exceeded. - /// - /// This value defaults to 10,000. - pub fn max_memories(&mut self, memories: usize) -> &mut Self { - self.max_memories = memories; - self - } - /// Defines a host function for the [`Config`] for the given callback. /// /// Use [`Store::get_host_func`](crate::Store::get_host_func) to get a [`Func`](crate::Func) representing the function. @@ -1293,6 +1283,10 @@ impl Config { for_each_function_signature!(generate_wrap_async_host_func); + pub(crate) fn host_funcs(&self) -> impl Iterator { + self.host_funcs.iter() + } + pub(crate) fn get_host_func(&self, module: &str, name: &str) -> Option<&HostFunc> { self.host_funcs.get(module, name) } @@ -1329,28 +1323,27 @@ impl Config { } pub(crate) fn build_allocator(&self) -> Result> { + #[cfg(feature = "async")] + let stack_size = self.async_stack_size; + + #[cfg(not(feature = "async"))] + let stack_size = 0; + match self.allocation_strategy { InstanceAllocationStrategy::OnDemand => Ok(Box::new(OnDemandInstanceAllocator::new( self.mem_creator.clone(), + stack_size, ))), InstanceAllocationStrategy::Pooling { strategy, module_limits, instance_limits, - } => { - #[cfg(feature = "async")] - let stack_size = self.async_stack_size; - - #[cfg(not(feature = "async"))] - let stack_size = 0; - - Ok(Box::new(PoolingInstanceAllocator::new( - strategy.into(), - module_limits.into(), - instance_limits.into(), - stack_size, - )?)) - } + } => Ok(Box::new(PoolingInstanceAllocator::new( + strategy.into(), + module_limits.into(), + instance_limits.into(), + stack_size, + )?)), } } } @@ -1420,7 +1413,7 @@ pub enum Strategy { /// Possible optimization levels for the Cranelift codegen backend. #[non_exhaustive] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub enum OptLevel { /// No optimizations performed, minimizes compilation time by disabling most /// optimizations. diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 245c02a300..8876eb042d 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -1,10 +1,41 @@ +use crate::signatures::{SignatureCollection, SignatureRegistry}; use crate::Config; use anyhow::Result; +use std::collections::HashMap; use std::sync::Arc; #[cfg(feature = "cache")] use wasmtime_cache::CacheConfig; use wasmtime_jit::Compiler; -use wasmtime_runtime::{debug_builtins, InstanceAllocator}; +use wasmtime_runtime::{debug_builtins, InstanceAllocator, InstanceHandle, VMCallerCheckedAnyfunc}; + +/// This is used as a Send+Sync wrapper around two data structures relating to +/// host functions defined on `Config`: +/// +/// * `anyfuncs` - this stores a mapping between the host function instance and +/// a `VMCallerCheckedAnyfunc` that can be used as the function's value in Wasmtime's ABI. +/// The address of the anyfunc needs to be stable, thus the boxed value. +/// +/// * `signatures` - this stores the collection of shared signatures registered for every +/// usable host functions with this engine. +struct EngineHostFuncs { + anyfuncs: HashMap>, + signatures: SignatureCollection, +} + +impl EngineHostFuncs { + fn new(registry: &SignatureRegistry) -> Self { + Self { + anyfuncs: HashMap::new(), + signatures: SignatureCollection::new(registry), + } + } +} + +// This is safe for send and sync as it is read-only once the +// engine is constructed and the host functions live with the config, +// which the engine keeps a strong reference to. +unsafe impl Send for EngineHostFuncs {} +unsafe impl Sync for EngineHostFuncs {} /// An `Engine` which is a global context for compilation and management of wasm /// modules. @@ -37,6 +68,8 @@ struct EngineInner { config: Config, compiler: Compiler, allocator: Box, + signatures: SignatureRegistry, + host_funcs: EngineHostFuncs, } impl Engine { @@ -46,16 +79,35 @@ impl Engine { debug_builtins::ensure_exported(); config.validate()?; let allocator = config.build_allocator()?; + let registry = SignatureRegistry::new(); + let mut host_funcs = EngineHostFuncs::new(®istry); + + // Register all the host function signatures with the collection + for func in config.host_funcs() { + let sig = host_funcs + .signatures + .register(func.ty.as_wasm_func_type(), func.trampoline); + + // Cloning the instance handle is safe as host functions outlive the engine + host_funcs.anyfuncs.insert( + unsafe { func.instance.clone() }, + Box::new(func.anyfunc(sig)), + ); + } + Ok(Engine { inner: Arc::new(EngineInner { config: config.clone(), compiler: config.build_compiler(allocator.as_ref()), allocator, + signatures: registry, + host_funcs, }), }) } /// Returns the configuration settings that this engine is using. + #[inline] pub fn config(&self) -> &Config { &self.inner.config } @@ -77,6 +129,63 @@ impl Engine { pub fn same(a: &Engine, b: &Engine) -> bool { Arc::ptr_eq(&a.inner, &b.inner) } + + pub(crate) fn signatures(&self) -> &SignatureRegistry { + &self.inner.signatures + } + + pub(crate) fn host_func_signatures(&self) -> &SignatureCollection { + &self.inner.host_funcs.signatures + } + + pub(crate) fn host_func_anyfunc( + &self, + instance: &InstanceHandle, + ) -> Option<&VMCallerCheckedAnyfunc> { + self.inner + .host_funcs + .anyfuncs + .get(instance) + .map(AsRef::as_ref) + } + + /// Ahead-of-time (AOT) compiles a WebAssembly module. + /// + /// The `bytes` provided must be in one of two formats: + /// + /// * A [binary-encoded][binary] WebAssembly module. This is always supported. + /// * A [text-encoded][text] instance of the WebAssembly text format. + /// This is only supported when the `wat` feature of this crate is enabled. + /// If this is supplied then the text format will be parsed before validation. + /// Note that the `wat` feature is enabled by default. + /// + /// This method may be used to compile a module for use with a different target + /// host. The output of this method may be used with + /// [`Module::deserialize`](crate::Module::deserialize) on hosts compatible + /// with the [`Config`] associated with this [`Engine`]. + /// + /// The output of this method is safe to send to another host machine for later + /// execution. As the output is already a compiled module, translation and code + /// generation will be skipped and this will improve the performance of constructing + /// a [`Module`](crate::Module) from the output of this method. + /// + /// [binary]: https://webassembly.github.io/spec/core/binary/index.html + /// [text]: https://webassembly.github.io/spec/core/text/index.html + pub fn precompile_module(&self, bytes: &[u8]) -> Result> { + const USE_PAGED_MEM_INIT: bool = cfg!(all(feature = "uffd", target_os = "linux")); + + #[cfg(feature = "wat")] + let bytes = wat::parse_bytes(&bytes)?; + + let (_, artifacts, types) = wasmtime_jit::CompilationArtifacts::build( + &self.inner.compiler, + &bytes, + USE_PAGED_MEM_INIT, + )?; + + crate::module::SerializedModule::from_artifacts(&self.inner.compiler, &artifacts, &types) + .to_bytes() + } } impl Default for Engine { diff --git a/crates/wasmtime/src/frame_info.rs b/crates/wasmtime/src/frame_info.rs deleted file mode 100644 index fbea477d28..0000000000 --- a/crates/wasmtime/src/frame_info.rs +++ /dev/null @@ -1,383 +0,0 @@ -use std::cmp; -use std::collections::BTreeMap; -use std::sync::Arc; -use wasmtime_environ::entity::EntityRef; -use wasmtime_environ::ir; -use wasmtime_environ::wasm::FuncIndex; -use wasmtime_environ::{FunctionAddressMap, Module, TrapInformation}; -use wasmtime_jit::{CompiledModule, SymbolizeContext}; - -#[derive(Default)] -pub struct StoreFrameInfo { - /// An internal map that keeps track of backtrace frame information for - /// each module. - /// - /// This map is morally a map of ranges to a map of information for that - /// module. Each module is expected to reside in a disjoint section of - /// contiguous memory. No modules can overlap. - /// - /// The key of this map is the highest address in the module and the value - /// is the module's information, which also contains the start address. - ranges: BTreeMap, -} - -struct ModuleFrameInfo { - start: usize, - functions: BTreeMap, - module: Arc, - symbolize: Option, - has_unparsed_debuginfo: bool, -} - -struct FunctionInfo { - start: usize, - index: FuncIndex, - traps: Vec, - instr_map: FunctionAddressMap, -} - -impl StoreFrameInfo { - /// Fetches frame information about a program counter in a backtrace. - /// - /// Returns an object if this `pc` is known to some previously registered - /// module, or returns `None` if no information can be found. The boolean - /// returned indicates whether the original module has unparsed debug - /// information due to the compiler's configuration. - pub fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, bool)> { - let (module, func) = self.func(pc)?; - - // Use our relative position from the start of the function to find the - // machine instruction that corresponds to `pc`, which then allows us to - // map that to a wasm original source location. - let rel_pos = (pc - func.start) as u32; - let pos = match func - .instr_map - .instructions - .binary_search_by_key(&rel_pos, |map| map.code_offset) - { - // Exact hit! - Ok(pos) => Some(pos), - - // This *would* be at the first slot in the array, so no - // instructions cover `pc`. - Err(0) => None, - - // This would be at the `nth` slot, so we're at the `n-1`th slot. - Err(n) => Some(n - 1), - }; - - // In debug mode for now assert that we found a mapping for `pc` within - // the function, because otherwise something is buggy along the way and - // not accounting for all the instructions. This isn't super critical - // though so we can omit this check in release mode. - debug_assert!(pos.is_some(), "failed to find instruction for {:x}", pc); - - let instr = match pos { - Some(pos) => func.instr_map.instructions[pos].srcloc, - None => func.instr_map.start_srcloc, - }; - - // Use our wasm-relative pc to symbolize this frame. If there's a - // symbolication context (dwarf debug info) available then we can try to - // look this up there. - // - // Note that dwarf pcs are code-section-relative, hence the subtraction - // from the location of `instr`. Also note that all errors are ignored - // here for now since technically wasm modules can always have any - // custom section contents. - let mut symbols = Vec::new(); - if let Some(s) = &module.symbolize { - let to_lookup = (instr.bits() as u64) - s.code_section_offset(); - if let Ok(mut frames) = s.addr2line().find_frames(to_lookup) { - while let Ok(Some(frame)) = frames.next() { - symbols.push(FrameSymbol { - name: frame - .function - .as_ref() - .and_then(|l| l.raw_name().ok()) - .map(|s| s.to_string()), - file: frame - .location - .as_ref() - .and_then(|l| l.file) - .map(|s| s.to_string()), - line: frame.location.as_ref().and_then(|l| l.line), - column: frame.location.as_ref().and_then(|l| l.column), - }); - } - } - } - - Some(( - FrameInfo { - module_name: module.module.name.clone(), - func_index: func.index.index() as u32, - func_name: module.module.func_names.get(&func.index).cloned(), - instr, - func_start: func.instr_map.start_srcloc, - symbols, - }, - module.has_unparsed_debuginfo, - )) - } - - /// Returns whether the `pc` specified is contaained within some module's - /// function. - pub fn contains_pc(&self, pc: usize) -> bool { - self.func(pc).is_some() - } - - /// Fetches trap information about a program counter in a backtrace. - pub fn lookup_trap_info(&self, pc: usize) -> Option<&TrapInformation> { - let (_module, func) = self.func(pc)?; - let idx = func - .traps - .binary_search_by_key(&((pc - func.start) as u32), |info| info.code_offset) - .ok()?; - Some(&func.traps[idx]) - } - - fn func(&self, pc: usize) -> Option<(&ModuleFrameInfo, &FunctionInfo)> { - let (end, info) = self.ranges.range(pc..).next()?; - if pc < info.start || *end < pc { - return None; - } - let (end, func) = info.functions.range(pc..).next()?; - if pc < func.start || *end < pc { - return None; - } - Some((info, func)) - } - - /// Registers a new compiled module's frame information. - /// - /// This function will register the `names` information for all of the - /// compiled functions within `module`. If the `module` has no functions - /// then `None` will be returned. Otherwise the returned object, when - /// dropped, will be used to unregister all name information from this map. - pub fn register(&mut self, module: &CompiledModule) { - let mut min = usize::max_value(); - let mut max = 0; - let mut functions = BTreeMap::new(); - for (i, allocated, traps, address_map) in module.trap_information() { - let (start, end) = unsafe { - let ptr = (*allocated).as_ptr(); - let len = (*allocated).len(); - // First and last byte of the function text. - (ptr as usize, ptr as usize + len - 1) - }; - // Skip empty functions. - if end < start { - continue; - } - min = cmp::min(min, start); - max = cmp::max(max, end); - let func = FunctionInfo { - start, - index: module.module().func_index(i), - traps: traps.to_vec(), - instr_map: address_map.clone(), - }; - assert!(functions.insert(end, func).is_none()); - } - if functions.len() == 0 { - return; - } - - // First up assert that our chunk of jit functions doesn't collide with - // any other known chunks of jit functions... - if let Some((_, prev)) = self.ranges.range(max..).next() { - assert!(prev.start > max); - } - if let Some((prev_end, _)) = self.ranges.range(..=min).next_back() { - assert!(*prev_end < min); - } - - // ... then insert our range and assert nothing was there previously - let prev = self.ranges.insert( - max, - ModuleFrameInfo { - start: min, - functions, - module: module.module().clone(), - symbolize: module.symbolize_context().ok().and_then(|c| c), - has_unparsed_debuginfo: module.has_unparsed_debuginfo(), - }, - ); - assert!(prev.is_none()); - } -} - -/// Description of a frame in a backtrace for a [`Trap`]. -/// -/// Whenever a WebAssembly trap occurs an instance of [`Trap`] is created. Each -/// [`Trap`] has a backtrace of the WebAssembly frames that led to the trap, and -/// each frame is described by this structure. -/// -/// [`Trap`]: crate::Trap -#[derive(Debug)] -pub struct FrameInfo { - module_name: Option, - func_index: u32, - func_name: Option, - func_start: ir::SourceLoc, - instr: ir::SourceLoc, - symbols: Vec, -} - -/// Debug information for a symbol that is attached to a [`FrameInfo`]. -/// -/// When DWARF debug information is present in a wasm file then this structure -/// can be found on a [`FrameInfo`] and can be used to learn about filenames, -/// line numbers, etc, which are the origin of a function in a stack trace. -#[derive(Debug)] -pub struct FrameSymbol { - name: Option, - file: Option, - line: Option, - column: Option, -} - -impl FrameInfo { - /// Returns the WebAssembly function index for this frame. - /// - /// This function index is the index in the function index space of the - /// WebAssembly module that this frame comes from. - pub fn func_index(&self) -> u32 { - self.func_index - } - - /// Returns the identifer of the module that this frame is for. - /// - /// Module identifiers are present in the `name` section of a WebAssembly - /// binary, but this may not return the exact item in the `name` section. - /// Module names can be overwritten at construction time or perhaps inferred - /// from file names. The primary purpose of this function is to assist in - /// debugging and therefore may be tweaked over time. - /// - /// This function returns `None` when no name can be found or inferred. - pub fn module_name(&self) -> Option<&str> { - self.module_name.as_deref() - } - - /// Returns a descriptive name of the function for this frame, if one is - /// available. - /// - /// The name of this function may come from the `name` section of the - /// WebAssembly binary, or wasmtime may try to infer a better name for it if - /// not available, for example the name of the export if it's exported. - /// - /// This return value is primarily used for debugging and human-readable - /// purposes for things like traps. Note that the exact return value may be - /// tweaked over time here and isn't guaranteed to be something in - /// particular about a wasm module due to its primary purpose of assisting - /// in debugging. - /// - /// This function returns `None` when no name could be inferred. - pub fn func_name(&self) -> Option<&str> { - self.func_name.as_deref() - } - - /// Returns the offset within the original wasm module this frame's program - /// counter was at. - /// - /// The offset here is the offset from the beginning of the original wasm - /// module to the instruction that this frame points to. - pub fn module_offset(&self) -> usize { - self.instr.bits() as usize - } - - /// Returns the offset from the original wasm module's function to this - /// frame's program counter. - /// - /// The offset here is the offset from the beginning of the defining - /// function of this frame (within the wasm module) to the instruction this - /// frame points to. - pub fn func_offset(&self) -> usize { - (self.instr.bits() - self.func_start.bits()) as usize - } - - /// Returns the debug symbols found, if any, for this function frame. - /// - /// When a wasm program is compiled with DWARF debug information then this - /// function may be populated to return symbols which contain extra debug - /// information about a frame including the filename and line number. If no - /// debug information was found or if it was malformed then this will return - /// an empty array. - pub fn symbols(&self) -> &[FrameSymbol] { - &self.symbols - } -} - -impl FrameSymbol { - /// Returns the function name associated with this symbol. - /// - /// Note that this may not be present with malformed debug information, or - /// the debug information may not include it. Also note that the symbol is - /// frequently mangled, so you might need to run some form of demangling - /// over it. - pub fn name(&self) -> Option<&str> { - self.name.as_deref() - } - - /// Returns the source code filename this symbol was defined in. - /// - /// Note that this may not be present with malformed debug information, or - /// the debug information may not include it. - pub fn file(&self) -> Option<&str> { - self.file.as_deref() - } - - /// Returns the 1-indexed source code line number this symbol was defined - /// on. - /// - /// Note that this may not be present with malformed debug information, or - /// the debug information may not include it. - pub fn line(&self) -> Option { - self.line - } - - /// Returns the 1-indexed source code column number this symbol was defined - /// on. - /// - /// Note that this may not be present with malformed debug information, or - /// the debug information may not include it. - pub fn column(&self) -> Option { - self.column - } -} - -#[test] -fn test_frame_info() -> Result<(), anyhow::Error> { - use crate::*; - let store = Store::default(); - let module = Module::new( - store.engine(), - r#" - (module - (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y))) - (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y))) - (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y))) - (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y))) - (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y))) - (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y))) - (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y))) - ) - "#, - )?; - // Create an instance to ensure the frame information is registered. - Instance::new(&store, &module, &[])?; - let info = store.frame_info().borrow(); - for (i, alloc) in module.compiled_module().finished_functions() { - let (start, end) = unsafe { - let ptr = (**alloc).as_ptr(); - let len = (**alloc).len(); - (ptr as usize, ptr as usize + len) - }; - for pc in start..end { - let (frame, _) = info.lookup_frame_info(pc).unwrap(); - assert!(frame.func_index() == i.as_u32()); - } - } - Ok(()) -} diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index f99548fee5..0319ee0002 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1,8 +1,7 @@ -use crate::{sig_registry::SignatureRegistry, trampoline::StoreInstanceHandle}; +use crate::trampoline::StoreInstanceHandle; use crate::{Config, Extern, FuncType, Store, Trap, Val, ValType}; use anyhow::{bail, Context as _, Result}; use smallvec::{smallvec, SmallVec}; -use std::any::Any; use std::cmp::max; use std::fmt; use std::future::Future; @@ -10,6 +9,7 @@ use std::mem; use std::panic::{self, AssertUnwindSafe}; use std::pin::Pin; use std::ptr::{self, NonNull}; +use std::sync::atomic::Ordering::Relaxed; use wasmtime_environ::wasm::{EntityIndex, FuncIndex}; use wasmtime_runtime::{ raise_user_trap, ExportFunction, InstanceAllocator, InstanceHandle, OnDemandInstanceAllocator, @@ -22,9 +22,9 @@ use wasmtime_runtime::{ /// This differs from `Func` in that it is not associated with a `Store`. /// Host functions are associated with a `Config`. pub(crate) struct HostFunc { - ty: FuncType, - instance: InstanceHandle, - trampoline: VMTrampoline, + pub ty: FuncType, + pub instance: InstanceHandle, + pub trampoline: VMTrampoline, } impl HostFunc { @@ -42,7 +42,7 @@ impl HostFunc { let func = Box::new(move |caller_vmctx, values_vec: *mut u128| { // Lookup the last registered store as host functions have no associated store let store = wasmtime_runtime::with_last_info(|last| { - last.and_then(Any::downcast_ref::) + last.and_then(|s| s.downcast_ref::()) .cloned() .expect("Host function called without thread state") }); @@ -73,6 +73,23 @@ impl HostFunc { } } + /// Gets a caller-checked anyfunc for this host function given a shared signature index. + /// + /// The shared signature index must have been registered for the signature of + /// this host function. + pub fn anyfunc(&self, sig: VMSharedSignatureIndex) -> VMCallerCheckedAnyfunc { + let mut anyfunc = match self + .instance + .lookup_by_declaration(&EntityIndex::Function(FuncIndex::from_u32(0))) + { + wasmtime_runtime::Export::Function(f) => unsafe { f.anyfunc.as_ref() }.clone(), + _ => unreachable!(), + }; + + anyfunc.type_index = sig; + anyfunc + } + /// Converts a `HostFunc` to a `Func`. /// /// # Safety @@ -88,11 +105,11 @@ impl HostFunc { }; let export = ExportFunction { - anyfunc: std::ptr::NonNull::new_unchecked(store.get_host_anyfunc( - &self.instance, - &self.ty, - self.trampoline, - )), + anyfunc: store + .engine() + .host_func_anyfunc(&self.instance) + .unwrap() + .into(), }; Func { @@ -106,7 +123,7 @@ impl HostFunc { impl Drop for HostFunc { fn drop(&mut self) { // Host functions are always allocated with the default (on-demand) allocator - unsafe { OnDemandInstanceAllocator::new(None).deallocate(&self.instance) } + unsafe { OnDemandInstanceAllocator::default().deallocate(&self.instance) } } } @@ -400,7 +417,7 @@ impl Func { let func = Box::new(move |caller_vmctx, values_vec: *mut u128| { // Lookup the last registered store as host functions have no associated store let store = wasmtime_runtime::with_last_info(|last| { - last.and_then(Any::downcast_ref::) + last.and_then(|s| s.downcast_ref::()) .cloned() .expect("function called without thread state") }); @@ -408,13 +425,9 @@ impl Func { Func::invoke(&store, &ty_clone, caller_vmctx, values_vec, &func) }); - let (instance, trampoline) = crate::trampoline::create_function( - &ty, - func, - store.engine().config(), - Some(&mut store.signatures().borrow_mut()), - ) - .expect("failed to create function"); + let (instance, trampoline) = + crate::trampoline::create_function(&ty, func, store.engine().config(), Some(store)) + .expect("failed to create function"); let idx = EntityIndex::Function(FuncIndex::from_u32(0)); let (instance, export) = match instance.lookup_by_declaration(&idx) { @@ -561,15 +574,15 @@ impl Func { /// Any of the Rust types can be returned from the closure as well, in /// addition to some extra types /// - /// | Rust Return Type | WebAssembly Return Type | Meaning | - /// |-------------------|-------------------------|-------------------| - /// | `()` | nothing | no return value | - /// | `Result` | `T` | function may trap | + /// | Rust Return Type | WebAssembly Return Type | Meaning | + /// |-------------------|-------------------------|-----------------------| + /// | `()` | nothing | no return value | + /// | `T` | `T` | a single return value | + /// | `(T1, T2, ...)` | `T1 T2 ...` | multiple returns | /// - /// At this time multi-value returns are not supported, and supporting this - /// is the subject of [#1178]. - /// - /// [#1178]: https://github.com/bytecodealliance/wasmtime/issues/1178 + /// Note that all return types can also be wrapped in `Result<_, Trap>` to + /// indicate that the host function can generate a trap as well as possibly + /// returning a value. /// /// Finally you can also optionally take [`Caller`] as the first argument of /// your closure. If inserted then you're able to inspect the caller's @@ -734,7 +747,7 @@ impl Func { /// # } /// ``` pub fn wrap(store: &Store, func: impl IntoFunc) -> Func { - let (_, instance, trampoline) = func.into_func(Some(&mut store.signatures().borrow_mut())); + let (_, instance, trampoline) = func.into_func(Some(store)); let (instance, export) = unsafe { let idx = EntityIndex::Function(FuncIndex::from_u32(0)); @@ -759,35 +772,26 @@ impl Func { /// Returns the underlying wasm type that this `Func` has. pub fn ty(&self) -> FuncType { - // Signatures should always be registered in the store's registry of + // Signatures should always be registered in the engine's registry of // shared signatures, so we should be able to unwrap safely here. - let signatures = self.instance.store.signatures().borrow(); - let (wft, _) = signatures - .lookup_shared(self.sig_index()) - .expect("signature should be registered"); - - // This is only called with `Export::Function`, and since it's coming - // from wasmtime_runtime itself we should support all the types coming - // out of it, so assert such here. - FuncType::from_wasm_func_type(&wft) + FuncType::from_wasm_func_type( + self.instance + .store + .engine() + .signatures() + .lookup_type(self.sig_index()) + .expect("signature should be registered"), + ) } /// Returns the number of parameters that this function takes. pub fn param_arity(&self) -> usize { - let signatures = self.instance.store.signatures().borrow(); - let (sig, _) = signatures - .lookup_shared(self.sig_index()) - .expect("signature should be registered"); - sig.params.len() + self.ty().params().len() } /// Returns the number of results this function produces. pub fn result_arity(&self) -> usize { - let signatures = self.instance.store.signatures().borrow(); - let (sig, _) = signatures - .lookup_shared(self.sig_index()) - .expect("signature should be registered"); - sig.returns.len() + self.ty().results().len() } /// Invokes this function with the `params` given, returning the results and @@ -804,7 +808,7 @@ impl Func { /// initiates a panic. pub fn call(&self, params: &[Val]) -> Result> { assert!( - !self.store().async_support(), + !cfg!(feature = "async") || !self.store().async_support(), "must use `call_async` when async support is enabled on the config", ); self._call(params) @@ -907,25 +911,17 @@ impl Func { } pub(crate) unsafe fn from_wasmtime_function(export: &ExportFunction, store: &Store) -> Self { - // Each function signature in a module should have a trampoline stored - // on that module as well, so unwrap the result here since otherwise - // it's a bug in wasmtime. let anyfunc = export.anyfunc.as_ref(); - let trampoline = store - .signatures() - .borrow() - .lookup_shared(anyfunc.type_index) - .expect("failed to retrieve trampoline from module") - .1; Func { instance: store.existing_vmctx(anyfunc.vmctx), export: export.clone(), - trampoline, + trampoline: store.lookup_trampoline(&*anyfunc), } } /// Get a reference to this function's store. + #[inline] pub fn store(&self) -> &Store { &self.instance.store } @@ -1093,7 +1089,9 @@ impl Func { /// and similarly if a function has multiple results you can bind that too /// /// ``` + /// # #[cfg(not(feature = "old-x86-backend"))] /// # use wasmtime::*; + /// # #[cfg(not(feature = "old-x86-backend"))] /// # fn foo(add_with_overflow: &Func) -> anyhow::Result<()> { /// let typed = add_with_overflow.typed::<(u32, u32), (u32, i32)>()?; /// let (result, overflow) = typed.call((u32::max_value(), 2))?; @@ -1148,20 +1146,109 @@ impl fmt::Debug for Func { } } +#[inline] pub(crate) fn invoke_wasm_and_catch_traps( store: &Store, closure: impl FnMut(), ) -> Result<(), Trap> { unsafe { - let canary = 0; - let _auto_reset_canary = store - .externref_activations_table() - .set_stack_canary(&canary); + let _reset = if store.externref_activations_table().stack_canary().is_some() { + None + } else { + Some(enter_wasm_init(store)?) + }; wasmtime_runtime::catch_traps(store, closure).map_err(|e| Trap::from_runtime(store, e)) } } +/// This function is called to register state within `Store` whenever +/// WebAssembly is entered for the first time within the `Store`. This isn't +/// called when wasm is called recursively within the `Store`. +/// +/// This function sets up various limits such as: +/// +/// * The stack limit. This is what ensures that we limit the stack space +/// allocated by WebAssembly code and it's relative to the initial stack +/// pointer that called into wasm. +/// +/// * Stack canaries for externref gc tracing. Currently the implementation +/// relies on walking frames but the stack walker isn't always 100% reliable, +/// so a canary is used to ensure that if the canary is seen then it's +/// guaranteed all wasm frames have been walked. +/// +/// This function may fail if the the stack limit can't be set because an +/// interrupt already happened. Otherwise it returns a value that resets the +/// various limits on `Drop`. +#[inline] +fn enter_wasm_init<'a>(store: &'a Store) -> Result { + let stack_pointer = psm::stack_pointer() as usize; + + // Determine the stack pointer where, after which, any wasm code will + // immediately trap. This is checked on the entry to all wasm functions. + // + // Note that this isn't 100% precise. We are requested to give wasm + // `max_wasm_stack` bytes, but what we're actually doing is giving wasm + // probably a little less than `max_wasm_stack` because we're + // calculating the limit relative to this function's approximate stack + // pointer. Wasm will be executed on a frame beneath this one (or next + // to it). In any case it's expected to be at most a few hundred bytes + // of slop one way or another. When wasm is typically given a MB or so + // (a million bytes) the slop shouldn't matter too much. + // + // After we've got the stack limit then we store it into the `stack_limit` + // variable. Note that the store is an atomic swap to ensure that we can + // consume any previously-sent interrupt requests. If we found that wasm was + // previously interrupted then we immediately return a trap (after resetting + // the stack limit). Otherwise we're good to keep on going. + // + // Note the usage of `Relaxed` memory orderings here. This is specifically + // an optimization in the `Drop` below where a `Relaxed` store is speedier + // than a `SeqCst` store. The rationale for `Relaxed` here is that the + // atomic orderings here aren't actually protecting any memory, we're just + // trying to be atomic with respect to this one location in memory (for when + // `InterruptHandle` sends us a signal). Due to the lack of needing to + // synchronize with any other memory it's hoped that the choice of `Relaxed` + // here should be correct for our use case. + let wasm_stack_limit = stack_pointer - store.engine().config().max_wasm_stack; + let interrupts = store.interrupts(); + match interrupts.stack_limit.swap(wasm_stack_limit, Relaxed) { + wasmtime_environ::INTERRUPTED => { + // This means that an interrupt happened before we actually + // called this function, which means that we're now + // considered interrupted. + interrupts.stack_limit.store(usize::max_value(), Relaxed); + return Err(Trap::new_wasm( + Some(store), + None, + wasmtime_environ::ir::TrapCode::Interrupt, + backtrace::Backtrace::new_unresolved(), + )); + } + n => debug_assert_eq!(usize::max_value(), n), + } + store + .externref_activations_table() + .set_stack_canary(Some(stack_pointer)); + + return Ok(Reset(store)); + + struct Reset<'a>(&'a Store); + + impl Drop for Reset<'_> { + #[inline] + fn drop(&mut self) { + self.0.externref_activations_table().set_stack_canary(None); + + // see docs above for why this uses `Relaxed` + self.0 + .interrupts() + .stack_limit + .store(usize::max_value(), Relaxed); + } + } +} + /// A trait implemented for types which can be returned from closures passed to /// [`Func::wrap`] and friends. /// @@ -1174,6 +1261,8 @@ pub unsafe trait WasmRet { // Same as `WasmTy::Abi`. #[doc(hidden)] type Abi: Copy; + #[doc(hidden)] + type Retptr: Copy; // Same as `WasmTy::compatible_with_store`. #[doc(hidden)] @@ -1186,11 +1275,13 @@ pub unsafe trait WasmRet { // `invoke_wasm_and_catch_traps` is on the stack, and therefore this method // is unsafe. #[doc(hidden)] - unsafe fn into_abi_for_ret(self, store: &Store) -> Self::Abi; + unsafe fn into_abi_for_ret(self, store: &Store, ptr: Self::Retptr) -> Result; - // Same as `WasmTy::push`. #[doc(hidden)] - fn valtype() -> Option; + fn func_type(params: impl Iterator) -> FuncType; + + #[doc(hidden)] + unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi); // Utilities used to convert an instance of this type to a `Result` // explicitly, used when wrapping async functions which always bottom-out @@ -1203,84 +1294,28 @@ pub unsafe trait WasmRet { fn fallible_from_trap(trap: Trap) -> Self::Fallible; } -unsafe impl WasmRet for () { - type Abi = (); - type Fallible = Result<(), Trap>; - - #[inline] - fn compatible_with_store(&self, _store: &Store) -> bool { - true - } - - #[inline] - unsafe fn into_abi_for_ret(self, _store: &Store) {} - - #[inline] - fn valtype() -> Option { - None - } - - #[inline] - fn into_fallible(self) -> Result<(), Trap> { - Ok(()) - } - - #[inline] - fn fallible_from_trap(trap: Trap) -> Result<(), Trap> { - Err(trap) - } -} - -unsafe impl WasmRet for Result<(), Trap> { - type Abi = (); - type Fallible = Self; - - #[inline] - fn compatible_with_store(&self, _store: &Store) -> bool { - true - } - - #[inline] - unsafe fn into_abi_for_ret(self, _store: &Store) { - match self { - Ok(()) => {} - Err(trap) => raise_user_trap(trap.into()), - } - } - - #[inline] - fn valtype() -> Option { - None - } - - #[inline] - fn into_fallible(self) -> Result<(), Trap> { - self - } - - #[inline] - fn fallible_from_trap(trap: Trap) -> Result<(), Trap> { - Err(trap) - } -} - unsafe impl WasmRet for T where T: WasmTy, { type Abi = ::Abi; + type Retptr = (); type Fallible = Result; fn compatible_with_store(&self, store: &Store) -> bool { ::compatible_with_store(self, store) } - unsafe fn into_abi_for_ret(self, store: &Store) -> Self::Abi { - ::into_abi(self, store) + unsafe fn into_abi_for_ret(self, store: &Store, _retptr: ()) -> Result { + Ok(::into_abi(self, store)) } - fn valtype() -> Option { - Some(::valtype()) + fn func_type(params: impl Iterator) -> FuncType { + FuncType::new(params, Some(::valtype())) + } + + unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + *ptr.cast::() = f(()); } fn into_fallible(self) -> Result { @@ -1294,31 +1329,33 @@ where unsafe impl WasmRet for Result where - T: WasmTy, + T: WasmRet, { - type Abi = ::Abi; + type Abi = ::Abi; + type Retptr = ::Retptr; type Fallible = Self; fn compatible_with_store(&self, store: &Store) -> bool { match self { - Ok(x) => ::compatible_with_store(x, store), + Ok(x) => ::compatible_with_store(x, store), Err(_) => true, } } - unsafe fn into_abi_for_ret(self, store: &Store) -> Self::Abi { - match self { - Ok(val) => return ::into_abi(val, store), - Err(trap) => handle_trap(trap), - } - - unsafe fn handle_trap(trap: Trap) -> ! { - raise_user_trap(trap.into()) - } + unsafe fn into_abi_for_ret( + self, + store: &Store, + retptr: Self::Retptr, + ) -> Result { + self.and_then(|val| val.into_abi_for_ret(store, retptr)) } - fn valtype() -> Option { - Some(::valtype()) + fn func_type(params: impl Iterator) -> FuncType { + T::func_type(params) + } + + unsafe fn wrap_trampoline(ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + T::wrap_trampoline(ptr, f) } fn into_fallible(self) -> Result { @@ -1330,6 +1367,169 @@ where } } +macro_rules! impl_wasm_host_results { + ($n:tt $($t:ident)*) => ( + #[allow(non_snake_case)] + unsafe impl<$($t),*> WasmRet for ($($t,)*) + where + $($t: WasmTy,)* + ($($t::Abi,)*): HostAbi, + { + type Abi = <($($t::Abi,)*) as HostAbi>::Abi; + type Retptr = <($($t::Abi,)*) as HostAbi>::Retptr; + type Fallible = Result; + + #[inline] + fn compatible_with_store(&self, _store: &Store) -> bool { + let ($($t,)*) = self; + $( $t.compatible_with_store(_store) && )* true + } + + #[inline] + unsafe fn into_abi_for_ret(self, _store: &Store, ptr: Self::Retptr) -> Result { + let ($($t,)*) = self; + let abi = ($($t.into_abi(_store),)*); + Ok(<($($t::Abi,)*) as HostAbi>::into_abi(abi, ptr)) + } + + fn func_type(params: impl Iterator) -> FuncType { + FuncType::new( + params, + std::array::IntoIter::new([$($t::valtype(),)*]), + ) + } + + #[allow(unused_assignments)] + unsafe fn wrap_trampoline(mut _ptr: *mut u128, f: impl FnOnce(Self::Retptr) -> Self::Abi) { + let ($($t,)*) = <($($t::Abi,)*) as HostAbi>::call(f); + $( + *_ptr.cast() = $t; + _ptr = _ptr.add(1); + )* + } + + #[inline] + fn into_fallible(self) -> Result { + Ok(self) + } + + #[inline] + fn fallible_from_trap(trap: Trap) -> Result { + Err(trap) + } + } + ) +} + +for_each_function_signature!(impl_wasm_host_results); + +// Internal trait representing how to communicate tuples of return values across +// an ABI boundary. This internally corresponds to the "wasmtime" ABI inside of +// cranelift itself. Notably the first element of each tuple is returned via the +// typical system ABI (e.g. systemv or fastcall depending on platform) and all +// other values are returned packed via the stack. +// +// This trait helps to encapsulate all the details of that. +#[doc(hidden)] +pub trait HostAbi { + // A value returned from native functions which return `Self` + type Abi: Copy; + // A return pointer, added to the end of the argument list, for native + // functions that return `Self`. Note that a 0-sized type here should get + // elided at the ABI level. + type Retptr: Copy; + + // Converts a value of `self` into its components. Stores necessary values + // into `ptr` and then returns whatever needs to be returned from the + // function. + unsafe fn into_abi(self, ptr: Self::Retptr) -> Self::Abi; + + // Calls `f` with a suitably sized return area and requires `f` to return + // the raw abi value of the first element of our tuple. This will then + // unpack the `Retptr` and assemble it with `Self::Abi` to return an + // instance of the whole tuple. + unsafe fn call(f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self; +} + +macro_rules! impl_host_abi { + // Base case, everything is `()` + (0) => { + impl HostAbi for () { + type Abi = (); + type Retptr = (); + + unsafe fn into_abi(self, _ptr: Self::Retptr) -> Self::Abi {} + + unsafe fn call(f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self { + f(()) + } + } + }; + + // In the 1-case the retptr is not present, so it's a 0-sized value. + (1 $a:ident) => { + impl<$a: Copy> HostAbi for ($a,) { + type Abi = $a; + type Retptr = (); + + unsafe fn into_abi(self, _ptr: Self::Retptr) -> Self::Abi { + self.0 + } + + unsafe fn call(f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self { + (f(()),) + } + } + }; + + // This is where the more interesting case happens. The first element of the + // tuple is returned via `Abi` and all other elements are returned via + // `Retptr`. We create a `TupleRetNN` structure to represent all of the + // return values here. + // + // Also note that this isn't implemented for the old backend right now due + // to the original author not really being sure how to implement this in the + // old backend. + ($n:tt $t:ident $($u:ident)*) => {paste::paste!{ + #[doc(hidden)] + #[allow(non_snake_case)] + #[repr(C)] + #[cfg(not(feature = "old-x86-backend"))] + pub struct []<$($u,)*> { + $($u: $u,)* + } + + #[cfg(not(feature = "old-x86-backend"))] + #[allow(non_snake_case, unused_assignments)] + impl<$t: Copy, $($u: Copy,)*> HostAbi for ($t, $($u,)*) { + type Abi = $t; + type Retptr = *mut []<$($u,)*>; + + unsafe fn into_abi(self, ptr: Self::Retptr) -> Self::Abi { + let ($t, $($u,)*) = self; + // Store the tail of our tuple into the return pointer... + $((*ptr).$u = $u;)* + // ... and return the head raw. + $t + } + + unsafe fn call(f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self { + // Create space to store all the return values and then invoke + // the function. + let mut space = std::mem::MaybeUninit::uninit(); + let t = f(space.as_mut_ptr()); + let space = space.assume_init(); + + // Use the return value as the head of the tuple and unpack our + // return area to get the rest of the tuple. + (t, $(space.$u,)*) + } + } + }}; +} + +for_each_function_signature!(impl_host_abi); + /// Internal trait implemented for all arguments that can be passed to /// [`Func::wrap`] and [`Config::wrap_host_func`](crate::Config::wrap_host_func). /// @@ -1337,10 +1537,7 @@ where /// as an implementation detail of this crate. pub trait IntoFunc { #[doc(hidden)] - fn into_func( - self, - registry: Option<&mut SignatureRegistry>, - ) -> (FuncType, InstanceHandle, VMTrampoline); + fn into_func(self, store: Option<&Store>) -> (FuncType, InstanceHandle, VMTrampoline); } /// A structure representing the *caller's* context when creating a function @@ -1414,6 +1611,7 @@ impl Caller<'_> { } /// Get a reference to the caller's store. + #[inline] pub fn store(&self) -> &Store { self.store } @@ -1452,12 +1650,12 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, registry: Option<&mut SignatureRegistry>) -> (FuncType, InstanceHandle, VMTrampoline) { + fn into_func(self, store: Option<&Store>) -> (FuncType, InstanceHandle, VMTrampoline) { let f = move |_: Caller<'_>, $($args:$args),*| { self($($args),*) }; - f.into_func(registry) + f.into_func(store) } } @@ -1468,7 +1666,7 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, registry: Option<&mut SignatureRegistry>) -> (FuncType, InstanceHandle, VMTrampoline) { + fn into_func(self, store: Option<&Store>) -> (FuncType, InstanceHandle, VMTrampoline) { /// This shim is called by Wasm code, constructs a `Caller`, /// calls the wrapped host function, and returns the translated /// result back to Wasm. @@ -1480,56 +1678,80 @@ macro_rules! impl_into_func { vmctx: *mut VMContext, caller_vmctx: *mut VMContext, $( $args: $args::Abi, )* + retptr: R::Retptr, ) -> R::Abi where F: Fn(Caller<'_>, $( $args ),*) -> R + 'static, $( $args: WasmTy, )* R: WasmRet, { - let state = (*vmctx).host_state(); - // Double-check ourselves in debug mode, but we control - // the `Any` here so an unsafe downcast should also - // work. - debug_assert!(state.is::()); - let func = &*(state as *const _ as *const F); + enum CallResult { + Ok(T), + Trap(Trap), + Panic(Box), + } - let store = wasmtime_runtime::with_last_info(|last| { - last.and_then(Any::downcast_ref::) - .cloned() - .expect("function called without thread state") - }); + // Note that this `result` is intentionally scoped into a + // separate block. Handling traps and panics will involve + // longjmp-ing from this function which means we won't run + // destructors. As a result anything requiring a destructor + // should be part of this block, and the long-jmp-ing + // happens after the block in handling `CallResult`. + let result = { + let state = (*vmctx).host_state(); + // Double-check ourselves in debug mode, but we control + // the `Any` here so an unsafe downcast should also + // work. + debug_assert!(state.is::()); + let func = &*(state as *const _ as *const F); - let ret = { - panic::catch_unwind(AssertUnwindSafe(|| { - func( - Caller { store: &store, caller_vmctx }, - $( $args::from_abi($args, &store), )* - ) - })) + let store = wasmtime_runtime::with_last_info(|last| { + last.and_then(|s| s.downcast_ref::()) + .cloned() + .expect("function called without thread state") + }); + + let ret = { + panic::catch_unwind(AssertUnwindSafe(|| { + func( + Caller { store: &store, caller_vmctx }, + $( $args::from_abi($args, &store), )* + ) + })) + }; + + // Note that we need to be careful when dealing with traps + // here. Traps are implemented with longjmp/setjmp meaning + // that it's not unwinding and consequently no Rust + // destructors are run. We need to be careful to ensure that + // nothing on the stack needs a destructor when we exit + // abnormally from this `match`, e.g. on `Err`, on + // cross-store-issues, or if `Ok(Err)` is raised. + match ret { + Err(panic) => CallResult::Panic(panic), + Ok(ret) => { + // Because the wrapped function is not `unsafe`, we + // can't assume it returned a value that is + // compatible with this store. + if !ret.compatible_with_store(&store) { + // Explicitly drop all locals with destructors prior to raising the trap + drop(store); + drop(ret); + raise_cross_store_trap(); + } + + match ret.into_abi_for_ret(&store, retptr) { + Ok(val) => CallResult::Ok(val), + Err(trap) => CallResult::Trap(trap), + } + } + } }; - // Note that we need to be careful when dealing with traps - // here. Traps are implemented with longjmp/setjmp meaning - // that it's not unwinding and consequently no Rust - // destructors are run. We need to be careful to ensure that - // nothing on the stack needs a destructor when we exit - // abnormally from this `match`, e.g. on `Err`, on - // cross-store-issues, or if `Ok(Err)` is raised. - match ret { - Err(panic) => wasmtime_runtime::resume_panic(panic), - Ok(ret) => { - // Because the wrapped function is not `unsafe`, we - // can't assume it returned a value that is - // compatible with this store. - if !ret.compatible_with_store(&store) { - // Explicitly drop all locals with destructors prior to raising the trap - drop(store); - drop(ret); - raise_cross_store_trap(); - } - - ret.into_abi_for_ret(&store) - } + match result { + CallResult::Ok(val) => val, + CallResult::Trap(trap) => raise_user_trap(trap.into()), + CallResult::Panic(panic) => wasmtime_runtime::resume_panic(panic), } } @@ -1556,6 +1778,7 @@ macro_rules! impl_into_func { *mut VMContext, *mut VMContext, $( $args::Abi, )* + R::Retptr, ) -> R::Abi, >(ptr); @@ -1564,23 +1787,22 @@ macro_rules! impl_into_func { let $args = *args.add(_n).cast::<$args::Abi>(); _n += 1; )* - let ret = ptr(callee_vmctx, caller_vmctx, $( $args ),*); - *args.cast::() = ret; + R::wrap_trampoline(args, |retptr| { + ptr(callee_vmctx, caller_vmctx, $( $args, )* retptr) + }); } - let ty = FuncType::new( + let ty = R::func_type( None::.into_iter() $(.chain(Some($args::valtype())))* - , - R::valtype(), ); let trampoline = host_trampoline::<$($args,)* R>; - // If not given a registry, use a default signature index that is guaranteed to trap - // if the function is called indirectly without first being associated with a store (a bug condition). - let shared_signature_id = registry - .map(|r| r.register(ty.as_wasm_func_type(), trampoline)) + // If not given a store, use a default signature index that is guaranteed to trap. + // If the function is called indirectly without first being associated with a store (a bug condition). + let shared_signature_id = store + .map(|s| s.signatures().borrow_mut().register(ty.as_wasm_func_type(), trampoline)) .unwrap_or(VMSharedSignatureIndex::default()); let instance = unsafe { diff --git a/crates/wasmtime/src/func/typed.rs b/crates/wasmtime/src/func/typed.rs index 1558c91982..107810f84b 100644 --- a/crates/wasmtime/src/func/typed.rs +++ b/crates/wasmtime/src/func/typed.rs @@ -1,10 +1,10 @@ -use super::invoke_wasm_and_catch_traps; +use super::{invoke_wasm_and_catch_traps, HostAbi}; use crate::{ExternRef, Func, Store, Trap, ValType}; use anyhow::{bail, Result}; use std::marker; use std::mem::{self, MaybeUninit}; use std::ptr; -use wasmtime_runtime::{VMContext, VMFunctionBody, VMTrampoline}; +use wasmtime_runtime::{VMContext, VMFunctionBody}; /// A statically typed WebAssembly function. /// @@ -53,7 +53,7 @@ where /// connected to an asynchronous store. pub fn call(&self, params: Params) -> Result { assert!( - !self.func.store().async_support(), + !cfg!(feature = "async") || !self.func.store().async_support(), "must use `call_async` with async stores" ); unsafe { self._call(params) } @@ -93,8 +93,6 @@ where )); } - let anyfunc = self.func.export.anyfunc.as_ref(); - let trampoline = self.func.trampoline; let params = MaybeUninit::new(params); let mut ret = MaybeUninit::uninit(); let mut called = false; @@ -102,9 +100,9 @@ where let result = invoke_wasm_and_catch_traps(&self.func.instance.store, || { called = true; let params = ptr::read(params.as_ptr()); + let anyfunc = self.func.export.anyfunc.as_ref(); let result = params.invoke::( &self.func.instance.store, - trampoline, anyfunc.func_ptr.as_ptr(), anyfunc.vmctx, ptr::null_mut(), @@ -209,7 +207,7 @@ unsafe impl WasmTy for Option { unsafe { store .externref_activations_table() - .insert_with_gc(x.inner, store.stack_map_registry()); + .insert_with_gc(x.inner, store.module_info_lookup()); } abi } else { @@ -275,7 +273,6 @@ pub unsafe trait WasmParams { unsafe fn invoke( self, store: &Store, - trampoline: VMTrampoline, func: *const VMFunctionBody, vmctx1: *mut VMContext, vmctx2: *mut VMContext, @@ -297,12 +294,11 @@ where unsafe fn invoke( self, store: &Store, - trampoline: VMTrampoline, func: *const VMFunctionBody, vmctx1: *mut VMContext, vmctx2: *mut VMContext, ) -> R { - <(T,)>::invoke((self,), store, trampoline, func, vmctx1, vmctx2) + <(T,)>::invoke((self,), store, func, vmctx1, vmctx2) } } @@ -334,66 +330,30 @@ macro_rules! impl_wasm_params { unsafe fn invoke( self, store: &Store, - trampoline: VMTrampoline, func: *const VMFunctionBody, vmctx1: *mut VMContext, vmctx2: *mut VMContext, ) -> R { - // Some signatures can go directly into JIT code which uses the - // default platform ABI, but basically only those without - // multiple return values. With multiple return values we can't - // natively in Rust call such a function because there's no way - // to model it (yet). + let fnptr = mem::transmute::< + *const VMFunctionBody, + unsafe extern "C" fn( + *mut VMContext, + *mut VMContext, + $($t::Abi,)* + R::Retptr, + ) -> R::Abi, + >(func); + let ($($t,)*) = self; + // Use the `call` function to acquire a `retptr` which we'll + // forward to the native function. Once we have it we also + // convert all our arguments to abi arguments to go to the raw + // function. // - // To work around that we use the trampoline which passes - // arguments/values via the stack which allows us to match the - // expected ABI. Note that this branch, using the trampoline, - // is slower as a result and has an extra indirect function - // call as well. In the future if this is a problem we should - // consider updating JIT code to use an ABI we can call from - // Rust itself. - if R::uses_trampoline() { - R::with_space(|space1| { - // Figure out whether the parameters or the results - // require more space, and use the bigger one as where - // to store arguments and load return values from. - let mut space2 = [0; $n]; - let space = if space1.len() < space2.len() { - space2.as_mut_ptr() - } else { - space1.as_mut_ptr() - }; - - // ... store the ABI for all values into our storage - // area... - let ($($t,)*) = self; - let mut _n = 0; - $( - *space.add(_n).cast::<$t::Abi>() = $t.into_abi(store); - _n += 1; - )* - - // ... make the indirect call through the trampoline - // which will read from `space` and also write all the - // results to `space`... - trampoline(vmctx1, vmctx2, func, space); - - // ... and then we can decode all the return values - // from `space`. - R::from_storage(space, store) - }) - } else { - let fnptr = mem::transmute::< - *const VMFunctionBody, - unsafe extern "C" fn( - *mut VMContext, - *mut VMContext, - $($t::Abi,)* - ) -> R::Abi, - >(func); - let ($($t,)*) = self; - R::from_abi(fnptr(vmctx1, vmctx2, $($t.into_abi(store),)*), store) - } + // Upon returning `R::call` will convert all the returns back + // into `R`. + R::call(store, |retptr| { + fnptr(vmctx1, vmctx2, $($t.into_abi(store),)* retptr) + }) } } }; @@ -409,80 +369,45 @@ for_each_function_signature!(impl_wasm_params); /// `TypedFunc` is not currently supported. pub unsafe trait WasmResults: WasmParams { #[doc(hidden)] - type Abi; + type Abi: Copy; #[doc(hidden)] - unsafe fn from_abi(abi: Self::Abi, store: &Store) -> Self; + type Retptr: Copy; #[doc(hidden)] - fn uses_trampoline() -> bool; - // Provides a stack-allocated array with enough space to store all these - // result values. - // - // It'd be nice if we didn't have to have this API and could do something - // with const-generics (or something like that), but I couldn't figure it - // out. If a future Rust explorer is able to get something like `const LEN: - // usize` working that'd be great! - #[doc(hidden)] - fn with_space(_: impl FnOnce(&mut [u128]) -> R) -> R; - #[doc(hidden)] - unsafe fn from_storage(ptr: *const u128, store: &Store) -> Self; + unsafe fn call(store: &Store, f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self; } -unsafe impl WasmResults for T { +// Forwards from a bare type `T` to the 1-tuple type `(T,)` +unsafe impl WasmResults for T +where + (T::Abi,): HostAbi, +{ type Abi = <(T,) as WasmResults>::Abi; - unsafe fn from_abi(abi: Self::Abi, store: &Store) -> Self { - <(T,) as WasmResults>::from_abi(abi, store).0 - } - fn uses_trampoline() -> bool { - <(T,) as WasmResults>::uses_trampoline() - } - fn with_space(f: impl FnOnce(&mut [u128]) -> R) -> R { - <(T,) as WasmResults>::with_space(f) - } - unsafe fn from_storage(ptr: *const u128, store: &Store) -> Self { - <(T,) as WasmResults>::from_storage(ptr, store).0 + type Retptr = <(T,) as WasmResults>::Retptr; + + unsafe fn call(store: &Store, f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self { + <(T,) as WasmResults>::call(store, f).0 } } -#[doc(hidden)] -pub enum Void {} - macro_rules! impl_wasm_results { ($n:tt $($t:ident)*) => { #[allow(non_snake_case, unused_variables)] - unsafe impl<$($t: WasmTy,)*> WasmResults for ($($t,)*) { - type Abi = impl_wasm_results!(@abi $n $($t)*); - unsafe fn from_abi(abi: Self::Abi, store: &Store) -> Self { - impl_wasm_results!(@from_abi abi store $n $($t)*) - } - fn uses_trampoline() -> bool { - $n > 1 - } - fn with_space(f: impl FnOnce(&mut [u128]) -> R) -> R { - f(&mut [0; $n]) - } - unsafe fn from_storage(ptr: *const u128, store: &Store) -> Self { - let mut _n = 0; - $( - let $t = $t::from_abi(*ptr.add(_n).cast::<$t::Abi>(), store); - _n += 1; - )* - ($($t,)*) + unsafe impl<$($t: WasmTy,)*> WasmResults for ($($t,)*) + where ($($t::Abi,)*): HostAbi + { + type Abi = <($($t::Abi,)*) as HostAbi>::Abi; + type Retptr = <($($t::Abi,)*) as HostAbi>::Retptr; + + unsafe fn call(store: &Store, f: impl FnOnce(Self::Retptr) -> Self::Abi) -> Self { + // Delegate via the host abi to figure out what the actual ABI + // for dealing with this tuple type is, and then we can re-tuple + // everything and create actual values via `from_abi` after the + // call is complete. + let ($($t,)*) = <($($t::Abi,)*) as HostAbi>::call(f); + ($($t::from_abi($t, store),)*) } } }; - - // 0/1 return values we can use natively, everything else isn't expressible - // and won't be used so define the abi type to Void. - (@abi 0) => (()); - (@abi 1 $t:ident) => ($t::Abi); - (@abi $($t:tt)*) => (Void); - - (@from_abi $abi:ident $store:ident 0) => (()); - (@from_abi $abi:ident $store:ident 1 $t:ident) => (($t::from_abi($abi, $store),)); - (@from_abi $abi:ident $store:ident $($t:tt)*) => ({ - debug_assert!(false); - match $abi {} - }); } for_each_function_signature!(impl_wasm_results); diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 7c89b919e8..7a30f850aa 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -13,9 +13,9 @@ use wasmtime_environ::wasm::{ }; use wasmtime_environ::Initializer; use wasmtime_runtime::{ - Imports, InstanceAllocationRequest, InstantiationError, RuntimeInstance, StackMapRegistry, - VMContext, VMExternRefActivationsTable, VMFunctionBody, VMFunctionImport, VMGlobalImport, - VMMemoryImport, VMTableImport, + Imports, InstanceAllocationRequest, InstantiationError, RuntimeInstance, VMContext, + VMExternRefActivationsTable, VMFunctionBody, VMFunctionImport, VMGlobalImport, VMMemoryImport, + VMTableImport, }; /// An instantiated WebAssembly module. @@ -362,6 +362,7 @@ impl<'a> Instantiator<'a> { let expected_ty = self.cur.module.compiled_module().module().type_of(*index); matching::MatchCx { + signatures: self.cur.module.signatures(), types: self.cur.module.types(), store: self.store, } @@ -505,10 +506,9 @@ impl<'a> Instantiator<'a> { fn instantiate_raw(&self) -> Result { let compiled_module = self.cur.module.compiled_module(); - // Register the module just before instantiation to ensure we have a - // trampoline registered for every signature and to preserve the module's - // compiled JIT code within the `Store`. - self.store.register_module(&self.cur.module); + // Register the module just before instantiation to ensure we keep the module + // properly referenced while in use by the store. + self.store.modules().borrow_mut().register(&self.cur.module); unsafe { let engine = self.store.engine(); @@ -518,16 +518,14 @@ impl<'a> Instantiator<'a> { module: compiled_module.module().clone(), finished_functions: compiled_module.finished_functions(), imports: self.cur.build(), - lookup_shared_signature: &self - .store - .lookup_shared_signature(self.cur.module.types()), + shared_signatures: self.cur.module.signatures().as_module_map().into(), host_state: Box::new(()), interrupts: self.store.interrupts(), externref_activations_table: self.store.externref_activations_table() as *const VMExternRefActivationsTable as *mut _, - stack_map_registry: self.store.stack_map_registry() as *const StackMapRegistry - as *mut _, + module_info_lookup: Some(self.store.module_info_lookup()), + limiter: self.store.limiter().as_ref(), })?; // After we've created the `InstanceHandle` we still need to run diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index 066d034cbc..332f63b4d2 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -175,6 +175,10 @@ //! lock contention is hampering multithreading throughput. This feature is only //! supported on Linux and requires a Linux kernel version 4.11 or higher. //! +//! * `all-arch` - Not enabled by default. This feature compiles in support for +//! all architectures for both the JIT compiler and the `wasmtime compile` CLI +//! command. +//! //! ## Examples //! //! In addition to the examples below be sure to check out the [online embedding @@ -278,13 +282,13 @@ mod func; mod config; mod engine; mod externals; -mod frame_info; mod instance; +mod limits; mod linker; mod memory; mod module; mod r#ref; -mod sig_registry; +mod signatures; mod store; mod trampoline; mod trap; @@ -294,12 +298,12 @@ mod values; pub use crate::config::*; pub use crate::engine::*; pub use crate::externals::*; -pub use crate::frame_info::{FrameInfo, FrameSymbol}; pub use crate::func::*; pub use crate::instance::Instance; +pub use crate::limits::*; pub use crate::linker::*; pub use crate::memory::*; -pub use crate::module::Module; +pub use crate::module::{FrameInfo, FrameSymbol, Module}; pub use crate::r#ref::ExternRef; pub use crate::store::*; pub use crate::trap::*; diff --git a/crates/wasmtime/src/limits.rs b/crates/wasmtime/src/limits.rs new file mode 100644 index 0000000000..fc65aa2e90 --- /dev/null +++ b/crates/wasmtime/src/limits.rs @@ -0,0 +1,208 @@ +pub(crate) const DEFAULT_INSTANCE_LIMIT: usize = 10000; +pub(crate) const DEFAULT_TABLE_LIMIT: usize = 10000; +pub(crate) const DEFAULT_MEMORY_LIMIT: usize = 10000; + +/// Used by hosts to limit resource consumption of instances at runtime. +/// +/// [`Store::new_with_limits`](crate::Store::new_with_limits) can be used +/// with a resource limiter to take into account non-WebAssembly resource +/// usage to determine if a linear memory or table should be grown. +pub trait ResourceLimiter { + /// Notifies the resource limiter that an instance's linear memory has been requested to grow. + /// + /// * `current` is the current size of the linear memory in WebAssembly page units. + /// * `desired` is the desired size of the linear memory in WebAssembly page units. + /// * `maximum` is either the linear memory's maximum or a maximum from an instance allocator, + /// also in WebAssembly page units. A value of `None` indicates that the linear memory is + /// unbounded. + /// + /// This function should return `true` to indicate that the growing operation is permitted or + /// `false` if not permitted. + /// + /// Note that this function will be called even when the desired count exceeds the given maximum. + /// + /// Returning `true` when a maximum has been exceeded will have no effect as the linear memory + /// will not be grown. + fn memory_growing(&self, current: u32, desired: u32, maximum: Option) -> bool; + + /// Notifies the resource limiter that an instance's table has been requested to grow. + /// + /// * `current` is the current number of elements in the table. + /// * `desired` is the desired number of elements in the table. + /// * `maximum` is either the table's maximum or a maximum from an instance allocator, + /// A value of `None` indicates that the table is unbounded. + /// + /// This function should return `true` to indicate that the growing operation is permitted or + /// `false` if not permitted. + /// + /// Note that this function will be called even when the desired count exceeds the given maximum. + /// + /// Returning `true` when a maximum has been exceeded will have no effect as the table will + /// not be grown. + fn table_growing(&self, current: u32, desired: u32, maximum: Option) -> bool; + + /// The maximum number of instances that can be created for a [`Store`](crate::Store). + /// + /// Module instantiation will fail if this limit is exceeded. + /// + /// This value defaults to 10,000. + fn instances(&self) -> usize { + DEFAULT_INSTANCE_LIMIT + } + + /// The maximum number of tables that can be created for a [`Store`](crate::Store). + /// + /// Module instantiation will fail if this limit is exceeded. + /// + /// This value defaults to 10,000. + fn tables(&self) -> usize { + DEFAULT_TABLE_LIMIT + } + + /// The maximum number of linear memories that can be created for a [`Store`](crate::Store). + /// + /// Instantiation will fail with an error if this limit is exceeded. + /// + /// This value defaults to 10,000. + fn memories(&self) -> usize { + DEFAULT_MEMORY_LIMIT + } +} + +pub(crate) struct ResourceLimiterProxy(pub T); + +impl wasmtime_runtime::ResourceLimiter for ResourceLimiterProxy { + fn memory_growing(&self, current: u32, desired: u32, maximum: Option) -> bool { + self.0.memory_growing(current, desired, maximum) + } + + fn table_growing(&self, current: u32, desired: u32, maximum: Option) -> bool { + self.0.table_growing(current, desired, maximum) + } + + fn instances(&self) -> usize { + self.0.instances() + } + + fn tables(&self) -> usize { + self.0.tables() + } + + fn memories(&self) -> usize { + self.0.memories() + } +} + +/// Used to build [`StoreLimits`]. +pub struct StoreLimitsBuilder(StoreLimits); + +impl StoreLimitsBuilder { + /// Creates a new [`StoreLimitsBuilder`]. + pub fn new() -> Self { + Self(StoreLimits::default()) + } + + /// The maximum number of WebAssembly pages a linear memory can grow to. + /// + /// Growing a linear memory beyond this limit will fail. + /// + /// By default, linear memory pages will not be limited. + pub fn memory_pages(mut self, limit: u32) -> Self { + self.0.memory_pages = Some(limit); + self + } + + /// The maximum number of elements in a table. + /// + /// Growing a table beyond this limit will fail. + /// + /// By default, table elements will not be limited. + pub fn table_elements(mut self, limit: u32) -> Self { + self.0.table_elements = Some(limit); + self + } + + /// The maximum number of instances that can be created for a [`Store`](crate::Store). + /// + /// Module instantiation will fail if this limit is exceeded. + /// + /// This value defaults to 10,000. + pub fn instances(mut self, limit: usize) -> Self { + self.0.instances = limit; + self + } + + /// The maximum number of tables that can be created for a [`Store`](crate::Store). + /// + /// Module instantiation will fail if this limit is exceeded. + /// + /// This value defaults to 10,000. + pub fn tables(mut self, tables: usize) -> Self { + self.0.tables = tables; + self + } + + /// The maximum number of linear memories that can be created for a [`Store`](crate::Store). + /// + /// Instantiation will fail with an error if this limit is exceeded. + /// + /// This value defaults to 10,000. + pub fn memories(mut self, memories: usize) -> Self { + self.0.memories = memories; + self + } + + /// Consumes this builder and returns the [`StoreLimits`]. + pub fn build(self) -> StoreLimits { + self.0 + } +} + +/// Provides limits for a [`Store`](crate::Store). +pub struct StoreLimits { + memory_pages: Option, + table_elements: Option, + instances: usize, + tables: usize, + memories: usize, +} + +impl Default for StoreLimits { + fn default() -> Self { + Self { + memory_pages: None, + table_elements: None, + instances: DEFAULT_INSTANCE_LIMIT, + tables: DEFAULT_TABLE_LIMIT, + memories: DEFAULT_MEMORY_LIMIT, + } + } +} + +impl ResourceLimiter for StoreLimits { + fn memory_growing(&self, _current: u32, desired: u32, _maximum: Option) -> bool { + match self.memory_pages { + Some(limit) if desired > limit => false, + _ => true, + } + } + + fn table_growing(&self, _current: u32, desired: u32, _maximum: Option) -> bool { + match self.table_elements { + Some(limit) if desired > limit => false, + _ => true, + } + } + + fn instances(&self) -> usize { + self.instances + } + + fn tables(&self) -> usize { + self.tables + } + + fn memories(&self) -> usize { + self.memories + } +} diff --git a/crates/wasmtime/src/linker.rs b/crates/wasmtime/src/linker.rs index 9418cf8581..590ff155d3 100644 --- a/crates/wasmtime/src/linker.rs +++ b/crates/wasmtime/src/linker.rs @@ -1,7 +1,6 @@ use crate::instance::InstanceBuilder; use crate::{ - Extern, ExternType, Func, FuncType, GlobalType, ImportType, Instance, IntoFunc, Module, Store, - Trap, + Extern, ExternType, Func, FuncType, ImportType, Instance, IntoFunc, Module, Store, Trap, }; use anyhow::{anyhow, bail, Context, Error, Result}; use log::warn; @@ -29,38 +28,22 @@ use std::rc::Rc; /// module and then has its own name. This basically follows the wasm standard /// for modularization. /// -/// Names in a `Linker` can be defined twice, but only for different signatures -/// of items. This means that every item defined in a `Linker` has a unique -/// name/type pair. For example you can define two functions with the module -/// name `foo` and item name `bar`, so long as they have different function -/// signatures. Currently duplicate memories and tables are not allowed, only -/// one-per-name is allowed. -/// -/// Note that allowing duplicates by shadowing the previous definition can be -/// controlled with the [`Linker::allow_shadowing`] method as well. +/// Names in a `Linker` cannot be defined twice, but allowing duplicates by +/// shadowing the previous definition can be controlled with the +/// [`Linker::allow_shadowing`] method. pub struct Linker { store: Store, string2idx: HashMap, usize>, strings: Vec>, map: HashMap, allow_shadowing: bool, + allow_unknown_exports: bool, } #[derive(Hash, PartialEq, Eq)] struct ImportKey { name: usize, module: usize, - kind: ImportKind, -} - -#[derive(Hash, PartialEq, Eq, Debug)] -enum ImportKind { - Func(FuncType), - Global(GlobalType), - Memory, - Table, - Module, - Instance, } impl Linker { @@ -87,6 +70,7 @@ impl Linker { string2idx: HashMap::new(), strings: Vec::new(), allow_shadowing: false, + allow_unknown_exports: false, } } @@ -120,6 +104,32 @@ impl Linker { self } + /// Configures whether this [`Linker`] will allow unknown exports from + /// command modules. + /// + /// By default a [`Linker`] will error when unknown exports are encountered + /// in a command module while using [`Linker::module`]. + /// + /// This method can be used to allow unknown exports from command modules. + /// + /// # Examples + /// + /// ``` + /// # use wasmtime::*; + /// # fn main() -> anyhow::Result<()> { + /// # let store = Store::default(); + /// # let module = Module::new(store.engine(), "(module)")?; + /// let mut linker = Linker::new(&store); + /// linker.allow_unknown_exports(true); + /// linker.module("mod", &module)?; + /// # Ok(()) + /// # } + /// ``` + pub fn allow_unknown_exports(&mut self, allow: bool) -> &mut Linker { + self.allow_unknown_exports = allow; + self + } + /// Defines a new item in this [`Linker`]. /// /// This method will add a new definition, by name, to this instance of @@ -164,10 +174,20 @@ impl Linker { name: &str, item: impl Into, ) -> Result<&mut Self> { - self._define(module, name, item.into()) + self._define(module, Some(name), item.into()) } - fn _define(&mut self, module: &str, name: &str, item: Extern) -> Result<&mut Self> { + /// Same as [`Linker::define`], except only the name of the import is + /// provided, not a module name as well. + /// + /// This is only relevant when working with the module linking proposal + /// where one-level names are allowed (in addition to two-level names). + /// Otherwise this method need not be used. + pub fn define_name(&mut self, name: &str, item: impl Into) -> Result<&mut Self> { + self._define(name, None, item.into()) + } + + fn _define(&mut self, module: &str, name: Option<&str>, item: Extern) -> Result<&mut Self> { if !item.comes_from_same_store(&self.store) { bail!("all linker items must be from the same store"); } @@ -217,7 +237,7 @@ impl Linker { name: &str, func: impl IntoFunc, ) -> Result<&mut Self> { - self._define(module, name, Func::wrap(&self.store, func).into()) + self._define(module, Some(name), Func::wrap(&self.store, func).into()) } /// Convenience wrapper to define an entire [`Instance`] in this linker. @@ -270,7 +290,7 @@ impl Linker { bail!("all linker items must be from the same store"); } for export in instance.exports() { - self.insert(module_name, export.name(), export.into_extern())?; + self.insert(module_name, Some(export.name()), export.into_extern())?; } Ok(self) } @@ -450,7 +470,7 @@ impl Linker { Ok(()) }, ); - self.insert(module_name, export.name(), func.into())?; + self.insert(module_name, Some(export.name()), func.into())?; } else if export.name() == "memory" && export.ty().memory().is_some() { // Allow an exported "memory" memory for now. } else if export.name() == "__indirect_function_table" && export.ty().table().is_some() @@ -477,7 +497,7 @@ impl Linker { // Allow an exported "__rtti_base" memory for compatibility with // AssemblyScript. warn!("command module exporting '__rtti_base' is deprecated; pass `--runtime half` to the AssemblyScript compiler"); - } else { + } else if !self.allow_unknown_exports { bail!("command export '{}' is not a function", export.name()); } } @@ -506,15 +526,16 @@ impl Linker { Ok(()) } - fn insert(&mut self, module: &str, name: &str, item: Extern) -> Result<()> { - let key = self.import_key(module, name, item.ty()); + fn insert(&mut self, module: &str, name: Option<&str>, item: Extern) -> Result<()> { + let key = self.import_key(module, name); + let desc = || match name { + Some(name) => format!("{}::{}", module, name), + None => module.to_string(), + }; match self.map.entry(key) { - Entry::Occupied(o) if !self.allow_shadowing => bail!( - "import of `{}::{}` with kind {:?} defined twice", - module, - name, - o.key().kind, - ), + Entry::Occupied(_) if !self.allow_shadowing => { + bail!("import of `{}` defined twice", desc(),) + } Entry::Occupied(mut o) => { o.insert(item); } @@ -522,13 +543,10 @@ impl Linker { // If shadowing is not allowed, check for an existing host function if !self.allow_shadowing { if let Extern::Func(_) = &item { - if self.store.get_host_func(module, name).is_some() { - bail!( - "import of `{}::{}` with kind {:?} defined twice", - module, - name, - v.key().kind, - ) + if let Some(name) = name { + if self.store.get_host_func(module, name).is_some() { + bail!("import of `{}` defined twice", desc(),) + } } } } @@ -538,22 +556,12 @@ impl Linker { Ok(()) } - fn import_key(&mut self, module: &str, name: &str, ty: ExternType) -> ImportKey { + fn import_key(&mut self, module: &str, name: Option<&str>) -> ImportKey { ImportKey { module: self.intern_str(module), - name: self.intern_str(name), - kind: self.import_kind(ty), - } - } - - fn import_kind(&self, ty: ExternType) -> ImportKind { - match ty { - ExternType::Func(f) => ImportKind::Func(f), - ExternType::Global(f) => ImportKind::Global(f), - ExternType::Memory(_) => ImportKind::Memory, - ExternType::Table(_) => ImportKind::Table, - ExternType::Module(_) => ImportKind::Module, - ExternType::Instance(_) => ImportKind::Instance, + name: name + .map(|name| self.intern_str(name)) + .unwrap_or(usize::max_value()), } } @@ -633,33 +641,11 @@ impl Linker { } fn link_error(&self, import: &ImportType) -> Error { - let mut options = Vec::new(); - for i in self.map.keys() { - if &*self.strings[i.module] != import.module() - || self.strings.get(i.name).map(|s| &**s) != import.name() - { - continue; - } - options.push(format!(" * {:?}\n", i.kind)); - } let desc = match import.name() { Some(name) => format!("{}::{}", import.module(), name), None => import.module().to_string(), }; - if options.is_empty() { - return anyhow!("unknown import: `{}` has not been defined", desc); - } - - options.sort(); - - anyhow!( - "incompatible import type for `{}` specified\n\ - desired signature was: {:?}\n\ - signatures available:\n\n{}", - desc, - import.ty(), - options.concat(), - ) + anyhow!("unknown import: `{}` has not been defined", desc) } /// Returns the [`Store`] that this linker is connected to. @@ -813,7 +799,6 @@ impl Linker { Some(name) => *self.string2idx.get(name)?, None => usize::max_value(), }, - kind: self.import_kind(import.ty()), }; self.map.get(&key).cloned() } diff --git a/crates/wasmtime/src/memory.rs b/crates/wasmtime/src/memory.rs index 39b6349274..57aee031ff 100644 --- a/crates/wasmtime/src/memory.rs +++ b/crates/wasmtime/src/memory.rs @@ -262,7 +262,7 @@ impl Memory { /// let store = Store::new(&engine); /// /// let memory_ty = MemoryType::new(Limits::new(1, None)); - /// let memory = Memory::new(&store, memory_ty); + /// let memory = Memory::new(&store, memory_ty)?; /// /// let module = Module::new(&engine, "(module (memory (import \"\" \"\") 1))")?; /// let instance = Instance::new(&store, &module, &[memory.into()])?; @@ -270,13 +270,12 @@ impl Memory { /// # Ok(()) /// # } /// ``` - pub fn new(store: &Store, ty: MemoryType) -> Memory { - let (instance, wasmtime_export) = - generate_memory_export(store, &ty).expect("generated memory"); - Memory { + pub fn new(store: &Store, ty: MemoryType) -> Result { + let (instance, wasmtime_export) = generate_memory_export(store, &ty)?; + Ok(Memory { instance, wasmtime_export, - } + }) } /// Returns the underlying type of this memory. @@ -454,7 +453,7 @@ impl Memory { .memory_index(unsafe { &*self.wasmtime_export.definition }); self.instance .memory_grow(index, delta) - .ok_or_else(|| anyhow!("failed to grow memory")) + .ok_or_else(|| anyhow!("failed to grow memory by `{}`", delta)) } pub(crate) unsafe fn from_wasmtime_memory( @@ -500,6 +499,10 @@ pub unsafe trait LinearMemory { /// Returns the number of allocated wasm pages. fn size(&self) -> u32; + /// Returns the maximum number of pages the memory can grow to. + /// Returns `None` if the memory is unbounded. + fn maximum(&self) -> Option; + /// Grow memory by the specified amount of wasm pages. /// /// Returns `None` if memory can't be grown by the specified amount @@ -568,7 +571,7 @@ mod tests { .dynamic_memory_guard_size(0); let store = Store::new(&Engine::new(&cfg).unwrap()); let ty = MemoryType::new(Limits::new(1, None)); - let mem = Memory::new(&store, ty); + let mem = Memory::new(&store, ty).unwrap(); assert_eq!(mem.wasmtime_export.memory.offset_guard_size, 0); match mem.wasmtime_export.memory.style { wasmtime_environ::MemoryStyle::Dynamic => {} diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index b068416ee7..4964189d7b 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1,10 +1,10 @@ -use crate::types::{ExportType, ExternType, ImportType}; +use crate::{ + signatures::SignatureCollection, + types::{ExportType, ExternType, ImportType}, +}; use crate::{Engine, ModuleType}; use anyhow::{bail, Context, Result}; -use bincode::Options; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::hash::Hash; +use std::fs; use std::path::Path; use std::sync::Arc; use wasmparser::Validator; @@ -14,6 +14,12 @@ use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::wasm::ModuleIndex; use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables}; +mod registry; +mod serialization; + +pub use registry::{FrameInfo, FrameSymbol, GlobalModuleRegistry, ModuleRegistry}; +pub use serialization::SerializedModule; + /// A compiled WebAssembly module, ready to be instantiated. /// /// A `Module` is a compiled in-memory representation of an input WebAssembly @@ -30,7 +36,7 @@ use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables}; /// compiling the original wasm module only once with a single [`Module`] /// instance. /// -/// The `Module` is threadsafe and safe to share accross threads. +/// The `Module` is thread-safe and safe to share across threads. /// /// ## Modules and `Clone` /// @@ -101,77 +107,29 @@ struct ModuleInner { /// Type information of this module and all `artifact_upvars` compiled /// modules. types: Arc, -} - -/// A small helper struct which defines modules are serialized. -#[derive(serde::Serialize, serde::Deserialize)] -struct ModuleSerialized<'a> { - /// All compiled artifacts neeeded by this module, where the last entry in - /// this list is the artifacts for the module itself. - artifacts: Vec>, - /// Closed-over module values that are also needed for this module. - modules: Vec>, - /// The index into the list of type tables that are used for this module's - /// type tables. - type_tables: usize, -} - -// This is like `std::borrow::Cow` but it doesn't have a `Clone` bound on `T` -enum MyCow<'a, T> { - Borrowed(&'a T), - Owned(T), -} - -impl<'a, T> MyCow<'a, T> { - fn unwrap_owned(self) -> T { - match self { - MyCow::Owned(val) => val, - MyCow::Borrowed(_) => unreachable!(), - } - } -} - -impl<'a, T: Serialize> Serialize for MyCow<'a, T> { - fn serialize(&self, dst: S) -> Result - where - S: serde::ser::Serializer, - { - match self { - MyCow::Borrowed(val) => val.serialize(dst), - MyCow::Owned(val) => val.serialize(dst), - } - } -} - -impl<'a, 'b, T: Deserialize<'a>> Deserialize<'a> for MyCow<'b, T> { - fn deserialize(src: D) -> Result - where - D: serde::de::Deserializer<'a>, - { - Ok(MyCow::Owned(T::deserialize(src)?)) - } + /// Registered shared signature for the module. + signatures: Arc, } impl Module { /// Creates a new WebAssembly `Module` from the given in-memory `bytes`. /// - /// The `bytes` provided must be in one of two formats: + /// The `bytes` provided must be in one of the following formats: /// - /// * It can be a [binary-encoded][binary] WebAssembly module. This - /// is always supported. - /// * It may also be a [text-encoded][text] instance of the WebAssembly - /// text format. This is only supported when the `wat` feature of this - /// crate is enabled. If this is supplied then the text format will be - /// parsed before validation. Note that the `wat` feature is enabled by - /// default. + /// * A [binary-encoded][binary] WebAssembly module. This is always supported. + /// * A [text-encoded][text] instance of the WebAssembly text format. + /// This is only supported when the `wat` feature of this crate is enabled. + /// If this is supplied then the text format will be parsed before validation. + /// Note that the `wat` feature is enabled by default. /// /// The data for the wasm module must be loaded in-memory if it's present /// elsewhere, for example on disk. This requires that the entire binary is /// loaded into memory all at once, this API does not support streaming /// compilation of a module. /// - /// The WebAssembly binary will be decoded and validated. It will also be - /// compiled according to the configuration of the provided `engine`. + /// If the module has not been already been compiled, the WebAssembly binary will + /// be decoded and validated. It will also be compiled according to the + /// configuration of the provided `engine`. /// /// # Errors /// @@ -184,7 +142,7 @@ impl Module { /// * Implementation-specific limits were exceeded with a valid binary (for /// example too many locals) /// * The wasm binary may use features that are not enabled in the - /// configuration of `enging` + /// configuration of `engine` /// * If the `wat` feature is enabled and the input is text, then it may be /// rejected if it fails to parse. /// @@ -220,9 +178,10 @@ impl Module { /// # } /// ``` pub fn new(engine: &Engine, bytes: impl AsRef<[u8]>) -> Result { + let bytes = bytes.as_ref(); #[cfg(feature = "wat")] - let bytes = wat::parse_bytes(bytes.as_ref())?; - Module::from_binary(engine, bytes.as_ref()) + let bytes = wat::parse_bytes(bytes)?; + Self::from_binary(engine, &bytes) } /// Creates a new WebAssembly `Module` from the given in-memory `binary` @@ -230,7 +189,7 @@ impl Module { /// /// See [`Module::new`] for other details. pub fn new_with_name(engine: &Engine, bytes: impl AsRef<[u8]>, name: &str) -> Result { - let mut module = Module::new(engine, bytes.as_ref())?; + let mut module = Self::new(engine, bytes.as_ref())?; Arc::get_mut(&mut Arc::get_mut(&mut module.inner).unwrap().module) .unwrap() .module_mut() @@ -268,11 +227,23 @@ impl Module { /// # } /// ``` pub fn from_file(engine: &Engine, file: impl AsRef) -> Result { - #[cfg(feature = "wat")] - let wasm = wat::parse_file(file)?; - #[cfg(not(feature = "wat"))] - let wasm = std::fs::read(file)?; - Module::new(engine, &wasm) + match Self::new( + engine, + &fs::read(&file).with_context(|| "failed to read input file")?, + ) { + Ok(m) => Ok(m), + Err(e) => { + cfg_if::cfg_if! { + if #[cfg(feature = "wat")] { + let mut e = e.downcast::()?; + e.set_path(file); + bail!(e) + } else { + Err(e) + } + } + } + } } /// Creates a new WebAssembly `Module` from the given in-memory `binary` @@ -280,8 +251,8 @@ impl Module { /// /// This is similar to [`Module::new`] except that it requires that the /// `binary` input is a WebAssembly binary, the text format is not supported - /// by this function. It's generally recommended to use [`Module::new`], - /// but if it's required to not support the text format this function can be + /// by this function. It's generally recommended to use [`Module::new`], but + /// if it's required to not support the text format this function can be /// used instead. /// /// # Examples @@ -307,6 +278,19 @@ impl Module { /// # } /// ``` pub fn from_binary(engine: &Engine, binary: &[u8]) -> Result { + // Check to see that the config's target matches the host + let target = engine.config().isa_flags.triple(); + if *target != target_lexicon::Triple::host() { + bail!( + "target '{}' specified in the configuration does not match the host", + target + ); + } + + // FIXME: we may want to validate that the ISA flags in the config match those that + // would be inferred for the host, otherwise the JIT might produce unrunnable code + // for the features the host's CPU actually has. + const USE_PAGED_MEM_INIT: bool = cfg!(all(feature = "uffd", target_os = "linux")); cfg_if::cfg_if! { @@ -324,25 +308,138 @@ impl Module { } }; - let mut modules = CompiledModule::from_artifacts_list( + let modules = CompiledModule::from_artifacts_list( artifacts, engine.compiler().isa(), &*engine.config().profiler, )?; + + Self::from_parts(engine, modules, main_module, Arc::new(types), &[]) + } + + /// Deserializes an in-memory compiled module previously created with + /// [`Module::serialize`] or [`Engine::precompile_module`]. + /// + /// This function will deserialize the binary blobs emitted by + /// [`Module::serialize`] and [`Engine::precompile_module`] back into an + /// in-memory [`Module`] that's ready to be instantiated. + /// + /// # Unsafety + /// + /// This function is marked as `unsafe` because if fed invalid input or used + /// improperly this could lead to memory safety vulnerabilities. This method + /// should not, for example, be exposed to arbitrary user input. + /// + /// The structure of the binary blob read here is only lightly validated + /// internally in `wasmtime`. This is intended to be an efficient + /// "rehydration" for a [`Module`] which has very few runtime checks beyond + /// deserialization. Arbitrary input could, for example, replace valid + /// compiled code with any other valid compiled code, meaning that this can + /// trivially be used to execute arbitrary code otherwise. + /// + /// For these reasons this function is `unsafe`. This function is only + /// designed to receive the previous input from [`Module::serialize`] and + /// [`Engine::precompile_module`]. If the exact output of those functions + /// (unmodified) is passed to this function then calls to this function can + /// be considered safe. It is the caller's responsibility to provide the + /// guarantee that only previously-serialized bytes are being passed in + /// here. + /// + /// Note that this function is designed to be safe receiving output from + /// *any* compiled version of `wasmtime` itself. This means that it is safe + /// to feed output from older versions of Wasmtime into this function, in + /// addition to newer versions of wasmtime (from the future!). These inputs + /// will deterministically and safely produce an `Err`. This function only + /// successfully accepts inputs from the same version of `wasmtime`, but the + /// safety guarantee only applies to externally-defined blobs of bytes, not + /// those defined by any version of wasmtime. (this means that if you cache + /// blobs across versions of wasmtime you can be safely guaranteed that + /// future versions of wasmtime will reject old cache entries). + pub unsafe fn deserialize(engine: &Engine, bytes: impl AsRef<[u8]>) -> Result { + let module = SerializedModule::from_bytes(bytes.as_ref())?; + module.into_module(engine) + } + + fn from_parts( + engine: &Engine, + mut modules: Vec>, + main_module: usize, + types: Arc, + module_upvars: &[serialization::SerializedModuleUpvar], + ) -> Result { + // Validate the module can be used with the current allocator + engine.allocator().validate(modules[main_module].module())?; + + let signatures = Arc::new(SignatureCollection::new_for_module( + engine.signatures(), + &types.wasm_signatures, + modules.iter().flat_map(|m| m.trampolines().iter().cloned()), + )); + let module = modules.remove(main_module); - // Validate the module can be used with the current allocator - engine.allocator().validate(module.module())?; + let module_upvars = module_upvars + .iter() + .map(|m| { + mk( + engine, + &modules, + &types, + m.index, + &m.artifact_upvars, + &m.module_upvars, + &signatures, + ) + }) + .collect::>>()?; - Ok(Module { + return Ok(Self { inner: Arc::new(ModuleInner { engine: engine.clone(), + types, module, - types: Arc::new(types), artifact_upvars: modules, - module_upvars: Vec::new(), + module_upvars, + signatures, }), - }) + }); + + fn mk( + engine: &Engine, + artifacts: &[Arc], + types: &Arc, + module_index: usize, + artifact_upvars: &[usize], + module_upvars: &[serialization::SerializedModuleUpvar], + signatures: &Arc, + ) -> Result { + Ok(Module { + inner: Arc::new(ModuleInner { + engine: engine.clone(), + types: types.clone(), + module: artifacts[module_index].clone(), + artifact_upvars: artifact_upvars + .iter() + .map(|i| artifacts[*i].clone()) + .collect(), + module_upvars: module_upvars + .into_iter() + .map(|m| { + mk( + engine, + artifacts, + types, + m.index, + &m.artifact_upvars, + &m.module_upvars, + signatures, + ) + }) + .collect::>>()?, + signatures: signatures.clone(), + }), + }) + } } /// Validates `binary` input data as a WebAssembly binary given the @@ -388,98 +485,12 @@ impl Module { sig } - /// Serialize compilation artifacts to the buffer. See also `deseriaize`. - pub fn serialize(&self) -> Result> { - let mut pushed = HashMap::new(); - let mut tables = Vec::new(); - let module = self.serialized_module(&mut pushed, &mut tables); - let artifacts = (compiler_fingerprint(self.engine()), tables, module); - let buffer = bincode_options().serialize(&artifacts)?; - Ok(buffer) - } - - fn serialized_module<'a>( - &'a self, - type_tables_pushed: &mut HashMap, - type_tables: &mut Vec<&'a TypeTables>, - ) -> ModuleSerialized<'a> { - // Deduplicate `Arc` using our two parameters to ensure we - // serialize type tables as little as possible. - let ptr = Arc::as_ptr(self.types()); - let type_tables_idx = *type_tables_pushed.entry(ptr as usize).or_insert_with(|| { - type_tables.push(self.types()); - type_tables.len() - 1 - }); - ModuleSerialized { - artifacts: self - .inner - .artifact_upvars - .iter() - .map(|i| MyCow::Borrowed(i.compilation_artifacts())) - .chain(Some(MyCow::Borrowed( - self.compiled_module().compilation_artifacts(), - ))) - .collect(), - modules: self - .inner - .module_upvars - .iter() - .map(|i| i.serialized_module(type_tables_pushed, type_tables)) - .collect(), - type_tables: type_tables_idx, - } - } - - /// Deserializes and creates a module from the compilation artifacts. - /// The `serialize` saves the compilation artifacts along with the host - /// fingerprint, which consists of target, compiler flags, and wasmtime - /// package version. + /// Serialize the module to a vector of bytes. /// - /// The method will fail if fingerprints of current host and serialized - /// one are different. The method does not verify the serialized artifacts - /// for modifications or curruptions. All responsibily of signing and its - /// verification falls on the embedder. - pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result { - let (fingerprint, types, serialized) = bincode_options() - .deserialize::<(u64, Vec, _)>(serialized) - .context("Deserialize compilation artifacts")?; - - if fingerprint != compiler_fingerprint(engine) { - bail!("Incompatible compilation artifact"); - } - - let types = types.into_iter().map(Arc::new).collect::>(); - return mk(engine, &types, serialized); - - fn mk( - engine: &Engine, - types: &Vec>, - module: ModuleSerialized<'_>, - ) -> Result { - let mut artifacts = CompiledModule::from_artifacts_list( - module - .artifacts - .into_iter() - .map(|i| i.unwrap_owned()) - .collect(), - engine.compiler().isa(), - &*engine.config().profiler, - )?; - let inner = ModuleInner { - engine: engine.clone(), - types: types[module.type_tables].clone(), - module: artifacts.pop().unwrap(), - artifact_upvars: artifacts, - module_upvars: module - .modules - .into_iter() - .map(|m| mk(engine, types, m)) - .collect::>>()?, - }; - Ok(Module { - inner: Arc::new(inner), - }) - } + /// Use `Module::new` or `Module::from_binary` to create the module + /// from the bytes. + pub fn serialize(&self) -> Result> { + SerializedModule::new(self).to_bytes() } /// Creates a submodule `Module` value from the specified parameters. @@ -493,7 +504,7 @@ impl Module { /// the upvars array in the submodule to be created, and each element of /// this array is an index into this module's upvar array. /// * `module_upvars` - similar to `artifact_upvars` this is a mapping of - /// how to create the e`module_upvars` of the submodule being created. + /// how to create the `module_upvars` of the submodule being created. /// Each entry in this array is either an index into this module's own /// module upvars array or it's an index into `modules`, the list of /// modules so far for the instance where this submodule is being @@ -513,8 +524,8 @@ impl Module { ) -> Module { Module { inner: Arc::new(ModuleInner { - types: self.types().clone(), - engine: self.engine().clone(), + types: self.inner.types.clone(), + engine: self.inner.engine.clone(), module: self.inner.artifact_upvars[artifact_index].clone(), artifact_upvars: artifact_upvars .iter() @@ -529,11 +540,12 @@ impl Module { wasmtime_environ::ModuleUpvar::Local(i) => modules[i].clone(), }) .collect(), + signatures: self.inner.signatures.clone(), }), } } - pub(crate) fn compiled_module(&self) -> &CompiledModule { + pub(crate) fn compiled_module(&self) -> &Arc { &self.inner.module } @@ -545,6 +557,10 @@ impl Module { &self.inner.types } + pub(crate) fn signatures(&self) -> &Arc { + &self.inner.signatures + } + /// Looks up the module upvar value at the `index` specified. /// /// Note that this panics if `index` is out of bounds since this should @@ -764,24 +780,6 @@ impl Module { } } -fn bincode_options() -> impl Options { - // Use a variable-length integer encoding instead of fixed length. The - // module shown on #2318 gets compressed from ~160MB to ~110MB simply using - // this, presumably because there's a lot of 8-byte integers which generally - // have small values. Local testing shows that the deserialization - // performance, while higher, is in the few-percent range. For huge size - // savings this seems worthwhile to lose a small percentage of - // deserialization performance. - bincode::DefaultOptions::new().with_varint_encoding() -} - -fn compiler_fingerprint(engine: &Engine) -> u64 { - use std::hash::Hasher; - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - engine.compiler().hash(&mut hasher); - hasher.finish() -} - fn _assert_send_sync() { fn _assert() {} _assert::(); diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs new file mode 100644 index 0000000000..6687119106 --- /dev/null +++ b/crates/wasmtime/src/module/registry.rs @@ -0,0 +1,555 @@ +//! Implements a registry of modules for a store. + +use crate::{signatures::SignatureCollection, Module}; +use std::{ + collections::BTreeMap, + sync::{Arc, Mutex}, +}; +use wasmtime_environ::{ + entity::EntityRef, + ir::{self, StackMap}, + wasm::DefinedFuncIndex, + FunctionAddressMap, TrapInformation, +}; +use wasmtime_jit::CompiledModule; +use wasmtime_runtime::{ModuleInfo, VMCallerCheckedAnyfunc, VMTrampoline}; + +lazy_static::lazy_static! { + static ref GLOBAL_MODULES: Mutex = Default::default(); +} + +fn func_by_pc(module: &CompiledModule, pc: usize) -> Option<(DefinedFuncIndex, u32)> { + let (index, start, _) = module.func_by_pc(pc)?; + Some((index, (pc - start) as u32)) +} + +/// Used for registering modules with a store. +/// +/// The map is from the ending (exclusive) address for the module code to +/// the registered module. +/// +/// The `BTreeMap` is used to quickly locate a module based on a program counter value. +#[derive(Default)] +pub struct ModuleRegistry(BTreeMap>); + +impl ModuleRegistry { + /// Fetches frame information about a program counter in a backtrace. + /// + /// Returns an object if this `pc` is known to some previously registered + /// module, or returns `None` if no information can be found. The boolean + /// returned indicates whether the original module has unparsed debug + /// information due to the compiler's configuration. + pub fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, bool)> { + let module = self.module(pc)?; + module + .lookup_frame_info(pc) + .map(|info| (info, module.has_unparsed_debuginfo())) + } + + /// Fetches trap information about a program counter in a backtrace. + pub fn lookup_trap_info(&self, pc: usize) -> Option<&TrapInformation> { + self.module(pc)?.lookup_trap_info(pc) + } + + /// Fetches information about a registered module given a program counter value. + pub fn lookup_module(&self, pc: usize) -> Option> { + self.module(pc) + .map(|m| -> Arc { m.clone() }) + } + + fn module(&self, pc: usize) -> Option<&Arc> { + let (end, info) = self.0.range(pc..).next()?; + if pc < info.start || *end < pc { + return None; + } + + Some(info) + } + + /// Registers a new module with the registry. + pub fn register(&mut self, module: &Module) { + let compiled_module = module.compiled_module(); + let (start, end) = compiled_module.code().range(); + + // Ignore modules with no code or finished functions + if start == end || compiled_module.finished_functions().is_empty() { + return; + } + + // The module code range is exclusive for end, so make it inclusive as it + // may be a valid PC value + let end = end - 1; + + // Ensure the module isn't already present in the registry + // This is expected when a module is instantiated multiple times in the same store + if let Some(m) = self.0.get(&end) { + assert_eq!(m.start, start); + return; + } + + // Assert that this module's code doesn't collide with any other registered modules + if let Some((_, prev)) = self.0.range(end..).next() { + assert!(prev.start > end); + } + + if let Some((prev_end, _)) = self.0.range(..=start).next_back() { + assert!(*prev_end < start); + } + + let prev = self.0.insert( + end, + Arc::new(RegisteredModule { + start, + module: compiled_module.clone(), + signatures: module.signatures().clone(), + }), + ); + assert!(prev.is_none()); + + GLOBAL_MODULES.lock().unwrap().register(start, end, module); + } + + /// Looks up a trampoline from an anyfunc. + pub fn lookup_trampoline(&self, anyfunc: &VMCallerCheckedAnyfunc) -> Option { + let module = self.module(anyfunc.func_ptr.as_ptr() as usize)?; + module.signatures.trampoline(anyfunc.type_index) + } +} + +impl Drop for ModuleRegistry { + fn drop(&mut self) { + let mut info = GLOBAL_MODULES.lock().unwrap(); + for end in self.0.keys() { + info.unregister(*end); + } + } +} + +struct RegisteredModule { + start: usize, + module: Arc, + signatures: Arc, +} + +impl RegisteredModule { + /// Determines if the related module has unparsed debug information. + pub fn has_unparsed_debuginfo(&self) -> bool { + self.module.has_unparsed_debuginfo() + } + + /// Fetches frame information about a program counter in a backtrace. + /// + /// Returns an object if this `pc` is known to this module, or returns `None` + /// if no information can be found. + pub fn lookup_frame_info(&self, pc: usize) -> Option { + let (index, offset) = func_by_pc(&self.module, pc)?; + let info = self.module.func_info(index); + let pos = Self::instr_pos(offset, &info.address_map); + + // In debug mode for now assert that we found a mapping for `pc` within + // the function, because otherwise something is buggy along the way and + // not accounting for all the instructions. This isn't super critical + // though so we can omit this check in release mode. + debug_assert!(pos.is_some(), "failed to find instruction for {:x}", pc); + + let instr = match pos { + Some(pos) => info.address_map.instructions[pos].srcloc, + None => info.address_map.start_srcloc, + }; + + // Use our wasm-relative pc to symbolize this frame. If there's a + // symbolication context (dwarf debug info) available then we can try to + // look this up there. + // + // Note that dwarf pcs are code-section-relative, hence the subtraction + // from the location of `instr`. Also note that all errors are ignored + // here for now since technically wasm modules can always have any + // custom section contents. + let mut symbols = Vec::new(); + + if let Some(s) = &self.module.symbolize_context().ok().and_then(|c| c) { + let to_lookup = (instr.bits() as u64) - s.code_section_offset(); + if let Ok(mut frames) = s.addr2line().find_frames(to_lookup) { + while let Ok(Some(frame)) = frames.next() { + symbols.push(FrameSymbol { + name: frame + .function + .as_ref() + .and_then(|l| l.raw_name().ok()) + .map(|s| s.to_string()), + file: frame + .location + .as_ref() + .and_then(|l| l.file) + .map(|s| s.to_string()), + line: frame.location.as_ref().and_then(|l| l.line), + column: frame.location.as_ref().and_then(|l| l.column), + }); + } + } + } + + let module = self.module.module(); + let index = module.func_index(index); + + Some(FrameInfo { + module_name: module.name.clone(), + func_index: index.index() as u32, + func_name: module.func_names.get(&index).cloned(), + instr, + func_start: info.address_map.start_srcloc, + symbols, + }) + } + + /// Fetches trap information about a program counter in a backtrace. + pub fn lookup_trap_info(&self, pc: usize) -> Option<&TrapInformation> { + let (index, offset) = func_by_pc(&self.module, pc)?; + let info = self.module.func_info(index); + let idx = info + .traps + .binary_search_by_key(&offset, |info| info.code_offset) + .ok()?; + Some(&info.traps[idx]) + } + + fn instr_pos(offset: u32, addr_map: &FunctionAddressMap) -> Option { + // Use our relative position from the start of the function to find the + // machine instruction that corresponds to `pc`, which then allows us to + // map that to a wasm original source location. + match addr_map + .instructions + .binary_search_by_key(&offset, |map| map.code_offset) + { + // Exact hit! + Ok(pos) => Some(pos), + + // This *would* be at the first slot in the array, so no + // instructions cover `pc`. + Err(0) => None, + + // This would be at the `nth` slot, so we're at the `n-1`th slot. + Err(n) => Some(n - 1), + } + } +} + +impl ModuleInfo for RegisteredModule { + fn lookup_stack_map(&self, pc: usize) -> Option<&StackMap> { + let (index, offset) = func_by_pc(&self.module, pc)?; + let info = self.module.func_info(index); + + // Do a binary search to find the stack map for the given offset. + // + // Because GC safepoints are technically only associated with a single + // PC, we should ideally only care about `Ok(index)` values returned + // from the binary search. However, safepoints are inserted right before + // calls, and there are two things that can disturb the PC/offset + // associated with the safepoint versus the PC we actually use to query + // for the stack map: + // + // 1. The `backtrace` crate gives us the PC in a frame that will be + // *returned to*, and where execution will continue from, rather than + // the PC of the call we are currently at. So we would need to + // disassemble one instruction backwards to query the actual PC for + // the stack map. + // + // TODO: One thing we *could* do to make this a little less error + // prone, would be to assert/check that the nearest GC safepoint + // found is within `max_encoded_size(any kind of call instruction)` + // our queried PC for the target architecture. + // + // 2. Cranelift's stack maps only handle the stack, not + // registers. However, some references that are arguments to a call + // may need to be in registers. In these cases, what Cranelift will + // do is: + // + // a. spill all the live references, + // b. insert a GC safepoint for those references, + // c. reload the references into registers, and finally + // d. make the call. + // + // Step (c) adds drift between the GC safepoint and the location of + // the call, which is where we actually walk the stack frame and + // collect its live references. + // + // Luckily, the spill stack slots for the live references are still + // up to date, so we can still find all the on-stack roots. + // Furthermore, we do not have a moving GC, so we don't need to worry + // whether the following code will reuse the references in registers + // (which would not have been updated to point to the moved objects) + // or reload from the stack slots (which would have been updated to + // point to the moved objects). + + let index = match info + .stack_maps + .binary_search_by_key(&offset, |i| i.code_offset) + { + // Exact hit. + Ok(i) => i, + + // `Err(0)` means that the associated stack map would have been the + // first element in the array if this pc had an associated stack + // map, but this pc does not have an associated stack map. This can + // only happen inside a Wasm frame if there are no live refs at this + // pc. + Err(0) => return None, + + Err(i) => i - 1, + }; + + Some(&info.stack_maps[index].stack_map) + } +} + +// Counterpart to `RegisteredModule`, but stored in the global registry. +struct GlobalRegisteredModule { + start: usize, + module: Arc, + /// Note that modules can be instantiated in many stores, so the purpose of + /// this field is to keep track of how many stores have registered a + /// module. Information is only removed from the global registry when this + /// reference count reaches 0. + references: usize, +} + +/// This is the global module registry that stores information for all modules +/// that are currently in use by any `Store`. +/// +/// The purpose of this map is to be called from signal handlers to determine +/// whether a program counter is a wasm trap or not. Specifically macOS has +/// no contextual information about the thread available, hence the necessity +/// for global state rather than using thread local state. +/// +/// This is similar to `ModuleRegistry` except that it has less information and +/// supports removal. Any time anything is registered with a `ModuleRegistry` +/// it is also automatically registered with the singleton global module +/// registry. When a `ModuleRegistry` is destroyed then all of its entries +/// are removed from the global module registry. +#[derive(Default)] +pub struct GlobalModuleRegistry(BTreeMap); + +impl GlobalModuleRegistry { + /// Returns whether the `pc`, according to globally registered information, + /// is a wasm trap or not. + pub(crate) fn is_wasm_pc(pc: usize) -> bool { + let modules = GLOBAL_MODULES.lock().unwrap(); + + match modules.0.range(pc..).next() { + Some((end, entry)) => { + if pc < entry.start || *end < pc { + return false; + } + + match func_by_pc(&entry.module, pc) { + Some((index, offset)) => { + let info = entry.module.func_info(index); + RegisteredModule::instr_pos(offset, &info.address_map).is_some() + } + None => false, + } + } + None => false, + } + } + + /// Registers a new region of code, described by `(start, end)` and with + /// the given function information, with the global information. + fn register(&mut self, start: usize, end: usize, module: &Module) { + let info = self.0.entry(end).or_insert_with(|| GlobalRegisteredModule { + start, + module: module.compiled_module().clone(), + references: 0, + }); + + // Note that ideally we'd debug_assert that the information previously + // stored, if any, matches the `functions` we were given, but for now we + // just do some simple checks to hope it's the same. + assert_eq!(info.start, start); + info.references += 1; + } + + /// Unregisters a region of code (keyed by the `end` address) from the + /// global information. + fn unregister(&mut self, end: usize) { + let info = self.0.get_mut(&end).unwrap(); + info.references -= 1; + if info.references == 0 { + self.0.remove(&end); + } + } +} + +/// Description of a frame in a backtrace for a [`Trap`]. +/// +/// Whenever a WebAssembly trap occurs an instance of [`Trap`] is created. Each +/// [`Trap`] has a backtrace of the WebAssembly frames that led to the trap, and +/// each frame is described by this structure. +/// +/// [`Trap`]: crate::Trap +#[derive(Debug)] +pub struct FrameInfo { + module_name: Option, + func_index: u32, + func_name: Option, + func_start: ir::SourceLoc, + instr: ir::SourceLoc, + symbols: Vec, +} + +impl FrameInfo { + /// Returns the WebAssembly function index for this frame. + /// + /// This function index is the index in the function index space of the + /// WebAssembly module that this frame comes from. + pub fn func_index(&self) -> u32 { + self.func_index + } + + /// Returns the identifer of the module that this frame is for. + /// + /// Module identifiers are present in the `name` section of a WebAssembly + /// binary, but this may not return the exact item in the `name` section. + /// Module names can be overwritten at construction time or perhaps inferred + /// from file names. The primary purpose of this function is to assist in + /// debugging and therefore may be tweaked over time. + /// + /// This function returns `None` when no name can be found or inferred. + pub fn module_name(&self) -> Option<&str> { + self.module_name.as_deref() + } + + /// Returns a descriptive name of the function for this frame, if one is + /// available. + /// + /// The name of this function may come from the `name` section of the + /// WebAssembly binary, or wasmtime may try to infer a better name for it if + /// not available, for example the name of the export if it's exported. + /// + /// This return value is primarily used for debugging and human-readable + /// purposes for things like traps. Note that the exact return value may be + /// tweaked over time here and isn't guaranteed to be something in + /// particular about a wasm module due to its primary purpose of assisting + /// in debugging. + /// + /// This function returns `None` when no name could be inferred. + pub fn func_name(&self) -> Option<&str> { + self.func_name.as_deref() + } + + /// Returns the offset within the original wasm module this frame's program + /// counter was at. + /// + /// The offset here is the offset from the beginning of the original wasm + /// module to the instruction that this frame points to. + pub fn module_offset(&self) -> usize { + self.instr.bits() as usize + } + + /// Returns the offset from the original wasm module's function to this + /// frame's program counter. + /// + /// The offset here is the offset from the beginning of the defining + /// function of this frame (within the wasm module) to the instruction this + /// frame points to. + pub fn func_offset(&self) -> usize { + (self.instr.bits() - self.func_start.bits()) as usize + } + + /// Returns the debug symbols found, if any, for this function frame. + /// + /// When a wasm program is compiled with DWARF debug information then this + /// function may be populated to return symbols which contain extra debug + /// information about a frame including the filename and line number. If no + /// debug information was found or if it was malformed then this will return + /// an empty array. + pub fn symbols(&self) -> &[FrameSymbol] { + &self.symbols + } +} + +/// Debug information for a symbol that is attached to a [`FrameInfo`]. +/// +/// When DWARF debug information is present in a wasm file then this structure +/// can be found on a [`FrameInfo`] and can be used to learn about filenames, +/// line numbers, etc, which are the origin of a function in a stack trace. +#[derive(Debug)] +pub struct FrameSymbol { + name: Option, + file: Option, + line: Option, + column: Option, +} + +impl FrameSymbol { + /// Returns the function name associated with this symbol. + /// + /// Note that this may not be present with malformed debug information, or + /// the debug information may not include it. Also note that the symbol is + /// frequently mangled, so you might need to run some form of demangling + /// over it. + pub fn name(&self) -> Option<&str> { + self.name.as_deref() + } + + /// Returns the source code filename this symbol was defined in. + /// + /// Note that this may not be present with malformed debug information, or + /// the debug information may not include it. + pub fn file(&self) -> Option<&str> { + self.file.as_deref() + } + + /// Returns the 1-indexed source code line number this symbol was defined + /// on. + /// + /// Note that this may not be present with malformed debug information, or + /// the debug information may not include it. + pub fn line(&self) -> Option { + self.line + } + + /// Returns the 1-indexed source code column number this symbol was defined + /// on. + /// + /// Note that this may not be present with malformed debug information, or + /// the debug information may not include it. + pub fn column(&self) -> Option { + self.column + } +} + +#[test] +fn test_frame_info() -> Result<(), anyhow::Error> { + use crate::*; + let store = Store::default(); + let module = Module::new( + store.engine(), + r#" + (module + (func (export "add") (param $x i32) (param $y i32) (result i32) (i32.add (local.get $x) (local.get $y))) + (func (export "sub") (param $x i32) (param $y i32) (result i32) (i32.sub (local.get $x) (local.get $y))) + (func (export "mul") (param $x i32) (param $y i32) (result i32) (i32.mul (local.get $x) (local.get $y))) + (func (export "div_s") (param $x i32) (param $y i32) (result i32) (i32.div_s (local.get $x) (local.get $y))) + (func (export "div_u") (param $x i32) (param $y i32) (result i32) (i32.div_u (local.get $x) (local.get $y))) + (func (export "rem_s") (param $x i32) (param $y i32) (result i32) (i32.rem_s (local.get $x) (local.get $y))) + (func (export "rem_u") (param $x i32) (param $y i32) (result i32) (i32.rem_u (local.get $x) (local.get $y))) + ) + "#, + )?; + // Create an instance to ensure the frame information is registered. + Instance::new(&store, &module, &[])?; + let modules = store.modules().borrow(); + for (i, alloc) in module.compiled_module().finished_functions() { + let (start, end) = unsafe { + let ptr = (**alloc).as_ptr(); + let len = (**alloc).len(); + (ptr as usize, ptr as usize + len) + }; + for pc in start..end { + let (frame, _) = modules.lookup_frame_info(pc).unwrap(); + assert!(frame.func_index() == i.as_u32()); + } + } + Ok(()) +} diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs new file mode 100644 index 0000000000..e566d01ed5 --- /dev/null +++ b/crates/wasmtime/src/module/serialization.rs @@ -0,0 +1,798 @@ +//! Implements module serialization. + +use crate::{Engine, Module, OptLevel}; +use anyhow::{anyhow, bail, Context, Result}; +use bincode::Options; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::fmt; +use std::str::FromStr; +use std::sync::Arc; +use std::{collections::HashMap, fmt::Display}; +use wasmtime_environ::{isa::TargetIsa, settings, Tunables}; +use wasmtime_jit::{ + CompilationArtifacts, CompilationStrategy, CompiledModule, Compiler, TypeTables, +}; + +const HEADER: &[u8] = b"\0wasmtime-aot"; + +fn bincode_options() -> impl Options { + // Use a variable-length integer encoding instead of fixed length. The + // module shown on #2318 gets compressed from ~160MB to ~110MB simply using + // this, presumably because there's a lot of 8-byte integers which generally + // have small values. Local testing shows that the deserialization + // performance, while higher, is in the few-percent range. For huge size + // savings this seems worthwhile to lose a small percentage of + // deserialization performance. + bincode::DefaultOptions::new().with_varint_encoding() +} + +// This exists because `wasmparser::WasmFeatures` isn't serializable +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +struct WasmFeatures { + pub reference_types: bool, + pub multi_value: bool, + pub bulk_memory: bool, + pub module_linking: bool, + pub simd: bool, + pub threads: bool, + pub tail_call: bool, + pub deterministic_only: bool, + pub multi_memory: bool, + pub exceptions: bool, + pub memory64: bool, +} + +impl From<&wasmparser::WasmFeatures> for WasmFeatures { + fn from(other: &wasmparser::WasmFeatures) -> Self { + let wasmparser::WasmFeatures { + reference_types, + multi_value, + bulk_memory, + module_linking, + simd, + threads, + tail_call, + deterministic_only, + multi_memory, + exceptions, + memory64, + } = other; + + Self { + reference_types: *reference_types, + multi_value: *multi_value, + bulk_memory: *bulk_memory, + module_linking: *module_linking, + simd: *simd, + threads: *threads, + tail_call: *tail_call, + deterministic_only: *deterministic_only, + multi_memory: *multi_memory, + exceptions: *exceptions, + memory64: *memory64, + } + } +} + +// This is like `std::borrow::Cow` but it doesn't have a `Clone` bound on `T` +enum MyCow<'a, T> { + Borrowed(&'a T), + Owned(T), +} + +impl<'a, T> MyCow<'a, T> { + fn unwrap_owned(self) -> T { + match self { + MyCow::Owned(val) => val, + MyCow::Borrowed(_) => unreachable!(), + } + } +} + +impl<'a, T: Serialize> Serialize for MyCow<'a, T> { + fn serialize(&self, dst: S) -> Result + where + S: serde::ser::Serializer, + { + match self { + MyCow::Borrowed(val) => val.serialize(dst), + MyCow::Owned(val) => val.serialize(dst), + } + } +} + +impl<'a, 'b, T: Deserialize<'a>> Deserialize<'a> for MyCow<'b, T> { + fn deserialize(src: D) -> Result + where + D: serde::de::Deserializer<'a>, + { + Ok(MyCow::Owned(T::deserialize(src)?)) + } +} + +impl From for OptLevel { + fn from(level: settings::OptLevel) -> Self { + match level { + settings::OptLevel::Speed => OptLevel::Speed, + settings::OptLevel::SpeedAndSize => OptLevel::SpeedAndSize, + settings::OptLevel::None => OptLevel::None, + } + } +} + +/// A small helper struct for serialized module upvars. +#[derive(Serialize, Deserialize)] +pub struct SerializedModuleUpvar { + /// The module's index into the compilation artifact. + pub index: usize, + /// Indexes into the list of all compilation artifacts for this module. + pub artifact_upvars: Vec, + /// Closed-over module values that are also needed for this module. + pub module_upvars: Vec, +} + +impl SerializedModuleUpvar { + pub fn new(module: &Module, artifacts: &[Arc]) -> Self { + // TODO: improve upon the linear searches in the artifact list + let index = artifacts + .iter() + .position(|a| Arc::as_ptr(a) == Arc::as_ptr(&module.inner.module)) + .expect("module should be in artifacts list"); + + SerializedModuleUpvar { + index, + artifact_upvars: module + .inner + .artifact_upvars + .iter() + .map(|m| { + artifacts + .iter() + .position(|a| Arc::as_ptr(a) == Arc::as_ptr(m)) + .expect("artifact should be in artifacts list") + }) + .collect(), + module_upvars: module + .inner + .module_upvars + .iter() + .map(|m| SerializedModuleUpvar::new(m, artifacts)) + .collect(), + } + } +} + +#[derive(Serialize, Deserialize, Eq, PartialEq)] +enum FlagValue { + Enum(Cow<'static, str>), + Num(u8), + Bool(bool), +} + +impl From for FlagValue { + fn from(v: settings::Value) -> Self { + match v.kind() { + settings::SettingKind::Enum => Self::Enum(v.as_enum().unwrap().into()), + settings::SettingKind::Num => Self::Num(v.as_num().unwrap()), + settings::SettingKind::Bool => Self::Bool(v.as_bool().unwrap()), + settings::SettingKind::Preset => unreachable!(), + } + } +} + +impl Display for FlagValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Enum(v) => v.fmt(f), + Self::Num(v) => v.fmt(f), + Self::Bool(v) => v.fmt(f), + } + } +} + +#[derive(Serialize, Deserialize)] +pub struct SerializedModule<'a> { + target: String, + shared_flags: HashMap, + isa_flags: HashMap, + strategy: CompilationStrategy, + tunables: Tunables, + features: WasmFeatures, + artifacts: Vec>, + module_upvars: Vec, + types: MyCow<'a, TypeTables>, +} + +impl<'a> SerializedModule<'a> { + pub fn new(module: &'a Module) -> Self { + let compiler = module.engine().compiler(); + let artifacts = module + .inner + .artifact_upvars + .iter() + .map(|m| MyCow::Borrowed(m.compilation_artifacts())) + .chain(Some(MyCow::Borrowed( + module.inner.module.compilation_artifacts(), + ))) + .collect::>(); + let module_upvars = module + .inner + .module_upvars + .iter() + .map(|m| SerializedModuleUpvar::new(m, &module.inner.artifact_upvars)) + .collect::>(); + + Self::with_data( + compiler, + artifacts, + module_upvars, + MyCow::Borrowed(module.types()), + ) + } + + pub fn from_artifacts( + compiler: &Compiler, + artifacts: &'a Vec, + types: &'a TypeTables, + ) -> Self { + Self::with_data( + compiler, + artifacts.iter().map(MyCow::Borrowed).collect(), + Vec::new(), + MyCow::Borrowed(types), + ) + } + + fn with_data( + compiler: &Compiler, + artifacts: Vec>, + module_upvars: Vec, + types: MyCow<'a, TypeTables>, + ) -> Self { + let isa = compiler.isa(); + + Self { + target: isa.triple().to_string(), + shared_flags: isa + .flags() + .iter() + .map(|v| (v.name.to_owned(), v.into())) + .collect(), + isa_flags: isa + .isa_flags() + .into_iter() + .map(|v| (v.name.to_owned(), v.into())) + .collect(), + strategy: compiler.strategy(), + tunables: compiler.tunables().clone(), + features: compiler.features().into(), + artifacts, + module_upvars, + types, + } + } + + pub fn into_module(mut self, engine: &Engine) -> Result { + let compiler = engine.compiler(); + let isa = compiler.isa(); + + self.check_triple(isa)?; + self.check_shared_flags(isa)?; + self.check_isa_flags(isa)?; + self.check_strategy(compiler)?; + self.check_tunables(compiler)?; + self.check_features(compiler)?; + + let modules = CompiledModule::from_artifacts_list( + self.artifacts + .into_iter() + .map(|i| i.unwrap_owned()) + .collect(), + engine.compiler().isa(), + &*engine.config().profiler, + )?; + + assert!(!modules.is_empty()); + + let main_module = modules.len() - 1; + + Module::from_parts( + engine, + modules, + main_module, + Arc::new(self.types.unwrap_owned()), + &self.module_upvars, + ) + } + + pub fn to_bytes(&self) -> Result> { + use std::io::Write; + + let mut bytes = Vec::new(); + + bytes.write_all(HEADER)?; + + // Preface the data with a version so we can do a version check independent + // of the serialized data. + let version = env!("CARGO_PKG_VERSION"); + assert!( + version.len() < 256, + "package version must be less than 256 bytes" + ); + bytes.write(&[version.len() as u8])?; + + bytes.write_all(version.as_bytes())?; + + bincode_options().serialize_into(&mut bytes, self)?; + + Ok(bytes) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + if !bytes.starts_with(HEADER) { + bail!("bytes are not a compatible serialized wasmtime module"); + } + + let bytes = &bytes[HEADER.len()..]; + + if bytes.is_empty() { + bail!("serialized data data is empty"); + } + + let version_len = bytes[0] as usize; + if bytes.len() < version_len + 1 { + bail!("serialized data is malformed"); + } + + let version = std::str::from_utf8(&bytes[1..1 + version_len])?; + if version != env!("CARGO_PKG_VERSION") { + bail!( + "Module was compiled with incompatible Wasmtime version '{}'", + version + ); + } + + Ok(bincode_options() + .deserialize::>(&bytes[1 + version_len..]) + .context("deserialize compilation artifacts")?) + } + + fn check_triple(&self, isa: &dyn TargetIsa) -> Result<()> { + let triple = target_lexicon::Triple::from_str(&self.target).map_err(|e| anyhow!(e))?; + + if triple.architecture != isa.triple().architecture { + bail!( + "Module was compiled for architecture '{}'", + triple.architecture + ); + } + + if triple.operating_system != isa.triple().operating_system { + bail!( + "Module was compiled for operating system '{}'", + triple.operating_system + ); + } + + Ok(()) + } + + fn check_shared_flags(&mut self, isa: &dyn TargetIsa) -> Result<()> { + let mut shared_flags = std::mem::take(&mut self.shared_flags); + for value in isa.flags().iter() { + let name = value.name; + match shared_flags.remove(name) { + Some(v) => { + let host: FlagValue = value.into(); + if v != host { + bail!("Module was compiled with a different '{}' setting: expected '{}' but host has '{}'", name, v, host); + } + } + None => bail!("Module was compiled without setting '{}'", name), + } + } + + for (name, _) in shared_flags { + bail!( + "Module was compiled with setting '{}' but it is not present for the host", + name + ); + } + + Ok(()) + } + + fn check_isa_flags(&mut self, isa: &dyn TargetIsa) -> Result<()> { + let mut isa_flags = std::mem::take(&mut self.isa_flags); + for value in isa.isa_flags().into_iter() { + let name = value.name; + let host: FlagValue = value.into(); + match isa_flags.remove(name) { + Some(v) => match (&v, &host) { + (FlagValue::Bool(v), FlagValue::Bool(host)) => { + // ISA flags represent CPU features; for boolean values, only + // treat it as an error if the module was compiled with the setting enabled + // but the host does not have it enabled. + if *v && !*host { + bail!("Module was compiled with setting '{}' enabled but the host does not support it", name); + } + } + _ => { + if v != host { + bail!("Module was compiled with a different '{}' setting: expected '{}' but host has '{}'", name, v, host); + } + } + }, + None => bail!("Module was compiled without setting '{}'", name), + } + } + + for (name, _) in isa_flags { + bail!( + "Module was compiled with setting '{}' but it is not present for the host", + name + ); + } + + Ok(()) + } + + fn check_strategy(&self, compiler: &Compiler) -> Result<()> { + #[allow(unreachable_patterns)] + let matches = match (self.strategy, compiler.strategy()) { + (CompilationStrategy::Auto, CompilationStrategy::Auto) + | (CompilationStrategy::Auto, CompilationStrategy::Cranelift) + | (CompilationStrategy::Cranelift, CompilationStrategy::Auto) + | (CompilationStrategy::Cranelift, CompilationStrategy::Cranelift) => true, + #[cfg(feature = "lightbeam")] + (CompilationStrategy::Lightbeam, CompilationStrategy::Lightbeam) => true, + _ => false, + }; + + if !matches { + bail!("Module was compiled with strategy '{:?}'", self.strategy); + } + + Ok(()) + } + + fn check_int(found: T, expected: T, feature: &str) -> Result<()> { + if found == expected { + return Ok(()); + } + + bail!( + "Module was compiled with a {} of '{}' but '{}' is expected for the host", + feature, + found, + expected + ); + } + + fn check_bool(found: bool, expected: bool, feature: &str) -> Result<()> { + if found == expected { + return Ok(()); + } + + bail!( + "Module was compiled {} {} but it {} enabled for the host", + if found { "with" } else { "without" }, + feature, + if expected { "is" } else { "is not" } + ); + } + + fn check_tunables(&self, compiler: &Compiler) -> Result<()> { + let Tunables { + static_memory_bound, + static_memory_offset_guard_size, + dynamic_memory_offset_guard_size, + generate_native_debuginfo, + parse_wasm_debuginfo, + interruptable, + consume_fuel, + static_memory_bound_is_maximum, + } = self.tunables; + + let other = compiler.tunables(); + + Self::check_int( + static_memory_bound, + other.static_memory_bound, + "static memory bound", + )?; + Self::check_int( + static_memory_offset_guard_size, + other.static_memory_offset_guard_size, + "static memory guard size", + )?; + Self::check_int( + dynamic_memory_offset_guard_size, + other.dynamic_memory_offset_guard_size, + "dynamic memory guard size", + )?; + Self::check_bool( + generate_native_debuginfo, + other.generate_native_debuginfo, + "debug information support", + )?; + Self::check_bool( + parse_wasm_debuginfo, + other.parse_wasm_debuginfo, + "WebAssembly backtrace support", + )?; + Self::check_bool(interruptable, other.interruptable, "interruption support")?; + Self::check_bool(consume_fuel, other.consume_fuel, "fuel support")?; + Self::check_bool( + static_memory_bound_is_maximum, + other.static_memory_bound_is_maximum, + "pooling allocation support", + )?; + + Ok(()) + } + + fn check_features(&self, compiler: &Compiler) -> Result<()> { + let WasmFeatures { + reference_types, + multi_value, + bulk_memory, + module_linking, + simd, + threads, + tail_call, + deterministic_only, + multi_memory, + exceptions, + memory64, + } = self.features; + + let other = compiler.features(); + Self::check_bool( + reference_types, + other.reference_types, + "WebAssembly reference types support", + )?; + Self::check_bool( + multi_value, + other.multi_value, + "WebAssembly multi-value support", + )?; + Self::check_bool( + bulk_memory, + other.bulk_memory, + "WebAssembly bulk memory support", + )?; + Self::check_bool( + module_linking, + other.module_linking, + "WebAssembly module linking support", + )?; + Self::check_bool(simd, other.simd, "WebAssembly SIMD support")?; + Self::check_bool(threads, other.threads, "WebAssembly threads support")?; + Self::check_bool(tail_call, other.tail_call, "WebAssembly tail-call support")?; + Self::check_bool( + deterministic_only, + other.deterministic_only, + "WebAssembly deterministic-only support", + )?; + Self::check_bool( + multi_memory, + other.multi_memory, + "WebAssembly multi-memory support", + )?; + Self::check_bool( + exceptions, + other.exceptions, + "WebAssembly exceptions support", + )?; + Self::check_bool( + memory64, + other.memory64, + "WebAssembly 64-bit memory support", + )?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Config; + + #[test] + fn test_architecture_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.target = "unknown-generic-linux".to_string(); + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled for architecture 'unknown'", + ), + } + + Ok(()) + } + + #[test] + fn test_os_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.target = format!( + "{}-generic-unknown", + target_lexicon::Triple::host().architecture + ); + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled for operating system 'unknown'", + ), + } + + Ok(()) + } + + #[test] + fn test_cranelift_flags_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.shared_flags.insert( + "opt_level".to_string(), + FlagValue::Enum(Cow::Borrowed("none")), + ); + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled with a different 'opt_level' setting: expected 'none' but host has 'speed'" + ), + } + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_isa_flags_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + + serialized + .isa_flags + .insert("not_a_flag".to_string(), FlagValue::Bool(true)); + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled with setting 'not_a_flag' but it is not present for the host", + ), + } + + Ok(()) + } + + #[cfg(feature = "lightbeam")] + #[test] + fn test_compilation_strategy_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.strategy = CompilationStrategy::Lightbeam; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled with strategy 'Cranelift'", + ), + } + + Ok(()) + } + + #[test] + fn test_tunables_int_mismatch() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.tunables.static_memory_offset_guard_size = 0; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!(e.to_string(), "Module was compiled with a static memory guard size of '0' but '2147483648' is expected for the host"), + } + + Ok(()) + } + + #[test] + fn test_tunables_bool_mismatch() -> Result<()> { + let mut config = Config::new(); + config.interruptable(true); + + let engine = Engine::new(&config)?; + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.tunables.interruptable = false; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled without interruption support but it is enabled for the host" + ), + } + + let mut config = Config::new(); + config.interruptable(false); + + let engine = Engine::new(&config)?; + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.tunables.interruptable = true; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Module was compiled with interruption support but it is not enabled for the host" + ), + } + + Ok(()) + } + + #[test] + fn test_feature_mismatch() -> Result<()> { + let mut config = Config::new(); + config.wasm_simd(true); + + let engine = Engine::new(&config)?; + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.features.simd = false; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!(e.to_string(), "Module was compiled without WebAssembly SIMD support but it is enabled for the host"), + } + + let mut config = Config::new(); + config.wasm_simd(false); + + let engine = Engine::new(&config)?; + let module = Module::new(&engine, "(module)")?; + + let mut serialized = SerializedModule::new(&module); + serialized.features.simd = true; + + match serialized.into_module(&engine) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!(e.to_string(), "Module was compiled with WebAssembly SIMD support but it is not enabled for the host"), + } + + Ok(()) + } +} diff --git a/crates/wasmtime/src/sig_registry.rs b/crates/wasmtime/src/sig_registry.rs deleted file mode 100644 index c9583972c9..0000000000 --- a/crates/wasmtime/src/sig_registry.rs +++ /dev/null @@ -1,82 +0,0 @@ -//! Implement a registry of function signatures, for fast indirect call -//! signature checking. - -use std::collections::{hash_map, HashMap}; -use std::convert::TryFrom; -use wasmtime_environ::wasm::WasmFuncType; -use wasmtime_runtime::{VMSharedSignatureIndex, VMTrampoline}; - -/// WebAssembly requires that the caller and callee signatures in an indirect -/// call must match. To implement this efficiently, keep a registry of all -/// signatures, shared by all instances, so that call sites can just do an -/// index comparison. -#[derive(Debug, Default)] -pub struct SignatureRegistry { - // Map from a wasm actual function type to the index that it is assigned, - // shared amongst all wasm modules. - wasm2index: HashMap, - - // Map of all known wasm function signatures in this registry. This is - // keyed by `VMSharedSignatureIndex` above. - index_map: Vec, -} - -#[derive(Debug)] -struct Entry { - // The WebAssembly type signature, using wasm types. - wasm: WasmFuncType, - // The native trampoline used to invoke this type signature from `Func`. - // Note that the code memory for this trampoline is not owned by this - // type, but instead it's expected to be owned by the store that this - // registry lives within. - trampoline: VMTrampoline, -} - -impl SignatureRegistry { - /// Register a signature and return its unique index. - pub fn register( - &mut self, - wasm: &WasmFuncType, - trampoline: VMTrampoline, - ) -> VMSharedSignatureIndex { - let len = self.wasm2index.len(); - - match self.wasm2index.entry(wasm.clone()) { - hash_map::Entry::Occupied(entry) => *entry.get(), - hash_map::Entry::Vacant(entry) => { - // Keep `signature_hash` len under 2**32 -- VMSharedSignatureIndex::new(std::u32::MAX) - // is reserved for VMSharedSignatureIndex::default(). - assert!( - len < std::u32::MAX as usize, - "Invariant check: signature_hash.len() < std::u32::MAX" - ); - debug_assert_eq!(len, self.index_map.len()); - let index = VMSharedSignatureIndex::new(u32::try_from(len).unwrap()); - self.index_map.push(Entry { - wasm: wasm.clone(), - trampoline, - }); - entry.insert(index); - index - } - } - } - - /// Looks up a shared index from the wasm signature itself. - pub fn lookup(&self, wasm: &WasmFuncType) -> Option { - self.wasm2index.get(wasm).cloned() - } - - /// Looks up information known about a shared signature index. - /// - /// Note that for this operation to be semantically correct the `idx` must - /// have previously come from a call to `register` of this same object. - pub fn lookup_shared( - &self, - idx: VMSharedSignatureIndex, - ) -> Option<(&WasmFuncType, VMTrampoline)> { - self.index_map - .get(idx.bits() as usize) - .map(|e| (&e.wasm, e.trampoline)) - } -} diff --git a/crates/wasmtime/src/signatures.rs b/crates/wasmtime/src/signatures.rs new file mode 100644 index 0000000000..2faf3fa059 --- /dev/null +++ b/crates/wasmtime/src/signatures.rs @@ -0,0 +1,262 @@ +//! Implement a registry of function signatures, for fast indirect call +//! signature checking. + +use std::{ + collections::{hash_map::Entry, HashMap}, + sync::RwLock, +}; +use std::{convert::TryFrom, sync::Arc}; +use wasmtime_environ::entity::PrimaryMap; +use wasmtime_environ::wasm::{SignatureIndex, WasmFuncType}; +use wasmtime_runtime::{VMSharedSignatureIndex, VMTrampoline}; + +/// Represents a collection of shared signatures. +/// +/// This is used to register shared signatures with a shared signature registry. +/// +/// The collection will unregister any contained signatures with the registry +/// when dropped. +#[derive(Debug)] +pub struct SignatureCollection { + registry: Arc>, + signatures: PrimaryMap, + trampolines: HashMap, +} + +impl SignatureCollection { + /// Creates a new, empty signature collection given a signature registry. + pub fn new(registry: &SignatureRegistry) -> Self { + Self { + registry: registry.0.clone(), + signatures: PrimaryMap::new(), + trampolines: HashMap::new(), + } + } + + /// Creates a signature collection for a module given the module's signatures + /// and trampolines. + pub fn new_for_module( + registry: &SignatureRegistry, + signatures: &PrimaryMap, + trampolines: impl Iterator, + ) -> Self { + let (signatures, trampolines) = registry + .0 + .write() + .unwrap() + .register_for_module(signatures, trampolines); + + Self { + registry: registry.0.clone(), + signatures, + trampolines, + } + } + + /// Treats the signature collection as a map from a module signature index to + /// registered shared signature indexes. + /// + /// This is used for looking up module shared signature indexes during module + /// instantiation. + pub fn as_module_map(&self) -> &PrimaryMap { + &self.signatures + } + + /// Gets the shared signature index given a module signature index. + pub fn shared_signature(&self, index: SignatureIndex) -> Option { + self.signatures.get(index).copied() + } + + /// Gets a trampoline for a registered signature. + pub fn trampoline(&self, index: VMSharedSignatureIndex) -> Option { + self.trampolines + .get(&index) + .map(|(_, trampoline)| *trampoline) + } + + /// Registers a single function with the collection. + /// + /// Returns the shared signature index for the function. + pub fn register( + &mut self, + ty: &WasmFuncType, + trampoline: VMTrampoline, + ) -> VMSharedSignatureIndex { + let index = self.registry.write().unwrap().register(ty); + + let entry = match self.trampolines.entry(index) { + Entry::Occupied(e) => e.into_mut(), + Entry::Vacant(e) => e.insert((0, trampoline)), + }; + + // Increment the ref count + entry.0 += 1; + + index + } +} + +impl Drop for SignatureCollection { + fn drop(&mut self) { + if !self.signatures.is_empty() || !self.trampolines.is_empty() { + self.registry.write().unwrap().unregister_signatures(self); + } + } +} + +#[derive(Debug)] +struct RegistryEntry { + references: usize, + ty: WasmFuncType, +} + +#[derive(Debug, Default)] +struct SignatureRegistryInner { + map: HashMap, + entries: Vec>, + free: Vec, +} + +impl SignatureRegistryInner { + fn register_for_module( + &mut self, + signatures: &PrimaryMap, + trampolines: impl Iterator, + ) -> ( + PrimaryMap, + HashMap, + ) { + let mut sigs = PrimaryMap::default(); + let mut map = HashMap::default(); + + for (_, ty) in signatures.iter() { + sigs.push(self.register(ty)); + } + + for (index, trampoline) in trampolines { + map.insert(sigs[index], (1, trampoline)); + } + + (sigs, map) + } + + fn register(&mut self, ty: &WasmFuncType) -> VMSharedSignatureIndex { + let len = self.map.len(); + + let index = match self.map.entry(ty.clone()) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let (index, entry) = match self.free.pop() { + Some(index) => (index, &mut self.entries[index.bits() as usize]), + None => { + // Keep `index_map` len under 2**32 -- VMSharedSignatureIndex::new(std::u32::MAX) + // is reserved for VMSharedSignatureIndex::default(). + assert!( + len < std::u32::MAX as usize, + "Invariant check: index_map.len() < std::u32::MAX" + ); + debug_assert_eq!(len, self.entries.len()); + + let index = VMSharedSignatureIndex::new(u32::try_from(len).unwrap()); + self.entries.push(None); + + (index, self.entries.last_mut().unwrap()) + } + }; + + // The entry should be missing for one just allocated or + // taken from the free list + assert!(entry.is_none()); + + *entry = Some(RegistryEntry { + references: 0, + ty: ty.clone(), + }); + + *e.insert(index) + } + }; + + self.entries[index.bits() as usize] + .as_mut() + .unwrap() + .references += 1; + + index + } + + fn unregister_signatures(&mut self, collection: &SignatureCollection) { + // If the collection has a populated signatures map, use it to deregister + // This is always 1:1 from entry to registration + if !collection.signatures.is_empty() { + for (_, index) in collection.signatures.iter() { + self.unregister_entry(*index, 1); + } + } else { + // Otherwise, use the trampolines map, which has reference counts related + // to the stored index + for (index, (count, _)) in collection.trampolines.iter() { + self.unregister_entry(*index, *count); + } + } + } + + fn unregister_entry(&mut self, index: VMSharedSignatureIndex, count: usize) { + let removed = { + let entry = self.entries[index.bits() as usize].as_mut().unwrap(); + + debug_assert!(entry.references >= count); + entry.references -= count; + + if entry.references == 0 { + self.map.remove(&entry.ty); + self.free.push(index); + true + } else { + false + } + }; + + if removed { + self.entries[index.bits() as usize] = None; + } + } +} + +// `SignatureRegistryInner` implements `Drop` in debug builds to assert that +// all signatures have been unregistered for the registry. +#[cfg(debug_assertions)] +impl Drop for SignatureRegistryInner { + fn drop(&mut self) { + assert!( + self.map.is_empty() && self.free.len() == self.entries.len(), + "signature registry not empty" + ); + } +} + +/// Implements a shared signature registry. +/// +/// WebAssembly requires that the caller and callee signatures in an indirect +/// call must match. To implement this efficiently, keep a registry of all +/// signatures, shared by all instances, so that call sites can just do an +/// index comparison. +#[derive(Debug)] +pub struct SignatureRegistry(Arc>); + +impl SignatureRegistry { + /// Creates a new shared signature registry. + pub fn new() -> Self { + Self(Arc::new(RwLock::new(SignatureRegistryInner::default()))) + } + + /// Looks up a function type from a shared signature index. + pub fn lookup_type(&self, index: VMSharedSignatureIndex) -> Option { + self.0 + .read() + .unwrap() + .entries + .get(index.bits() as usize) + .and_then(|e| e.as_ref().map(|e| &e.ty).cloned()) + } +} diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index e90b394d26..c3737b3bff 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -1,11 +1,12 @@ -use crate::frame_info::StoreFrameInfo; -use crate::sig_registry::SignatureRegistry; -use crate::trampoline::StoreInstanceHandle; -use crate::{Engine, Func, FuncType, Module, Trap}; +use crate::{ + module::ModuleRegistry, signatures::SignatureCollection, trampoline::StoreInstanceHandle, + Engine, Func, Module, ResourceLimiter, ResourceLimiterProxy, Trap, DEFAULT_INSTANCE_LIMIT, + DEFAULT_MEMORY_LIMIT, DEFAULT_TABLE_LIMIT, +}; use anyhow::{bail, Result}; use std::any::{Any, TypeId}; use std::cell::{Cell, RefCell}; -use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::collections::{hash_map::Entry, HashMap}; use std::convert::TryFrom; use std::fmt; use std::future::Future; @@ -15,12 +16,10 @@ use std::ptr; use std::rc::Rc; use std::sync::Arc; use std::task::{Context, Poll}; -use wasmtime_environ::wasm; -use wasmtime_jit::{CompiledModule, ModuleCode, TypeTables}; use wasmtime_runtime::{ - Export, InstanceAllocator, InstanceHandle, OnDemandInstanceAllocator, SignalHandler, - StackMapRegistry, TrapInfo, VMCallerCheckedAnyfunc, VMContext, VMExternRef, - VMExternRefActivationsTable, VMInterrupts, VMSharedSignatureIndex, VMTrampoline, + InstanceAllocator, InstanceHandle, ModuleInfo, OnDemandInstanceAllocator, SignalHandler, + TrapInfo, VMCallerCheckedAnyfunc, VMContext, VMExternRef, VMExternRefActivationsTable, + VMInterrupts, VMTrampoline, }; /// Used to associate instances with the store. @@ -71,20 +70,13 @@ pub struct Store { pub(crate) struct StoreInner { engine: Engine, - /// The map of all host functions registered with this store's signature registry - host_funcs: RefCell>>, interrupts: Arc, - signatures: RefCell, instances: RefCell>, signal_handler: RefCell>>>, externref_activations_table: VMExternRefActivationsTable, - stack_map_registry: StackMapRegistry, - /// Information about JIT code which allows us to test if a program counter - /// is in JIT code, lookup trap information, etc. - frame_info: RefCell, - /// Set of all compiled modules that we're holding a strong reference to - /// the module's code for. This includes JIT functions, trampolines, etc. - modules: RefCell>, + modules: RefCell, + // The signatures and trampolines for `Func` objects + signatures: RefCell, // Numbers of resources instantiated in this store. instance_count: Cell, memory_count: Cell, @@ -98,6 +90,7 @@ pub(crate) struct StoreInner { current_poll_cx: Cell<*mut Context<'static>>, out_of_gas_behavior: Cell, context_values: RefCell>>, + limiter: Option>, } #[derive(Copy, Clone)] @@ -129,27 +122,57 @@ impl Hash for HostInfoKey { } impl Store { - /// Creates a new store to be associated with the given [`Engine`]. - pub fn new(engine: &Engine) -> Store { - // Ensure that wasmtime_runtime's signal handlers are configured. Note - // that at the `Store` level it means we should perform this - // once-per-thread. Platforms like Unix, however, only require this - // once-per-program. In any case this is safe to call many times and - // each one that's not relevant just won't do anything. - wasmtime_runtime::init_traps(); + /// Creates a new [`Store`] to be associated with the given [`Engine`]. + /// + /// The created [`Store`] will place no additional limits on the size of linear + /// memories or tables at runtime. Linear memories and tables will be allowed to + /// grow to any upper limit specified in their definitions. + /// + /// The store will limit the number of instances, linear memories, and tables created to 10,000. + /// + /// Use [`Store::new_with_limits`] with a [`StoreLimitsBuilder`](crate::StoreLimitsBuilder) to + /// specify different limits for the store. + pub fn new(engine: &Engine) -> Self { + Self::new_(engine, None) + } - Store { + /// Creates a new [`Store`] to be associated with the given [`Engine`] and using the supplied + /// resource limiter. + /// + /// A [`ResourceLimiter`] can be implemented by hosts to control the size of WebAssembly + /// linear memories and tables when a request is made to grow them. + /// + /// [`StoreLimitsBuilder`](crate::StoreLimitsBuilder) can be used to create a + /// [`StoreLimits`](crate::StoreLimits) that implements [`ResourceLimiter`] using + /// static limit values. + /// + /// # Example + /// + /// ```rust + /// # use wasmtime::{Engine, Store, StoreLimitsBuilder}; + /// // Place a limit on linear memories so they cannot grow beyond 1 MiB + /// let engine = Engine::default(); + /// let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().memory_pages(16).build()); + /// ``` + pub fn new_with_limits(engine: &Engine, limiter: impl ResourceLimiter + 'static) -> Self { + Self::new_(engine, Some(Rc::new(ResourceLimiterProxy(limiter)))) + } + + fn new_(engine: &Engine, limiter: Option>) -> Self { + // Ensure that wasmtime_runtime's signal handlers are configured. This + // is the per-program initialization required for handling traps, such + // as configuring signals, vectored exception handlers, etc. + wasmtime_runtime::init_traps(crate::module::GlobalModuleRegistry::is_wasm_pc); + + Self { inner: Rc::new(StoreInner { engine: engine.clone(), - host_funcs: RefCell::new(HashMap::new()), interrupts: Arc::new(Default::default()), - signatures: RefCell::new(Default::default()), instances: RefCell::new(Vec::new()), signal_handler: RefCell::new(None), externref_activations_table: VMExternRefActivationsTable::new(), - stack_map_registry: StackMapRegistry::default(), - frame_info: Default::default(), - modules: Default::default(), + modules: RefCell::new(ModuleRegistry::default()), + signatures: RefCell::new(SignatureCollection::new(engine.signatures())), instance_count: Default::default(), memory_count: Default::default(), table_count: Default::default(), @@ -160,6 +183,7 @@ impl Store { current_poll_cx: Cell::new(ptr::null_mut()), out_of_gas_behavior: Cell::new(OutOfGas::Trap), context_values: RefCell::new(HashMap::new()), + limiter, }), } } @@ -179,36 +203,8 @@ impl Store { }) } - pub(crate) fn get_host_anyfunc( - &self, - instance: &InstanceHandle, - ty: &FuncType, - trampoline: VMTrampoline, - ) -> *mut VMCallerCheckedAnyfunc { - let mut funcs = self.inner.host_funcs.borrow_mut(); - - let anyfunc = funcs.entry(unsafe { instance.clone() }).or_insert_with(|| { - let mut anyfunc = match instance - .lookup_by_declaration(&wasm::EntityIndex::Function(wasm::FuncIndex::from_u32(0))) - { - Export::Function(f) => unsafe { f.anyfunc.as_ref() }.clone(), - _ => unreachable!(), - }; - - // Register the function with this store's signature registry - anyfunc.type_index = self - .inner - .signatures - .borrow_mut() - .register(ty.as_wasm_func_type(), trampoline); - - Box::new(anyfunc) - }); - - &mut **anyfunc - } - /// Returns the [`Engine`] that this store is associated with. + #[inline] pub fn engine(&self) -> &Engine { &self.inner.engine } @@ -219,7 +215,7 @@ impl Store { pub fn get(&self) -> Option<&T> { let values = self.inner.context_values.borrow(); - // Safety: a context value cannot be removed once added and therefore the addres is + // Safety: a context value cannot be removed once added and therefore the address is // stable for the life of the store values .get(&TypeId::of::()) @@ -241,92 +237,39 @@ impl Store { } } - pub(crate) fn signatures(&self) -> &RefCell { + pub(crate) fn limiter(&self) -> &Option> { + &self.inner.limiter + } + + pub(crate) fn signatures(&self) -> &RefCell { &self.inner.signatures } - pub(crate) fn lookup_shared_signature<'a>( - &'a self, - types: &'a TypeTables, - ) -> impl Fn(wasm::SignatureIndex) -> VMSharedSignatureIndex + 'a { - move |index| { - self.signatures() - .borrow() - .lookup(&types.wasm_signatures[index]) - .expect("signature not previously registered") + pub(crate) fn lookup_trampoline(&self, anyfunc: &VMCallerCheckedAnyfunc) -> VMTrampoline { + // Look up the trampoline with the store's trampolines (from `Func`). + if let Some(trampoline) = self + .inner + .signatures + .borrow() + .trampoline(anyfunc.type_index) + { + return trampoline; } - } - pub(crate) fn register_module(&self, module: &Module) { - // All modules register their JIT code in a store for two reasons - // currently: - // - // * First we only catch signals/traps if the program counter falls - // within the jit code of an instantiated wasm module. This ensures - // we don't catch accidental Rust/host segfaults. - // - // * Second when generating a backtrace we'll use this mapping to - // only generate wasm frames for instruction pointers that fall - // within jit code. - self.register_jit_code(module.compiled_module()); + // Look up the trampoline with the registered modules + if let Some(trampoline) = self.inner.modules.borrow().lookup_trampoline(anyfunc) { + return trampoline; + } - // We need to know about all the stack maps of all instantiated modules - // so when performing a GC we know about all wasm frames that we find - // on the stack. - self.register_stack_maps(module.compiled_module()); - - // Signatures are loaded into our `SignatureRegistry` here - // once-per-module (and once-per-signature). This allows us to create - // a `Func` wrapper for any function in the module, which requires that - // we know about the signature and trampoline for all instances. - self.register_signatures(module); - - // And finally with a module being instantiated into this `Store` we - // need to preserve its jit-code. References to this module's code and - // trampolines are not owning-references so it's our responsibility to - // keep it all alive within the `Store`. + // Lastly, check with the engine (for `HostFunc`) self.inner - .modules - .borrow_mut() - .insert(ArcModuleCode(module.compiled_module().code().clone())); - } - - fn register_jit_code(&self, module: &CompiledModule) { - let functions = module.finished_functions(); - let first_pc = match functions.values().next() { - Some(f) => unsafe { (**f).as_ptr() as usize }, - None => return, - }; - // Only register this module if it hasn't already been registered. - let mut info = self.inner.frame_info.borrow_mut(); - if !info.contains_pc(first_pc) { - info.register(module); - } - } - - fn register_stack_maps(&self, module: &CompiledModule) { - self.stack_map_registry() - .register_stack_maps(module.stack_maps().map(|(func, stack_maps)| unsafe { - let ptr = (*func).as_ptr(); - let len = (*func).len(); - let start = ptr as usize; - let end = ptr as usize + len; - let range = start..end; - (range, stack_maps) - })); - } - - fn register_signatures(&self, module: &Module) { - let trampolines = module.compiled_module().trampolines(); - let mut signatures = self.signatures().borrow_mut(); - for (index, wasm) in module.types().wasm_signatures.iter() { - signatures.register(wasm, trampolines[index]); - } + .engine + .host_func_signatures() + .trampoline(anyfunc.type_index) + .expect("trampoline missing") } pub(crate) fn bump_resource_counts(&self, module: &Module) -> Result<()> { - let config = self.engine().config(); - fn bump(slot: &Cell, max: usize, amt: usize, desc: &str) -> Result<()> { let new = slot.get().saturating_add(amt); if new > max { @@ -343,20 +286,11 @@ impl Store { let module = module.env_module(); let memories = module.memory_plans.len() - module.num_imported_memories; let tables = module.table_plans.len() - module.num_imported_tables; + let (max_instances, max_memories, max_tables) = self.limits(); - bump( - &self.inner.instance_count, - config.max_instances, - 1, - "instance", - )?; - bump( - &self.inner.memory_count, - config.max_memories, - memories, - "memory", - )?; - bump(&self.inner.table_count, config.max_tables, tables, "table")?; + bump(&self.inner.instance_count, max_instances, 1, "instance")?; + bump(&self.inner.memory_count, max_memories, memories, "memory")?; + bump(&self.inner.table_count, max_tables, tables, "table")?; Ok(()) } @@ -383,7 +317,7 @@ impl Store { .borrow() .iter() .any(|i| i.handle.vmctx_ptr() == handle.vmctx_ptr()) - || self.inner.host_funcs.borrow().get(&handle).is_some() + || self.inner.engine.host_func_anyfunc(&handle).is_some() ); StoreInstanceHandle { store: self.clone(), @@ -400,6 +334,7 @@ impl Store { *self.inner.signal_handler.borrow_mut() = handler; } + #[inline] pub(crate) fn interrupts(&self) -> &VMInterrupts { &self.inner.interrupts } @@ -503,28 +438,27 @@ impl Store { } } + #[inline] pub(crate) fn externref_activations_table(&self) -> &VMExternRefActivationsTable { &self.inner.externref_activations_table } - pub(crate) fn stack_map_registry(&self) -> &StackMapRegistry { - &self.inner.stack_map_registry + #[inline] + pub(crate) fn modules(&self) -> &RefCell { + &self.inner.modules } - pub(crate) fn frame_info(&self) -> &RefCell { - &self.inner.frame_info + #[inline] + pub(crate) fn module_info_lookup(&self) -> &dyn wasmtime_runtime::ModuleInfoLookup { + self.inner.as_ref() } /// Perform garbage collection of `ExternRef`s. pub fn gc(&self) { // For this crate's API, we ensure that `set_stack_canary` invariants - // are upheld for all host-->Wasm calls, and we register every module - // used with this store in `self.inner.stack_map_registry`. + // are upheld for all host-->Wasm calls. unsafe { - wasmtime_runtime::gc( - &self.inner.stack_map_registry, - &self.inner.externref_activations_table, - ); + wasmtime_runtime::gc(self.inner.as_ref(), &self.inner.externref_activations_table); } } @@ -655,6 +589,7 @@ impl Store { }); } + #[inline] pub(crate) fn async_support(&self) -> bool { self.inner.engine.config().async_support } @@ -716,7 +651,8 @@ impl Store { } unsafe { - let before = wasmtime_runtime::TlsRestore::take(); + let before = wasmtime_runtime::TlsRestore::take() + .map_err(|e| Trap::from_runtime(self, e))?; let res = (*suspend).suspend(()); before.replace().map_err(|e| Trap::from_runtime(self, e))?; res?; @@ -740,9 +676,15 @@ impl Store { debug_assert!(self.async_support()); debug_assert!(config.async_stack_size > 0); - type SuspendType = wasmtime_fiber::Suspend, (), Result<(), Trap>>; + let stack = self + .inner + .engine + .allocator() + .allocate_fiber_stack() + .map_err(|e| Trap::from(anyhow::Error::from(e)))?; + let mut slot = None; - let func = |keep_going, suspend: &SuspendType| { + let fiber = wasmtime_fiber::Fiber::new(stack, |keep_going, suspend| { // First check and see if we were interrupted/dropped, and only // continue if we haven't been. keep_going?; @@ -760,46 +702,19 @@ impl Store { slot = Some(func()); Ok(()) - }; - - let (fiber, stack) = match self.inner.engine.allocator().allocate_fiber_stack() { - Ok(stack) => { - // Use the returned stack and deallocate it when finished - ( - unsafe { - wasmtime_fiber::Fiber::new_with_stack(stack, func) - .map_err(|e| Trap::from(anyhow::Error::from(e)))? - }, - stack, - ) - } - Err(wasmtime_runtime::FiberStackError::NotSupported) => { - // The allocator doesn't support custom fiber stacks for the current platform - // Request that the fiber itself allocate the stack - ( - wasmtime_fiber::Fiber::new(config.async_stack_size, func) - .map_err(|e| Trap::from(anyhow::Error::from(e)))?, - std::ptr::null_mut(), - ) - } - Err(e) => return Err(Trap::from(anyhow::Error::from(e))), - }; + }) + .map_err(|e| Trap::from(anyhow::Error::from(e)))?; // Once we have the fiber representing our synchronous computation, we // wrap that in a custom future implementation which does the // translation from the future protocol to our fiber API. - FiberFuture { - fiber, - store: self, - stack, - } - .await?; + FiberFuture { fiber, store: self }.await?; + return Ok(slot.unwrap()); struct FiberFuture<'a> { fiber: wasmtime_fiber::Fiber<'a, Result<(), Trap>, (), Result<(), Trap>>, store: &'a Store, - stack: *mut u8, } impl Future for FiberFuture<'_> { @@ -807,7 +722,7 @@ impl Store { fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll { // We need to carry over this `cx` into our fiber's runtime - // for when it trys to poll sub-futures that are created. Doing + // for when it tries to poll sub-futures that are created. Doing // this must be done unsafely, however, since `cx` is only alive // for this one singular function call. Here we do a `transmute` // to extend the lifetime of `Context` so it can be stored in @@ -864,13 +779,12 @@ impl Store { // callers that they shouldn't be doing that. debug_assert!(result.is_ok()); } - if !self.stack.is_null() { - unsafe { - self.store - .engine() - .allocator() - .deallocate_fiber_stack(self.stack) - }; + + unsafe { + self.store + .engine() + .allocator() + .deallocate_fiber_stack(self.fiber.stack()); } } } @@ -934,17 +848,26 @@ impl Store { Err(trap) => unsafe { wasmtime_runtime::raise_user_trap(trap.into()) }, } } + + fn limits(&self) -> (usize, usize, usize) { + self.inner + .limiter + .as_ref() + .map(|l| (l.instances(), l.memories(), l.tables())) + .unwrap_or(( + DEFAULT_INSTANCE_LIMIT, + DEFAULT_MEMORY_LIMIT, + DEFAULT_TABLE_LIMIT, + )) + } } unsafe impl TrapInfo for Store { + #[inline] fn as_any(&self) -> &dyn Any { self } - fn is_wasm_trap(&self, addr: usize) -> bool { - self.frame_info().borrow().lookup_trap_info(addr).is_some() - } - fn custom_signal_handler(&self, call: &dyn Fn(&SignalHandler) -> bool) -> bool { if let Some(handler) = &*self.inner.signal_handler.borrow() { return call(handler); @@ -952,10 +875,6 @@ unsafe impl TrapInfo for Store { false } - fn max_wasm_stack(&self) -> usize { - self.engine().config().max_wasm_stack - } - fn out_of_gas(&self) { match self.inner.out_of_gas_behavior.get() { OutOfGas::Trap => self.out_of_gas_trap(), @@ -978,6 +897,7 @@ unsafe impl TrapInfo for Store { } } + #[inline] fn interrupts(&self) -> &VMInterrupts { &self.inner.interrupts } @@ -999,7 +919,7 @@ impl fmt::Debug for Store { impl Drop for StoreInner { fn drop(&mut self) { let allocator = self.engine.allocator(); - let ondemand = OnDemandInstanceAllocator::new(self.engine.config().mem_creator.clone()); + let ondemand = OnDemandInstanceAllocator::default(); for instance in self.instances.borrow().iter() { unsafe { if instance.ondemand { @@ -1012,6 +932,12 @@ impl Drop for StoreInner { } } +impl wasmtime_runtime::ModuleInfoLookup for StoreInner { + fn lookup(&self, pc: usize) -> Option> { + self.modules.borrow().lookup_module(pc) + } +} + /// A threadsafe handle used to interrupt instances executing within a /// particular `Store`. /// @@ -1040,24 +966,6 @@ impl InterruptHandle { } } -// Wrapper struct to implement hash/equality based on the pointer value of the -// `Arc` in question. -struct ArcModuleCode(Arc); - -impl PartialEq for ArcModuleCode { - fn eq(&self, other: &ArcModuleCode) -> bool { - Arc::ptr_eq(&self.0, &other.0) - } -} - -impl Eq for ArcModuleCode {} - -impl Hash for ArcModuleCode { - fn hash(&self, hasher: &mut H) { - Arc::as_ptr(&self.0).hash(hasher) - } -} - struct Reset<'a, T: Copy>(&'a Cell, T); impl Drop for Reset<'_, T> { diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs index 0e417e17b2..5ebc436eba 100644 --- a/crates/wasmtime/src/trampoline.rs +++ b/crates/wasmtime/src/trampoline.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use wasmtime_environ::{entity::PrimaryMap, wasm, Module}; use wasmtime_runtime::{ Imports, InstanceAllocationRequest, InstanceAllocator, InstanceHandle, - OnDemandInstanceAllocator, StackMapRegistry, VMExternRefActivationsTable, VMFunctionBody, - VMFunctionImport, VMSharedSignatureIndex, + OnDemandInstanceAllocator, VMExternRefActivationsTable, VMFunctionBody, VMFunctionImport, + VMSharedSignatureIndex, }; /// A wrapper around `wasmtime_runtime::InstanceHandle` which pairs it with the @@ -62,22 +62,25 @@ fn create_handle( imports.functions = func_imports; unsafe { + let config = store.engine().config(); // Use the on-demand allocator when creating handles associated with host objects // The configured instance allocator should only be used when creating module instances // as we don't want host objects to count towards instance limits. - let handle = OnDemandInstanceAllocator::new(store.engine().config().mem_creator.clone()) - .allocate(InstanceAllocationRequest { + let handle = OnDemandInstanceAllocator::new(config.mem_creator.clone(), 0).allocate( + InstanceAllocationRequest { module: Arc::new(module), finished_functions: &finished_functions, imports, - lookup_shared_signature: &|_| shared_signature_id.unwrap(), + shared_signatures: shared_signature_id.into(), host_state, interrupts: store.interrupts(), externref_activations_table: store.externref_activations_table() as *const VMExternRefActivationsTable as *mut _, - stack_map_registry: store.stack_map_registry() as *const StackMapRegistry as *mut _, - })?; + module_info_lookup: Some(store.module_info_lookup()), + limiter: store.limiter().as_ref(), + }, + )?; Ok(store.add_instance(handle, true)) } diff --git a/crates/wasmtime/src/trampoline/create_handle.rs b/crates/wasmtime/src/trampoline/create_handle.rs deleted file mode 100644 index 4ecf261b1e..0000000000 --- a/crates/wasmtime/src/trampoline/create_handle.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Support for a calling of an imported function. - -use crate::trampoline::StoreInstanceHandle; -use crate::Store; -use anyhow::Result; -use std::any::Any; -use std::sync::Arc; -use wasmtime_environ::entity::PrimaryMap; -use wasmtime_environ::wasm::DefinedFuncIndex; -use wasmtime_environ::Module; -use wasmtime_runtime::{ - Imports, InstanceAllocationRequest, InstanceAllocator, StackMapRegistry, - VMExternRefActivationsTable, VMFunctionBody, VMFunctionImport, VMSharedSignatureIndex, -}; - -pub(crate) fn create_handle( - module: Module, - store: &Store, - finished_functions: PrimaryMap, - host_state: Box, - func_imports: &[VMFunctionImport], - shared_signature_id: Option, -) -> Result { - let mut imports = Imports::default(); - imports.functions = func_imports; - let module = Arc::new(module); - - unsafe { - // Use the default allocator when creating handles associated with host objects - // The configured instance allocator should only be used when creating module instances - // as we don't want host objects to count towards instance limits. - let handle = store - .engine() - .config() - .default_instance_allocator - .allocate(InstanceAllocationRequest { - module: module.clone(), - finished_functions: &finished_functions, - imports, - lookup_shared_signature: &|_| shared_signature_id.unwrap(), - host_state, - interrupts: store.interrupts(), - externref_activations_table: store.externref_activations_table() - as *const VMExternRefActivationsTable - as *mut _, - stack_map_registry: store.stack_map_registry() as *const StackMapRegistry as *mut _, - })?; - - Ok(store.add_instance(handle, true)) - } -} diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index c0e33dd2b0..b381c561ea 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -1,6 +1,6 @@ //! Support for a calling of an imported function. -use crate::{sig_registry::SignatureRegistry, Config, FuncType, Trap}; +use crate::{Config, FuncType, Store, Trap}; use anyhow::Result; use std::any::Any; use std::cmp; @@ -18,6 +18,7 @@ use wasmtime_jit::trampoline::{ self, binemit, pretty_error, Context, FunctionBuilder, FunctionBuilderContext, }; use wasmtime_jit::CodeMemory; +use wasmtime_jit::{blank_sig, wasmtime_call_conv}; use wasmtime_runtime::{ Imports, InstanceAllocationRequest, InstanceAllocator, InstanceHandle, OnDemandInstanceAllocator, VMContext, VMFunctionBody, VMSharedSignatureIndex, VMTrampoline, @@ -91,16 +92,7 @@ fn make_trampoline( // Mostly reverse copy of the similar method from wasmtime's // wasmtime-jit/src/compiler.rs. let pointer_type = isa.pointer_type(); - let mut stub_sig = ir::Signature::new(isa.frontend_config().default_call_conv); - - // Add the caller/callee `vmctx` parameters. - stub_sig.params.push(ir::AbiParam::special( - pointer_type, - ir::ArgumentPurpose::VMContext, - )); - - // Add the caller `vmctx` parameter. - stub_sig.params.push(ir::AbiParam::new(pointer_type)); + let mut stub_sig = blank_sig(isa, wasmtime_call_conv(isa)); // Add the `values_vec` parameter. stub_sig.params.push(ir::AbiParam::new(pointer_type)); @@ -220,8 +212,15 @@ fn create_function_trampoline( // reference types which requires safepoints. let isa = config.target_isa_with_reference_types(); - let pointer_type = isa.pointer_type(); - let sig = ft.get_wasmtime_signature(pointer_type); + let mut sig = blank_sig(&*isa, wasmtime_call_conv(&*isa)); + sig.params.extend( + ft.params() + .map(|p| ir::AbiParam::new(p.get_wasmtime_type())), + ); + sig.returns.extend( + ft.results() + .map(|p| ir::AbiParam::new(p.get_wasmtime_type())), + ); let mut fn_builder_ctx = FunctionBuilderContext::new(); let mut module = Module::new(); @@ -263,28 +262,33 @@ pub fn create_function( ft: &FuncType, func: Box Result<(), Trap>>, config: &Config, - registry: Option<&mut SignatureRegistry>, + store: Option<&Store>, ) -> Result<(InstanceHandle, VMTrampoline)> { let (module, finished_functions, trampoline, trampoline_state) = create_function_trampoline(config, ft, func)?; - // If there is no signature registry, use the default signature index which is + // If there is no store, use the default signature index which is // guaranteed to trap if there is ever an indirect call on the function (should not happen) - let shared_signature_id = registry - .map(|r| r.register(ft.as_wasm_func_type(), trampoline)) + let shared_signature_id = store + .map(|s| { + s.signatures() + .borrow_mut() + .register(ft.as_wasm_func_type(), trampoline) + }) .unwrap_or(VMSharedSignatureIndex::default()); unsafe { Ok(( - OnDemandInstanceAllocator::new(None).allocate(InstanceAllocationRequest { + OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest { module: Arc::new(module), finished_functions: &finished_functions, imports: Imports::default(), - lookup_shared_signature: &|_| shared_signature_id, + shared_signatures: shared_signature_id.into(), host_state: Box::new(trampoline_state), interrupts: std::ptr::null(), externref_activations_table: std::ptr::null_mut(), - stack_map_registry: std::ptr::null_mut(), + module_info_lookup: None, + limiter: None, })?, trampoline, )) @@ -308,15 +312,16 @@ pub unsafe fn create_raw_function( finished_functions.push(func); Ok( - OnDemandInstanceAllocator::new(None).allocate(InstanceAllocationRequest { + OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest { module: Arc::new(module), finished_functions: &finished_functions, imports: Imports::default(), - lookup_shared_signature: &|_| shared_signature_id, + shared_signatures: shared_signature_id.into(), host_state, interrupts: std::ptr::null(), externref_activations_table: std::ptr::null_mut(), - stack_map_registry: std::ptr::null_mut(), + module_info_lookup: None, + limiter: None, })?, ) } diff --git a/crates/wasmtime/src/trampoline/memory.rs b/crates/wasmtime/src/trampoline/memory.rs index 0876ead225..9f3ea772b0 100644 --- a/crates/wasmtime/src/trampoline/memory.rs +++ b/crates/wasmtime/src/trampoline/memory.rs @@ -37,6 +37,10 @@ impl RuntimeLinearMemory for LinearMemoryProxy { self.mem.size() } + fn maximum(&self) -> Option { + self.mem.maximum() + } + fn grow(&self, delta: u32) -> Option { self.mem.grow(delta) } diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index acbca71dbe..1d59aa564c 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -161,7 +161,7 @@ impl Trap { maybe_interrupted, } => { let mut code = store - .frame_info() + .modules() .borrow() .lookup_trap_info(pc) .map(|info| info.trap_code) @@ -182,7 +182,7 @@ impl Trap { } } - fn new_wasm( + pub(crate) fn new_wasm( store: Option<&Store>, trap_pc: Option, code: ir::TrapCode, @@ -239,7 +239,7 @@ impl Trap { // (the call instruction) so we subtract one as the lookup. let pc_to_lookup = if Some(pc) == trap_pc { pc } else { pc - 1 }; if let Some((info, has_unparsed_debuginfo)) = - store.frame_info().borrow().lookup_frame_info(pc_to_lookup) + store.modules().borrow().lookup_frame_info(pc_to_lookup) { wasm_trace.push(info); diff --git a/crates/wasmtime/src/types.rs b/crates/wasmtime/src/types.rs index 5a090d794c..0241aee282 100644 --- a/crates/wasmtime/src/types.rs +++ b/crates/wasmtime/src/types.rs @@ -204,8 +204,7 @@ impl ExternType { ) -> ExternType { match ty { EntityType::Function(idx) => { - let sig = &types.wasm_signatures[*idx]; - FuncType::from_wasm_func_type(sig).into() + FuncType::from_wasm_func_type(types.wasm_signatures[*idx].clone()).into() } EntityType::Global(ty) => GlobalType::from_wasmtime_global(ty).into(), EntityType::Memory(ty) => MemoryType::from_wasmtime_memory(ty).into(), @@ -298,30 +297,8 @@ impl FuncType { &self.sig } - /// Get the Cranelift-compatible function signature. - pub(crate) fn get_wasmtime_signature(&self, pointer_type: ir::Type) -> ir::Signature { - use wasmtime_environ::ir::{AbiParam, ArgumentPurpose, Signature}; - use wasmtime_jit::native; - let call_conv = native::call_conv(); - let mut params = vec![ - AbiParam::special(pointer_type, ArgumentPurpose::VMContext), - AbiParam::new(pointer_type), - ]; - params.extend(self.params().map(|p| AbiParam::new(p.get_wasmtime_type()))); - let returns = self - .results() - .map(|p| AbiParam::new(p.get_wasmtime_type())) - .collect::>(); - - Signature { - params, - returns, - call_conv, - } - } - - pub(crate) fn from_wasm_func_type(sig: &wasm::WasmFuncType) -> FuncType { - FuncType { sig: sig.clone() } + pub(crate) fn from_wasm_func_type(sig: wasm::WasmFuncType) -> FuncType { + Self { sig } } } diff --git a/crates/wasmtime/src/types/matching.rs b/crates/wasmtime/src/types/matching.rs index a767bbb7e1..7405194c12 100644 --- a/crates/wasmtime/src/types/matching.rs +++ b/crates/wasmtime/src/types/matching.rs @@ -1,4 +1,4 @@ -use crate::{Extern, Store}; +use crate::{signatures::SignatureCollection, Extern, Store}; use anyhow::{bail, Context, Result}; use wasmtime_environ::wasm::{ EntityType, Global, InstanceTypeIndex, Memory, ModuleTypeIndex, SignatureIndex, Table, @@ -6,6 +6,7 @@ use wasmtime_environ::wasm::{ use wasmtime_jit::TypeTables; pub struct MatchCx<'a> { + pub signatures: &'a SignatureCollection, pub types: &'a TypeTables, pub store: &'a Store, } @@ -70,12 +71,7 @@ impl MatchCx<'_> { } pub fn func(&self, expected: SignatureIndex, actual: &crate::Func) -> Result<()> { - let matches = match self - .store - .signatures() - .borrow() - .lookup(&self.types.wasm_signatures[expected]) - { + let matches = match self.signatures.shared_signature(expected) { Some(idx) => actual.sig_index() == idx, // If our expected signature isn't registered, then there's no way // that `actual` can match it. @@ -114,15 +110,19 @@ impl MatchCx<'_> { let module = actual.compiled_module().module(); self.imports_match( expected, + actual.signatures(), actual.types(), module.imports().map(|(name, field, ty)| { assert!(field.is_none()); // should be true if module linking is enabled (name, ty) }), )?; - self.exports_match(expected_sig.exports, actual.types(), |name| { - module.exports.get(name).map(|idx| module.type_of(*idx)) - })?; + self.exports_match( + expected_sig.exports, + actual.signatures(), + actual.types(), + |name| module.exports.get(name).map(|idx| module.type_of(*idx)), + )?; Ok(()) } @@ -133,6 +133,7 @@ impl MatchCx<'_> { fn imports_match<'a>( &self, expected: ModuleTypeIndex, + actual_signatures: &SignatureCollection, actual_types: &TypeTables, actual_imports: impl Iterator, ) -> Result<()> { @@ -146,10 +147,11 @@ impl MatchCx<'_> { None => bail!("expected type doesn't import {:?}", name), }; MatchCx { + signatures: actual_signatures, types: actual_types, store: self.store, } - .extern_ty_matches(&actual_ty, expected_ty, self.types) + .extern_ty_matches(&actual_ty, expected_ty, self.signatures, self.types) .with_context(|| format!("module import {:?} incompatible", name))?; } Ok(()) @@ -160,6 +162,7 @@ impl MatchCx<'_> { fn exports_match( &self, expected: InstanceTypeIndex, + actual_signatures: &SignatureCollection, actual_types: &TypeTables, lookup: impl Fn(&str) -> Option, ) -> Result<()> { @@ -169,7 +172,7 @@ impl MatchCx<'_> { for (name, expected) in self.types.instance_signatures[expected].exports.iter() { match lookup(name) { Some(ty) => self - .extern_ty_matches(expected, &ty, actual_types) + .extern_ty_matches(expected, &ty, actual_signatures, actual_types) .with_context(|| format!("export {:?} incompatible", name))?, None => bail!("failed to find export {:?}", name), } @@ -183,6 +186,7 @@ impl MatchCx<'_> { &self, expected: &EntityType, actual_ty: &EntityType, + actual_signatures: &SignatureCollection, actual_types: &TypeTables, ) -> Result<()> { let actual_desc = match actual_ty { @@ -221,7 +225,7 @@ impl MatchCx<'_> { EntityType::Instance(expected) => match actual_ty { EntityType::Instance(actual) => { let sig = &actual_types.instance_signatures[*actual]; - self.exports_match(*expected, actual_types, |name| { + self.exports_match(*expected, actual_signatures, actual_types, |name| { sig.exports.get(name).cloned() })?; Ok(()) @@ -237,15 +241,19 @@ impl MatchCx<'_> { self.imports_match( *expected, + actual_signatures, actual_types, actual_module_sig .imports .iter() .map(|(module, ty)| (module.as_str(), ty.clone())), )?; - self.exports_match(expected_module_sig.exports, actual_types, |name| { - actual_instance_sig.exports.get(name).cloned() - })?; + self.exports_match( + expected_module_sig.exports, + actual_signatures, + actual_types, + |name| actual_instance_sig.exports.get(name).cloned(), + )?; Ok(()) } _ => bail!("expected module, but found {}", actual_desc), diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs index 4dc6474513..7d6adbbedc 100644 --- a/crates/wasmtime/src/values.rs +++ b/crates/wasmtime/src/values.rs @@ -98,7 +98,7 @@ impl Val { let externref_ptr = x.inner.as_raw(); store .externref_activations_table() - .insert_with_gc(x.inner, store.stack_map_registry()); + .insert_with_gc(x.inner, store.module_info_lookup()); ptr::write(p as *mut *mut u8, externref_ptr) } Val::FuncRef(f) => ptr::write( diff --git a/crates/wast/Cargo.toml b/crates/wast/Cargo.toml index b2b9cf9752..8d76fe6c9d 100644 --- a/crates/wast/Cargo.toml +++ b/crates/wast/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wast" -version = "0.25.0" +version = "0.26.0" authors = ["The Wasmtime Project Developers"] description = "wast testing support for wasmtime" license = "Apache-2.0 WITH LLVM-exception" @@ -12,8 +12,8 @@ edition = "2018" [dependencies] anyhow = "1.0.19" -wasmtime = { path = "../wasmtime", version = "0.25.0", default-features = false } -wast = "35.0.0" +wasmtime = { path = "../wasmtime", version = "0.26.0", default-features = false } +wast = "35.0.1" [badges] maintenance = { status = "actively-developed" } diff --git a/crates/wast/src/spectest.rs b/crates/wast/src/spectest.rs index ef744fa42d..cd8b7bc9d1 100644 --- a/crates/wast/src/spectest.rs +++ b/crates/wast/src/spectest.rs @@ -39,7 +39,7 @@ pub fn link_spectest(linker: &mut Linker) -> Result<()> { linker.define("spectest", "table", table)?; let ty = MemoryType::new(Limits::new(1, Some(2))); - let memory = Memory::new(linker.store(), ty); + let memory = Memory::new(linker.store(), ty)?; linker.define("spectest", "memory", memory)?; Ok(()) diff --git a/crates/wiggle/Cargo.toml b/crates/wiggle/Cargo.toml index 14f8b0c483..d0c4709ac8 100644 --- a/crates/wiggle/Cargo.toml +++ b/crates/wiggle/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wiggle" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -13,7 +13,7 @@ include = ["src/**/*", "LICENSE"] [dependencies] thiserror = "1" witx = { path = "../wasi-common/WASI/tools/witx", version = "0.9.0", optional = true } -wiggle-macro = { path = "macro", version = "0.25.0" } +wiggle-macro = { path = "macro", version = "0.26.0" } tracing = "0.1.15" bitflags = "1.2" async-trait = "0.1.42" diff --git a/crates/wiggle/borrow/Cargo.toml b/crates/wiggle/borrow/Cargo.toml index bc54deafe3..46ebe25009 100644 --- a/crates/wiggle/borrow/Cargo.toml +++ b/crates/wiggle/borrow/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wiggle-borrow" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -11,7 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" include = ["src/**/*", "LICENSE"] [dependencies] -wiggle = { path = "..", version = "0.25.0" } +wiggle = { path = "..", version = "0.26.0" } [badges] maintenance = { status = "actively-developed" } diff --git a/crates/wiggle/generate/Cargo.toml b/crates/wiggle/generate/Cargo.toml index 6535506ff1..e065a69187 100644 --- a/crates/wiggle/generate/Cargo.toml +++ b/crates/wiggle/generate/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wiggle-generate" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] license = "Apache-2.0 WITH LLVM-exception" edition = "2018" diff --git a/crates/wiggle/generate/src/config.rs b/crates/wiggle/generate/src/config.rs index a430a8547a..bed0e5c834 100644 --- a/crates/wiggle/generate/src/config.rs +++ b/crates/wiggle/generate/src/config.rs @@ -47,7 +47,9 @@ impl Parse for ConfigField { } else if lookahead.peek(Token![async]) { input.parse::()?; input.parse::()?; - Ok(ConfigField::Async(input.parse()?)) + Ok(ConfigField::Async(AsyncConf { + functions: input.parse()?, + })) } else { Err(lookahead.error()) } @@ -280,40 +282,64 @@ impl Parse for ErrorConfField { } #[derive(Clone, Default, Debug)] -/// Modules and funcs that should be async -pub struct AsyncConf(HashMap>); +/// Modules and funcs that have async signatures +pub struct AsyncConf { + functions: AsyncFunctions, +} -impl AsyncConf { - pub fn is_async(&self, module: &str, function: &str) -> bool { - self.0 - .get(module) - .and_then(|fs| fs.iter().find(|f| *f == function)) - .is_some() +#[derive(Clone, Debug)] +pub enum AsyncFunctions { + Some(HashMap>), + All, +} +impl Default for AsyncFunctions { + fn default() -> Self { + AsyncFunctions::Some(HashMap::default()) } } -impl Parse for AsyncConf { +impl AsyncConf { + pub fn is_async(&self, module: &str, function: &str) -> bool { + match &self.functions { + AsyncFunctions::Some(fs) => fs + .get(module) + .and_then(|fs| fs.iter().find(|f| *f == function)) + .is_some(), + AsyncFunctions::All => true, + } + } +} + +impl Parse for AsyncFunctions { fn parse(input: ParseStream) -> Result { let content; - let _ = braced!(content in input); - let items: Punctuated = - content.parse_terminated(Parse::parse)?; - let mut m: HashMap> = HashMap::new(); - use std::collections::hash_map::Entry; - for i in items { - let function_names = i - .function_names - .iter() - .map(|i| i.to_string()) - .collect::>(); - match m.entry(i.module_name.to_string()) { - Entry::Occupied(o) => o.into_mut().extend(function_names), - Entry::Vacant(v) => { - v.insert(function_names); + let lookahead = input.lookahead1(); + if lookahead.peek(syn::token::Brace) { + let _ = braced!(content in input); + let items: Punctuated = + content.parse_terminated(Parse::parse)?; + let mut functions: HashMap> = HashMap::new(); + use std::collections::hash_map::Entry; + for i in items { + let function_names = i + .function_names + .iter() + .map(|i| i.to_string()) + .collect::>(); + match functions.entry(i.module_name.to_string()) { + Entry::Occupied(o) => o.into_mut().extend(function_names), + Entry::Vacant(v) => { + v.insert(function_names); + } } } + Ok(AsyncFunctions::Some(functions)) + } else if lookahead.peek(Token![*]) { + let _: Token![*] = input.parse().unwrap(); + Ok(AsyncFunctions::All) + } else { + Err(lookahead.error()) } - Ok(AsyncConf(m)) } } diff --git a/crates/wiggle/generate/src/funcs.rs b/crates/wiggle/generate/src/funcs.rs index af15709d0b..4fcdcba474 100644 --- a/crates/wiggle/generate/src/funcs.rs +++ b/crates/wiggle/generate/src/funcs.rs @@ -139,10 +139,12 @@ impl witx::Bindgen for Rust<'_> { ) { let rt = self.rt; let wrap_err = |location: &str| { + let modulename = self.module.name.as_str(); let funcname = self.funcname; quote! { |e| { #rt::GuestError::InFunc { + modulename: #modulename, funcname: #funcname, location: #location, err: Box::new(#rt::GuestError::from(e)), diff --git a/crates/wiggle/generate/src/lib.rs b/crates/wiggle/generate/src/lib.rs index 90cca766d1..cecd610b50 100644 --- a/crates/wiggle/generate/src/lib.rs +++ b/crates/wiggle/generate/src/lib.rs @@ -38,17 +38,6 @@ pub fn generate(doc: &witx::Document, names: &Names, settings: &CodegenSettings) } }); - let guest_error_methods = doc.error_types().map(|t| { - let typename = names.type_ref(&t, anon_lifetime()); - let err_method = names.guest_error_conversion_method(&t); - quote!(fn #err_method(&self, e: #rt::GuestError) -> #typename;) - }); - let guest_error_conversion = quote! { - pub trait GuestErrorConversion { - #(#guest_error_methods)* - } - }; - let user_error_methods = settings.errors.iter().map(|errtype| { let abi_typename = names.type_ref(&errtype.abi_type(), anon_lifetime()); let user_typename = errtype.typename(); @@ -82,7 +71,6 @@ pub fn generate(doc: &witx::Document, names: &Names, settings: &CodegenSettings) #(#types)* #(#constants)* - #guest_error_conversion #user_error_conversion } #(#modules)* diff --git a/crates/wiggle/generate/src/module_trait.rs b/crates/wiggle/generate/src/module_trait.rs index 2db27490ab..4ccd1eceff 100644 --- a/crates/wiggle/generate/src/module_trait.rs +++ b/crates/wiggle/generate/src/module_trait.rs @@ -89,8 +89,7 @@ pub fn define_module_trait(names: &Names, m: &Module, settings: &CodegenSettings }); quote! { - use #rt::async_trait; - #[async_trait(?Send)] + #[#rt::async_trait] pub trait #traitname { #(#traitmethods)* } diff --git a/crates/wiggle/generate/src/names.rs b/crates/wiggle/generate/src/names.rs index ae3b6bc74b..087b753d18 100644 --- a/crates/wiggle/generate/src/names.rs +++ b/crates/wiggle/generate/src/names.rs @@ -196,11 +196,6 @@ impl Names { } } - pub fn guest_error_conversion_method(&self, tref: &TypeRef) -> Ident { - let suffix = Self::snake_typename(tref); - format_ident!("into_{}", suffix) - } - pub fn user_error_conversion_method(&self, user_type: &UserErrorType) -> Ident { let abi_type = Self::snake_typename(&user_type.abi_type()); format_ident!( diff --git a/crates/wiggle/macro/Cargo.toml b/crates/wiggle/macro/Cargo.toml index a015675aba..93c6c03c24 100644 --- a/crates/wiggle/macro/Cargo.toml +++ b/crates/wiggle/macro/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wiggle-macro" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -21,10 +21,11 @@ test = false doctest = false [dependencies] -wiggle-generate = { path = "../generate", version = "0.25.0" } +wiggle-generate = { path = "../generate", version = "0.26.0" } witx = { version = "0.9.0", path = "../../wasi-common/WASI/tools/witx" } quote = "1.0" syn = { version = "1.0", features = ["full"] } +proc-macro2 = "1.0" [dev-dependencies] wiggle = { path = ".." } diff --git a/crates/wiggle/macro/src/lib.rs b/crates/wiggle/macro/src/lib.rs index 09ab2f6353..44e01eb8ba 100644 --- a/crates/wiggle/macro/src/lib.rs +++ b/crates/wiggle/macro/src/lib.rs @@ -83,11 +83,12 @@ use syn::parse_macro_input; /// /// /// The above witx text contains one module called `$example`. So, we must /// /// implement this one method trait for our ctx type. -/// #[wiggle::async_trait(?Send)] +/// #[wiggle::async_trait] /// /// We specified in the `async_` field that `example::double_int_return_float` /// /// is an asynchronous method. Therefore, we use the `async_trait` proc macro -/// /// (re-exported by wiggle from the crate of the same name) to define this -/// /// trait, so that `double_int_return_float` can be an `async fn`. +/// /// to define this trait, so that `double_int_return_float` can be an `async fn`. +/// /// `wiggle::async_trait` is defined as `#[async_trait::async_trait(?Send)]` - +/// /// in wiggle, async methods do not have the Send constaint. /// impl example::Example for YourCtxType { /// /// The arrays module has two methods, shown here. /// /// Note that the `GuestPtr` type comes from `wiggle`, @@ -113,17 +114,6 @@ use syn::parse_macro_input; /// } /// } /// -/// /// The `types::GuestErrorConversion` trait is also generated with a method for -/// /// each type used in the `error` position. This trait allows wiggle-generated -/// /// code to convert a `wiggle::GuestError` into the right error type. The trait -/// /// must be implemented for the user's ctx type. -/// -/// impl types::GuestErrorConversion for YourCtxType { -/// fn into_errno(&self, _e: wiggle::GuestError) -> types::Errno { -/// unimplemented!() -/// } -/// } -/// /// /// If you specify a `error` mapping to the macro, you must implement the /// /// `types::UserErrorConversion` for your ctx type as well. This trait gives /// /// you an opportunity to store or log your rich error type, while returning @@ -167,3 +157,13 @@ pub fn from_witx(args: TokenStream) -> TokenStream { TokenStream::from(quote! { #code #metadata }) } + +#[proc_macro_attribute] +pub fn async_trait(attr: TokenStream, item: TokenStream) -> TokenStream { + let _ = parse_macro_input!(attr as syn::parse::Nothing); + let item = proc_macro2::TokenStream::from(item); + TokenStream::from(quote! { + #[wiggle::async_trait_crate::async_trait(?Send)] + #item + }) +} diff --git a/crates/wiggle/src/error.rs b/crates/wiggle/src/error.rs index 5cfab66675..e8dd0f5a3b 100644 --- a/crates/wiggle/src/error.rs +++ b/crates/wiggle/src/error.rs @@ -19,20 +19,14 @@ pub enum GuestError { BorrowCheckerOutOfHandles, #[error("Slice length mismatch")] SliceLengthsDiffer, - #[error("In func {funcname}:{location}:")] + #[error("In func {modulename}::{funcname} at {location}: {err}")] InFunc { + modulename: &'static str, funcname: &'static str, location: &'static str, #[source] err: Box, }, - #[error("In data {typename}.{field}:")] - InDataField { - typename: String, - field: String, - #[source] - err: Box, - }, #[error("Invalid UTF-8 encountered: {0:?}")] InvalidUtf8(#[from] ::std::str::Utf8Error), #[error("Int conversion error: {0:?}")] diff --git a/crates/wiggle/src/lib.rs b/crates/wiggle/src/lib.rs index 76d3f0fcbb..f05a65cf5e 100644 --- a/crates/wiggle/src/lib.rs +++ b/crates/wiggle/src/lib.rs @@ -6,9 +6,8 @@ use std::slice; use std::str; use std::sync::Arc; -pub use wiggle_macro::from_witx; -// re-exports so users of wiggle don't need to track the dependency: -pub use async_trait::async_trait; +pub use wiggle_macro::{async_trait, from_witx}; + pub use bitflags; #[cfg(feature = "wiggle_metadata")] @@ -24,6 +23,10 @@ pub use error::GuestError; pub use guest_type::{GuestErrorType, GuestType, GuestTypeTransparent}; pub use region::Region; +pub mod async_trait_crate { + pub use async_trait::*; +} + /// A trait which abstracts how to get at the region of host memory taht /// contains guest memory. /// @@ -950,3 +953,40 @@ impl From for Trap { Trap::String(err.to_string()) } } + +pub fn run_in_dummy_executor(future: F) -> F::Output { + use std::pin::Pin; + use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; + + let mut f = Pin::from(Box::new(future)); + let waker = dummy_waker(); + let mut cx = Context::from_waker(&waker); + match f.as_mut().poll(&mut cx) { + Poll::Ready(val) => return val, + Poll::Pending => { + panic!("Cannot wait on pending future: must enable wiggle \"async\" future and execute on an async Store") + } + } + + fn dummy_waker() -> Waker { + return unsafe { Waker::from_raw(clone(5 as *const _)) }; + + unsafe fn clone(ptr: *const ()) -> RawWaker { + assert_eq!(ptr as usize, 5); + const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); + RawWaker::new(ptr, &VTABLE) + } + + unsafe fn wake(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn wake_by_ref(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn drop(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + } +} diff --git a/crates/wiggle/test-helpers/examples/tracing.rs b/crates/wiggle/test-helpers/examples/tracing.rs index 1e1517d8d9..9930f705c0 100644 --- a/crates/wiggle/test-helpers/examples/tracing.rs +++ b/crates/wiggle/test-helpers/examples/tracing.rs @@ -27,9 +27,7 @@ witx_literal: " errors: { errno => RichError }, }); -// The impl of GuestErrorConversion works just like in every other test where -// we have a single error type with witx `$errno` with the success called `$ok` -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); /// When the `errors` mapping in witx is non-empty, we need to impl the /// types::UserErrorConversion trait that wiggle generates from that mapping. diff --git a/crates/wiggle/test-helpers/src/lib.rs b/crates/wiggle/test-helpers/src/lib.rs index 99571367a5..fa36bee5de 100644 --- a/crates/wiggle/test-helpers/src/lib.rs +++ b/crates/wiggle/test-helpers/src/lib.rs @@ -347,18 +347,11 @@ impl<'a> WasiCtx<'a> { // with these errors. We just push them to vecs. #[macro_export] macro_rules! impl_errno { - ( $errno:ty, $convert:path ) => { + ( $errno:ty ) => { impl wiggle::GuestErrorType for $errno { fn success() -> $errno { <$errno>::Ok } } - impl<'a> $convert for WasiCtx<'a> { - fn into_errno(&self, e: wiggle::GuestError) -> $errno { - eprintln!("GuestError: {:?}", e); - self.guest_errors.borrow_mut().push(e); - <$errno>::InvalidArg - } - } }; } diff --git a/crates/wiggle/tests/atoms.rs b/crates/wiggle/tests/atoms.rs index 9aabbc74c5..8cc2acaa40 100644 --- a/crates/wiggle/tests/atoms.rs +++ b/crates/wiggle/tests/atoms.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> atoms::Atoms for WasiCtx<'a> { fn int_float_args(&self, an_int: u32, an_float: f32) -> Result<(), types::Errno> { diff --git a/crates/wiggle/tests/atoms_async.rs b/crates/wiggle/tests/atoms_async.rs index bdaf32d796..7d41d3a273 100644 --- a/crates/wiggle/tests/atoms_async.rs +++ b/crates/wiggle/tests/atoms_async.rs @@ -7,14 +7,12 @@ use wiggle_test::{impl_errno, HostMemory, MemArea, WasiCtx}; wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], - async: { - atoms::{int_float_args, double_int_return_float} - } + async: *, }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); -#[wiggle::async_trait(?Send)] +#[wiggle::async_trait] impl<'a> atoms::Atoms for WasiCtx<'a> { async fn int_float_args(&self, an_int: u32, an_float: f32) -> Result<(), types::Errno> { println!("INT FLOAT ARGS: {} {}", an_int, an_float); diff --git a/crates/wiggle/tests/errors.rs b/crates/wiggle/tests/errors.rs index ec9291ff53..98cd30edf7 100644 --- a/crates/wiggle/tests/errors.rs +++ b/crates/wiggle/tests/errors.rs @@ -26,9 +26,7 @@ mod convert_just_errno { errors: { errno => RichError }, }); - // The impl of GuestErrorConversion works just like in every other test where - // we have a single error type with witx `$errno` with the success called `$ok` - impl_errno!(types::Errno, types::GuestErrorConversion); + impl_errno!(types::Errno); /// When the `errors` mapping in witx is non-empty, we need to impl the /// types::UserErrorConversion trait that wiggle generates from that mapping. @@ -104,7 +102,7 @@ mod convert_just_errno { /// we use two distinct error types. mod convert_multiple_error_types { pub use super::convert_just_errno::RichError; - use wiggle_test::WasiCtx; + use wiggle_test::{impl_errno, WasiCtx}; /// Test that we can map multiple types of errors. #[derive(Debug, thiserror::Error)] @@ -135,27 +133,8 @@ mod convert_multiple_error_types { errors: { errno => RichError, errno2 => AnotherRichError }, }); - // Can't use the impl_errno! macro as usual here because the conversion - // trait ends up having two methods. - // We aren't going to execute this code, so the bodies are elided. - impl<'a> types::GuestErrorConversion for WasiCtx<'a> { - fn into_errno(&self, _e: wiggle::GuestError) -> types::Errno { - unimplemented!() - } - fn into_errno2(&self, _e: wiggle::GuestError) -> types::Errno2 { - unimplemented!() - } - } - impl wiggle::GuestErrorType for types::Errno { - fn success() -> types::Errno { - ::Ok - } - } - impl wiggle::GuestErrorType for types::Errno2 { - fn success() -> types::Errno2 { - ::Ok - } - } + impl_errno!(types::Errno); + impl_errno!(types::Errno2); // The UserErrorConversion trait will also have two methods for this test. They correspond to // each member of the `errors` mapping. diff --git a/crates/wiggle/tests/flags.rs b/crates/wiggle/tests/flags.rs index 1a1f98c6c5..e29cfdeab7 100644 --- a/crates/wiggle/tests/flags.rs +++ b/crates/wiggle/tests/flags.rs @@ -7,7 +7,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/flags.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> flags::Flags for WasiCtx<'a> { fn configure_car( diff --git a/crates/wiggle/tests/handles.rs b/crates/wiggle/tests/handles.rs index 15fb0d36be..6dce915286 100644 --- a/crates/wiggle/tests/handles.rs +++ b/crates/wiggle/tests/handles.rs @@ -8,7 +8,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/handles.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> handle_examples::HandleExamples for WasiCtx<'a> { fn fd_create(&self) -> Result { diff --git a/crates/wiggle/tests/ints.rs b/crates/wiggle/tests/ints.rs index 23310ee561..eb54b02f4e 100644 --- a/crates/wiggle/tests/ints.rs +++ b/crates/wiggle/tests/ints.rs @@ -7,7 +7,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/ints.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> ints::Ints for WasiCtx<'a> { fn cookie_cutter(&self, init_cookie: types::Cookie) -> Result { diff --git a/crates/wiggle/tests/lists.rs b/crates/wiggle/tests/lists.rs index d2f76fd152..e797b5f0ca 100644 --- a/crates/wiggle/tests/lists.rs +++ b/crates/wiggle/tests/lists.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/lists.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> lists::Lists for WasiCtx<'a> { fn reduce_excuses( diff --git a/crates/wiggle/tests/pointers.rs b/crates/wiggle/tests/pointers.rs index 96e33a734d..7988733074 100644 --- a/crates/wiggle/tests/pointers.rs +++ b/crates/wiggle/tests/pointers.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/pointers.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> pointers::Pointers for WasiCtx<'a> { fn pointers_and_enums<'b>( diff --git a/crates/wiggle/tests/records.rs b/crates/wiggle/tests/records.rs index fa448c3dc3..3c61e621a3 100644 --- a/crates/wiggle/tests/records.rs +++ b/crates/wiggle/tests/records.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/records.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> records::Records for WasiCtx<'a> { fn sum_of_pair(&self, an_pair: &types::PairInts) -> Result { diff --git a/crates/wiggle/tests/strings.rs b/crates/wiggle/tests/strings.rs index 420a5c0a62..e6eab2a9e2 100644 --- a/crates/wiggle/tests/strings.rs +++ b/crates/wiggle/tests/strings.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/strings.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); impl<'a> strings::Strings for WasiCtx<'a> { fn hello_string(&self, a_string: &GuestPtr) -> Result { diff --git a/crates/wiggle/tests/variant.rs b/crates/wiggle/tests/variant.rs index 5bddfb6aab..d367938106 100644 --- a/crates/wiggle/tests/variant.rs +++ b/crates/wiggle/tests/variant.rs @@ -6,7 +6,7 @@ wiggle::from_witx!({ witx: ["$CARGO_MANIFEST_DIR/tests/variant.witx"], }); -impl_errno!(types::Errno, types::GuestErrorConversion); +impl_errno!(types::Errno); // Avoid panics on overflow fn mult_lose_overflow(a: i32, b: u32) -> i32 { diff --git a/crates/wiggle/tests/wasi.rs b/crates/wiggle/tests/wasi.rs index 6dc6d84ea5..d95f008449 100644 --- a/crates/wiggle/tests/wasi.rs +++ b/crates/wiggle/tests/wasi.rs @@ -1,4 +1,4 @@ -use wiggle::{GuestError, GuestErrorType, GuestPtr, GuestSlice}; +use wiggle::{GuestErrorType, GuestPtr, GuestSlice}; use wiggle_test::WasiCtx; // This test file exists to make sure that the entire `wasi.witx` file can be @@ -31,13 +31,6 @@ impl GuestErrorType for types::Errno { } } -impl<'a> types::GuestErrorConversion for WasiCtx<'a> { - fn into_errno(&self, e: GuestError) -> types::Errno { - eprintln!("GuestError {:?}", e); - types::Errno::Badf - } -} - impl<'a> crate::wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx<'a> { fn args_get(&self, _argv: &GuestPtr>, _argv_buf: &GuestPtr) -> Result<()> { unimplemented!("args_get") diff --git a/crates/wiggle/wasmtime/Cargo.toml b/crates/wiggle/wasmtime/Cargo.toml index e77c344724..f51ea119bb 100644 --- a/crates/wiggle/wasmtime/Cargo.toml +++ b/crates/wiggle/wasmtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wiggle" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -11,11 +11,11 @@ repository = "https://github.com/bytecodealliance/wasmtime" include = ["src/**/*", "LICENSE"] [dependencies] -wasmtime = { path = "../../wasmtime", version = "0.25.0", default-features = false } -wasmtime-wiggle-macro = { path = "./macro", version = "0.25.0" } +wasmtime = { path = "../../wasmtime", version = "0.26.0", default-features = false } +wasmtime-wiggle-macro = { path = "./macro", version = "0.26.0" } witx = { version = "0.9.0", path = "../../wasi-common/WASI/tools/witx", optional = true } -wiggle = { path = "..", version = "0.25.0" } -wiggle-borrow = { path = "../borrow", version = "0.25.0" } +wiggle = { path = "..", version = "0.26.0" } +wiggle-borrow = { path = "../borrow", version = "0.26.0" } [dev-dependencies] anyhow = "1" @@ -26,6 +26,11 @@ name = "atoms_async" path = "tests/atoms_async.rs" required-features = ["async", "wasmtime/wat"] +[[test]] +name = "atoms_sync" +path = "tests/atoms_sync.rs" +required-features = ["wasmtime/wat"] + [badges] maintenance = { status = "actively-developed" } diff --git a/crates/wiggle/wasmtime/macro/Cargo.toml b/crates/wiggle/wasmtime/macro/Cargo.toml index 71b4080b5c..c1c9494d38 100644 --- a/crates/wiggle/wasmtime/macro/Cargo.toml +++ b/crates/wiggle/wasmtime/macro/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wasmtime-wiggle-macro" -version = "0.25.0" +version = "0.26.0" authors = ["Pat Hickey ", "Jakub Konka ", "Alex Crichton "] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" @@ -16,7 +16,7 @@ test = false [dependencies] witx = { version = "0.9.0", path = "../../../wasi-common/WASI/tools/witx" } -wiggle-generate = { path = "../../generate", version = "0.25.0" } +wiggle-generate = { path = "../../generate", version = "0.26.0" } quote = "1.0" syn = { version = "1.0", features = ["full", "extra-traits"] } proc-macro2 = "1.0" diff --git a/crates/wiggle/wasmtime/macro/src/config.rs b/crates/wiggle/wasmtime/macro/src/config.rs index 76a23dd31f..30815817b2 100644 --- a/crates/wiggle/wasmtime/macro/src/config.rs +++ b/crates/wiggle/wasmtime/macro/src/config.rs @@ -1,4 +1,4 @@ -pub use wiggle_generate::config::AsyncConf; +use wiggle_generate::config::AsyncFunctions; use { proc_macro2::Span, std::collections::HashMap, @@ -16,7 +16,6 @@ pub struct Config { pub witx: WitxConf, pub ctx: CtxConf, pub modules: ModulesConf, - #[cfg(feature = "async")] pub async_: AsyncConf, } @@ -26,7 +25,6 @@ pub enum ConfigField { Witx(WitxConf), Ctx(CtxConf), Modules(ModulesConf), - #[cfg(feature = "async")] Async(AsyncConf), } @@ -39,6 +37,7 @@ mod kw { syn::custom_keyword!(name); syn::custom_keyword!(docs); syn::custom_keyword!(function_override); + syn::custom_keyword!(block_on); } impl Parse for ConfigField { @@ -67,17 +66,17 @@ impl Parse for ConfigField { } else if lookahead.peek(Token![async]) { input.parse::()?; input.parse::()?; - #[cfg(feature = "async")] - { - Ok(ConfigField::Async(input.parse()?)) - } - #[cfg(not(feature = "async"))] - { - Err(syn::Error::new( - input.span(), - "async not supported, enable cargo feature \"async\"", - )) - } + Ok(ConfigField::Async(AsyncConf { + blocking: false, + functions: input.parse()?, + })) + } else if lookahead.peek(kw::block_on) { + input.parse::()?; + input.parse::()?; + Ok(ConfigField::Async(AsyncConf { + blocking: true, + functions: input.parse()?, + })) } else { Err(lookahead.error()) } @@ -90,7 +89,6 @@ impl Config { let mut witx = None; let mut ctx = None; let mut modules = None; - #[cfg(feature = "async")] let mut async_ = None; for f in fields { match f { @@ -118,7 +116,6 @@ impl Config { } modules = Some(c); } - #[cfg(feature = "async")] ConfigField::Async(c) => { if async_.is_some() { return Err(Error::new(err_loc, "duplicate `async` field")); @@ -132,7 +129,6 @@ impl Config { witx: witx.ok_or_else(|| Error::new(err_loc, "`witx` field required"))?, ctx: ctx.ok_or_else(|| Error::new(err_loc, "`ctx` field required"))?, modules: modules.ok_or_else(|| Error::new(err_loc, "`modules` field required"))?, - #[cfg(feature = "async")] async_: async_.unwrap_or_default(), }) } @@ -276,3 +272,53 @@ impl Parse for ModulesConf { }) } } + +#[derive(Clone, Default, Debug)] +/// Modules and funcs that have async signatures +pub struct AsyncConf { + blocking: bool, + functions: AsyncFunctions, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Asyncness { + /// Wiggle function is synchronous, wasmtime Func is synchronous + Sync, + /// Wiggle function is asynchronous, but wasmtime Func is synchronous + Blocking, + /// Wiggle function and wasmtime Func are asynchronous. + Async, +} + +impl Asyncness { + pub fn is_sync(&self) -> bool { + match self { + Asyncness::Sync => true, + _ => false, + } + } +} + +impl AsyncConf { + pub fn is_async(&self, module: &str, function: &str) -> Asyncness { + let a = if self.blocking { + Asyncness::Blocking + } else { + Asyncness::Async + }; + match &self.functions { + AsyncFunctions::Some(fs) => { + if fs + .get(module) + .and_then(|fs| fs.iter().find(|f| *f == function)) + .is_some() + { + a + } else { + Asyncness::Sync + } + } + AsyncFunctions::All => a, + } + } +} diff --git a/crates/wiggle/wasmtime/macro/src/lib.rs b/crates/wiggle/wasmtime/macro/src/lib.rs index 3a064cbae4..01f8a8db48 100644 --- a/crates/wiggle/wasmtime/macro/src/lib.rs +++ b/crates/wiggle/wasmtime/macro/src/lib.rs @@ -6,7 +6,7 @@ use wiggle_generate::Names; mod config; -use config::{AsyncConf, ModuleConf, TargetConf}; +use config::{AsyncConf, Asyncness, ModuleConf, TargetConf}; /// Define the structs required to integrate a Wiggle implementation with Wasmtime. /// @@ -48,11 +48,6 @@ pub fn wasmtime_integration(args: TokenStream) -> TokenStream { let doc = config.load_document(); let names = Names::new(quote!(wasmtime_wiggle)); - #[cfg(feature = "async")] - let async_config = config.async_.clone(); - #[cfg(not(feature = "async"))] - let async_config = AsyncConf::default(); - let modules = config.modules.iter().map(|(name, module_conf)| { let module = doc .module(&witx::Id::new(name)) @@ -63,7 +58,7 @@ pub fn wasmtime_integration(args: TokenStream) -> TokenStream { &names, &config.target, &config.ctx.name, - &async_config, + &config.async_, ) }); quote!( #(#modules)* ).into() @@ -107,13 +102,24 @@ fn generate_module( let mut host_funcs = Vec::new(); for f in module.funcs() { + let asyncness = async_conf.is_async(module.name.as_str(), f.name.as_str()); + match asyncness { + Asyncness::Blocking => {} + Asyncness::Async => { + assert!( + cfg!(feature = "async"), + "generating async wasmtime Funcs requires cargo feature \"async\"" + ); + } + _ => {} + } generate_func( &module_id, &f, names, &target_module, ctx_type, - async_conf.is_async(module.name.as_str(), f.name.as_str()), + asyncness, &mut fns, &mut ctor_externs, &mut host_funcs, @@ -135,6 +141,28 @@ contained in the `cx` parameter.", module_conf.name.to_string() ); + let config_adder_definitions = host_funcs.iter().map(|(func_name, body)| { + let adder_func = format_ident!("add_{}_to_config", names.func(&func_name)); + let docs = format!( + "Add the host function for `{}` to a config under a given module and field name.", + func_name.as_str() + ); + quote! { + #[doc = #docs] + pub fn #adder_func(config: &mut wasmtime::Config, module: &str, field: &str) { + #body + } + } + }); + let config_adder_invocations = host_funcs.iter().map(|(func_name, _body)| { + let adder_func = format_ident!("add_{}_to_config", names.func(&func_name)); + let module = module.name.as_str(); + let field = func_name.as_str(); + quote! { + Self::#adder_func(config, #module, #field); + } + }); + quote! { #type_docs pub struct #type_name { @@ -151,6 +179,7 @@ contained in the `cx` parameter.", } } + /// Looks up a field called `name` in this structure, returning it /// if found. /// @@ -175,9 +204,11 @@ contained in the `cx` parameter.", /// /// Host functions will trap if the context is not set in the calling [`wasmtime::Store`]. pub fn add_to_config(config: &mut wasmtime::Config) { - #(#host_funcs)* + #(#config_adder_invocations)* } + #(#config_adder_definitions)* + /// Sets the context in the given store. /// /// Context must be set in the store when using [`add_to_config`] and prior to any @@ -204,11 +235,12 @@ fn generate_func( names: &Names, target_module: &TokenStream2, ctx_type: &syn::Type, - is_async: bool, + asyncness: Asyncness, fns: &mut Vec, ctors: &mut Vec, - host_funcs: &mut Vec, + host_funcs: &mut Vec<(witx::Id, TokenStream2)>, ) { + let rt = names.runtime_mod(); let name_ident = names.func(&func.name); let (params, results) = func.wasm_signature(); @@ -232,8 +264,16 @@ fn generate_func( _ => unimplemented!(), }; - let async_ = if is_async { quote!(async) } else { quote!() }; - let await_ = if is_async { quote!(.await) } else { quote!() }; + let async_ = if asyncness.is_sync() { + quote!() + } else { + quote!(async) + }; + let await_ = if asyncness.is_sync() { + quote!() + } else { + quote!(.await) + }; let runtime = names.runtime_mod(); let fn_ident = format_ident!("{}_{}", module_ident, name_ident); @@ -257,20 +297,36 @@ fn generate_func( } }); - if is_async { - let wrapper = format_ident!("wrap{}_async", params.len()); - ctors.push(quote! { + match asyncness { + Asyncness::Async => { + let wrapper = format_ident!("wrap{}_async", params.len()); + ctors.push(quote! { let #name_ident = wasmtime::Func::#wrapper( store, ctx.clone(), - move |caller: wasmtime::Caller<'_>, my_ctx: &Rc> #(,#arg_decls)*| + move |caller: wasmtime::Caller<'_>, my_ctx: &std::rc::Rc> #(,#arg_decls)*| -> Box>> { Box::new(async move { Self::#fn_ident(&caller, &mut my_ctx.borrow_mut() #(, #arg_names)*).await }) } ); - }); - } else { - ctors.push(quote! { + }); + } + Asyncness::Blocking => { + // Emit a synchronous function. Self::#fn_ident returns a Future, so we need to + // use a dummy executor to let any synchronous code inside there execute correctly. If + // the future ends up Pending, this func will Trap. + ctors.push(quote! { + let my_ctx = ctx.clone(); + let #name_ident = wasmtime::Func::wrap( + store, + move |caller: wasmtime::Caller #(, #arg_decls)*| -> Result<#ret_ty, wasmtime::Trap> { + #rt::run_in_dummy_executor(Self::#fn_ident(&caller, &mut my_ctx.borrow_mut() #(, #arg_names)*)) + } + ); + }); + } + Asyncness::Sync => { + ctors.push(quote! { let my_ctx = ctx.clone(); let #name_ident = wasmtime::Func::wrap( store, @@ -279,40 +335,63 @@ fn generate_func( } ); }); + } } - if is_async { - let wrapper = format_ident!("wrap{}_host_func_async", params.len()); - host_funcs.push(quote! { - config.#wrapper( - stringify!(#module_ident), - stringify!(#name_ident), - move |caller #(,#arg_decls)*| - -> Box>> { - Box::new(async move { - let ctx = caller.store() + let host_wrapper = match asyncness { + Asyncness::Async => { + let wrapper = format_ident!("wrap{}_host_func_async", params.len()); + quote! { + config.#wrapper( + module, + field, + move |caller #(,#arg_decls)*| + -> Box>> { + Box::new(async move { + let ctx = caller.store() + .get::>>() + .ok_or_else(|| wasmtime::Trap::new("context is missing in the store"))?; + let result = Self::#fn_ident(&caller, &mut ctx.borrow_mut() #(, #arg_names)*).await; + result + }) + } + ); + } + } + + Asyncness::Blocking => { + // Emit a synchronous host function. Self::#fn_ident returns a Future, so we need to + // use a dummy executor to let any synchronous code inside there execute correctly. If + // the future ends up Pending, this func will Trap. + quote! { + config.wrap_host_func( + module, + field, + move |caller: wasmtime::Caller #(, #arg_decls)*| -> Result<#ret_ty, wasmtime::Trap> { + let ctx = caller + .store() .get::>>() .ok_or_else(|| wasmtime::Trap::new("context is missing in the store"))?; - let result = Self::#fn_ident(&caller, &mut ctx.borrow_mut() #(, #arg_names)*).await; - result - }) - } - ); - }); - } else { - host_funcs.push(quote! { - config.wrap_host_func( - stringify!(#module_ident), - stringify!(#name_ident), - move |caller: wasmtime::Caller #(, #arg_decls)*| -> Result<#ret_ty, wasmtime::Trap> { - let ctx = caller - .store() - .get::>>() - .ok_or_else(|| wasmtime::Trap::new("context is missing in the store"))?; - let result = Self::#fn_ident(&caller, &mut ctx.borrow_mut() #(, #arg_names)*); - result - }, - ); - }); - } + #rt::run_in_dummy_executor(Self::#fn_ident(&caller, &mut ctx.borrow_mut() #(, #arg_names)*)) + }, + ); + } + } + Asyncness::Sync => { + quote! { + config.wrap_host_func( + module, + field, + move |caller: wasmtime::Caller #(, #arg_decls)*| -> Result<#ret_ty, wasmtime::Trap> { + let ctx = caller + .store() + .get::>>() + .ok_or_else(|| wasmtime::Trap::new("context is missing in the store"))?; + Self::#fn_ident(&caller, &mut ctx.borrow_mut() #(, #arg_names)*) + }, + ); + } + } + }; + host_funcs.push((func.name.clone(), host_wrapper)); } diff --git a/crates/wiggle/wasmtime/tests/atoms_async.rs b/crates/wiggle/wasmtime/tests/atoms_async.rs index d8e6d062b4..7dd5b6f5c2 100644 --- a/crates/wiggle/wasmtime/tests/atoms_async.rs +++ b/crates/wiggle/wasmtime/tests/atoms_async.rs @@ -28,7 +28,7 @@ impl wiggle::GuestErrorType for types::Errno { } } -#[wasmtime_wiggle::async_trait(?Send)] +#[wasmtime_wiggle::async_trait] impl atoms::Atoms for Ctx { fn int_float_args(&self, an_int: u32, an_float: f32) -> Result<(), types::Errno> { println!("INT FLOAT ARGS: {} {}", an_int, an_float); diff --git a/crates/wiggle/wasmtime/tests/atoms_sync.rs b/crates/wiggle/wasmtime/tests/atoms_sync.rs new file mode 100644 index 0000000000..eee48f5338 --- /dev/null +++ b/crates/wiggle/wasmtime/tests/atoms_sync.rs @@ -0,0 +1,176 @@ +use std::cell::RefCell; +use std::rc::Rc; + +wasmtime_wiggle::from_witx!({ + witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], + async: { + atoms::{double_int_return_float} + } +}); + +wasmtime_wiggle::wasmtime_integration!({ + target: crate, + witx: ["$CARGO_MANIFEST_DIR/tests/atoms.witx"], + ctx: Ctx, + modules: { atoms => { name: Atoms } }, + block_on: { + atoms::double_int_return_float + } +}); + +pub struct Ctx; +impl wiggle::GuestErrorType for types::Errno { + fn success() -> Self { + types::Errno::Ok + } +} + +#[wasmtime_wiggle::async_trait] +impl atoms::Atoms for Ctx { + fn int_float_args(&self, an_int: u32, an_float: f32) -> Result<(), types::Errno> { + println!("INT FLOAT ARGS: {} {}", an_int, an_float); + Ok(()) + } + async fn double_int_return_float( + &self, + an_int: u32, + ) -> Result { + Ok((an_int as f32) * 2.0) + } +} + +fn run_int_float_args(linker: &wasmtime::Linker) { + let shim_mod = shim_module(linker.store()); + let shim_inst = linker.instantiate(&shim_mod).unwrap(); + + let results = shim_inst + .get_func("int_float_args_shim") + .unwrap() + .call(&[0i32.into(), 123.45f32.into()]) + .unwrap(); + + assert_eq!(results.len(), 1, "one return value"); + assert_eq!( + results[0].unwrap_i32(), + types::Errno::Ok as i32, + "int_float_args errno" + ); +} + +fn run_double_int_return_float(linker: &wasmtime::Linker) { + let shim_mod = shim_module(linker.store()); + let shim_inst = linker.instantiate(&shim_mod).unwrap(); + + let input: i32 = 123; + let result_location: i32 = 0; + + let results = shim_inst + .get_func("double_int_return_float_shim") + .unwrap() + .call(&[input.into(), result_location.into()]) + .unwrap(); + + assert_eq!(results.len(), 1, "one return value"); + assert_eq!( + results[0].unwrap_i32(), + types::Errno::Ok as i32, + "double_int_return_float errno" + ); + + // The actual result is in memory: + let mem = shim_inst.get_memory("memory").unwrap(); + let mut result_bytes: [u8; 4] = [0, 0, 0, 0]; + mem.read(result_location as usize, &mut result_bytes) + .unwrap(); + let result = f32::from_le_bytes(result_bytes); + assert_eq!((input * 2) as f32, result); +} + +#[test] +fn test_sync_host_func() { + let store = store(); + + let ctx = Rc::new(RefCell::new(Ctx)); + let atoms = Atoms::new(&store, ctx.clone()); + + let mut linker = wasmtime::Linker::new(&store); + atoms.add_to_linker(&mut linker).unwrap(); + + run_int_float_args(&linker); +} + +#[test] +fn test_async_host_func() { + let store = store(); + + let ctx = Rc::new(RefCell::new(Ctx)); + let atoms = Atoms::new(&store, ctx.clone()); + + let mut linker = wasmtime::Linker::new(&store); + atoms.add_to_linker(&mut linker).unwrap(); + + run_double_int_return_float(&linker); +} + +#[test] +fn test_sync_config_host_func() { + let mut config = wasmtime::Config::new(); + Atoms::add_to_config(&mut config); + + let engine = wasmtime::Engine::new(&config).unwrap(); + let store = wasmtime::Store::new(&engine); + + assert!(Atoms::set_context(&store, Ctx).is_ok()); + + let linker = wasmtime::Linker::new(&store); + run_int_float_args(&linker); +} + +#[test] +fn test_async_config_host_func() { + let mut config = wasmtime::Config::new(); + Atoms::add_to_config(&mut config); + + let engine = wasmtime::Engine::new(&config).unwrap(); + let store = wasmtime::Store::new(&engine); + + assert!(Atoms::set_context(&store, Ctx).is_ok()); + + let linker = wasmtime::Linker::new(&store); + run_double_int_return_float(&linker); +} + +fn store() -> wasmtime::Store { + wasmtime::Store::new(&wasmtime::Engine::new(&wasmtime::Config::new()).unwrap()) +} + +// Wiggle expects the caller to have an exported memory. Wasmtime can only +// provide this if the caller is a WebAssembly module, so we need to write +// a shim module: +fn shim_module(store: &wasmtime::Store) -> wasmtime::Module { + wasmtime::Module::new( + store.engine(), + r#" + (module + (memory 1) + (export "memory" (memory 0)) + (import "atoms" "int_float_args" (func $int_float_args (param i32 f32) (result i32))) + (import "atoms" "double_int_return_float" (func $double_int_return_float (param i32 i32) (result i32))) + + (func $int_float_args_shim (param i32 f32) (result i32) + local.get 0 + local.get 1 + call $int_float_args + ) + (func $double_int_return_float_shim (param i32 i32) (result i32) + local.get 0 + local.get 1 + call $double_int_return_float + ) + (export "int_float_args_shim" (func $int_float_args_shim)) + (export "double_int_return_float_shim" (func $double_int_return_float_shim)) + ) + "#, + ) + .unwrap() +} diff --git a/deny.toml b/deny.toml index d9baefa136..a08d581216 100644 --- a/deny.toml +++ b/deny.toml @@ -14,6 +14,7 @@ vulnerability = "deny" unmaintained = "deny" yanked = "deny" ignore = [ + "RUSTSEC-2021-0064" ] # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html @@ -23,6 +24,7 @@ allow = [ "Apache-2.0", "BSD-2-Clause", "CC0-1.0", + "ISC", "MIT", "MPL-2.0", "Zlib", @@ -43,4 +45,7 @@ skip = [ { name = "wast" }, # old one pulled in by witx { name = "itertools" }, # 0.9 pulled in by zstd-sys { name = "quick-error" }, # transitive dependencies + { name = "rustc_version" }, # transitive dependencies of criterion's build script (see https://github.com/japaric/cast.rs/pull/26) + { name = "semver" }, # transitive dependencies of criterion's build script (see https://github.com/japaric/cast.rs/pull/26) + { name = "semver-parser" }, # transitive dependencies of criterion's build script (see https://github.com/japaric/cast.rs/pull/26) ] diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 32b71d3cd8..2a74e30a96 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -17,6 +17,7 @@ - [Linking Modules](./examples-rust-linking.md) - [Debugging](./examples-rust-debugging.md) - [Using Multi-Value](./examples-rust-multi-value.md) + - [Multi-threading](./examples-rust-multithreading.md) - [Embedding in C](./examples-c-embed.md) - [Hello, World!](./examples-c-hello-world.md) - [Calculating the GCD](./examples-c-gcd.md) diff --git a/docs/cli-options.md b/docs/cli-options.md index d1204314ab..be78272a78 100644 --- a/docs/cli-options.md +++ b/docs/cli-options.md @@ -80,3 +80,29 @@ with: ```sh $ wasmtime wasm2obj foo.wasm foo.o ``` + +## `compile` + +This subcommand is used to Ahead-Of-Time (AOT) compile a WebAssembly module to produce +a "compiled wasm" (.cwasm) file. + +The `wasmtime run` subcommand can then be used to run a AOT-compiled WebAssembly module: + +```sh +$ wasmtime compile foo.wasm +$ wasmtime foo.cwasm +``` + +AOT-compiled modules can be run from hosts that are compatible with the target +environment of the AOT-completed module. + +## `settings` + +This subcommand is used to print the available Cranelift settings for a given target. + +When run without options, it will print the settings for the host target and also +display what Cranelift settings are inferred for the host: + +```sh +$ wasmtime settings +``` diff --git a/docs/contributing-ci.md b/docs/contributing-ci.md index c7b422ca0e..5b7306f661 100644 --- a/docs/contributing-ci.md +++ b/docs/contributing-ci.md @@ -46,7 +46,8 @@ run on CI looks like this: corpus](https://github.com/bytecodealliance/wasmtime-libfuzzer-corpus) and run it through the fuzzers. This is mostly intended to be a pretty quick regression test and testing the fuzzers still build, most of our fuzzing - happens on [oss-fuzz](https://oss-fuzz.com). + happens on [oss-fuzz](https://oss-fuzz.com). Found issues are recorded in + the [oss-fuzz bug tracker](https://bugs.chromium.org/p/oss-fuzz/issues/list?q=-status%3AWontFix%2CDuplicate%20-component%3AInfra%20proj%3Awasmtime&can=1) While we do run more tests here and there, this is the general shape of what you can be expected to get tested on CI for all commits and all PRs. You can of diff --git a/docs/examples-rust-multithreading.md b/docs/examples-rust-multithreading.md new file mode 100644 index 0000000000..5d3fdcac2c --- /dev/null +++ b/docs/examples-rust-multithreading.md @@ -0,0 +1,148 @@ +# Multi-threading + +When using Rust you're effectively immune from a whole class of threading issues +such as data races due to the inherent checks in the compiler and traits like +`Send` and `Sync`. The `wasmtime` API, like other safe Rust APIs, is 100% safe +to use relative to threading if you never have any `unsafe` yourself. In +addition to all of this, however, it's important to be aware of the limitations +of `wasmtime` types and how this might affect your embedding use case. + +## Types that are `Send` and `Sync` + +Wasmtime has a number of types which implement both the `Send` and `Sync` +traits: + +* [`Config`](https://docs.wasmtime.dev/api/wasmtime/struct.Config.html) +* [`Engine`](https://docs.wasmtime.dev/api/wasmtime/struct.Engine.html) +* [`Module`](https://docs.wasmtime.dev/api/wasmtime/struct.Module.html) +* [`Trap`](https://docs.wasmtime.dev/api/wasmtime/struct.Trap.html) +* [`InterruptHandle`](https://docs.wasmtime.dev/api/wasmtime/struct.InterruptHandle.html) +* Type-descriptions of items + * [`ValType`](https://docs.wasmtime.dev/api/wasmtime/struct.ValType.html) + * [`ExportType`](https://docs.wasmtime.dev/api/wasmtime/struct.ExportType.html) + * [`ExternType`](https://docs.wasmtime.dev/api/wasmtime/struct.ExternType.html) + * [`ImportType`](https://docs.wasmtime.dev/api/wasmtime/struct.ImportType.html) + * [`FuncType`](https://docs.wasmtime.dev/api/wasmtime/struct.FuncType.html) + * [`GlobalType`](https://docs.wasmtime.dev/api/wasmtime/struct.GlobalType.html) + * [`MemoryType`](https://docs.wasmtime.dev/api/wasmtime/struct.MemoryType.html) + * [`ModuleType`](https://docs.wasmtime.dev/api/wasmtime/struct.ModuleType.html) + * [`TableType`](https://docs.wasmtime.dev/api/wasmtime/struct.TableType.html) + * [`InstanceType`](https://docs.wasmtime.dev/api/wasmtime/struct.InstanceType.html) + +These types, as the traits imply, are safe to send and share across threads. +Note that the major types to call out here are `Module` and `Engine`. The +`Engine` is important because it enables sharing compilation configuration for +an entire application. Each `Engine` is intended to be long-lived for this +reason. + +Additionally `Module`, the compiled version of a WebAssembly module, is safe to +send and share across threads. This notably means that you can compile a module +once and then instantiate it on multiple threads simultaneously. There's no need +to recompile a module on each thread. + +## Types that are neither `Send` nor `Sync` + +Wasmtime also has a number of types which are thread-"unsafe". These types do +not have the `Send` or `Sync` traits implemented which means that you won't be +able to send them across threads by default. + +* [`Store`](https://docs.wasmtime.dev/api/wasmtime/struct.Store.html) +* [`Linker`](https://docs.wasmtime.dev/api/wasmtime/struct.Linker.html) +* [`Instance`](https://docs.wasmtime.dev/api/wasmtime/struct.Instance.html) +* [`Extern`](https://docs.wasmtime.dev/api/wasmtime/struct.Extern.html) +* [`Func`](https://docs.wasmtime.dev/api/wasmtime/struct.Func.html) +* [`Global`](https://docs.wasmtime.dev/api/wasmtime/struct.Global.html) +* [`Table`](https://docs.wasmtime.dev/api/wasmtime/struct.Table.html) +* [`Memory`](https://docs.wasmtime.dev/api/wasmtime/struct.Memory.html) +* [`Val`](https://docs.wasmtime.dev/api/wasmtime/struct.Val.html) +* [`ExternRef`](https://docs.wasmtime.dev/api/wasmtime/struct.ExternRef.html) + +These types are all considered as "connected to a store", and everything +connected to a store is neither `Send` nor `Sync`. The Rust compiler will not +allow you to have values of these types cross thread boundaries or get shared +between multiple threads. Doing so would require some form of `unsafe` glue. + +It's important to note that the WebAssembly specification itself fundamentally +limits some of the concurrent possibilities here. For example it's not allowed +to concurrently call `global.set` or `table.set` on the same global/table. This +means that Wasmtime is designed to prevent at the very least concurrent usage of +these primitives. + +Apart from the WebAssembly specification, though, Wasmtime additionally has some +fundamental design decision which results in these types not implementing either +`Send` or `Sync`: + +* All objects are independently-owned `'static` values that internally retain + anything necessary to implement the API provided. This necessitates some form + of reference counting, and also requires the usage of non-atomic reference + counting. Once reference counting is used Rust only allows shared references + (`&T`) to the internals, and due to the wasm restriction of disallowing + concurrent usage non-atomic reference counting is used. + +* Insertion of user-defined objects into `Store` does not require all objects to + be either `Send` or `Sync`. For example `Func::wrap` will insert the + host-defined function into the `Store`, but there are no extra trait bounds on + this. Similar restrictions apply to `Store::set` as well. + +* The implementation of `ExternRef` allows arbitrary `'static` types `T` to get + wrapped up and is also implemented with non-atomic reference counting. + +Overall the design decisions of Wasmtime itself leads all of these types to not +implement either the `Send` or `Sync` traits. + +## Multithreading without `Send` + +Due to the lack of `Send` on types like `Store` and everything connected, it's +not always as trivial to add multithreaded execution of WebAssembly to an +embedding of Wasmtime as it is for other Rust code in general. The exact way +that multithreading could work for you depends on your specific embedding, but +some possibilities include: + +* If your workload involves instantiating a singular wasm module on a separate + thread, then it will need to live on that thread and communicate to other + threads via threadsafe means (e.g. channels, locks/queues, etc). + +* If you have something like a multithreaded web server, for example, then the + WebAssembly executed for each request will need to live within the thread that + the original `Store` was created on. This could be multithreaded, though, by + having a pool of threads executing WebAssembly. Each request would have a + scheduling decision of which pool to route to which would be up to the + application. In situations such as this it's recommended to [enable fuel + consumption](https://docs.wasmtime.dev/api/wasmtime/struct.Config.html#method.consume_fuel) + as well as [yielding when out of + fuel](https://docs.wasmtime.dev/api/wasmtime/struct.Store.html#method.out_of_fuel_async_yield). + This will ensure that no one request entirely hogs a thread executing + WebAssembly and all requests scheduled onto that thread are able to execute. + It's also worth pointing out that the threads executing WebAssembly may or may + not be the same as the threads performing I/O for your server requests. + +* If absolutely required, Wasmtime is engineered such that it is dynamically safe + to move a `Store` as a whole to a separate thread. This option is not + recommended due to its complexity, but it is one that Wasmtime tests in CI and + considers supported. The principle here is that all objects connected to a + `Store` are safe to move to a separate thread *if and only if*: + + * All objects are moved all at once. For example you can't leave behind + references to a `Func` or perhaps a `Store` in TLS. + + * All host objects living inside of a store (e.g. those inserted via + `Store::set` or `Func::wrap`) implement the `Send` trait. + + If these requirements are met it is technically safe to move a store and its + objects between threads. The reason that this strategy isn't recommended, + however, is that you will receive no assistance from the Rust compiler in + verifying that the transfer across threads is indeed actually safe. This will + require auditing your embedding of Wasmtime itself to ensure it meets these + requirements. + + It's important to note that the requirements here also apply to the futures + returned from `Func::call_async`. These futures are not `Send` due to them + closing over `Store`-related values. In addition to the above requirements + though to safely send across threads embedders must *also* ensure that any + host futures returned from `Func::wrapN_async` are actually `Send` and safe to + send across threads. Again, though, there is no compiler assistance in doing + this. + +Overall the recommended story for multithreading with Wasmtime is "don't move a +`Store` between threads" and to architect your application around this +assumption. diff --git a/examples/fuel.c b/examples/fuel.c new file mode 100644 index 0000000000..af3b51782d --- /dev/null +++ b/examples/fuel.c @@ -0,0 +1,132 @@ +/* +Example of instantiating of the WebAssembly module and invoking its exported +function. + +You can compile and run this example on Linux with: + + cargo build --release -p wasmtime-c-api + cc examples/fuel.c \ + -I crates/c-api/include \ + -I crates/c-api/wasm-c-api/include \ + target/release/libwasmtime.a \ + -lpthread -ldl -lm \ + -o fuel + ./fuel + +Note that on Windows and macOS the command will be similar, but you'll need +to tweak the `-lpthread` and such annotations. +*/ + +#include +#include +#include +#include +#include + +static void exit_with_error(const char *message, wasmtime_error_t *error, wasm_trap_t *trap); + +int main() { + wasmtime_error_t *error = NULL; + + wasm_config_t *config = wasm_config_new(); + assert(config != NULL); + wasmtime_config_consume_fuel_set(config, true); + + // Create an *engine*, which is a compilation context, with our configured options. + wasm_engine_t *engine = wasm_engine_new_with_config(config); + assert(engine != NULL); + wasm_store_t *store = wasm_store_new(engine); + assert(store != NULL); + error = wasmtime_store_add_fuel(store, 10000); + if (error != NULL) + exit_with_error("failed to add fuel", error, NULL); + + // Load our input file to parse it next + FILE* file = fopen("examples/fuel.wat", "r"); + if (!file) { + printf("> Error loading file!\n"); + return 1; + } + fseek(file, 0L, SEEK_END); + size_t file_size = ftell(file); + fseek(file, 0L, SEEK_SET); + wasm_byte_vec_t wat; + wasm_byte_vec_new_uninitialized(&wat, file_size); + if (fread(wat.data, file_size, 1, file) != 1) { + printf("> Error loading module!\n"); + return 1; + } + fclose(file); + + // Parse the wat into the binary wasm format + wasm_byte_vec_t wasm; + error = wasmtime_wat2wasm(&wat, &wasm); + if (error != NULL) + exit_with_error("failed to parse wat", error, NULL); + wasm_byte_vec_delete(&wat); + + // Compile and instantiate our module + wasm_module_t *module = NULL; + error = wasmtime_module_new(engine, &wasm, &module); + if (module == NULL) + exit_with_error("failed to compile module", error, NULL); + wasm_byte_vec_delete(&wasm); + wasm_trap_t *trap = NULL; + wasm_instance_t *instance = NULL; + wasm_extern_vec_t imports = WASM_EMPTY_VEC; + error = wasmtime_instance_new(store, module, &imports, &instance, &trap); + if (instance == NULL) + exit_with_error("failed to instantiate", error, trap); + + // Lookup our `fibonacci` export function + wasm_extern_vec_t externs; + wasm_instance_exports(instance, &externs); + assert(externs.size == 1); + wasm_func_t *fibonacci = wasm_extern_as_func(externs.data[0]); + assert(fibonacci != NULL); + + // Call it repeatedly until it fails + for (int n = 1; ; n++) { + uint64_t fuel_before; + wasmtime_store_fuel_consumed(store, &fuel_before); + wasm_val_t params[1] = { WASM_I32_VAL(n) }; + wasm_val_t results[1]; + wasm_val_vec_t params_vec = WASM_ARRAY_VEC(params); + wasm_val_vec_t results_vec = WASM_ARRAY_VEC(results); + error = wasmtime_func_call(fibonacci, ¶ms_vec, &results_vec, &trap); + if (error != NULL || trap != NULL) { + printf("Exhausted fuel computing fib(%d)\n", n); + break; + } + + uint64_t fuel_after; + wasmtime_store_fuel_consumed(store, &fuel_after); + assert(results[0].kind == WASM_I32); + printf("fib(%d) = %d [consumed %lld fuel]\n", n, results[0].of.i32, fuel_after - fuel_before); + + error = wasmtime_store_add_fuel(store, fuel_after - fuel_before); + if (error != NULL) + exit_with_error("failed to add fuel", error, NULL); + } + + // Clean up after ourselves at this point + wasm_extern_vec_delete(&externs); + wasm_instance_delete(instance); + wasm_module_delete(module); + wasm_store_delete(store); + wasm_engine_delete(engine); + return 0; +} + +static void exit_with_error(const char *message, wasmtime_error_t *error, wasm_trap_t *trap) { + fprintf(stderr, "error: %s\n", message); + wasm_byte_vec_t error_message; + if (error != NULL) { + wasmtime_error_message(error, &error_message); + } else { + wasm_trap_message(trap, &error_message); + } + fprintf(stderr, "%.*s\n", (int) error_message.size, error_message.data); + wasm_byte_vec_delete(&error_message); + exit(1); +} diff --git a/examples/fuel.rs b/examples/fuel.rs new file mode 100644 index 0000000000..00181a67e1 --- /dev/null +++ b/examples/fuel.rs @@ -0,0 +1,33 @@ +//! Example of limiting a WebAssembly function's runtime using "fuel consumption". + +// You can execute this example with `cargo run --example fuel` + +use anyhow::Result; +use wasmtime::*; + +fn main() -> Result<()> { + let mut config = Config::new(); + config.consume_fuel(true); + let engine = Engine::new(&config)?; + let store = Store::new(&engine); + store.add_fuel(10_000)?; + let module = Module::from_file(store.engine(), "examples/fuel.wat")?; + let instance = Instance::new(&store, &module, &[])?; + + // Invoke `fibonacci` export with higher and higher numbers until we exhaust our fuel. + let fibonacci = instance.get_typed_func::("fibonacci")?; + for n in 1.. { + let fuel_before = store.fuel_consumed().unwrap(); + let output = match fibonacci.call(n) { + Ok(v) => v, + Err(_) => { + println!("Exhausted fuel computing fib({})", n); + break; + } + }; + let fuel_consumed = store.fuel_consumed().unwrap() - fuel_before; + println!("fib({}) = {} [consumed {} fuel]", n, output, fuel_consumed); + store.add_fuel(fuel_consumed)?; + } + Ok(()) +} diff --git a/examples/fuel.wat b/examples/fuel.wat new file mode 100644 index 0000000000..48622c2e21 --- /dev/null +++ b/examples/fuel.wat @@ -0,0 +1,13 @@ +(module + (func $fibonacci (param $n i32) (result i32) + (if + (i32.lt_s (local.get $n) (i32.const 2)) + (return (local.get $n)) + ) + (i32.add + (call $fibonacci (i32.sub (local.get $n) (i32.const 1))) + (call $fibonacci (i32.sub (local.get $n) (i32.const 2))) + ) + ) + (export "fibonacci" (func $fibonacci)) +) diff --git a/examples/linking.rs b/examples/linking.rs index 11f4022bcd..824bb13410 100644 --- a/examples/linking.rs +++ b/examples/linking.rs @@ -3,9 +3,8 @@ // You can execute this example with `cargo run --example linking` use anyhow::Result; -use wasi_cap_std_sync::WasiCtxBuilder; use wasmtime::*; -use wasmtime_wasi::Wasi; +use wasmtime_wasi::sync::{Wasi, WasiCtxBuilder}; fn main() -> Result<()> { let engine = Engine::default(); diff --git a/examples/memory.rs b/examples/memory.rs index 70e1b724a9..e47c249e5f 100644 --- a/examples/memory.rs +++ b/examples/memory.rs @@ -75,7 +75,7 @@ fn main() -> Result<()> { println!("Creating stand-alone memory..."); let memorytype = MemoryType::new(Limits::new(5, Some(5))); - let memory2 = Memory::new(&wasmtime_store, memorytype); + let memory2 = Memory::new(&wasmtime_store, memorytype)?; assert_eq!(memory2.size(), 5); assert!(memory2.grow(1).is_err()); assert!(memory2.grow(0).is_ok()); diff --git a/examples/multi.rs b/examples/multi.rs index cb2d20bf8f..6b224c8e8c 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -8,9 +8,11 @@ // You can execute this example with `cargo run --example multi` use anyhow::Result; -use wasmtime::*; +#[cfg(not(feature = "old-x86-backend"))] fn main() -> Result<()> { + use wasmtime::*; + println!("Initializing..."); let engine = Engine::default(); let store = Store::new(&engine); @@ -68,3 +70,8 @@ fn main() -> Result<()> { Ok(()) } + +#[cfg(feature = "old-x86-backend")] +fn main() -> Result<()> { + Ok(()) +} diff --git a/examples/serialize.rs b/examples/serialize.rs index dd30b47a98..70a875c3bf 100644 --- a/examples/serialize.rs +++ b/examples/serialize.rs @@ -29,9 +29,12 @@ fn deserialize(buffer: &[u8]) -> Result<()> { println!("Initializing..."); let store = Store::default(); - // Compile the wasm binary into an in-memory instance of a `Module`. + // Compile the wasm binary into an in-memory instance of a `Module`. Note + // that this is `unsafe` because it is our responsibility for guaranteeing + // that these bytes are valid precompiled module bytes. We know that from + // the structure of this example program. println!("Deserialize module..."); - let module = Module::deserialize(store.engine(), buffer)?; + let module = unsafe { Module::deserialize(store.engine(), buffer)? }; // Here we handle the imports of the module, which in this case is our // `HelloCallback` type and its associated implementation of `Callback. diff --git a/examples/tokio/main.c b/examples/tokio/main.c new file mode 100644 index 0000000000..f6920d8bae --- /dev/null +++ b/examples/tokio/main.c @@ -0,0 +1,5 @@ +int main(int argc, char *argv[]) { + // This example is specific to integrating with Rust's tokio ecosystem, so + // it isnt applicable to C/C++. + return 0; +} diff --git a/examples/tokio/main.rs b/examples/tokio/main.rs new file mode 100644 index 0000000000..491fe754db --- /dev/null +++ b/examples/tokio/main.rs @@ -0,0 +1,163 @@ +use anyhow::{anyhow, Error}; +use std::future::Future; +use tokio::time::Duration; +use wasmtime::{Config, Engine, Linker, Module, Store}; +// For this example we want to use the async version of wasmtime_wasi. +// Notably, this version of wasi uses a scheduler that will async yield +// when sleeping in `poll_oneoff`. +use wasmtime_wasi::tokio::{Wasi, WasiCtxBuilder}; + +#[tokio::main] +async fn main() -> Result<(), Error> { + // Create an environment shared by all wasm execution. This contains + // the `Engine` and the `Module` we are executing. + let env = Environment::new()?; + + // The inputs to run_wasm are `Send`: we can create them here and send + // them to a new task that we spawn. + let inputs1 = Inputs::new(env.clone(), "Gussie"); + let inputs2 = Inputs::new(env.clone(), "Willa"); + let inputs3 = Inputs::new(env, "Sparky"); + + // Spawn some tasks. Insert sleeps before run_wasm so that the + // interleaving is easy to observe. + let join1 = tokio::task::spawn(async move { run_wasm(inputs1).await }); + let join2 = tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_millis(750)).await; + run_wasm(inputs2).await + }); + let join3 = tokio::task::spawn(async move { + tokio::time::sleep(Duration::from_millis(1250)).await; + run_wasm(inputs3).await + }); + + // All tasks should join successfully. + join1.await??; + join2.await??; + join3.await??; + Ok(()) +} + +#[derive(Clone)] +struct Environment { + engine: Engine, + module: Module, +} + +impl Environment { + pub fn new() -> Result { + let mut config = Config::new(); + // We need this engine's `Store`s to be async, and consume fuel, so + // that they can co-operatively yield during execution. + config.async_support(true); + config.consume_fuel(true); + + // Install the host functions for `Wasi`. + Wasi::add_to_config(&mut config); + + let engine = Engine::new(&config)?; + let module = Module::from_file(&engine, "target/wasm32-wasi/debug/tokio-wasi.wasm")?; + + Ok(Self { engine, module }) + } +} + +struct Inputs { + env: Environment, + name: String, +} + +impl Inputs { + fn new(env: Environment, name: &str) -> Self { + Self { + env, + name: name.to_owned(), + } + } +} + +fn run_wasm(inputs: Inputs) -> impl Future> { + use std::pin::Pin; + use std::task::{Context, Poll}; + // IMPORTANT: The current wasmtime API is very challenging to use safely + // on an async runtime. This RFC describes a redesign of the API that will + // resolve these safety issues: + // https://github.com/alexcrichton/rfcs-2/blob/new-api/accepted/new-api.md + + // This is a "marker type future" which simply wraps some other future and + // the only purpose it serves is to forward the implementation of `Future` + // as well as have `unsafe impl Send` for itself, regardless of the + // underlying type. + // + // Note that the qctual safety of this relies on the fact that the inputs + // here are `Send`, the outputs (just () in this case) are `Send`, and the + // future itself is safe tu resume on other threads. + // + // For an in-depth discussion of the safety of moving Wasmtime's `Store` + // between threads, see + // https://docs.wasmtime.dev/examples-rust-multithreading.html. + struct UnsafeSend(T); + + // Note the `where` cause specifically ensures the output of the future to + // be `Send` is required. We specifically dont require `T` to be `Send` + // since that's the whole point of this function, but we require that + // everything used to construct `T` is `Send` below. + unsafe impl Send for UnsafeSend + where + T: Future, + T::Output: Send, + { + } + impl Future for UnsafeSend { + type Output = T::Output; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + // Note that this `unsafe` is unrelated to `Send`, it only has to do with "pin + // projection" and should be safe since it's all we do with the `Pin`. + unsafe { self.map_unchecked_mut(|p| &mut p.0).poll(cx) } + } + } + + // This is a crucial assertion that needs to be here. The compiler + // typically checks this for us, but do to our `UnsafeSend` type the + // compiler isn't automatically checking this. The assertion here must + // assert that all arguments to this function are indeed `Send` because + // we're closing over them and sending them to other threads. It's only + // everything *internal* to the computation of this function which doesn't + // have to be `Send`. + fn assert_send(_t: &T) {} + assert_send(&inputs); + + // Wrap up the `_run_wasm` function, which is *not* `Send`, but is safe to + // resume on other threads. + UnsafeSend(_run_wasm(inputs)) +} + +async fn _run_wasm(inputs: Inputs) -> Result<(), Error> { + let store = Store::new(&inputs.env.engine); + + // WebAssembly execution will be paused for an async yield every time it + // consumes 10000 fuel. Fuel will be refilled u32::MAX times. + store.out_of_fuel_async_yield(u32::MAX, 10000); + + Wasi::set_context( + &store, + WasiCtxBuilder::new() + // Let wasi print to this process's stdout. + .inherit_stdout() + // Set an environment variable so the wasm knows its name. + .env("NAME", &inputs.name)? + .build()?, + ) + .map_err(|_| anyhow!("setting wasi context"))?; + + let linker = Linker::new(&store); + + // Instantiate + let instance = linker.instantiate_async(&inputs.env.module).await?; + instance + .get_typed_func::<(), ()>("_start")? + .call_async(()) + .await?; + + Ok(()) +} diff --git a/examples/tokio/wasm/Cargo.toml b/examples/tokio/wasm/Cargo.toml new file mode 100644 index 0000000000..5704f79630 --- /dev/null +++ b/examples/tokio/wasm/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "example-tokio-wasm" +version = "0.0.0" +authors = ["The Wasmtime Project Developers"] +edition = "2018" +publish = false + +[[bin]] +path = "tokio-wasi.rs" +name = "tokio-wasi" diff --git a/examples/tokio/wasm/tokio-wasi.rs b/examples/tokio/wasm/tokio-wasi.rs new file mode 100644 index 0000000000..3431e65a66 --- /dev/null +++ b/examples/tokio/wasm/tokio-wasi.rs @@ -0,0 +1,6 @@ +fn main() { + let name = std::env::var("NAME").unwrap(); + println!("Hello, world! My name is {}", name); + std::thread::sleep(std::time::Duration::from_secs(1)); + println!("Goodbye from {}", name); +} diff --git a/examples/wasi/main.rs b/examples/wasi/main.rs index f1487171f7..0c3f077d93 100644 --- a/examples/wasi/main.rs +++ b/examples/wasi/main.rs @@ -4,9 +4,8 @@ // You can execute this example with `cargo run --example wasi` use anyhow::Result; -use wasi_cap_std_sync::WasiCtxBuilder; use wasmtime::*; -use wasmtime_wasi::Wasi; +use wasmtime_wasi::sync::{Wasi, WasiCtxBuilder}; fn main() -> Result<()> { tracing_subscriber::FmtSubscriber::builder() diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 2b40128d25..ee36b07154 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -13,14 +13,17 @@ cranelift-codegen = { path = "../cranelift/codegen" } cranelift-reader = { path = "../cranelift/reader" } cranelift-wasm = { path = "../cranelift/wasm" } libfuzzer-sys = "0.4.0" -target-lexicon = "0.11" +target-lexicon = "0.12" peepmatic-fuzzing = { path = "../cranelift/peepmatic/crates/fuzzing", optional = true } wasmtime = { path = "../crates/wasmtime" } wasmtime-fuzzing = { path = "../crates/fuzzing" } wasm-smith = "0.4.0" [features] -experimental_x64 = ["wasmtime-fuzzing/experimental_x64"] +# Leave a stub feature with no side-effects in place for now: the OSS-Fuzz +# config builds fuzz targets with this feature enabled and we don't want to +# break the build. +experimental_x64 = [] [[bin]] name = "compile" diff --git a/scripts/publish.rs b/scripts/publish.rs index 39a7f04e8c..e121cedc3a 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -49,6 +49,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[ // wasi-common "wasi-common", "wasi-cap-std-sync", + "wasi-tokio", // wasmtime "lightbeam", "wasmtime-fiber", diff --git a/src/bin/wasmtime.rs b/src/bin/wasmtime.rs index 89c1078f48..475f2c607b 100644 --- a/src/bin/wasmtime.rs +++ b/src/bin/wasmtime.rs @@ -6,7 +6,7 @@ use anyhow::Result; use structopt::{clap::AppSettings, clap::ErrorKind, StructOpt}; use wasmtime_cli::commands::{ - ConfigCommand, RunCommand, WasmToObjCommand, WastCommand, WASM2OBJ_AFTER_HELP, + CompileCommand, ConfigCommand, RunCommand, SettingsCommand, WasmToObjCommand, WastCommand, }; /// Wasmtime WebAssembly Runtime @@ -38,10 +38,14 @@ enum WasmtimeApp { // !!! IMPORTANT: if subcommands are added or removed, update `parse_module` in `src/commands/run.rs`. !!! /// Controls Wasmtime configuration settings Config(ConfigCommand), + /// Compiles a WebAssembly module. + Compile(CompileCommand), /// Runs a WebAssembly module Run(RunCommand), + /// Displays available Cranelift settings for a target. + Settings(SettingsCommand), /// Translates a WebAssembly module to native object file - #[structopt(name = "wasm2obj", after_help = WASM2OBJ_AFTER_HELP)] + #[structopt(name = "wasm2obj")] WasmToObj(WasmToObjCommand), /// Runs a WebAssembly test script file Wast(WastCommand), @@ -49,10 +53,12 @@ enum WasmtimeApp { impl WasmtimeApp { /// Executes the command. - pub fn execute(&self) -> Result<()> { + pub fn execute(self) -> Result<()> { match self { Self::Config(c) => c.execute(), + Self::Compile(c) => c.execute(), Self::Run(c) => c.execute(), + Self::Settings(c) => c.execute(), Self::WasmToObj(c) => c.execute(), Self::Wast(c) => c.execute(), } diff --git a/src/commands.rs b/src/commands.rs index e9891bab99..ebed3d732b 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -1,8 +1,10 @@ //! The module for the Wasmtime CLI commands. +mod compile; mod config; mod run; +mod settings; mod wasm2obj; mod wast; -pub use self::{config::*, run::*, wasm2obj::*, wast::*}; +pub use self::{compile::*, config::*, run::*, settings::*, wasm2obj::*, wast::*}; diff --git a/src/commands/compile.rs b/src/commands/compile.rs new file mode 100644 index 0000000000..9d601ab7a0 --- /dev/null +++ b/src/commands/compile.rs @@ -0,0 +1,274 @@ +//! The module that implements the `wasmtime compile` command. + +use crate::CommonOptions; +use anyhow::{bail, Context, Result}; +use std::fs; +use std::path::PathBuf; +use structopt::{clap::AppSettings, StructOpt}; +use target_lexicon::Triple; +use wasmtime::Engine; + +lazy_static::lazy_static! { + static ref AFTER_HELP: String = { + format!( + "By default, no CPU features or presets will be enabled for the compilation.\n\ + \n\ + {}\ + \n\ + Usage examples:\n\ + \n\ + Compiling a WebAssembly module for the current platform:\n\ + \n \ + wasmtime compile example.wasm + \n\ + Specifying the output file:\n\ + \n \ + wasmtime compile -o output.cwasm input.wasm\n\ + \n\ + Compiling for a specific platform (Linux) and CPU preset (Skylake):\n\ + \n \ + wasmtime compile --target x86_64-unknown-linux --cranelift-enable skylake foo.wasm\n", + crate::FLAG_EXPLANATIONS.as_str() + ) + }; +} + +/// Compiles a WebAssembly module. +#[derive(StructOpt)] +#[structopt( + name = "compile", + version = env!("CARGO_PKG_VERSION"), + setting = AppSettings::ColoredHelp, + after_help = AFTER_HELP.as_str() +)] +pub struct CompileCommand { + #[structopt(flatten)] + common: CommonOptions, + + /// Enable support for interrupting WebAssembly code. + #[structopt(long)] + interruptable: bool, + + /// The target triple; default is the host triple + #[structopt(long, value_name = "TARGET")] + target: Option, + + /// The path of the output compiled module; defaults to .cwasm + #[structopt(short = "o", long, value_name = "OUTPUT", parse(from_os_str))] + output: Option, + + /// The path of the WebAssembly to compile + #[structopt(index = 1, value_name = "MODULE", parse(from_os_str))] + module: PathBuf, +} + +impl CompileCommand { + /// Executes the command. + pub fn execute(mut self) -> Result<()> { + self.common.init_logging(); + + let target = self + .target + .take() + .unwrap_or_else(|| Triple::host().to_string()); + + let mut config = self.common.config(Some(&target))?; + config.interruptable(self.interruptable); + + let engine = Engine::new(&config)?; + + if self.module.file_name().is_none() { + bail!( + "'{}' is not a valid input module path", + self.module.display() + ); + } + + let input = fs::read(&self.module).with_context(|| "failed to read input file")?; + + let output = self.output.take().unwrap_or_else(|| { + let mut output: PathBuf = self.module.file_name().unwrap().into(); + output.set_extension("cwasm"); + output + }); + + fs::write(output, engine.precompile_module(&input)?)?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + use wasmtime::{Instance, Module, Store}; + + #[test] + fn test_successful_compile() -> Result<()> { + let (mut input, input_path) = NamedTempFile::new()?.into_parts(); + input.write_all( + "(module (func (export \"f\") (param i32) (result i32) local.get 0))".as_bytes(), + )?; + drop(input); + + let output_path = NamedTempFile::new()?.into_temp_path(); + + let command = CompileCommand::from_iter_safe(vec![ + "compile", + "--disable-logging", + "-o", + output_path.to_str().unwrap(), + input_path.to_str().unwrap(), + ])?; + + command.execute()?; + + let engine = Engine::default(); + let contents = std::fs::read(output_path)?; + let module = unsafe { Module::deserialize(&engine, contents)? }; + let store = Store::new(&engine); + let instance = Instance::new(&store, &module, &[])?; + let f = instance.get_typed_func::("f")?; + assert_eq!(f.call(1234).unwrap(), 1234); + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_x64_flags_compile() -> Result<()> { + let (mut input, input_path) = NamedTempFile::new()?.into_parts(); + input.write_all("(module)".as_bytes())?; + drop(input); + + let output_path = NamedTempFile::new()?.into_temp_path(); + + // Set all the x64 flags to make sure they work + let command = CompileCommand::from_iter_safe(vec![ + "compile", + "--disable-logging", + "--cranelift-enable", + "has_sse3", + "--cranelift-enable", + "has_ssse3", + "--cranelift-enable", + "has_sse41", + "--cranelift-enable", + "has_sse42", + "--cranelift-enable", + "has_avx", + "--cranelift-enable", + "has_avx2", + "--cranelift-enable", + "has_avx512dq", + "--cranelift-enable", + "has_avx512vl", + "--cranelift-enable", + "has_avx512f", + "--cranelift-enable", + "has_popcnt", + "--cranelift-enable", + "has_bmi1", + "--cranelift-enable", + "has_bmi2", + "--cranelift-enable", + "has_lzcnt", + "-o", + output_path.to_str().unwrap(), + input_path.to_str().unwrap(), + ])?; + + command.execute()?; + + Ok(()) + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn test_aarch64_flags_compile() -> Result<()> { + let (mut input, input_path) = NamedTempFile::new()?.into_parts(); + input.write_all("(module)".as_bytes())?; + drop(input); + + let output_path = NamedTempFile::new()?.into_temp_path(); + + // Set all the aarch64 flags to make sure they work + let command = CompileCommand::from_iter_safe(vec![ + "compile", + "--disable-logging", + "--cranelift-enable", + "has_lse", + "-o", + output_path.to_str().unwrap(), + input_path.to_str().unwrap(), + ])?; + + command.execute()?; + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_unsupported_flags_compile() -> Result<()> { + let (mut input, input_path) = NamedTempFile::new()?.into_parts(); + input.write_all("(module)".as_bytes())?; + drop(input); + + let output_path = NamedTempFile::new()?.into_temp_path(); + + // aarch64 flags should not be supported + let command = CompileCommand::from_iter_safe(vec![ + "compile", + "--disable-logging", + "--cranelift-enable", + "has_lse", + "-o", + output_path.to_str().unwrap(), + input_path.to_str().unwrap(), + ])?; + + assert_eq!( + command.execute().unwrap_err().to_string(), + "No existing setting named 'has_lse'" + ); + + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + #[test] + fn test_x64_presets_compile() -> Result<()> { + let (mut input, input_path) = NamedTempFile::new()?.into_parts(); + input.write_all("(module)".as_bytes())?; + drop(input); + + let output_path = NamedTempFile::new()?.into_temp_path(); + + for preset in &[ + "nehalem", + "haswell", + "broadwell", + "skylake", + "cannonlake", + "icelake", + "znver1", + ] { + let command = CompileCommand::from_iter_safe(vec![ + "compile", + "--disable-logging", + "--cranelift-enable", + preset, + "-o", + output_path.to_str().unwrap(), + input_path.to_str().unwrap(), + ])?; + + command.execute()?; + } + + Ok(()) + } +} diff --git a/src/commands/config.rs b/src/commands/config.rs index 5cafac4e77..a434b86436 100644 --- a/src/commands/config.rs +++ b/src/commands/config.rs @@ -17,7 +17,7 @@ pub enum ConfigCommand { impl ConfigCommand { /// Executes the command. - pub fn execute(&self) -> Result<()> { + pub fn execute(self) -> Result<()> { match self { Self::New(c) => c.execute(), } @@ -35,7 +35,7 @@ pub struct ConfigNewCommand { impl ConfigNewCommand { /// Executes the command. - pub fn execute(&self) -> Result<()> { + pub fn execute(self) -> Result<()> { let path = wasmtime_cache::create_new_config(self.path.as_ref())?; println!( diff --git a/src/commands/run.rs b/src/commands/run.rs index 89f9ec120f..639061acad 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -1,8 +1,7 @@ //! The module that implements the `wasmtime run` command. -use crate::{init_file_per_thread_logger, CommonOptions}; +use crate::{CommonOptions, WasiModules}; use anyhow::{bail, Context as _, Result}; -use cap_std::fs::Dir; use std::thread; use std::time::Duration; use std::{ @@ -11,9 +10,8 @@ use std::{ process, }; use structopt::{clap::AppSettings, StructOpt}; -use wasi_cap_std_sync::WasiCtxBuilder; use wasmtime::{Engine, Func, Linker, Module, Store, Trap, Val, ValType}; -use wasmtime_wasi::Wasi; +use wasmtime_wasi::sync::{Dir, Wasi, WasiCtxBuilder}; #[cfg(feature = "wasi-nn")] use wasmtime_wasi_nn::{WasiNn, WasiNnCtx}; @@ -27,9 +25,8 @@ use wasmtime_wasi_crypto::{ fn parse_module(s: &OsStr) -> Result { // Do not accept wasmtime subcommand names as the module name match s.to_str() { - Some("help") | Some("config") | Some("run") | Some("wasm2obj") | Some("wast") => { - Err("module name cannot be the same as a subcommand".into()) - } + Some("help") | Some("config") | Some("run") | Some("wasm2obj") | Some("wast") + | Some("compile") => Err("module name cannot be the same as a subcommand".into()), _ => Ok(s.into()), } } @@ -68,13 +65,23 @@ fn parse_preloads(s: &str) -> Result<(String, PathBuf)> { Ok((parts[0].into(), parts[1].into())) } +lazy_static::lazy_static! { + static ref AFTER_HELP: String = { + crate::FLAG_EXPLANATIONS.to_string() + }; +} + /// Runs a WebAssembly module #[derive(StructOpt)] -#[structopt(name = "run", setting = AppSettings::TrailingVarArg)] +#[structopt(name = "run", setting = AppSettings::TrailingVarArg, after_help = AFTER_HELP.as_str())] pub struct RunCommand { #[structopt(flatten)] common: CommonOptions, + /// Allow unknown exports when running commands. + #[structopt(long = "allow-unknown-exports")] + allow_unknown_exports: bool, + /// Grant access to the given host directory #[structopt(long = "dir", number_of_values = 1, value_name = "DIRECTORY")] dirs: Vec, @@ -95,7 +102,7 @@ pub struct RunCommand { #[structopt( index = 1, required = true, - value_name = "WASM_MODULE", + value_name = "MODULE", parse(try_from_os_str = parse_module), )] module: PathBuf, @@ -126,14 +133,9 @@ pub struct RunCommand { impl RunCommand { /// Executes the command. pub fn execute(&self) -> Result<()> { - if self.common.log_to_files { - let prefix = "wasmtime.dbg."; - init_file_per_thread_logger(prefix); - } else { - pretty_env_logger::init(); - } + self.common.init_logging(); - let mut config = self.common.config()?; + let mut config = self.common.config(None)?; if self.wasm_timeout.is_some() { config.interruptable(true); } @@ -145,7 +147,15 @@ impl RunCommand { let argv = self.compute_argv(); let mut linker = Linker::new(&store); - populate_with_wasi(&mut linker, preopen_dirs, &argv, &self.vars)?; + linker.allow_unknown_exports(self.allow_unknown_exports); + + populate_with_wasi( + &mut linker, + preopen_dirs, + &argv, + &self.vars, + &self.common.wasi_modules.unwrap_or(WasiModules::default()), + )?; // Load the preload wasm modules. for (name, path) in self.preloads.iter() { @@ -332,10 +342,10 @@ impl RunCommand { match result { Val::I32(i) => println!("{}", i), Val::I64(i) => println!("{}", i), - Val::F32(f) => println!("{}", f), - Val::F64(f) => println!("{}", f), + Val::F32(f) => println!("{}", f32::from_bits(f)), + Val::F64(f) => println!("{}", f64::from_bits(f)), Val::ExternRef(_) => println!(""), - Val::FuncRef(_) => println!(""), + Val::FuncRef(_) => println!(""), Val::V128(i) => println!("{}", i), } } @@ -350,6 +360,7 @@ fn populate_with_wasi( preopen_dirs: Vec<(String, Dir)>, argv: &[String], vars: &[(String, String)], + wasi_modules: &WasiModules, ) -> Result<()> { // Add the current snapshot to the linker. let mut builder = WasiCtxBuilder::new(); @@ -359,25 +370,40 @@ fn populate_with_wasi( builder = builder.preopened_dir(dir, name)?; } - Wasi::new(linker.store(), builder.build()?).add_to_linker(linker)?; - - #[cfg(feature = "wasi-nn")] - { - use std::cell::RefCell; - use std::rc::Rc; - let wasi_nn = WasiNn::new(linker.store(), Rc::new(RefCell::new(WasiNnCtx::new()?))); - wasi_nn.add_to_linker(linker)?; + if wasi_modules.wasi_common { + Wasi::new(linker.store(), builder.build()?).add_to_linker(linker)?; } - #[cfg(feature = "wasi-crypto")] - { - use std::cell::RefCell; - use std::rc::Rc; - let cx_crypto = Rc::new(RefCell::new(WasiCryptoCtx::new())); - WasiCryptoCommon::new(linker.store(), cx_crypto.clone()).add_to_linker(linker)?; - WasiCryptoAsymmetricCommon::new(linker.store(), cx_crypto.clone()).add_to_linker(linker)?; - WasiCryptoSignatures::new(linker.store(), cx_crypto.clone()).add_to_linker(linker)?; - WasiCryptoSymmetric::new(linker.store(), cx_crypto).add_to_linker(linker)?; + if wasi_modules.wasi_nn { + #[cfg(not(feature = "wasi-nn"))] + { + bail!("Cannot enable wasi-nn when the binary is not compiled with this feature."); + } + #[cfg(feature = "wasi-nn")] + { + use std::cell::RefCell; + use std::rc::Rc; + let wasi_nn = WasiNn::new(linker.store(), Rc::new(RefCell::new(WasiNnCtx::new()?))); + wasi_nn.add_to_linker(linker)?; + } + } + + if wasi_modules.wasi_crypto { + #[cfg(not(feature = "wasi-crypto"))] + { + bail!("Cannot enable wasi-crypto when the binary is not compiled with this feature."); + } + #[cfg(feature = "wasi-crypto")] + { + use std::cell::RefCell; + use std::rc::Rc; + let cx_crypto = Rc::new(RefCell::new(WasiCryptoCtx::new())); + WasiCryptoCommon::new(linker.store(), cx_crypto.clone()).add_to_linker(linker)?; + WasiCryptoAsymmetricCommon::new(linker.store(), cx_crypto.clone()) + .add_to_linker(linker)?; + WasiCryptoSignatures::new(linker.store(), cx_crypto.clone()).add_to_linker(linker)?; + WasiCryptoSymmetric::new(linker.store(), cx_crypto).add_to_linker(linker)?; + } } Ok(()) diff --git a/src/commands/settings.rs b/src/commands/settings.rs new file mode 100644 index 0000000000..f1949e8c27 --- /dev/null +++ b/src/commands/settings.rs @@ -0,0 +1,98 @@ +//! The module that implements the `wasmtime settings` command. + +use anyhow::{anyhow, Result}; +use std::str::FromStr; +use structopt::StructOpt; +use wasmtime_environ::settings::{self, Setting, SettingKind}; +use wasmtime_jit::native; + +/// Displays available Cranelift settings for a target. +#[derive(StructOpt)] +#[structopt(name = "run")] +pub struct SettingsCommand { + /// The target triple to get the settings for; defaults to the host triple. + #[structopt(long, value_name = "TARGET")] + target: Option, +} + +impl SettingsCommand { + /// Executes the command. + pub fn execute(self) -> Result<()> { + let settings = match &self.target { + Some(target) => { + native::lookup(target_lexicon::Triple::from_str(target).map_err(|e| anyhow!(e))?)? + } + None => native::builder(), + }; + + let mut enums = (Vec::new(), 0, "Enum settings:"); + let mut nums = (Vec::new(), 0, "Numerical settings:"); + let mut bools = (Vec::new(), 0, "Boolean settings:"); + let mut presets = (Vec::new(), 0, "Presets:"); + + for setting in settings.iter() { + let (collection, max, _) = match setting.kind { + SettingKind::Enum => &mut enums, + SettingKind::Num => &mut nums, + SettingKind::Bool => &mut bools, + SettingKind::Preset => &mut presets, + }; + + if setting.name.len() > *max { + *max = setting.name.len(); + } + + collection.push(setting); + } + + if enums.0.is_empty() && nums.0.is_empty() && bools.0.is_empty() && presets.0.is_empty() { + println!("Target '{}' has no settings.", settings.triple()); + return Ok(()); + } + + println!("Cranelift settings for target '{}':", settings.triple()); + + for (collection, max, header) in &mut [enums, nums, bools, presets] { + if collection.is_empty() { + continue; + } + + collection.sort_by_key(|k| k.name); + println!(); + Self::print_settings(header, collection, *max); + } + + if self.target.is_none() { + let isa = settings.finish(settings::Flags::new(settings::builder())); + println!(); + println!("Settings inferred for the current host:"); + + let mut values = isa.isa_flags(); + values.sort_by_key(|k| k.name); + + for value in values { + if value.as_bool().unwrap_or(false) { + println!(" {}", value.name); + } + } + } + + Ok(()) + } + + fn print_settings(header: &str, settings: &[Setting], width: usize) { + println!("{}", header); + for setting in settings { + println!( + " {:width$} {}{}", + setting.name, + setting.description, + setting + .values + .map(|v| format!(" Supported values: {}.", v.join(", "))) + .unwrap_or("".to_string()), + width = width + 2 + ); + } + } +} diff --git a/src/commands/wasm2obj.rs b/src/commands/wasm2obj.rs index 4688f985dc..b6426ed13a 100644 --- a/src/commands/wasm2obj.rs +++ b/src/commands/wasm2obj.rs @@ -1,23 +1,26 @@ //! The module that implements the `wasmtime wasm2obj` command. use crate::obj::compile_to_obj; -use crate::{init_file_per_thread_logger, pick_compilation_strategy, CommonOptions}; -use anyhow::{anyhow, Context as _, Result}; +use crate::{parse_target, pick_compilation_strategy, CommonOptions}; +use anyhow::{Context as _, Result}; use std::{ fs::File, io::Write, path::{Path, PathBuf}, - str::FromStr, }; use structopt::{clap::AppSettings, StructOpt}; use target_lexicon::Triple; -/// The after help text for the `wasm2obj` command. -pub const WASM2OBJ_AFTER_HELP: &str = "The translation is dependent on the environment chosen.\n\ - The default is a dummy environment that produces placeholder values."; - -fn parse_target(s: &str) -> Result { - Triple::from_str(&s).map_err(|e| anyhow!(e)) +lazy_static::lazy_static! { + static ref AFTER_HELP: String = { + format!( + "The translation is dependent on the environment chosen.\n\ + The default is a dummy environment that produces placeholder values.\n\ + \n\ + {}", + crate::FLAG_EXPLANATIONS.as_str() + ) + }; } /// Translates a WebAssembly module to native object file @@ -26,7 +29,7 @@ fn parse_target(s: &str) -> Result { name = "wasm2obj", version = env!("CARGO_PKG_VERSION"), setting = AppSettings::ColoredHelp, - after_help = WASM2OBJ_AFTER_HELP, + after_help = AFTER_HELP.as_str(), )] pub struct WasmToObjCommand { #[structopt(flatten)] @@ -47,17 +50,8 @@ pub struct WasmToObjCommand { impl WasmToObjCommand { /// Executes the command. - pub fn execute(&self) -> Result<()> { - self.handle_module() - } - - fn handle_module(&self) -> Result<()> { - if self.common.log_to_files { - let prefix = "wasm2obj.dbg."; - init_file_per_thread_logger(prefix); - } else { - pretty_env_logger::init(); - } + pub fn execute(self) -> Result<()> { + self.common.init_logging(); let strategy = pick_compilation_strategy(self.common.cranelift, self.common.lightbeam)?; diff --git a/src/commands/wast.rs b/src/commands/wast.rs index 52dbff3aff..08ad8e0a2a 100644 --- a/src/commands/wast.rs +++ b/src/commands/wast.rs @@ -1,18 +1,25 @@ //! The module that implements the `wasmtime wast` command. -use crate::{init_file_per_thread_logger, CommonOptions}; +use crate::CommonOptions; use anyhow::{Context as _, Result}; use std::path::PathBuf; use structopt::{clap::AppSettings, StructOpt}; use wasmtime::{Engine, Store}; use wasmtime_wast::WastContext; +lazy_static::lazy_static! { + static ref AFTER_HELP: String = { + crate::FLAG_EXPLANATIONS.to_string() + }; +} + /// Runs a WebAssembly test script file #[derive(StructOpt)] #[structopt( name = "wast", version = env!("CARGO_PKG_VERSION"), setting = AppSettings::ColoredHelp, + after_help = AFTER_HELP.as_str(), )] pub struct WastCommand { #[structopt(flatten)] @@ -25,15 +32,10 @@ pub struct WastCommand { impl WastCommand { /// Executes the command. - pub fn execute(&self) -> Result<()> { - if self.common.log_to_files { - let prefix = "wast.dbg."; - init_file_per_thread_logger(prefix); - } else { - pretty_env_logger::init(); - } + pub fn execute(self) -> Result<()> { + self.common.init_logging(); - let config = self.common.config()?; + let config = self.common.config(None)?; let store = Store::new(&Engine::new(&config)?); let mut wast_context = WastContext::new(store); diff --git a/src/lib.rs b/src/lib.rs index 39bda21f66..facb0383c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,12 +23,83 @@ ) )] +const SUPPORTED_WASM_FEATURES: &[(&str, &str)] = &[ + ("all", "enables all supported WebAssembly features"), + ( + "bulk-memory", + "enables support for bulk memory instructions", + ), + ( + "module-linking", + "enables support for the module-linking proposal", + ), + ( + "multi-memory", + "enables support for the multi-memory proposal", + ), + ("multi-value", "enables support for multi-value functions"), + ("reference-types", "enables support for reference types"), + ("simd", "enables support for proposed SIMD instructions"), + ("threads", "enables support for WebAssembly threads"), +]; + +const SUPPORTED_WASI_MODULES: &[(&str, &str)] = &[ + ( + "default", + "enables all stable WASI modules (no experimental modules)", + ), + ( + "wasi-common", + "enables support for the WASI common APIs, see https://github.com/WebAssembly/WASI", + ), + ( + "experimental-wasi-nn", + "enables support for the WASI neural network API (experimental), see https://github.com/WebAssembly/wasi-nn", + ), + ( + "experimental-wasi-crypto", + "enables support for the WASI cryptography APIs (experimental), see https://github.com/WebAssembly/wasi-crypto", + ), +]; + +lazy_static::lazy_static! { + static ref FLAG_EXPLANATIONS: String = { + use std::fmt::Write; + + let mut s = String::new(); + + // Explain --wasm-features. + writeln!(&mut s, "Supported values for `--wasm-features`:").unwrap(); + writeln!(&mut s).unwrap(); + let max = SUPPORTED_WASM_FEATURES.iter().max_by_key(|(name, _)| name.len()).unwrap(); + for (name, desc) in SUPPORTED_WASM_FEATURES.iter() { + writeln!(&mut s, "{:width$} {}", name, desc, width = max.0.len() + 2).unwrap(); + } + writeln!(&mut s).unwrap(); + + // Explain --wasi-modules. + writeln!(&mut s, "Supported values for `--wasi-modules`:").unwrap(); + writeln!(&mut s).unwrap(); + let max = SUPPORTED_WASI_MODULES.iter().max_by_key(|(name, _)| name.len()).unwrap(); + for (name, desc) in SUPPORTED_WASI_MODULES.iter() { + writeln!(&mut s, "{:width$} {}", name, desc, width = max.0.len() + 2).unwrap(); + } + + writeln!(&mut s).unwrap(); + writeln!(&mut s, "Features prefixed with '-' will be disabled.").unwrap(); + + s + }; +} + pub mod commands; mod obj; use anyhow::{bail, Result}; +use std::collections::HashMap; use std::path::PathBuf; use structopt::StructOpt; +use target_lexicon::Triple; use wasmtime::{Config, ProfilingStrategy, Strategy}; pub use obj::compile_to_obj; @@ -91,6 +162,10 @@ struct CommonOptions { #[structopt(long, conflicts_with = "lightbeam")] cranelift: bool, + /// Disable logging. + #[structopt(long, conflicts_with = "log_to_files")] + disable_logging: bool, + /// Log to per-thread log files instead of stderr. #[structopt(long)] log_to_files: bool, @@ -103,38 +178,46 @@ struct CommonOptions { #[structopt(long)] disable_cache: bool, - /// Enable support for proposed SIMD instructions - #[structopt(long)] + /// Enable support for proposed SIMD instructions (deprecated; use `--wasm-features=simd`) + #[structopt(long, hidden = true)] enable_simd: bool, - /// Enable support for reference types - #[structopt(long)] - enable_reference_types: Option, + /// Enable support for reference types (deprecated; use `--wasm-features=reference-types`) + #[structopt(long, hidden = true)] + enable_reference_types: bool, - /// Enable support for multi-value functions - #[structopt(long)] - enable_multi_value: Option, + /// Enable support for multi-value functions (deprecated; use `--wasm-features=multi-value`) + #[structopt(long, hidden = true)] + enable_multi_value: bool, - /// Enable support for Wasm threads - #[structopt(long)] + /// Enable support for Wasm threads (deprecated; use `--wasm-features=threads`) + #[structopt(long, hidden = true)] enable_threads: bool, - /// Enable support for bulk memory instructions - #[structopt(long)] - enable_bulk_memory: Option, + /// Enable support for bulk memory instructions (deprecated; use `--wasm-features=bulk-memory`) + #[structopt(long, hidden = true)] + enable_bulk_memory: bool, - /// Enable support for the multi-memory proposal - #[structopt(long)] + /// Enable support for the multi-memory proposal (deprecated; use `--wasm-features=multi-memory`) + #[structopt(long, hidden = true)] enable_multi_memory: bool, - /// Enable support for the module-linking proposal - #[structopt(long)] + /// Enable support for the module-linking proposal (deprecated; use `--wasm-features=module-linking`) + #[structopt(long, hidden = true)] enable_module_linking: bool, - /// Enable all experimental Wasm features - #[structopt(long)] + /// Enable all experimental Wasm features (deprecated; use `--wasm-features=all`) + #[structopt(long, hidden = true)] enable_all: bool, + /// Enables or disables WebAssembly features + #[structopt(long, value_name = "FEATURE,FEATURE,...", parse(try_from_str = parse_wasm_features))] + wasm_features: Option, + + /// Enables or disables WASI modules + #[structopt(long, value_name = "MODULE,MODULE,...", parse(try_from_str = parse_wasi_modules))] + wasi_modules: Option, + /// Use Lightbeam for all compilation #[structopt(long, conflicts_with = "cranelift")] lightbeam: bool, @@ -151,30 +234,42 @@ struct CommonOptions { #[structopt(short = "O", long)] optimize: bool, - /// Optimization level for generated functions (0 (none), 1, 2 (most), or s - /// (size)) + /// Optimization level for generated functions + /// Supported levels: 0 (none), 1, 2 (most), or s (size); default is "most" #[structopt( long, + value_name = "LEVEL", parse(try_from_str = parse_opt_level), - default_value = "2", + verbatim_doc_comment, )] - opt_level: wasmtime::OptLevel, + opt_level: Option, - /// Other Cranelift flags to be passed down to Cranelift. - #[structopt(long, parse(try_from_str = parse_cranelift_flag))] - cranelift_flags: Vec, + /// Set a Cranelift setting to a given value. + /// Use `wasmtime settings` to list Cranelift settings for a target. + #[structopt(long = "cranelift-set", value_name = "NAME=VALUE", number_of_values = 1, verbatim_doc_comment, parse(try_from_str = parse_cranelift_flag))] + cranelift_set: Vec<(String, String)>, + + /// Enable a Cranelift boolean setting or preset. + /// Use `wasmtime settings` to list Cranelift settings for a target. + #[structopt( + long, + value_name = "SETTING", + number_of_values = 1, + verbatim_doc_comment + )] + cranelift_enable: Vec, /// Maximum size in bytes of wasm memory before it becomes dynamically /// relocatable instead of up-front-reserved. - #[structopt(long)] + #[structopt(long, value_name = "MAXIMUM")] static_memory_maximum_size: Option, /// Byte size of the guard region after static memories are allocated. - #[structopt(long)] + #[structopt(long, value_name = "SIZE")] static_memory_guard_size: Option, /// Byte size of the guard region after dynamic memories are allocated. - #[structopt(long)] + #[structopt(long, value_name = "SIZE")] dynamic_memory_guard_size: Option, /// Enable Cranelift's internal debug verifier (expensive) @@ -187,31 +282,48 @@ struct CommonOptions { } impl CommonOptions { - fn config(&self) -> Result { + fn init_logging(&self) { + if self.disable_logging { + return; + } + if self.log_to_files { + let prefix = "wasmtime.dbg."; + init_file_per_thread_logger(prefix); + } else { + pretty_env_logger::init(); + } + } + + fn config(&self, target: Option<&str>) -> Result { let mut config = Config::new(); + + // Set the target before setting any cranelift options + if let Some(target) = target { + config.target(target)?; + } + config .cranelift_debug_verifier(self.enable_cranelift_debug_verifier) .debug_info(self.debug_info) - .wasm_simd(self.enable_simd || self.enable_all) - .wasm_bulk_memory(self.enable_bulk_memory.unwrap_or(true) || self.enable_all) - .wasm_reference_types( - self.enable_reference_types - .unwrap_or(cfg!(target_arch = "x86_64")) - || self.enable_all, - ) - .wasm_multi_value(self.enable_multi_value.unwrap_or(true) || self.enable_all) - .wasm_threads(self.enable_threads || self.enable_all) - .wasm_multi_memory(self.enable_multi_memory || self.enable_all) - .wasm_module_linking(self.enable_module_linking || self.enable_all) .cranelift_opt_level(self.opt_level()) .strategy(pick_compilation_strategy(self.cranelift, self.lightbeam)?)? .profiler(pick_profiling_strategy(self.jitdump, self.vtune)?)? .cranelift_nan_canonicalization(self.enable_cranelift_nan_canonicalization); - for CraneliftFlag { name, value } in &self.cranelift_flags { + + self.enable_wasm_features(&mut config); + + for name in &self.cranelift_enable { unsafe { - config.cranelift_other_flag(name, value)?; + config.cranelift_flag_enable(name)?; } } + + for (name, value) in &self.cranelift_set { + unsafe { + config.cranelift_flag_set(name, value)?; + } + } + if !self.disable_cache { match &self.config { Some(path) => { @@ -222,22 +334,43 @@ impl CommonOptions { } } } + if let Some(max) = self.static_memory_maximum_size { config.static_memory_maximum_size(max); } + if let Some(size) = self.static_memory_guard_size { config.static_memory_guard_size(size); } + if let Some(size) = self.dynamic_memory_guard_size { config.dynamic_memory_guard_size(size); } + Ok(config) } + fn enable_wasm_features(&self, config: &mut Config) { + let features = self.wasm_features.unwrap_or_default(); + + config + .wasm_simd(features.simd || self.enable_simd || self.enable_all) + .wasm_bulk_memory(features.bulk_memory || self.enable_bulk_memory || self.enable_all) + .wasm_reference_types( + features.reference_types || self.enable_reference_types || self.enable_all, + ) + .wasm_multi_value(features.multi_value || self.enable_multi_value || self.enable_all) + .wasm_threads(features.threads || self.enable_threads || self.enable_all) + .wasm_multi_memory(features.multi_memory || self.enable_multi_memory || self.enable_all) + .wasm_module_linking( + features.module_linking || self.enable_module_linking || self.enable_all, + ); + } + fn opt_level(&self) -> wasmtime::OptLevel { match (self.optimize, self.opt_level.clone()) { (true, _) => wasmtime::OptLevel::Speed, - (false, other) => other, + (false, other) => other.unwrap_or(wasmtime::OptLevel::Speed), } } } @@ -255,12 +388,128 @@ fn parse_opt_level(opt_level: &str) -> Result { } } -struct CraneliftFlag { - name: String, - value: String, +fn parse_wasm_features(features: &str) -> Result { + let features = features.trim(); + + let mut all = None; + let mut values: HashMap<_, _> = SUPPORTED_WASM_FEATURES + .iter() + .map(|(name, _)| (name.to_string(), None)) + .collect(); + + if features == "all" { + all = Some(true); + } else if features == "-all" { + all = Some(false); + } else { + for feature in features.split(',') { + let feature = feature.trim(); + + if feature.is_empty() { + continue; + } + + let (feature, value) = if feature.starts_with('-') { + (&feature[1..], false) + } else { + (feature, true) + }; + + if feature == "all" { + bail!("'all' cannot be specified with other WebAssembly features"); + } + + match values.get_mut(feature) { + Some(v) => *v = Some(value), + None => bail!("unsupported WebAssembly feature '{}'", feature), + } + } + } + + Ok(wasmparser::WasmFeatures { + reference_types: all.unwrap_or(values["reference-types"].unwrap_or(true)), + multi_value: all.unwrap_or(values["multi-value"].unwrap_or(true)), + bulk_memory: all.unwrap_or(values["bulk-memory"].unwrap_or(true)), + module_linking: all.unwrap_or(values["module-linking"].unwrap_or(false)), + simd: all.unwrap_or(values["simd"].unwrap_or(false)), + threads: all.unwrap_or(values["threads"].unwrap_or(false)), + tail_call: false, + deterministic_only: false, + multi_memory: all.unwrap_or(values["multi-memory"].unwrap_or(false)), + exceptions: false, + memory64: false, + }) } -fn parse_cranelift_flag(name_and_value: &str) -> Result { +fn parse_wasi_modules(modules: &str) -> Result { + let modules = modules.trim(); + match modules { + "default" => Ok(WasiModules::default()), + "-default" => Ok(WasiModules::none()), + _ => { + // Starting from the default set of WASI modules, enable or disable a list of + // comma-separated modules. + let mut wasi_modules = WasiModules::default(); + let mut set = |module: &str, enable: bool| match module { + "" => Ok(()), + "wasi-common" => Ok(wasi_modules.wasi_common = enable), + "experimental-wasi-nn" => Ok(wasi_modules.wasi_nn = enable), + "experimental-wasi-crypto" => Ok(wasi_modules.wasi_crypto = enable), + "default" => bail!("'default' cannot be specified with other WASI modules"), + _ => bail!("unsupported WASI module '{}'", module), + }; + + for module in modules.split(',') { + let module = module.trim(); + let (module, value) = if module.starts_with('-') { + (&module[1..], false) + } else { + (module, true) + }; + set(module, value)?; + } + + Ok(wasi_modules) + } + } +} + +/// Select which WASI modules are available at runtime for use by Wasm programs. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct WasiModules { + /// Enable the wasi-common implementation; eventually this should be split into its separate + /// parts once the implementation allows for it (e.g. wasi-fs, wasi-clocks, etc.). + pub wasi_common: bool, + + /// Enable the experimental wasi-nn implementation. + pub wasi_nn: bool, + + /// Enable the experimental wasi-crypto implementation. + pub wasi_crypto: bool, +} + +impl Default for WasiModules { + fn default() -> Self { + Self { + wasi_common: true, + wasi_nn: false, + wasi_crypto: false, + } + } +} + +impl WasiModules { + /// Enable no modules. + pub fn none() -> Self { + Self { + wasi_common: false, + wasi_nn: false, + wasi_crypto: false, + } + } +} + +fn parse_cranelift_flag(name_and_value: &str) -> Result<(String, String)> { let mut split = name_and_value.splitn(2, '='); let name = if let Some(name) = split.next() { name.to_string() @@ -272,5 +521,215 @@ fn parse_cranelift_flag(name_and_value: &str) -> Result { } else { bail!("missing value in cranelift flag"); }; - Ok(CraneliftFlag { name, value }) + Ok((name, value)) +} + +fn parse_target(s: &str) -> Result { + use std::str::FromStr; + Triple::from_str(&s).map_err(|e| anyhow::anyhow!(e)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_all_features() -> Result<()> { + let options = CommonOptions::from_iter_safe(vec!["foo", "--wasm-features=all"])?; + + let wasmparser::WasmFeatures { + reference_types, + multi_value, + bulk_memory, + module_linking, + simd, + threads, + tail_call, + deterministic_only, + multi_memory, + exceptions, + memory64, + } = options.wasm_features.unwrap(); + + assert!(reference_types); + assert!(multi_value); + assert!(bulk_memory); + assert!(module_linking); + assert!(simd); + assert!(threads); + assert!(!tail_call); // Not supported + assert!(!deterministic_only); // Not supported + assert!(multi_memory); + assert!(!exceptions); // Not supported + assert!(!memory64); // Not supported + + Ok(()) + } + + #[test] + fn test_no_features() -> Result<()> { + let options = CommonOptions::from_iter_safe(vec!["foo", "--wasm-features=-all"])?; + + let wasmparser::WasmFeatures { + reference_types, + multi_value, + bulk_memory, + module_linking, + simd, + threads, + tail_call, + deterministic_only, + multi_memory, + exceptions, + memory64, + } = options.wasm_features.unwrap(); + + assert!(!reference_types); + assert!(!multi_value); + assert!(!bulk_memory); + assert!(!module_linking); + assert!(!simd); + assert!(!threads); + assert!(!tail_call); + assert!(!deterministic_only); + assert!(!multi_memory); + assert!(!exceptions); + assert!(!memory64); + + Ok(()) + } + + #[test] + fn test_multiple_features() -> Result<()> { + let options = CommonOptions::from_iter_safe(vec![ + "foo", + "--wasm-features=-reference-types,simd,multi-memory", + ])?; + + let wasmparser::WasmFeatures { + reference_types, + multi_value, + bulk_memory, + module_linking, + simd, + threads, + tail_call, + deterministic_only, + multi_memory, + exceptions, + memory64, + } = options.wasm_features.unwrap(); + + assert!(!reference_types); + assert!(multi_value); + assert!(bulk_memory); + assert!(!module_linking); + assert!(simd); + assert!(!threads); + assert!(!tail_call); // Not supported + assert!(!deterministic_only); // Not supported + assert!(multi_memory); + assert!(!exceptions); // Not supported + assert!(!memory64); // Not supported + + Ok(()) + } + + macro_rules! feature_test { + ($test_name:ident, $name:ident, $flag:literal) => { + #[test] + fn $test_name() -> Result<()> { + let options = + CommonOptions::from_iter_safe(vec!["foo", concat!("--wasm-features=", $flag)])?; + + let wasmparser::WasmFeatures { $name, .. } = options.wasm_features.unwrap(); + + assert!($name); + + let options = CommonOptions::from_iter_safe(vec![ + "foo", + concat!("--wasm-features=-", $flag), + ])?; + + let wasmparser::WasmFeatures { $name, .. } = options.wasm_features.unwrap(); + + assert!(!$name); + + Ok(()) + } + }; + } + + feature_test!( + test_reference_types_feature, + reference_types, + "reference-types" + ); + feature_test!(test_multi_value_feature, multi_value, "multi-value"); + feature_test!(test_bulk_memory_feature, bulk_memory, "bulk-memory"); + feature_test!( + test_module_linking_feature, + module_linking, + "module-linking" + ); + feature_test!(test_simd_feature, simd, "simd"); + feature_test!(test_threads_feature, threads, "threads"); + feature_test!(test_multi_memory_feature, multi_memory, "multi-memory"); + + #[test] + fn test_default_modules() { + let options = CommonOptions::from_iter_safe(vec!["foo", "--wasi-modules=default"]).unwrap(); + assert_eq!( + options.wasi_modules.unwrap(), + WasiModules { + wasi_common: true, + wasi_nn: false, + wasi_crypto: false + } + ); + } + + #[test] + fn test_empty_modules() { + let options = CommonOptions::from_iter_safe(vec!["foo", "--wasi-modules="]).unwrap(); + assert_eq!( + options.wasi_modules.unwrap(), + WasiModules { + wasi_common: true, + wasi_nn: false, + wasi_crypto: false + } + ); + } + + #[test] + fn test_some_modules() { + let options = CommonOptions::from_iter_safe(vec![ + "foo", + "--wasi-modules=experimental-wasi-nn,-wasi-common", + ]) + .unwrap(); + assert_eq!( + options.wasi_modules.unwrap(), + WasiModules { + wasi_common: false, + wasi_nn: true, + wasi_crypto: false + } + ); + } + + #[test] + fn test_no_modules() { + let options = + CommonOptions::from_iter_safe(vec!["foo", "--wasi-modules=-default"]).unwrap(); + assert_eq!( + options.wasi_modules.unwrap(), + WasiModules { + wasi_common: false, + wasi_nn: false, + wasi_crypto: false + } + ); + } } diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs index 5c825cd590..ae09dd97cd 100644 --- a/tests/all/cli_tests.rs +++ b/tests/all/cli_tests.rs @@ -97,6 +97,26 @@ fn run_wasmtime_simple_wat() -> Result<()> { "--disable-cache", "4", ])?; + assert_eq!( + run_wasmtime(&[ + "run", + wasm.path().to_str().unwrap(), + "--invoke", + "get_f32", + "--disable-cache", + ])?, + "100\n" + ); + assert_eq!( + run_wasmtime(&[ + "run", + wasm.path().to_str().unwrap(), + "--invoke", + "get_f64", + "--disable-cache", + ])?, + "100\n" + ); Ok(()) } diff --git a/tests/all/debug/lldb.rs b/tests/all/debug/lldb.rs index 7807732700..3e72eaf49f 100644 --- a/tests/all/debug/lldb.rs +++ b/tests/all/debug/lldb.rs @@ -141,7 +141,7 @@ check: exited with status // Ignore test on new backend. The value this is looking for is // not available at the point that the breakpoint is set when // compiled by the new backend. - not(feature = "experimental_x64"), + feature = "old-x86-backend", ))] pub fn test_debug_dwarf_ptr() -> Result<()> { let output = lldb_with_script( diff --git a/tests/all/debug/translate.rs b/tests/all/debug/translate.rs index 7253989d57..2560a71b03 100644 --- a/tests/all/debug/translate.rs +++ b/tests/all/debug/translate.rs @@ -118,7 +118,7 @@ check: DW_AT_decl_line (10) // Ignore test on new backend. This is a specific test with hardcoded // offsets and the new backend compiles the return basic-block at a different // offset, causing mismatches. - not(feature = "experimental_x64"), + feature = "old-x86-backend", ))] fn test_debug_dwarf5_translate_lines() -> Result<()> { check_line_program( diff --git a/tests/all/externals.rs b/tests/all/externals.rs index 9bffa2c08d..0e26166f07 100644 --- a/tests/all/externals.rs +++ b/tests/all/externals.rs @@ -67,7 +67,7 @@ fn cross_store() -> anyhow::Result<()> { let ty = GlobalType::new(ValType::I32, Mutability::Const); let global = Global::new(&store2, ty, Val::I32(0))?; let ty = MemoryType::new(Limits::new(1, None)); - let memory = Memory::new(&store2, ty); + let memory = Memory::new(&store2, ty)?; let ty = TableType::new(ValType::FuncRef, Limits::new(1, None)); let table = Table::new(&store2, ty, Val::FuncRef(None))?; @@ -356,7 +356,7 @@ fn read_write_memory_via_api() { let cfg = Config::new(); let store = Store::new(&Engine::new(&cfg).unwrap()); let ty = MemoryType::new(Limits::new(1, None)); - let mem = Memory::new(&store, ty); + let mem = Memory::new(&store, ty).unwrap(); mem.grow(1).unwrap(); let value = b"hello wasm"; diff --git a/tests/all/func.rs b/tests/all/func.rs index 73166cc891..67e4f9ddc2 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -1,4 +1,6 @@ use anyhow::Result; +use std::cell::Cell; +use std::rc::Rc; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use wasmtime::*; @@ -121,9 +123,6 @@ fn signatures_match() { } #[test] -// Note: Cranelift only supports refrerence types (used in the wasm in this -// test) on x64. -#[cfg(target_arch = "x86_64")] fn import_works() -> Result<()> { static HITS: AtomicUsize = AtomicUsize::new(0); @@ -551,6 +550,7 @@ fn trampolines_always_valid() -> anyhow::Result<()> { } #[test] +#[cfg(not(feature = "old-x86-backend"))] fn typed_multiple_results() -> anyhow::Result<()> { let store = Store::default(); let module = Module::new( @@ -581,3 +581,223 @@ fn typed_multiple_results() -> anyhow::Result<()> { ); Ok(()) } + +#[test] +fn trap_doesnt_leak() -> anyhow::Result<()> { + struct Canary(Rc>); + + impl Drop for Canary { + fn drop(&mut self) { + self.0.set(true); + } + } + + let store = Store::default(); + + // test that `Func::wrap` is correct + let canary1 = Canary(Rc::new(Cell::new(false))); + let dtor1_run = canary1.0.clone(); + let f1 = Func::wrap(&store, move || -> Result<(), Trap> { + drop(&canary1); + Err(Trap::new("")) + }); + assert!(f1.typed::<(), ()>()?.call(()).is_err()); + assert!(f1.call(&[]).is_err()); + + // test that `Func::new` is correct + let canary2 = Canary(Rc::new(Cell::new(false))); + let dtor2_run = canary2.0.clone(); + let f2 = Func::new(&store, FuncType::new(None, None), move |_, _, _| { + drop(&canary2); + Err(Trap::new("")) + }); + assert!(f2.typed::<(), ()>()?.call(()).is_err()); + assert!(f2.call(&[]).is_err()); + + // drop everything and ensure dtors are run + drop((store, f1, f2)); + assert!(dtor1_run.get()); + assert!(dtor2_run.get()); + Ok(()) +} + +#[test] +#[cfg(not(feature = "old-x86-backend"))] +fn wrap_multiple_results() -> anyhow::Result<()> { + fn test(store: &Store, t: T) -> anyhow::Result<()> + where + T: WasmRet + WasmResults + PartialEq + Copy + std::fmt::Debug + EqualToValues + 'static, + { + let f = Func::wrap(store, move || t); + assert_eq!(f.typed::<(), T>()?.call(())?, t); + assert!(t.eq_values(&f.call(&[])?)); + + let module = Module::new(store.engine(), &T::gen_wasm())?; + let instance = Instance::new(store, &module, &[f.into()])?; + let f = instance.get_func("foo").unwrap(); + + assert_eq!(f.typed::<(), T>()?.call(())?, t); + assert!(t.eq_values(&f.call(&[])?)); + Ok(()) + } + + let store = Store::default(); + // 0 element + test(&store, ())?; + + // 1 element + test(&store, (1i32,))?; + test(&store, (2u32,))?; + test(&store, (3i64,))?; + test(&store, (4u64,))?; + test(&store, (5.0f32,))?; + test(&store, (6.0f64,))?; + + // 2 element ... + test(&store, (7i32, 8i32))?; + test(&store, (7i32, 8i64))?; + test(&store, (7i32, 8f32))?; + test(&store, (7i32, 8f64))?; + + test(&store, (7i64, 8i32))?; + test(&store, (7i64, 8i64))?; + test(&store, (7i64, 8f32))?; + test(&store, (7i64, 8f64))?; + + test(&store, (7f32, 8i32))?; + test(&store, (7f32, 8i64))?; + test(&store, (7f32, 8f32))?; + test(&store, (7f32, 8f64))?; + + test(&store, (7f64, 8i32))?; + test(&store, (7f64, 8i64))?; + test(&store, (7f64, 8f32))?; + test(&store, (7f64, 8f64))?; + + // and beyond... + test(&store, (1i32, 2i32, 3i32))?; + test(&store, (1i32, 2f32, 3i32))?; + test(&store, (1f64, 2f32, 3i32))?; + test(&store, (1f64, 2i64, 3i32))?; + test(&store, (1f32, 2f32, 3i64, 4f64))?; + test(&store, (1f64, 2i64, 3i32, 4i64, 5f32))?; + test(&store, (1i32, 2f64, 3i64, 4f64, 5f64, 6f32))?; + test(&store, (1i64, 2i32, 3i64, 4f32, 5f32, 6i32, 7u64))?; + test(&store, (1u32, 2f32, 3u64, 4f64, 5i32, 6f32, 7u64, 8u32))?; + test( + &store, + (1f32, 2f64, 3f32, 4i32, 5u32, 6i64, 7f32, 8i32, 9u64), + )?; + return Ok(()); + + trait EqualToValues { + fn eq_values(&self, values: &[Val]) -> bool; + fn gen_wasm() -> String; + } + + macro_rules! equal_tuples { + ($($cnt:tt ($($a:ident),*))*) => ($( + #[allow(non_snake_case)] + impl<$($a: EqualToValue,)*> EqualToValues for ($($a,)*) { + fn eq_values(&self, values: &[Val]) -> bool { + let ($($a,)*) = self; + let mut _values = values.iter(); + _values.len() == $cnt && + $($a.eq_value(_values.next().unwrap()) &&)* + true + } + + fn gen_wasm() -> String { + let mut wasm = String::new(); + wasm.push_str("(module "); + wasm.push_str("(type $t (func (result "); + $( + wasm.push_str($a::wasm_ty()); + wasm.push_str(" "); + )* + wasm.push_str(")))"); + + wasm.push_str("(import \"\" \"\" (func $host (type $t)))"); + wasm.push_str("(func (export \"foo\") (type $t)"); + wasm.push_str("call $host"); + wasm.push_str(")"); + wasm.push_str(")"); + + wasm + } + } + )*) + } + + equal_tuples! { + 0 () + 1 (A1) + 2 (A1, A2) + 3 (A1, A2, A3) + 4 (A1, A2, A3, A4) + 5 (A1, A2, A3, A4, A5) + 6 (A1, A2, A3, A4, A5, A6) + 7 (A1, A2, A3, A4, A5, A6, A7) + 8 (A1, A2, A3, A4, A5, A6, A7, A8) + 9 (A1, A2, A3, A4, A5, A6, A7, A8, A9) + } + + trait EqualToValue { + fn eq_value(&self, value: &Val) -> bool; + fn wasm_ty() -> &'static str; + } + + macro_rules! equal_values { + ($a:ident $($ty:ident $wasm:tt $variant:ident $e:expr,)*) => ($( + impl EqualToValue for $ty { + fn eq_value(&self, val: &Val) -> bool { + if let Val::$variant($a) = *val { + return *self == $e; + } + false + } + + fn wasm_ty() -> &'static str { + $wasm + } + } + )*) + } + + equal_values! { + a + i32 "i32" I32 a, + u32 "i32" I32 a as u32, + i64 "i64" I64 a, + u64 "i64" I64 a as u64, + f32 "f32" F32 f32::from_bits(a), + f64 "f64" F64 f64::from_bits(a), + } +} + +#[test] +fn trampoline_for_declared_elem() -> anyhow::Result<()> { + let engine = Engine::default(); + + let module = Module::new( + &engine, + r#" + (module + (elem declare func $f) + (func $f) + (func (export "g") (result funcref) + (ref.func $f) + ) + ) + "#, + )?; + + let store = Store::new(&engine); + let instance = Instance::new(&store, &module, &[])?; + + let g = instance.get_typed_func::<(), Option>("g")?; + + let func = g.call(())?; + func.unwrap().call(&[])?; + Ok(()) +} diff --git a/tests/all/host_funcs.rs b/tests/all/host_funcs.rs index de94701144..360e2526ad 100644 --- a/tests/all/host_funcs.rs +++ b/tests/all/host_funcs.rs @@ -1,8 +1,7 @@ use anyhow::Result; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; -use wasi_cap_std_sync::WasiCtxBuilder; use wasmtime::*; -use wasmtime_wasi::Wasi; +use wasmtime_wasi::{sync::WasiCtxBuilder, Wasi}; #[test] fn async_required() { @@ -219,9 +218,6 @@ fn signatures_match() -> Result<()> { } #[test] -// Note: Cranelift only supports refrerence types (used in the wasm in this -// test) on x64. -#[cfg(target_arch = "x86_64")] fn import_works() -> Result<()> { static HITS: AtomicUsize = AtomicUsize::new(0); @@ -328,6 +324,141 @@ fn import_works() -> Result<()> { Ok(()) } +#[test] +fn call_import_many_args() -> Result<()> { + let wasm = wat::parse_str( + r#" + (import "" "host" (func (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32))) + (func (export "run") + i32.const 1 + i32.const 2 + i32.const 3 + i32.const 4 + i32.const 5 + i32.const 6 + i32.const 7 + i32.const 8 + i32.const 9 + i32.const 10 + call 0 + ) + "#, + )?; + + let mut config = Config::new(); + + config.wrap_host_func( + "", + "host", + |x1: i32, + x2: i32, + x3: i32, + x4: i32, + x5: i32, + x6: i32, + x7: i32, + x8: i32, + x9: i32, + x10: i32| { + assert_eq!(x1, 1); + assert_eq!(x2, 2); + assert_eq!(x3, 3); + assert_eq!(x4, 4); + assert_eq!(x5, 5); + assert_eq!(x6, 6); + assert_eq!(x7, 7); + assert_eq!(x8, 8); + assert_eq!(x9, 9); + assert_eq!(x10, 10); + }, + ); + + let engine = Engine::new(&config)?; + let module = Module::new(&engine, &wasm)?; + + let store = Store::new(&engine); + let instance = Instance::new( + &store, + &module, + &[store + .get_host_func("", "host") + .expect("should be defined") + .into()], + )?; + + let run = instance.get_func("run").unwrap(); + run.call(&[])?; + + Ok(()) +} + +#[test] +fn call_wasm_many_args() -> Result<()> { + let wasm = wat::parse_str( + r#" + (func (export "run") (param i32 i32 i32 i32 i32 i32 i32 i32 i32 i32) + i32.const 1 + get_local 0 + i32.ne + if + unreachable + end + + i32.const 10 + get_local 9 + i32.ne + if + unreachable + end + ) + + (func (export "test") + i32.const 1 + i32.const 2 + i32.const 3 + i32.const 4 + i32.const 5 + i32.const 6 + i32.const 7 + i32.const 8 + i32.const 9 + i32.const 10 + call 0 + ) + "#, + )?; + + let config = Config::new(); + let engine = Engine::new(&config)?; + let module = Module::new(&engine, &wasm)?; + + let store = Store::new(&engine); + let instance = Instance::new(&store, &module, &[])?; + + let run = instance.get_func("run").unwrap(); + run.call(&[ + 1.into(), + 2.into(), + 3.into(), + 4.into(), + 5.into(), + 6.into(), + 7.into(), + 8.into(), + 9.into(), + 10.into(), + ])?; + + let typed_run = + instance.get_typed_func::<(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32), ()>("run")?; + typed_run.call((1, 2, 3, 4, 5, 6, 7, 8, 9, 10))?; + + let test = instance.get_func("test").unwrap(); + test.call(&[])?; + + Ok(()) +} + #[test] fn trap_smoke() -> Result<()> { let mut config = Config::default(); diff --git a/tests/all/instance.rs b/tests/all/instance.rs index 83ee52b227..3d8046110c 100644 --- a/tests/all/instance.rs +++ b/tests/all/instance.rs @@ -11,3 +11,23 @@ fn wrong_import_numbers() -> Result<()> { assert!(Instance::new(&store, &module, &[func.clone().into(), func.into()]).is_err()); Ok(()) } + +#[test] +fn initializes_linear_memory() -> Result<()> { + // Test for https://github.com/bytecodealliance/wasmtime/issues/2784 + let wat = r#" + (module + (memory (export "memory") 2) + (data (i32.const 0) "Hello World!") + )"#; + let module = Module::new(&Engine::default(), wat)?; + + let store = Store::new(module.engine()); + let instance = Instance::new(&store, &module, &[])?; + let memory = instance.get_memory("memory").unwrap(); + + let mut bytes = [0; 12]; + memory.read(0, &mut bytes)?; + assert_eq!(bytes, "Hello World!".as_bytes()); + Ok(()) +} diff --git a/tests/all/limits.rs b/tests/all/limits.rs new file mode 100644 index 0000000000..f4c74be612 --- /dev/null +++ b/tests/all/limits.rs @@ -0,0 +1,381 @@ +use anyhow::Result; +use std::cell::RefCell; +use std::rc::Rc; +use wasmtime::*; + +#[test] +fn test_limits() -> Result<()> { + let engine = Engine::default(); + let module = Module::new( + &engine, + r#"(module (memory (export "m") 0) (table (export "t") 0 anyfunc))"#, + )?; + + let store = Store::new_with_limits( + &engine, + StoreLimitsBuilder::new() + .memory_pages(10) + .table_elements(5) + .build(), + ); + + let instance = Instance::new(&store, &module, &[])?; + + // Test instance exports and host objects hitting the limit + for memory in std::array::IntoIter::new([ + instance.get_memory("m").unwrap(), + Memory::new(&store, MemoryType::new(Limits::new(0, None)))?, + ]) { + memory.grow(3)?; + memory.grow(5)?; + memory.grow(2)?; + + assert_eq!( + memory.grow(1).map_err(|e| e.to_string()).unwrap_err(), + "failed to grow memory by `1`" + ); + } + + // Test instance exports and host objects hitting the limit + for table in std::array::IntoIter::new([ + instance.get_table("t").unwrap(), + Table::new( + &store, + TableType::new(ValType::FuncRef, Limits::new(0, None)), + Val::FuncRef(None), + )?, + ]) { + table.grow(2, Val::FuncRef(None))?; + table.grow(1, Val::FuncRef(None))?; + table.grow(2, Val::FuncRef(None))?; + + assert_eq!( + table + .grow(1, Val::FuncRef(None)) + .map_err(|e| e.to_string()) + .unwrap_err(), + "failed to grow table by `1`" + ); + } + + Ok(()) +} + +#[test] +fn test_limits_memory_only() -> Result<()> { + let engine = Engine::default(); + let module = Module::new( + &engine, + r#"(module (memory (export "m") 0) (table (export "t") 0 anyfunc))"#, + )?; + + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().memory_pages(10).build()); + + let instance = Instance::new(&store, &module, &[])?; + + // Test instance exports and host objects hitting the limit + for memory in std::array::IntoIter::new([ + instance.get_memory("m").unwrap(), + Memory::new(&store, MemoryType::new(Limits::new(0, None)))?, + ]) { + memory.grow(3)?; + memory.grow(5)?; + memory.grow(2)?; + + assert_eq!( + memory.grow(1).map_err(|e| e.to_string()).unwrap_err(), + "failed to grow memory by `1`" + ); + } + + // Test instance exports and host objects *not* hitting the limit + for table in std::array::IntoIter::new([ + instance.get_table("t").unwrap(), + Table::new( + &store, + TableType::new(ValType::FuncRef, Limits::new(0, None)), + Val::FuncRef(None), + )?, + ]) { + table.grow(2, Val::FuncRef(None))?; + table.grow(1, Val::FuncRef(None))?; + table.grow(2, Val::FuncRef(None))?; + table.grow(1, Val::FuncRef(None))?; + } + + Ok(()) +} + +#[test] +fn test_initial_memory_limits_exceeded() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, r#"(module (memory (export "m") 11))"#)?; + + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().memory_pages(10).build()); + + match Instance::new(&store, &module, &[]) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Insufficient resources: memory minimum size of 11 pages exceeds memory limits" + ), + } + + match Memory::new(&store, MemoryType::new(Limits::new(25, None))) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Insufficient resources: memory minimum size of 25 pages exceeds memory limits" + ), + } + + Ok(()) +} + +#[test] +fn test_limits_table_only() -> Result<()> { + let engine = Engine::default(); + let module = Module::new( + &engine, + r#"(module (memory (export "m") 0) (table (export "t") 0 anyfunc))"#, + )?; + + let store = + Store::new_with_limits(&engine, StoreLimitsBuilder::new().table_elements(5).build()); + + let instance = Instance::new(&store, &module, &[])?; + + // Test instance exports and host objects *not* hitting the limit + for memory in std::array::IntoIter::new([ + instance.get_memory("m").unwrap(), + Memory::new(&store, MemoryType::new(Limits::new(0, None)))?, + ]) { + memory.grow(3)?; + memory.grow(5)?; + memory.grow(2)?; + memory.grow(1)?; + } + + // Test instance exports and host objects hitting the limit + for table in std::array::IntoIter::new([ + instance.get_table("t").unwrap(), + Table::new( + &store, + TableType::new(ValType::FuncRef, Limits::new(0, None)), + Val::FuncRef(None), + )?, + ]) { + table.grow(2, Val::FuncRef(None))?; + table.grow(1, Val::FuncRef(None))?; + table.grow(2, Val::FuncRef(None))?; + + assert_eq!( + table + .grow(1, Val::FuncRef(None)) + .map_err(|e| e.to_string()) + .unwrap_err(), + "failed to grow table by `1`" + ); + } + + Ok(()) +} + +#[test] +fn test_initial_table_limits_exceeded() -> Result<()> { + let engine = Engine::default(); + let module = Module::new(&engine, r#"(module (table (export "t") 23 anyfunc))"#)?; + + let store = + Store::new_with_limits(&engine, StoreLimitsBuilder::new().table_elements(4).build()); + + match Instance::new(&store, &module, &[]) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Insufficient resources: table minimum size of 23 elements exceeds table limits" + ), + } + + match Table::new( + &store, + TableType::new(ValType::FuncRef, Limits::new(99, None)), + Val::FuncRef(None), + ) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Insufficient resources: table minimum size of 99 elements exceeds table limits" + ), + } + + Ok(()) +} + +#[test] +fn test_pooling_allocator_initial_limits_exceeded() -> Result<()> { + let mut config = Config::new(); + config.wasm_multi_memory(true); + config.allocation_strategy(InstanceAllocationStrategy::Pooling { + strategy: PoolingAllocationStrategy::NextAvailable, + module_limits: ModuleLimits { + memories: 2, + ..Default::default() + }, + instance_limits: InstanceLimits { + count: 1, + ..Default::default() + }, + }); + + let engine = Engine::new(&config)?; + let module = Module::new( + &engine, + r#"(module (memory (export "m1") 2) (memory (export "m2") 5))"#, + )?; + + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().memory_pages(3).build()); + + match Instance::new(&store, &module, &[]) { + Ok(_) => unreachable!(), + Err(e) => assert_eq!( + e.to_string(), + "Insufficient resources: memory minimum size of 5 pages exceeds memory limits" + ), + } + + // An instance should still be able to be created after the failure above + let module = Module::new(&engine, r#"(module (memory (export "m") 2))"#)?; + + Instance::new(&store, &module, &[])?; + + Ok(()) +} + +struct MemoryContext { + host_memory_used: usize, + wasm_memory_used: usize, + memory_limit: usize, + limit_exceeded: bool, + limiter_dropped: bool, +} + +struct HostMemoryLimiter(Rc>); + +impl ResourceLimiter for HostMemoryLimiter { + fn memory_growing(&self, current: u32, desired: u32, maximum: Option) -> bool { + let mut ctx = self.0.borrow_mut(); + + // Check if the desired exceeds a maximum (either from Wasm or from the host) + if desired > maximum.unwrap_or(u32::MAX) { + ctx.limit_exceeded = true; + return false; + } + + assert_eq!(current as usize * 0x10000, ctx.wasm_memory_used); + let desired = desired as usize * 0x10000; + + if desired + ctx.host_memory_used > ctx.memory_limit { + ctx.limit_exceeded = true; + return false; + } + + ctx.wasm_memory_used = desired; + true + } + + fn table_growing(&self, _current: u32, _desired: u32, _maximum: Option) -> bool { + true + } +} + +impl Drop for HostMemoryLimiter { + fn drop(&mut self) { + self.0.borrow_mut().limiter_dropped = true; + } +} + +#[test] +fn test_custom_limiter() -> Result<()> { + let mut config = Config::default(); + + // This approximates a function that would "allocate" resources that the host tracks. + // Here this is a simple function that increments the current host memory "used". + config.wrap_host_func("", "alloc", |caller: Caller, size: u32| -> u32 { + if let Some(ctx) = caller.store().get::>>() { + let mut ctx = ctx.borrow_mut(); + let size = size as usize; + + if size + ctx.host_memory_used + ctx.wasm_memory_used <= ctx.memory_limit { + ctx.host_memory_used += size; + return 1; + } + + ctx.limit_exceeded = true; + } + + 0 + }); + + let engine = Engine::new(&config)?; + let module = Module::new( + &engine, + r#"(module (import "" "alloc" (func $alloc (param i32) (result i32))) (memory (export "m") 0) (func (export "f") (param i32) (result i32) local.get 0 call $alloc))"#, + )?; + + let context = Rc::new(RefCell::new(MemoryContext { + host_memory_used: 0, + wasm_memory_used: 0, + memory_limit: 1 << 20, // 16 wasm pages is the limit for both wasm + host memory + limit_exceeded: false, + limiter_dropped: false, + })); + + let store = Store::new_with_limits(&engine, HostMemoryLimiter(context.clone())); + + assert!(store.set(context.clone()).is_ok()); + + let linker = Linker::new(&store); + let instance = linker.instantiate(&module)?; + let memory = instance.get_memory("m").unwrap(); + + // Grow the memory by 640 KiB + memory.grow(3)?; + memory.grow(5)?; + memory.grow(2)?; + + assert!(!context.borrow().limit_exceeded); + + // Grow the host "memory" by 384 KiB + let f = instance.get_typed_func::("f")?; + + assert_eq!(f.call(1 * 0x10000).unwrap(), 1); + assert_eq!(f.call(3 * 0x10000).unwrap(), 1); + assert_eq!(f.call(2 * 0x10000).unwrap(), 1); + + // Memory is at the maximum, but the limit hasn't been exceeded + assert!(!context.borrow().limit_exceeded); + + // Try to grow the memory again + assert_eq!( + memory.grow(1).map_err(|e| e.to_string()).unwrap_err(), + "failed to grow memory by `1`" + ); + + assert!(context.borrow().limit_exceeded); + + // Try to grow the host "memory" again + assert_eq!(f.call(1).unwrap(), 0); + + assert!(context.borrow().limit_exceeded); + + drop(f); + drop(memory); + drop(instance); + drop(linker); + drop(store); + + assert!(context.borrow().limiter_dropped); + + Ok(()) +} diff --git a/tests/all/linker.rs b/tests/all/linker.rs index 078509774d..1b95663390 100644 --- a/tests/all/linker.rs +++ b/tests/all/linker.rs @@ -27,46 +27,45 @@ fn link_twice_bad() -> Result<()> { let mut linker = Linker::new(&store); // functions - linker.func("", "", || {})?; - assert!(linker.func("", "", || {}).is_err()); + linker.func("f", "", || {})?; + assert!(linker.func("f", "", || {}).is_err()); assert!(linker - .func("", "", || -> Result<(), Trap> { loop {} }) + .func("f", "", || -> Result<(), Trap> { loop {} }) .is_err()); - linker.func("", "", |_: i32| {})?; // globals let ty = GlobalType::new(ValType::I32, Mutability::Const); let global = Global::new(&store, ty, Val::I32(0))?; - linker.define("", "", global.clone())?; - assert!(linker.define("", "", global.clone()).is_err()); + linker.define("g", "1", global.clone())?; + assert!(linker.define("g", "1", global.clone()).is_err()); let ty = GlobalType::new(ValType::I32, Mutability::Var); let global = Global::new(&store, ty, Val::I32(0))?; - linker.define("", "", global.clone())?; - assert!(linker.define("", "", global.clone()).is_err()); + linker.define("g", "2", global.clone())?; + assert!(linker.define("g", "2", global.clone()).is_err()); let ty = GlobalType::new(ValType::I64, Mutability::Const); let global = Global::new(&store, ty, Val::I64(0))?; - linker.define("", "", global.clone())?; - assert!(linker.define("", "", global.clone()).is_err()); + linker.define("g", "3", global.clone())?; + assert!(linker.define("g", "3", global.clone()).is_err()); // memories let ty = MemoryType::new(Limits::new(1, None)); - let memory = Memory::new(&store, ty); - linker.define("", "", memory.clone())?; - assert!(linker.define("", "", memory.clone()).is_err()); + let memory = Memory::new(&store, ty)?; + linker.define("m", "", memory.clone())?; + assert!(linker.define("m", "", memory.clone()).is_err()); let ty = MemoryType::new(Limits::new(2, None)); - let memory = Memory::new(&store, ty); - assert!(linker.define("", "", memory.clone()).is_err()); + let memory = Memory::new(&store, ty)?; + assert!(linker.define("m", "", memory.clone()).is_err()); // tables let ty = TableType::new(ValType::FuncRef, Limits::new(1, None)); let table = Table::new(&store, ty, Val::FuncRef(None))?; - linker.define("", "", table.clone())?; - assert!(linker.define("", "", table.clone()).is_err()); + linker.define("t", "", table.clone())?; + assert!(linker.define("t", "", table.clone()).is_err()); let ty = TableType::new(ValType::FuncRef, Limits::new(2, None)); let table = Table::new(&store, ty, Val::FuncRef(None))?; - assert!(linker.define("", "", table.clone()).is_err()); + assert!(linker.define("t", "", table.clone()).is_err()); Ok(()) } @@ -163,6 +162,24 @@ fn module_interposition() -> Result<()> { Ok(()) } +#[test] +fn allow_unknown_exports() -> Result<()> { + let store = Store::default(); + let mut linker = Linker::new(&store); + let module = Module::new( + store.engine(), + r#"(module (func (export "_start")) (global (export "g") i32 (i32.const 0)))"#, + )?; + + assert!(linker.module("module", &module).is_err()); + + let mut linker = Linker::new(&store); + linker.allow_unknown_exports(true); + linker.module("module", &module)?; + + Ok(()) +} + #[test] fn no_leak() -> Result<()> { struct DropMe(Rc>); diff --git a/tests/all/main.rs b/tests/all/main.rs index 4c921e60a3..89c686119e 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -13,6 +13,7 @@ mod import_calling_export; mod import_indexes; mod instance; mod invoke_func_via_table; +mod limits; mod linker; mod memory_creator; mod module; diff --git a/tests/all/memory_creator.rs b/tests/all/memory_creator.rs index e877b5e22d..5c49414706 100644 --- a/tests/all/memory_creator.rs +++ b/tests/all/memory_creator.rs @@ -17,6 +17,7 @@ mod not_for_windows { struct CustomMemory { mem: *mut c_void, size: usize, + guard_size: usize, used_wasm_pages: RefCell, glob_page_counter: Arc>, } @@ -43,6 +44,7 @@ mod not_for_windows { Self { mem, size, + guard_size, used_wasm_pages: RefCell::new(num_wasm_pages), glob_page_counter: glob_counter, } @@ -63,6 +65,10 @@ mod not_for_windows { *self.used_wasm_pages.borrow() } + fn maximum(&self) -> Option { + Some((self.size as u32 - self.guard_size as u32) / WASM_PAGE_SIZE) + } + fn grow(&self, delta: u32) -> Option { let delta_size = (delta as usize).checked_mul(WASM_PAGE_SIZE as usize)?; @@ -70,11 +76,8 @@ mod not_for_windows { let prev_size = (prev_pages as usize).checked_mul(WASM_PAGE_SIZE as usize)?; let new_pages = prev_pages.checked_add(delta)?; - let new_size = (new_pages as usize).checked_mul(WASM_PAGE_SIZE as usize)?; - let guard_size = unsafe { sysconf(_SC_PAGESIZE) as usize }; - - if new_size > self.size - guard_size { + if new_pages > self.maximum().unwrap() { return None; } unsafe { diff --git a/tests/all/module.rs b/tests/all/module.rs index 18b2f72c58..f6b3b5b0d0 100644 --- a/tests/all/module.rs +++ b/tests/all/module.rs @@ -1,57 +1,80 @@ +use anyhow::Result; use wasmtime::*; +#[test] +fn checks_incompatible_target() -> Result<()> { + let mut target = target_lexicon::Triple::host(); + target.operating_system = target_lexicon::OperatingSystem::Unknown; + match Module::new( + &Engine::new(Config::new().target(&target.to_string())?)?, + "(module)", + ) { + Ok(_) => unreachable!(), + Err(e) => assert!(e + .to_string() + .contains("configuration does not match the host")), + } + + Ok(()) +} + #[test] fn caches_across_engines() { - let mut c = Config::new(); - c.cranelift_clear_cpu_flags(); + let c = Config::new(); let bytes = Module::new(&Engine::new(&c).unwrap(), "(module)") .unwrap() .serialize() .unwrap(); - let res = Module::deserialize( - &Engine::new(&Config::new().cranelift_clear_cpu_flags()).unwrap(), - &bytes, - ); - assert!(res.is_ok()); + unsafe { + let res = Module::deserialize(&Engine::new(&Config::new()).unwrap(), &bytes); + assert!(res.is_ok()); - // differ in shared cranelift flags - let res = Module::deserialize( - &Engine::new( - &Config::new() - .cranelift_clear_cpu_flags() - .cranelift_nan_canonicalization(true), - ) - .unwrap(), - &bytes, - ); - assert!(res.is_err()); - - // differ in cranelift settings - let res = Module::deserialize( - &Engine::new( - &Config::new() - .cranelift_clear_cpu_flags() - .cranelift_opt_level(OptLevel::None), - ) - .unwrap(), - &bytes, - ); - assert!(res.is_err()); - - // differ in cpu-specific flags - if cfg!(target_arch = "x86_64") { + // differ in shared cranelift flags let res = Module::deserialize( - &Engine::new(unsafe { - &Config::new() - .cranelift_clear_cpu_flags() - .cranelift_other_flag("has_sse3", "true") - .unwrap() - }) - .unwrap(), + &Engine::new(Config::new().cranelift_nan_canonicalization(true)).unwrap(), &bytes, ); assert!(res.is_err()); + + // differ in cranelift settings + let res = Module::deserialize( + &Engine::new(Config::new().cranelift_opt_level(OptLevel::None)).unwrap(), + &bytes, + ); + assert!(res.is_err()); + + // Missing required cpu flags + if cfg!(target_arch = "x86_64") { + let res = Module::deserialize( + &Engine::new( + Config::new() + .target(&target_lexicon::Triple::host().to_string()) + .unwrap(), + ) + .unwrap(), + &bytes, + ); + assert!(res.is_err()); + } } } + +#[test] +fn aot_compiles() -> Result<()> { + let engine = Engine::default(); + let bytes = engine.precompile_module( + "(module (func (export \"f\") (param i32) (result i32) local.get 0))".as_bytes(), + )?; + + let module = unsafe { Module::deserialize(&engine, &bytes)? }; + + let store = Store::new(&engine); + let instance = Instance::new(&store, &module, &[])?; + + let f = instance.get_typed_func::("f")?; + assert_eq!(f.call(101).unwrap(), 101); + + Ok(()) +} diff --git a/tests/all/module_linking.rs b/tests/all/module_linking.rs index b010573c6e..28fb802fae 100644 --- a/tests/all/module_linking.rs +++ b/tests/all/module_linking.rs @@ -39,7 +39,9 @@ fn compile() -> Result<()> { assert_eq!(m.imports().len(), 0); assert_eq!(m.exports().len(), 0); let bytes = m.serialize()?; - Module::deserialize(&engine, &bytes)?; + unsafe { + Module::deserialize(&engine, &bytes)?; + } assert_eq!(m.imports().len(), 0); assert_eq!(m.exports().len(), 0); Ok(()) @@ -190,7 +192,6 @@ fn imports_exports() -> Result<()> { fn limit_instances() -> Result<()> { let mut config = Config::new(); config.wasm_module_linking(true); - config.max_instances(10); let engine = Engine::new(&config)?; let module = Module::new( &engine, @@ -216,7 +217,7 @@ fn limit_instances() -> Result<()> { ) "#, )?; - let store = Store::new(&engine); + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().instances(10).build()); let err = Instance::new(&store, &module, &[]).err().unwrap(); assert!( err.to_string().contains("resource limit exceeded"), @@ -231,7 +232,6 @@ fn limit_memories() -> Result<()> { let mut config = Config::new(); config.wasm_module_linking(true); config.wasm_multi_memory(true); - config.max_memories(10); let engine = Engine::new(&config)?; let module = Module::new( &engine, @@ -252,7 +252,7 @@ fn limit_memories() -> Result<()> { ) "#, )?; - let store = Store::new(&engine); + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().memories(10).build()); let err = Instance::new(&store, &module, &[]).err().unwrap(); assert!( err.to_string().contains("resource limit exceeded"), @@ -266,7 +266,6 @@ fn limit_memories() -> Result<()> { fn limit_tables() -> Result<()> { let mut config = Config::new(); config.wasm_module_linking(true); - config.max_tables(10); let engine = Engine::new(&config)?; let module = Module::new( &engine, @@ -287,7 +286,7 @@ fn limit_tables() -> Result<()> { ) "#, )?; - let store = Store::new(&engine); + let store = Store::new_with_limits(&engine, StoreLimitsBuilder::new().tables(10).build()); let err = Instance::new(&store, &module, &[]).err().unwrap(); assert!( err.to_string().contains("resource limit exceeded"), diff --git a/tests/all/module_serialize.rs b/tests/all/module_serialize.rs index a77f62b077..7f68b48a58 100644 --- a/tests/all/module_serialize.rs +++ b/tests/all/module_serialize.rs @@ -6,11 +6,27 @@ fn serialize(engine: &Engine, wat: &'static str) -> Result> { Ok(module.serialize()?) } -fn deserialize_and_instantiate(store: &Store, buffer: &[u8]) -> Result { +unsafe fn deserialize_and_instantiate(store: &Store, buffer: &[u8]) -> Result { let module = Module::deserialize(store.engine(), buffer)?; Ok(Instance::new(&store, &module, &[])?) } +#[test] +fn test_version_mismatch() -> Result<()> { + let engine = Engine::default(); + let mut buffer = serialize(&engine, "(module)")?; + buffer[13 /* header length */ + 1 /* version length */] = 'x' as u8; + + match unsafe { Module::deserialize(&engine, &buffer) } { + Ok(_) => bail!("expected deserialization to fail"), + Err(e) => assert!(e + .to_string() + .starts_with("Module was compiled with incompatible Wasmtime version")), + } + + Ok(()) +} + #[test] fn test_module_serialize_simple() -> Result<()> { let buffer = serialize( @@ -19,7 +35,7 @@ fn test_module_serialize_simple() -> Result<()> { )?; let store = Store::default(); - let instance = deserialize_and_instantiate(&store, &buffer)?; + let instance = unsafe { deserialize_and_instantiate(&store, &buffer)? }; let run = instance.get_typed_func::<(), i32>("run")?; let result = run.call(())?; @@ -37,7 +53,7 @@ fn test_module_serialize_fail() -> Result<()> { let mut config = Config::new(); config.cranelift_opt_level(OptLevel::None); let store = Store::new(&Engine::new(&config)?); - match deserialize_and_instantiate(&store, &buffer) { + match unsafe { deserialize_and_instantiate(&store, &buffer) } { Ok(_) => bail!("expected failure at deserialization"), Err(_) => (), } diff --git a/tests/all/traps.rs b/tests/all/traps.rs index b83ac38daa..af702c247b 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -27,8 +27,7 @@ fn test_trap_return() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn test_trap_trace() -> Result<()> { let store = Store::default(); let wat = r#" @@ -66,8 +65,7 @@ fn test_trap_trace() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn test_trap_trace_cb() -> Result<()> { let store = Store::default(); let wat = r#" @@ -99,8 +97,7 @@ fn test_trap_trace_cb() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn test_trap_stack_overflow() -> Result<()> { let store = Store::default(); let wat = r#" @@ -128,8 +125,7 @@ fn test_trap_stack_overflow() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn trap_display_pretty() -> Result<()> { let store = Store::default(); let wat = r#" @@ -161,8 +157,7 @@ wasm backtrace: } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn trap_display_multi_module() -> Result<()> { let store = Store::default(); let wat = r#" @@ -207,8 +202,6 @@ wasm backtrace: } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) fn trap_start_function_import() -> Result<()> { let store = Store::default(); let binary = wat::parse_str( @@ -235,8 +228,6 @@ fn trap_start_function_import() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) fn rust_panic_import() -> Result<()> { let store = Store::default(); let binary = wat::parse_str( @@ -278,8 +269,6 @@ fn rust_panic_import() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) fn rust_panic_start_function() -> Result<()> { let store = Store::default(); let binary = wat::parse_str( @@ -313,8 +302,6 @@ fn rust_panic_start_function() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) fn mismatched_arguments() -> Result<()> { let store = Store::default(); let binary = wat::parse_str( @@ -346,8 +333,6 @@ fn mismatched_arguments() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) fn call_signature_mismatch() -> Result<()> { let store = Store::default(); let binary = wat::parse_str( @@ -378,8 +363,7 @@ fn call_signature_mismatch() -> Result<()> { } #[test] -#[cfg_attr(all(target_os = "windows", target_arch = "aarch64"), ignore)] // FIXME(#1642) -#[cfg_attr(all(target_os = "windows", feature = "experimental_x64"), ignore)] // FIXME(#2079) +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn start_trap_pretty() -> Result<()> { let store = Store::default(); let wat = r#" @@ -413,6 +397,7 @@ wasm backtrace: } #[test] +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn present_after_module_drop() -> Result<()> { let store = Store::default(); let module = Module::new(store.engine(), r#"(func (export "foo") unreachable)"#)?; @@ -496,6 +481,7 @@ fn rustc(src: &str) -> Vec { } #[test] +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn parse_dwarf_info() -> Result<()> { let wasm = rustc( " @@ -512,7 +498,7 @@ fn parse_dwarf_info() -> Result<()> { let mut linker = Linker::new(&store); wasmtime_wasi::Wasi::new( &store, - wasi_cap_std_sync::WasiCtxBuilder::new() + wasmtime_wasi::sync::WasiCtxBuilder::new() .inherit_stdio() .build()?, ) @@ -538,6 +524,7 @@ fn parse_dwarf_info() -> Result<()> { } #[test] +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn no_hint_even_with_dwarf_info() -> Result<()> { let mut config = Config::new(); config.wasm_backtrace_details(WasmBacktraceDetails::Disable); @@ -570,6 +557,7 @@ wasm backtrace: } #[test] +#[cfg_attr(all(target_os = "macos", target_arch = "aarch64"), ignore)] // TODO #2808 system libunwind is broken on aarch64 fn hint_with_dwarf_info() -> Result<()> { // Skip this test if the env var is already configure, but in CI we're sure // to run tests without this env var configured. @@ -603,3 +591,39 @@ note: run with `WASMTIME_BACKTRACE_DETAILS=1` environment variable to display mo ); Ok(()) } + +#[test] +fn multithreaded_traps() -> Result<()> { + // Compile and run unreachable on a thread, then moves over the whole store to another thread, + // and make sure traps are still correctly caught after notifying the store of the move. + let instance = { + let store = Store::default(); + let module = Module::new( + store.engine(), + r#"(module (func (export "run") unreachable))"#, + )?; + Instance::new(&store, &module, &[])? + }; + + assert!(instance.get_typed_func::<(), ()>("run")?.call(()).is_err()); + + struct SendInstance { + inner: Instance, + } + unsafe impl Send for SendInstance {} + + let instance = SendInstance { inner: instance }; + + let handle = std::thread::spawn(move || { + let instance = instance.inner; + assert!(instance + .get_typed_func::<(), ()>("run") + .unwrap() + .call(()) + .is_err()); + }); + + handle.join().expect("couldn't join thread"); + + Ok(()) +} diff --git a/tests/host_segfault.rs b/tests/host_segfault.rs index 30f3a4f9ec..e725ff4303 100644 --- a/tests/host_segfault.rs +++ b/tests/host_segfault.rs @@ -10,26 +10,72 @@ // happened or anything like that. use std::env; +use std::future::Future; +use std::io::{self, Write}; +use std::pin::Pin; use std::process::{Command, ExitStatus}; +use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; use wasmtime::*; const VAR_NAME: &str = "__TEST_TO_RUN"; -const CONFIRM: &str = "well at least we ran up to the segfault\n"; +const CONFIRM: &str = "well at least we ran up to the crash"; fn segfault() -> ! { unsafe { - print!("{}", CONFIRM); + println!("{}", CONFIRM); + io::stdout().flush().unwrap(); *(0x4 as *mut i32) = 3; unreachable!() } } -fn overrun_the_stack() -> usize { - let mut a = [0u8; 1024]; - if a.as_mut_ptr() as usize == 1 { - return 1; - } else { - return a.as_mut_ptr() as usize + overrun_the_stack(); +fn allocate_stack_space() -> ! { + let _a = [0u8; 1024]; + + for _ in 0..100000 { + allocate_stack_space(); + } + + unreachable!() +} + +fn overrun_the_stack() -> ! { + println!("{}", CONFIRM); + io::stdout().flush().unwrap(); + allocate_stack_space(); +} + +fn run_future(future: F) -> F::Output { + let mut f = Pin::from(Box::new(future)); + let waker = dummy_waker(); + let mut cx = Context::from_waker(&waker); + loop { + match f.as_mut().poll(&mut cx) { + Poll::Ready(val) => break val, + Poll::Pending => {} + } + } +} + +fn dummy_waker() -> Waker { + return unsafe { Waker::from_raw(clone(5 as *const _)) }; + + unsafe fn clone(ptr: *const ()) -> RawWaker { + assert_eq!(ptr as usize, 5); + const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); + RawWaker::new(ptr, &VTABLE) + } + + unsafe fn wake(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn wake_by_ref(ptr: *const ()) { + assert_eq!(ptr as usize, 5); + } + + unsafe fn drop(ptr: *const ()) { + assert_eq!(ptr as usize, 5); } } @@ -45,29 +91,77 @@ fn main() { return; } - let tests: &[(&str, fn())] = &[ - ("normal segfault", || segfault()), - ("make instance then segfault", || { - let engine = Engine::default(); - let store = Store::new(&engine); - let module = Module::new(&engine, "(module)").unwrap(); - let _instance = Instance::new(&store, &module, &[]).unwrap(); - segfault(); - }), - ("make instance then overrun the stack", || { - let engine = Engine::default(); - let store = Store::new(&engine); - let module = Module::new(&engine, "(module)").unwrap(); - let _instance = Instance::new(&store, &module, &[]).unwrap(); - println!("stack overrun: {}", overrun_the_stack()); - }), - ("segfault in a host function", || { - let engine = Engine::default(); - let store = Store::new(&engine); - let module = Module::new(&engine, r#"(import "" "" (func)) (start 0)"#).unwrap(); - let segfault = Func::wrap(&store, || segfault()); - Instance::new(&store, &module, &[segfault.into()]).unwrap(); - }), + let tests: &[(&str, fn(), bool)] = &[ + ("normal segfault", || segfault(), false), + ( + "make instance then segfault", + || { + let engine = Engine::default(); + let store = Store::new(&engine); + let module = Module::new(&engine, "(module)").unwrap(); + let _instance = Instance::new(&store, &module, &[]).unwrap(); + segfault(); + }, + false, + ), + ( + "make instance then overrun the stack", + || { + let engine = Engine::default(); + let store = Store::new(&engine); + let module = Module::new(&engine, "(module)").unwrap(); + let _instance = Instance::new(&store, &module, &[]).unwrap(); + overrun_the_stack(); + }, + true, + ), + ( + "segfault in a host function", + || { + let engine = Engine::default(); + let store = Store::new(&engine); + let module = Module::new(&engine, r#"(import "" "" (func)) (start 0)"#).unwrap(); + let segfault = Func::wrap(&store, || segfault()); + Instance::new(&store, &module, &[segfault.into()]).unwrap(); + unreachable!(); + }, + false, + ), + ( + "hit async stack guard page", + || { + let mut config = Config::default(); + config.async_support(true); + let engine = Engine::new(&config).unwrap(); + let store = Store::new(&engine); + let f = Func::wrap0_async(&store, (), |_, _| { + Box::new(async { + overrun_the_stack(); + }) + }); + run_future(f.call_async(&[])).unwrap(); + unreachable!(); + }, + true, + ), + ( + "hit async stack guard page with pooling allocator", + || { + let mut config = Config::default(); + config.async_support(true); + config.allocation_strategy(InstanceAllocationStrategy::pooling()); + let engine = Engine::new(&config).unwrap(); + let store = Store::new(&engine); + let f = Func::wrap0_async(&store, (), |_, _| { + Box::new(async { + overrun_the_stack(); + }) + }); + run_future(f.call_async(&[])).unwrap(); + unreachable!(); + }, + true, + ), ]; match env::var(VAR_NAME) { Ok(s) => { @@ -79,14 +173,14 @@ fn main() { test(); } Err(_) => { - for (name, _test) in tests { - runtest(name); + for (name, _test, stack_overflow) in tests { + run_test(name, *stack_overflow); } } } } -fn runtest(name: &str) { +fn run_test(name: &str, stack_overflow: bool) { let me = env::current_exe().unwrap(); let mut cmd = Command::new(me); cmd.env(VAR_NAME, name); @@ -94,31 +188,41 @@ fn runtest(name: &str) { let stdout = String::from_utf8_lossy(&output.stdout); let stderr = String::from_utf8_lossy(&output.stderr); let mut desc = format!("got status: {}", output.status); + if !stdout.trim().is_empty() { desc.push_str("\nstdout: ----\n"); desc.push_str(" "); desc.push_str(&stdout.replace("\n", "\n ")); } + if !stderr.trim().is_empty() { desc.push_str("\nstderr: ----\n"); desc.push_str(" "); desc.push_str(&stderr.replace("\n", "\n ")); } - if is_segfault(&output.status) { - assert!( - stdout.ends_with(CONFIRM) && stderr.is_empty(), - "failed to find confirmation in test `{}`\n{}", - name, - desc - ); - } else if name.contains("overrun the stack") { - assert!( - stderr.contains("thread 'main' has overflowed its stack"), - "bad stderr: {}", - stderr - ); + + if stack_overflow { + if is_stack_overflow(&output.status, &stderr) { + assert!( + stdout.trim().ends_with(CONFIRM), + "failed to find confirmation in test `{}`\n{}", + name, + desc + ); + } else { + panic!("\n\nexpected a stack overflow on `{}`\n{}\n\n", name, desc); + } } else { - panic!("\n\nexpected a segfault on `{}`\n{}\n\n", name, desc); + if is_segfault(&output.status) { + assert!( + stdout.trim().ends_with(CONFIRM) && stderr.is_empty(), + "failed to find confirmation in test `{}`\n{}", + name, + desc + ); + } else { + panic!("\n\nexpected a segfault on `{}`\n{}\n\n", name, desc); + } } } @@ -127,11 +231,23 @@ fn is_segfault(status: &ExitStatus) -> bool { use std::os::unix::prelude::*; match status.signal() { - Some(libc::SIGSEGV) | Some(libc::SIGBUS) => true, + Some(libc::SIGSEGV) => true, _ => false, } } +#[cfg(unix)] +fn is_stack_overflow(status: &ExitStatus, stderr: &str) -> bool { + use std::os::unix::prelude::*; + + // The main thread might overflow or it might be from a fiber stack (SIGSEGV/SIGBUS) + stderr.contains("thread 'main' has overflowed its stack") + || match status.signal() { + Some(libc::SIGSEGV) | Some(libc::SIGBUS) => true, + _ => false, + } +} + #[cfg(windows)] fn is_segfault(status: &ExitStatus) -> bool { match status.code().map(|s| s as u32) { @@ -139,3 +255,11 @@ fn is_segfault(status: &ExitStatus) -> bool { _ => false, } } + +#[cfg(windows)] +fn is_stack_overflow(status: &ExitStatus, _stderr: &str) -> bool { + match status.code().map(|s| s as u32) { + Some(0xc00000fd) => true, + _ => false, + } +} diff --git a/tests/wasm/simple.wat b/tests/wasm/simple.wat index 7b618ee429..a851dfa00e 100644 --- a/tests/wasm/simple.wat +++ b/tests/wasm/simple.wat @@ -2,4 +2,6 @@ (func (export "simple") (param i32) (result i32) local.get 0 ) -) \ No newline at end of file + (func (export "get_f32") (result f32) f32.const 100) + (func (export "get_f64") (result f64) f64.const 100) +)