properly splatting bytes in emit_small_memset
This commit is contained in:
5
.github/actions/install-rust/main.js
vendored
5
.github/actions/install-rust/main.js
vendored
@@ -30,3 +30,8 @@ set_env("CARGO_INCREMENTAL", "0");
|
||||
// Turn down debuginfo from 2 to 1 to help save disk space
|
||||
set_env("CARGO_PROFILE_DEV_DEBUG", "1");
|
||||
set_env("CARGO_PROFILE_TEST_DEBUG", "1");
|
||||
|
||||
if (process.platform === 'darwin') {
|
||||
set_env("CARGO_PROFILE_DEV_SPLIT_DEBUGINFO", "unpacked");
|
||||
set_env("CARGO_PROFILE_TEST_SPLIT_DEBUGINFO", "unpacked");
|
||||
}
|
||||
|
||||
113
.github/workflows/main.yml
vendored
113
.github/workflows/main.yml
vendored
@@ -48,6 +48,7 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- run: rustup update stable && rustup default stable
|
||||
- run: |
|
||||
set -e
|
||||
curl -L https://github.com/rust-lang-nursery/mdBook/releases/download/v0.4.4/mdbook-v0.4.4-x86_64-unknown-linux-gnu.tar.gz | tar xzf -
|
||||
@@ -73,12 +74,15 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
# Note that we use nightly Rust for the doc_cfg feature (enabled via `nightlydoc` above)
|
||||
# This version is an older nightly for the new x64 backend (see below)
|
||||
- uses: ./.github/actions/install-rust
|
||||
with:
|
||||
toolchain: nightly-2020-12-26
|
||||
- run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta
|
||||
toolchain: nightly-2021-04-11
|
||||
- run: |
|
||||
cargo doc --no-deps --workspace \
|
||||
--exclude wasmtime-cli \
|
||||
--exclude test-programs \
|
||||
--exclude cranelift-codegen-meta \
|
||||
--exclude 'peepmatic*'
|
||||
- run: cargo doc --package cranelift-codegen-meta --document-private-items
|
||||
- uses: actions/upload-artifact@v1
|
||||
with:
|
||||
@@ -165,7 +169,7 @@ jobs:
|
||||
# flags to rustc.
|
||||
- uses: ./.github/actions/install-rust
|
||||
with:
|
||||
toolchain: nightly
|
||||
toolchain: nightly-2021-04-11
|
||||
- run: cargo install cargo-fuzz --vers "^0.8"
|
||||
- run: cargo fetch
|
||||
working-directory: ./fuzz
|
||||
@@ -178,16 +182,9 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- run: rustup update stable && rustup default stable
|
||||
- name: Test `peepmatic`
|
||||
run: |
|
||||
cargo test \
|
||||
--package peepmatic \
|
||||
--package peepmatic-automata \
|
||||
--package peepmatic-fuzzing \
|
||||
--package peepmatic-macro \
|
||||
--package peepmatic-runtime \
|
||||
--package peepmatic-test \
|
||||
--package peepmatic-souper
|
||||
run: cargo test --package 'peepmatic*'
|
||||
- name: Rebuild Peepmatic-based peephole optimizers
|
||||
run: |
|
||||
cargo test \
|
||||
@@ -211,6 +208,7 @@ jobs:
|
||||
name: Test
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [stable, beta, nightly, windows, macos]
|
||||
include:
|
||||
@@ -222,7 +220,7 @@ jobs:
|
||||
rust: beta
|
||||
- build: nightly
|
||||
os: ubuntu-latest
|
||||
rust: nightly
|
||||
rust: nightly-2021-04-11
|
||||
- build: macos
|
||||
os: macos-latest
|
||||
rust: stable
|
||||
@@ -270,18 +268,10 @@ jobs:
|
||||
- run: |
|
||||
cargo test \
|
||||
--features test-programs/test_programs \
|
||||
--all \
|
||||
--exclude lightbeam \
|
||||
--exclude wasmtime-lightbeam \
|
||||
--exclude wasmtime-wasi-nn \
|
||||
--exclude wasmtime-wasi-crypto \
|
||||
--exclude peepmatic \
|
||||
--exclude peepmatic-automata \
|
||||
--exclude peepmatic-fuzzing \
|
||||
--exclude peepmatic-macro \
|
||||
--exclude peepmatic-runtime \
|
||||
--exclude peepmatic-test \
|
||||
--exclude peepmatic-souper
|
||||
--workspace \
|
||||
--exclude '*lightbeam*' \
|
||||
--exclude 'wasmtime-wasi-*' \
|
||||
--exclude 'peepmatic*'
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
@@ -297,7 +287,7 @@ jobs:
|
||||
# Test debug (DWARF) related functionality on new backend.
|
||||
- run: |
|
||||
sudo apt-get update && sudo apt-get install -y gdb lldb
|
||||
cargo test --features experimental_x64 test_debug_dwarf -- --ignored --test-threads 1 --test debug::
|
||||
cargo test test_debug_dwarf -- --ignored --test-threads 1 --test debug::
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
@@ -320,13 +310,9 @@ jobs:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
# Perform all tests (debug mode) for `wasmtime` with the experimental x64
|
||||
# backend. This runs on an older nightly of Rust (because of issues with
|
||||
# unifying Cargo features on stable) on Ubuntu such that it's new enough
|
||||
# to build Wasmtime, but old enough where the -Z options being used
|
||||
# haven't been stabilized yet.
|
||||
# Perform all tests (debug mode) for `wasmtime` with the old x86 backend.
|
||||
test_x64:
|
||||
name: Test x64 new backend
|
||||
name: Test old x86 backend
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -334,7 +320,7 @@ jobs:
|
||||
submodules: true
|
||||
- uses: ./.github/actions/install-rust
|
||||
with:
|
||||
toolchain: nightly-2020-12-26
|
||||
toolchain: stable
|
||||
- uses: ./.github/actions/define-llvm-env
|
||||
|
||||
# Install wasm32 targets in order to build various tests throughout the
|
||||
@@ -342,43 +328,9 @@ jobs:
|
||||
- run: rustup target add wasm32-wasi
|
||||
- run: rustup target add wasm32-unknown-unknown
|
||||
|
||||
# Run the x64 CI script.
|
||||
- run: ./ci/run-experimental-x64-ci.sh
|
||||
# Run the old x86 backend CI (we will eventually remove this).
|
||||
- run: ./ci/run-old-x86-ci.sh
|
||||
env:
|
||||
CARGO_VERSION: "+nightly-2020-12-26"
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
# Perform tests on the new x64 backend on Windows as well.
|
||||
test_x64_win:
|
||||
name: Test x64 new backend on Windows
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- uses: ./.github/actions/install-rust
|
||||
with:
|
||||
toolchain: nightly-2020-11-29
|
||||
- uses: ./.github/actions/define-llvm-env
|
||||
|
||||
- name: Install libclang
|
||||
# Note: libclang is pre-installed on the macOS and linux images.
|
||||
if: matrix.os == 'windows-latest'
|
||||
run: |
|
||||
curl https://releases.llvm.org/9.0.0/LLVM-9.0.0-win64.exe -o llvm-installer.exe
|
||||
7z x llvm-installer.exe -oC:/llvm-binary
|
||||
echo LIBCLANG_PATH=C:/llvm-binary/bin/libclang.dll >> $GITHUB_ENV
|
||||
echo C:/llvm-binary/bin >> $GITHUB_PATH
|
||||
|
||||
# Install wasm32 targets in order to build various tests throughout the
|
||||
# repo.
|
||||
- run: rustup target add wasm32-wasi
|
||||
- run: rustup target add wasm32-unknown-unknown
|
||||
|
||||
# Run the x64 CI script.
|
||||
- run: ./ci/run-experimental-x64-ci.sh
|
||||
env:
|
||||
CARGO_VERSION: "+nightly-2020-11-29"
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
# Build and test the wasi-nn module.
|
||||
@@ -390,8 +342,6 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
- uses: ./.github/actions/install-rust
|
||||
with:
|
||||
toolchain: nightly
|
||||
- run: rustup target add wasm32-wasi
|
||||
- uses: ./.github/actions/install-openvino
|
||||
- run: ./ci/run-wasi-nn-example.sh
|
||||
@@ -433,6 +383,7 @@ jobs:
|
||||
name: Build wasmtime
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- build: x86_64-linux
|
||||
@@ -517,18 +468,10 @@ jobs:
|
||||
$CENTOS cargo test \
|
||||
--features test-programs/test_programs \
|
||||
--release \
|
||||
--all \
|
||||
--exclude lightbeam \
|
||||
--exclude wasmtime-lightbeam \
|
||||
--exclude wasmtime-wasi-nn \
|
||||
--exclude wasmtime-wasi-crypto \
|
||||
--exclude peepmatic \
|
||||
--exclude peepmatic-automata \
|
||||
--exclude peepmatic-fuzzing \
|
||||
--exclude peepmatic-macro \
|
||||
--exclude peepmatic-runtime \
|
||||
--exclude peepmatic-test \
|
||||
--exclude peepmatic-souper \
|
||||
--workspace \
|
||||
--exclude '*lightbeam*' \
|
||||
--exclude 'wasmtime-wasi-*' \
|
||||
--exclude 'peepmatic*' \
|
||||
--exclude wasmtime-fuzz
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
660
Cargo.lock
generated
660
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
54
Cargo.toml
54
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "wasmtime-cli"
|
||||
version = "0.25.0"
|
||||
version = "0.26.0"
|
||||
authors = ["The Wasmtime Project Developers"]
|
||||
description = "Command-line interface for Wasmtime"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
@@ -22,31 +22,29 @@ doc = false
|
||||
|
||||
[dependencies]
|
||||
# Enable all supported architectures by default.
|
||||
wasmtime = { path = "crates/wasmtime", version = "0.25.0", default-features = false, features = ['cache'] }
|
||||
wasmtime-cache = { path = "crates/cache", version = "0.25.0" }
|
||||
wasmtime-debug = { path = "crates/debug", version = "0.25.0" }
|
||||
wasmtime-environ = { path = "crates/environ", version = "0.25.0" }
|
||||
wasmtime-jit = { path = "crates/jit", version = "0.25.0" }
|
||||
wasmtime-obj = { path = "crates/obj", version = "0.25.0" }
|
||||
wasmtime-wast = { path = "crates/wast", version = "0.25.0" }
|
||||
wasmtime-wasi = { path = "crates/wasi", version = "0.25.0" }
|
||||
wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.25.0", optional = true }
|
||||
wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.25.0", optional = true }
|
||||
wasi-common = { path = "crates/wasi-common", version = "0.25.0" }
|
||||
wasi-cap-std-sync = { path = "crates/wasi-common/cap-std-sync", version = "0.25.0" }
|
||||
wasmtime = { path = "crates/wasmtime", version = "0.26.0", default-features = false, features = ['cache'] }
|
||||
wasmtime-cache = { path = "crates/cache", version = "0.26.0" }
|
||||
wasmtime-debug = { path = "crates/debug", version = "0.26.0" }
|
||||
wasmtime-environ = { path = "crates/environ", version = "0.26.0" }
|
||||
wasmtime-jit = { path = "crates/jit", version = "0.26.0" }
|
||||
wasmtime-obj = { path = "crates/obj", version = "0.26.0" }
|
||||
wasmtime-wast = { path = "crates/wast", version = "0.26.0" }
|
||||
wasmtime-wasi = { path = "crates/wasi", version = "0.26.0" }
|
||||
wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.26.0", optional = true }
|
||||
wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.26.0", optional = true }
|
||||
structopt = { version = "0.3.5", features = ["color", "suggestions"] }
|
||||
object = { version = "0.23.0", default-features = false, features = ["write"] }
|
||||
object = { version = "0.24.0", default-features = false, features = ["write"] }
|
||||
anyhow = "1.0.19"
|
||||
target-lexicon = { version = "0.11.0", default-features = false }
|
||||
target-lexicon = { version = "0.12.0", default-features = false }
|
||||
pretty_env_logger = "0.4.0"
|
||||
file-per-thread-logger = "0.1.1"
|
||||
wat = "1.0.36"
|
||||
wat = "1.0.37"
|
||||
libc = "0.2.60"
|
||||
log = "0.4.8"
|
||||
rayon = "1.2.1"
|
||||
humantime = "2.0.0"
|
||||
wasmparser = "0.76.0"
|
||||
cap-std = "0.13"
|
||||
wasmparser = "0.77.0"
|
||||
lazy_static = "1.4.0"
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.8.1"
|
||||
@@ -56,6 +54,7 @@ tempfile = "3.1.0"
|
||||
test-programs = { path = "crates/test-programs" }
|
||||
wasmtime-fuzzing = { path = "crates/fuzzing" }
|
||||
wasmtime-runtime = { path = "crates/runtime" }
|
||||
tokio = { version = "1.5.0", features = ["rt", "time", "macros", "rt-multi-thread"] }
|
||||
tracing-subscriber = "0.2.16"
|
||||
wast = "35.0.0"
|
||||
|
||||
@@ -66,6 +65,7 @@ anyhow = "1.0.19"
|
||||
opt-level = 0
|
||||
|
||||
[workspace]
|
||||
resolver = '2'
|
||||
members = [
|
||||
"cranelift",
|
||||
"crates/bench-api",
|
||||
@@ -79,23 +79,29 @@ members = [
|
||||
"crates/wiggle/wasmtime",
|
||||
"crates/wasi-common",
|
||||
"crates/wasi-common/cap-std-sync",
|
||||
"crates/wasi-common/tokio",
|
||||
"examples/fib-debug/wasm",
|
||||
"examples/wasi/wasm",
|
||||
"examples/tokio/wasm",
|
||||
"fuzz",
|
||||
]
|
||||
|
||||
[features]
|
||||
default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"]
|
||||
default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"]
|
||||
lightbeam = ["wasmtime/lightbeam"]
|
||||
jitdump = ["wasmtime/jitdump"]
|
||||
vtune = ["wasmtime/vtune"]
|
||||
wasi-crypto = ["wasmtime-wasi-crypto"]
|
||||
wasi-nn = ["wasmtime-wasi-nn"]
|
||||
uffd = ["wasmtime/uffd"]
|
||||
all-arch = ["wasmtime/all-arch"]
|
||||
|
||||
# Try the experimental, work-in-progress new x86_64 backend. This is not stable
|
||||
# as of June 2020.
|
||||
experimental_x64 = ["wasmtime-jit/experimental_x64"]
|
||||
# Stub feature that does nothing, for Cargo-features compatibility: the new
|
||||
# backend is the default now.
|
||||
experimental_x64 = []
|
||||
|
||||
# Use the old x86 backend.
|
||||
old-x86-backend = ["wasmtime/old-x86-backend"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "actively-developed" }
|
||||
@@ -104,5 +110,9 @@ maintenance = { status = "actively-developed" }
|
||||
name = "host_segfault"
|
||||
harness = false
|
||||
|
||||
[[example]]
|
||||
name = "tokio"
|
||||
required-features = ["wasmtime-wasi/tokio"]
|
||||
|
||||
[profile.dev.package.backtrace]
|
||||
debug = false # FIXME(#1813)
|
||||
|
||||
133
RELEASES.md
133
RELEASES.md
@@ -2,6 +2,137 @@
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
|
||||
* Added `Store::with_limits`, `StoreLimits`, and `ResourceLimiter` to the
|
||||
Wasmtime API to help with enforcing resource limits at runtime. The
|
||||
`ResourceLimiter` trait can be implemented by custom resource limiters to
|
||||
decide if linear memories or tables can be grown.
|
||||
|
||||
### Changed
|
||||
|
||||
* Breaking: `Memory::new` has been changed to return `Result` as creating a
|
||||
host memory object is now a fallible operation when the initial size of
|
||||
the memory exceeds the store limits.
|
||||
|
||||
## 0.26.0
|
||||
|
||||
Released 2021-04-05.
|
||||
|
||||
### Added
|
||||
|
||||
* Added the `wasmtime compile` command to support AOT compilation of Wasm
|
||||
modules. This adds the `Engine::precompile_module` method. Also added the
|
||||
`Config::target` method to change the compilation target of the
|
||||
configuration. This can be used in conjunction with
|
||||
`Engine::precompile_module` to target a different host triple than the
|
||||
current one.
|
||||
[#2791](https://github.com/bytecodealliance/wasmtime/pull/2791)
|
||||
|
||||
* Support for macOS on aarch64 (Apple M1 Silicon), including Apple-specific
|
||||
calling convention details and unwinding/exception handling using Mach ports.
|
||||
[#2742](https://github.com/bytecodealliance/wasmtime/pull/2742),
|
||||
[#2723](https://github.com/bytecodealliance/wasmtime/pull/2723)
|
||||
|
||||
* A number of SIMD instruction implementations in the new x86-64 backend.
|
||||
[#2771](https://github.com/bytecodealliance/wasmtime/pull/2771)
|
||||
|
||||
* Added the `Config::cranelift_flag_enable` method to enable setting Cranelift
|
||||
boolean flags or presets in a config.
|
||||
|
||||
* Added CLI option `--cranelift-enable` to enable boolean settings and ISA presets.
|
||||
|
||||
* Deduplicate function signatures in Wasm modules.
|
||||
[#2772](https://github.com/bytecodealliance/wasmtime/pull/2772)
|
||||
|
||||
* Optimize overheads of calling into Wasm functions.
|
||||
[#2757](https://github.com/bytecodealliance/wasmtime/pull/2757),
|
||||
[#2759](https://github.com/bytecodealliance/wasmtime/pull/2759)
|
||||
|
||||
* Improvements related to Module Linking: compile fewer trampolines;
|
||||
|
||||
[#2774](https://github.com/bytecodealliance/wasmtime/pull/2774)
|
||||
|
||||
* Re-export sibling crates from `wasmtime-wasi` to make embedding easier
|
||||
without needing to match crate versions.
|
||||
[#2776](https://github.com/bytecodealliance/wasmtime/pull/2776)
|
||||
|
||||
### Changed
|
||||
|
||||
* Switched the default compiler backend on x86-64 to Cranelift's new backend.
|
||||
This should not have any user-visible effects other than possibly runtime
|
||||
performance improvements. The old backend is still available with the
|
||||
`old-x86-backend` feature flag to the `cranelift-codegen` or `wasmtime`
|
||||
crates, or programmatically with `BackendVariant::Legacy`. We plan to
|
||||
maintain the old backend for at least one more release and ensure it works on
|
||||
CI.
|
||||
[#2718](https://github.com/bytecodealliance/wasmtime/pull/2718)
|
||||
|
||||
* Breaking: `Module::deserialize` has been removed in favor of `Module::new`.
|
||||
|
||||
* Breaking: `Config::cranelift_clear_cpu_flags` was removed. Use `Config::target`
|
||||
to clear the CPU flags for the host's target.
|
||||
|
||||
* Breaking: `Config::cranelift_other_flag` was renamed to `Config::cranelift_flag_set`.
|
||||
|
||||
* CLI changes:
|
||||
* Wasmtime CLI options to enable WebAssembly features have been replaced with
|
||||
a singular `--wasm-features` option. The previous options are still
|
||||
supported, but are not displayed in help text.
|
||||
* Breaking: the CLI option `--cranelift-flags` was changed to
|
||||
`--cranelift-set`.
|
||||
* Breaking: the CLI option `--enable-reference-types=false` has been changed
|
||||
to `--wasm-features=-reference-types`.
|
||||
* Breaking: the CLI option `--enable-multi-value=false` has been changed to
|
||||
`--wasm-features=-multi-value`.
|
||||
* Breaking: the CLI option `--enable-bulk-memory=false` has been changed to
|
||||
`--wasm-features=-bulk-memory`.
|
||||
|
||||
* Improved error-reporting in wiggle.
|
||||
[#2760](https://github.com/bytecodealliance/wasmtime/pull/2760)
|
||||
|
||||
* Make WASI sleeping fallible (some systems do not support sleep).
|
||||
[#2756](https://github.com/bytecodealliance/wasmtime/pull/2756)
|
||||
|
||||
* WASI: Support `poll_oneoff` with a sleep.
|
||||
[#2753](https://github.com/bytecodealliance/wasmtime/pull/2753)
|
||||
|
||||
* Allow a `StackMapSink` to be passed when defining functions with
|
||||
`cranelift-module`.
|
||||
[#2739](https://github.com/bytecodealliance/wasmtime/pull/2739)
|
||||
|
||||
* Some refactoring in new x86-64 backend to prepare for VEX/EVEX (e.g.,
|
||||
AVX-512) instruction encodings to be supported.
|
||||
[#2799](https://github.com/bytecodealliance/wasmtime/pull/2799)
|
||||
|
||||
### Fixed
|
||||
|
||||
* Fixed a corner case in `srem` (signed remainder) in the new x86-64 backend:
|
||||
`INT_MIN % -1` should return `0`, rather than trapping. This only occurred
|
||||
when `avoid_div_traps == false` was set by the embedding.
|
||||
[#2763](https://github.com/bytecodealliance/wasmtime/pull/2763)
|
||||
|
||||
* Fixed a memory leak of the `Store` when an instance traps.
|
||||
[#2803](https://github.com/bytecodealliance/wasmtime/pull/2803)
|
||||
|
||||
* Some fuzzing-related fixes.
|
||||
[#2788](https://github.com/bytecodealliance/wasmtime/pull/2788),
|
||||
[#2770](https://github.com/bytecodealliance/wasmtime/pull/2770)
|
||||
|
||||
* Fixed memory-initialization bug in uffd allocator that could copy into the
|
||||
wrong destination under certain conditions. Does not affect the default
|
||||
wasmtime instance allocator.
|
||||
[#2801](https://github.com/bytecodealliance/wasmtime/pull/2801)
|
||||
|
||||
* Fix printing of float values from the Wasmtime CLI.
|
||||
[#2797](https://github.com/bytecodealliance/wasmtime/pull/2797)
|
||||
|
||||
* Remove the ability for the `Linker` to instantiate modules with duplicate
|
||||
import strings of different types.
|
||||
[#2789](https://github.com/bytecodealliance/wasmtime/pull/2789)
|
||||
|
||||
## 0.25.0
|
||||
|
||||
Released 2021-03-16.
|
||||
@@ -39,7 +170,7 @@ Released 2021-03-16.
|
||||
|
||||
### Fixed
|
||||
|
||||
* Interepretation of timestamps in `poll_oneoff` for WASI have been fixed to
|
||||
* Interpretation of timestamps in `poll_oneoff` for WASI have been fixed to
|
||||
correctly use nanoseconds instead of microseconds.
|
||||
[#2717](https://github.com/bytecodealliance/wasmtime/pull/2717)
|
||||
|
||||
|
||||
45
build.rs
45
build.rs
@@ -155,11 +155,8 @@ fn write_testsuite_tests(
|
||||
let testname = extract_name(path);
|
||||
|
||||
writeln!(out, "#[test]")?;
|
||||
if experimental_x64_should_panic(testsuite, &testname, strategy) {
|
||||
writeln!(
|
||||
out,
|
||||
r#"#[cfg_attr(feature = "experimental_x64", should_panic)]"#
|
||||
)?;
|
||||
if x64_should_panic(testsuite, &testname, strategy) {
|
||||
writeln!(out, r#"#[should_panic]"#)?;
|
||||
} else if ignore(testsuite, &testname, strategy) {
|
||||
writeln!(out, "#[ignore]")?;
|
||||
} else if pooling {
|
||||
@@ -186,10 +183,10 @@ fn write_testsuite_tests(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// For experimental_x64 backend features that are not supported yet, mark tests as panicking, so
|
||||
/// For x64 backend features that are not supported yet, mark tests as panicking, so
|
||||
/// they stop "passing" once the features are properly implemented.
|
||||
fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
if !cfg!(feature = "experimental_x64") || strategy != "Cranelift" {
|
||||
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
if !platform_is_x64() || strategy != "Cranelift" {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -222,12 +219,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
_ => (),
|
||||
},
|
||||
"Cranelift" => match (testsuite, testname) {
|
||||
// TODO(#1886): Ignore reference types tests if this isn't x64,
|
||||
// because Cranelift only supports reference types on x64.
|
||||
("reference_types", _) => {
|
||||
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
|
||||
}
|
||||
// No simd support yet for s390x.
|
||||
("simd", _) if platform_is_s390x() => return true,
|
||||
|
||||
("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend.
|
||||
// These are new instructions that are not really implemented in any backend.
|
||||
("simd", "simd_i8x16_arith2")
|
||||
| ("simd", "simd_conversions")
|
||||
@@ -240,22 +235,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
| ("simd", "simd_i64x2_extmul_i32x4")
|
||||
| ("simd", "simd_int_to_int_extend") => return true,
|
||||
|
||||
// These are only implemented on x64.
|
||||
("simd", "simd_i64x2_arith2") | ("simd", "simd_boolean") => {
|
||||
return !cfg!(feature = "experimental_x64")
|
||||
}
|
||||
|
||||
// These are only implemented on aarch64 and x64.
|
||||
("simd", "simd_i64x2_cmp")
|
||||
| ("simd", "simd_f32x4_pmin_pmax")
|
||||
| ("simd", "simd_f64x2_pmin_pmax")
|
||||
| ("simd", "simd_f32x4_rounding")
|
||||
| ("simd", "simd_f64x2_rounding")
|
||||
| ("simd", "simd_i32x4_dot_i16x8") => {
|
||||
return !(cfg!(feature = "experimental_x64")
|
||||
|| env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "aarch64")
|
||||
}
|
||||
|
||||
_ => {}
|
||||
},
|
||||
_ => panic!("unrecognized strategy"),
|
||||
@@ -263,3 +242,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn platform_is_x64() -> bool {
|
||||
env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "x86_64"
|
||||
}
|
||||
|
||||
fn platform_is_s390x() -> bool {
|
||||
env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x"
|
||||
}
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Use the Nightly variant of the compiler to properly unify the
|
||||
# experimental_x64 feature across all crates. Once the feature has stabilized
|
||||
# and become the default, we can remove this.
|
||||
CARGO_VERSION=${CARGO_VERSION:-"+nightly"}
|
||||
|
||||
# Some WASI tests seem to have an issue on Windows with symlinks if we run them
|
||||
# with this particular invocation. It's unclear why (nightly toolchain?) but
|
||||
# we're moving to the new backend by default soon enough, and all tests seem to
|
||||
# work with the main test setup, so let's just work around this by skipping
|
||||
# the tests for now.
|
||||
MINGW_EXTRA=""
|
||||
if [ `uname -o` == "Msys" ]; then
|
||||
MINGW_EXTRA="-- --skip wasi_cap_std_sync"
|
||||
fi
|
||||
|
||||
cargo $CARGO_VERSION \
|
||||
--locked \
|
||||
-Zfeatures=all -Zpackage-features \
|
||||
test \
|
||||
--features test-programs/test_programs \
|
||||
--features experimental_x64 \
|
||||
--all \
|
||||
--exclude wasmtime-lightbeam \
|
||||
--exclude wasmtime-wasi-nn \
|
||||
--exclude wasmtime-wasi-crypto \
|
||||
--exclude peepmatic \
|
||||
--exclude peepmatic-automata \
|
||||
--exclude peepmatic-fuzzing \
|
||||
--exclude peepmatic-macro \
|
||||
--exclude peepmatic-runtime \
|
||||
--exclude peepmatic-test \
|
||||
--exclude peepmatic-souper \
|
||||
--exclude lightbeam \
|
||||
$MINGW_EXTRA
|
||||
18
ci/run-old-x86-ci.sh
Executable file
18
ci/run-old-x86-ci.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
cargo test \
|
||||
--locked \
|
||||
--features test-programs/test_programs \
|
||||
--features old-x86-backend \
|
||||
--all \
|
||||
--exclude wasmtime-lightbeam \
|
||||
--exclude wasmtime-wasi-nn \
|
||||
--exclude wasmtime-wasi-crypto \
|
||||
--exclude peepmatic \
|
||||
--exclude peepmatic-automata \
|
||||
--exclude peepmatic-fuzzing \
|
||||
--exclude peepmatic-macro \
|
||||
--exclude peepmatic-runtime \
|
||||
--exclude peepmatic-test \
|
||||
--exclude peepmatic-souper \
|
||||
--exclude lightbeam
|
||||
@@ -7,4 +7,4 @@ pushd "$RUST_BINDINGS"
|
||||
cargo build --release --target=wasm32-wasi
|
||||
popd
|
||||
|
||||
cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm"
|
||||
cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm" --wasi-modules=experimental-wasi-crypto
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# executed with the Wasmtime CLI.
|
||||
set -e
|
||||
WASMTIME_DIR=$(dirname "$0" | xargs dirname)
|
||||
FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/alexnet
|
||||
FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/mobilenet
|
||||
if [ -z "${1+x}" ]; then
|
||||
# If no temporary directory is specified, create one.
|
||||
TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX)
|
||||
@@ -26,9 +26,9 @@ source /opt/intel/openvino/bin/setupvars.sh
|
||||
OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo build -p wasmtime-cli --features wasi-nn
|
||||
|
||||
# Download all necessary test fixtures to the temporary directory.
|
||||
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.bin
|
||||
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.xml
|
||||
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/tensor-1x3x227x227-f32.bgr
|
||||
wget --no-clobber $FIXTURE/mobilenet.bin --output-document=$TMP_DIR/model.bin
|
||||
wget --no-clobber $FIXTURE/mobilenet.xml --output-document=$TMP_DIR/model.xml
|
||||
wget --no-clobber $FIXTURE/tensor-1x224x224x3-f32.bgr --output-document=$TMP_DIR/tensor.bgr
|
||||
|
||||
# Now build an example that uses the wasi-nn API.
|
||||
pushd $WASMTIME_DIR/crates/wasi-nn/examples/classification-example
|
||||
@@ -37,7 +37,7 @@ cp target/wasm32-wasi/release/wasi-nn-example.wasm $TMP_DIR
|
||||
popd
|
||||
|
||||
# Run the example in Wasmtime (note that the example uses `fixture` as the expected location of the model/tensor files).
|
||||
OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo run --features wasi-nn -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm
|
||||
cargo run -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm --wasi-modules=experimental-wasi-nn
|
||||
|
||||
# Clean up the temporary directory only if it was not specified (users may want to keep the directory around).
|
||||
if [[ $REMOVE_TMP_DIR -eq 1 ]]; then
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "cranelift-tools"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.66.0"
|
||||
version = "0.73.0"
|
||||
description = "Binaries for testing the Cranelift libraries"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/index.md"
|
||||
@@ -15,27 +15,27 @@ path = "src/clif-util.rs"
|
||||
|
||||
[dependencies]
|
||||
cfg-if = "1.0"
|
||||
cranelift-codegen = { path = "codegen", version = "0.72.0" }
|
||||
cranelift-entity = { path = "entity", version = "0.72.0" }
|
||||
cranelift-interpreter = { path = "interpreter", version = "0.72.0" }
|
||||
cranelift-reader = { path = "reader", version = "0.72.0" }
|
||||
cranelift-frontend = { path = "frontend", version = "0.72.0" }
|
||||
cranelift-serde = { path = "serde", version = "0.72.0", optional = true }
|
||||
cranelift-wasm = { path = "wasm", version = "0.72.0", optional = true }
|
||||
cranelift-native = { path = "native", version = "0.72.0" }
|
||||
cranelift-filetests = { path = "filetests", version = "0.66.0" }
|
||||
cranelift-module = { path = "module", version = "0.72.0" }
|
||||
cranelift-object = { path = "object", version = "0.72.0" }
|
||||
cranelift-jit = { path = "jit", version = "0.72.0" }
|
||||
cranelift-preopt = { path = "preopt", version = "0.72.0" }
|
||||
cranelift = { path = "umbrella", version = "0.72.0" }
|
||||
cranelift-codegen = { path = "codegen", version = "0.73.0" }
|
||||
cranelift-entity = { path = "entity", version = "0.73.0" }
|
||||
cranelift-interpreter = { path = "interpreter", version = "0.73.0" }
|
||||
cranelift-reader = { path = "reader", version = "0.73.0" }
|
||||
cranelift-frontend = { path = "frontend", version = "0.73.0" }
|
||||
cranelift-serde = { path = "serde", version = "0.73.0", optional = true }
|
||||
cranelift-wasm = { path = "wasm", version = "0.73.0", optional = true }
|
||||
cranelift-native = { path = "native", version = "0.73.0" }
|
||||
cranelift-filetests = { path = "filetests", version = "0.73.0" }
|
||||
cranelift-module = { path = "module", version = "0.73.0" }
|
||||
cranelift-object = { path = "object", version = "0.73.0" }
|
||||
cranelift-jit = { path = "jit", version = "0.73.0" }
|
||||
cranelift-preopt = { path = "preopt", version = "0.73.0" }
|
||||
cranelift = { path = "umbrella", version = "0.73.0" }
|
||||
filecheck = "0.5.0"
|
||||
log = "0.4.8"
|
||||
termcolor = "1.1.2"
|
||||
capstone = { version = "0.7.0", optional = true }
|
||||
wat = { version = "1.0.36", optional = true }
|
||||
target-lexicon = { version = "0.11", features = ["std"] }
|
||||
peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.72.0", optional = true }
|
||||
target-lexicon = { version = "0.12", features = ["std"] }
|
||||
peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.73.0", optional = true }
|
||||
pretty_env_logger = "0.4.0"
|
||||
rayon = { version = "1", optional = true }
|
||||
file-per-thread-logger = "0.1.2"
|
||||
@@ -50,6 +50,6 @@ default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper", "s
|
||||
disas = ["capstone"]
|
||||
enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"]
|
||||
wasm = ["wat", "cranelift-wasm"]
|
||||
experimental_x64 = ["cranelift-codegen/x64", "cranelift-filetests/experimental_x64", "cranelift-reader/experimental_x64"]
|
||||
experimental_arm32 = ["cranelift-codegen/arm32", "cranelift-filetests/experimental_arm32"]
|
||||
souper-harvest = ["cranelift-codegen/souper-harvest", "rayon"]
|
||||
all-arch = ["cranelift-codegen/all-arch"]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.72.0"
|
||||
version = "0.73.0"
|
||||
description = "A forest of B+-trees"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-bforest"
|
||||
@@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-entity = { path = "../entity", version = "0.72.0", default-features = false }
|
||||
cranelift-entity = { path = "../entity", version = "0.73.0", default-features = false }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.72.0"
|
||||
version = "0.73.0"
|
||||
description = "Low-level code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-codegen"
|
||||
@@ -13,21 +13,19 @@ build = "build.rs"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.72.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.72.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.72.0" }
|
||||
cranelift-codegen-shared = { path = "./shared", version = "0.73.0" }
|
||||
cranelift-entity = { path = "../entity", version = "0.73.0" }
|
||||
cranelift-bforest = { path = "../bforest", version = "0.73.0" }
|
||||
hashbrown = { version = "0.9.1", optional = true }
|
||||
target-lexicon = "0.11"
|
||||
target-lexicon = "0.12"
|
||||
log = { version = "0.4.6", default-features = false }
|
||||
serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
bincode = { version = "1.2.1", optional = true }
|
||||
gimli = { version = "0.23.0", default-features = false, features = ["write"], optional = true }
|
||||
gimli = { version = "0.24.0", default-features = false, features = ["write"], optional = true }
|
||||
smallvec = { version = "1.6.1" }
|
||||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.72.0" }
|
||||
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.72.0" }
|
||||
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.72.0" }
|
||||
peepmatic = { path = "../peepmatic", optional = true, version = "0.73.0" }
|
||||
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.73.0" }
|
||||
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.73.0" }
|
||||
regalloc = { version = "0.0.31" }
|
||||
souper-ir = { version = "2.1.0", optional = true }
|
||||
wast = { version = "35.0.0", optional = true }
|
||||
@@ -36,8 +34,11 @@ wast = { version = "35.0.0", optional = true }
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
# accomodated in `tests`.
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.72.0" }
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.73.0" }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
@@ -63,14 +64,22 @@ unwind = ["gimli"]
|
||||
x86 = []
|
||||
arm64 = []
|
||||
riscv = []
|
||||
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
|
||||
s390x = []
|
||||
arm32 = [] # Work-in-progress codegen backend for ARM.
|
||||
|
||||
# Stub feature that does nothing, for Cargo-features compatibility: the new
|
||||
# backend is the default now.
|
||||
experimental_x64 = []
|
||||
|
||||
# Make the old x86 backend the default.
|
||||
old-x86-backend = []
|
||||
|
||||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
"arm64",
|
||||
"riscv"
|
||||
"riscv",
|
||||
"s390x"
|
||||
]
|
||||
|
||||
# For dependent crates that want to serialize some parts of cranelift
|
||||
@@ -97,3 +106,7 @@ souper-harvest = ["souper-ir", "souper-ir/stringify"]
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[[bench]]
|
||||
name = "x64-evex-encoding"
|
||||
harness = false
|
||||
|
||||
138
cranelift/codegen/benches/x64-evex-encoding.rs
Normal file
138
cranelift/codegen/benches/x64-evex-encoding.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
//! Measure instruction encoding latency using various approaches; the
|
||||
//! benchmarking is feature-gated on `x86` since it only measures the encoding
|
||||
//! mechanism of that backend.
|
||||
|
||||
#[cfg(feature = "x86")]
|
||||
mod x86 {
|
||||
use cranelift_codegen::isa::x64::encoding::{
|
||||
evex::{EvexContext, EvexInstruction, EvexMasking, EvexVectorLength, Register},
|
||||
rex::OpcodeMap,
|
||||
rex::{encode_modrm, LegacyPrefixes},
|
||||
ByteSink,
|
||||
};
|
||||
use cranelift_codegen_shared::isa::x86::EncodingBits;
|
||||
use criterion::{criterion_group, Criterion};
|
||||
|
||||
// Define the benchmarks.
|
||||
fn x64_evex_encoding_benchmarks(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("x64 EVEX encoding");
|
||||
let rax = Register::from(0);
|
||||
let rdx = Register::from(2);
|
||||
|
||||
group.bench_function("EvexInstruction (builder pattern)", |b| {
|
||||
let mut sink = vec![];
|
||||
b.iter(|| {
|
||||
sink.clear();
|
||||
EvexInstruction::new()
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(true)
|
||||
.opcode(0x1F)
|
||||
.reg(rax)
|
||||
.rm(rdx)
|
||||
.length(EvexVectorLength::V128)
|
||||
.encode(&mut sink);
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("encode_evex (function pattern)", |b| {
|
||||
let mut sink = vec![];
|
||||
let bits = EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1);
|
||||
let vvvvv = Register::from(0);
|
||||
b.iter(|| {
|
||||
sink.clear();
|
||||
encode_evex(
|
||||
bits,
|
||||
rax,
|
||||
vvvvv,
|
||||
rdx,
|
||||
EvexContext::Other {
|
||||
length: EvexVectorLength::V128,
|
||||
},
|
||||
EvexMasking::default(),
|
||||
&mut sink,
|
||||
);
|
||||
})
|
||||
});
|
||||
}
|
||||
criterion_group!(benches, x64_evex_encoding_benchmarks);
|
||||
|
||||
/// Using an inner module to feature-gate the benchmarks means that we must
|
||||
/// manually specify how to run the benchmarks (see `criterion_main!`).
|
||||
pub fn run_benchmarks() {
|
||||
criterion::__warn_about_html_reports_feature();
|
||||
criterion::__warn_about_cargo_bench_support_feature();
|
||||
benches();
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
|
||||
/// From the legacy x86 backend: a mechanism for encoding an EVEX
|
||||
/// instruction, including the prefixes, the instruction opcode, and the
|
||||
/// ModRM byte. This EVEX encoding function only encodes the `reg` (operand
|
||||
/// 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are possible
|
||||
/// (see section 2.6.2, Intel Software Development Manual, volume 2A),
|
||||
/// requiring refactoring of this function or separate functions for each
|
||||
/// form (e.g. as for the REX prefix).
|
||||
#[inline(always)]
|
||||
pub fn encode_evex<CS: ByteSink + ?Sized>(
|
||||
enc: EncodingBits,
|
||||
reg: Register,
|
||||
vvvvv: Register,
|
||||
rm: Register,
|
||||
context: EvexContext,
|
||||
masking: EvexMasking,
|
||||
sink: &mut CS,
|
||||
) {
|
||||
let reg: u8 = reg.into();
|
||||
let rm: u8 = rm.into();
|
||||
let vvvvv: u8 = vvvvv.into();
|
||||
|
||||
// EVEX prefix.
|
||||
sink.put1(0x62);
|
||||
|
||||
debug_assert!(enc.mm() < 0b100);
|
||||
let mut p0 = enc.mm() & 0b11;
|
||||
p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
|
||||
sink.put1(p0);
|
||||
|
||||
let mut p1 = enc.pp() | 0b100; // bit 2 is always set
|
||||
p1 |= (!(vvvvv) & 0b1111) << 3;
|
||||
p1 |= (enc.rex_w() & 0b1) << 7;
|
||||
sink.put1(p1);
|
||||
|
||||
let mut p2 = masking.aaa_bits();
|
||||
p2 |= (!(vvvvv >> 4) & 0b1) << 3;
|
||||
p2 |= context.bits() << 4;
|
||||
p2 |= masking.z_bit() << 7;
|
||||
sink.put1(p2);
|
||||
|
||||
// Opcode.
|
||||
sink.put1(enc.opcode_byte());
|
||||
|
||||
// ModR/M byte.
|
||||
sink.put1(encode_modrm(3, reg & 7, rm & 7))
|
||||
}
|
||||
|
||||
/// From the legacy x86 backend: encode the RXBR' bits of the EVEX P0 byte.
|
||||
/// For an explanation of these bits, see section 2.6.1 in the Intel
|
||||
/// Software Development Manual, volume 2A. These bits can be used by
|
||||
/// different addressing modes (see section 2.6.2), requiring different
|
||||
/// `vex*` functions than this one.
|
||||
fn evex2(rm: u8, reg: u8) -> u8 {
|
||||
let b = !(rm >> 3) & 1;
|
||||
let x = !(rm >> 4) & 1;
|
||||
let r = !(reg >> 3) & 1;
|
||||
let r_ = !(reg >> 4) & 1;
|
||||
0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "x86")]
|
||||
x86::run_benchmarks();
|
||||
|
||||
#[cfg(not(feature = "x86"))]
|
||||
println!(
|
||||
"Unable to run the x64-evex-encoding benchmark; the `x86` feature must be enabled in Cargo.",
|
||||
);
|
||||
}
|
||||
@@ -1,19 +1,20 @@
|
||||
[package]
|
||||
name = "cranelift-codegen-meta"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.72.0"
|
||||
version = "0.73.0"
|
||||
description = "Metaprogram for cranelift-codegen code generator library"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
readme = "README.md"
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = [ "--document-private-items" ]
|
||||
# FIXME(rust-lang/cargo#9300): uncomment once that lands
|
||||
# [package.metadata.docs.rs]
|
||||
# rustdoc-args = [ "--document-private-items" ]
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.72.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.72.0" }
|
||||
cranelift-codegen-shared = { path = "../shared", version = "0.73.0" }
|
||||
cranelift-entity = { path = "../../entity", version = "0.73.0" }
|
||||
|
||||
[badges]
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
@@ -20,6 +20,7 @@ pub(crate) enum SpecificSetting {
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
pub(crate) struct Setting {
|
||||
pub name: &'static str,
|
||||
pub description: &'static str,
|
||||
pub comment: &'static str,
|
||||
pub specific: SpecificSetting,
|
||||
pub byte_offset: u8,
|
||||
@@ -88,6 +89,7 @@ impl Into<PresetType> for PresetIndex {
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
pub(crate) struct Preset {
|
||||
pub name: &'static str,
|
||||
pub description: &'static str,
|
||||
values: Vec<BoolSettingIndex>,
|
||||
}
|
||||
|
||||
@@ -169,6 +171,7 @@ pub(crate) enum ProtoSpecificSetting {
|
||||
/// This is the information provided during building for a setting.
|
||||
struct ProtoSetting {
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
comment: &'static str,
|
||||
specific: ProtoSpecificSetting,
|
||||
}
|
||||
@@ -251,11 +254,13 @@ impl SettingGroupBuilder {
|
||||
fn add_setting(
|
||||
&mut self,
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
comment: &'static str,
|
||||
specific: ProtoSpecificSetting,
|
||||
) {
|
||||
self.settings.push(ProtoSetting {
|
||||
name,
|
||||
description,
|
||||
comment,
|
||||
specific,
|
||||
})
|
||||
@@ -264,6 +269,7 @@ impl SettingGroupBuilder {
|
||||
pub fn add_bool(
|
||||
&mut self,
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
comment: &'static str,
|
||||
default: bool,
|
||||
) -> BoolSettingIndex {
|
||||
@@ -271,28 +277,55 @@ impl SettingGroupBuilder {
|
||||
self.predicates.is_empty(),
|
||||
"predicates must be added after the boolean settings"
|
||||
);
|
||||
self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
|
||||
self.add_setting(
|
||||
name,
|
||||
description,
|
||||
comment,
|
||||
ProtoSpecificSetting::Bool(default),
|
||||
);
|
||||
BoolSettingIndex(self.settings.len() - 1)
|
||||
}
|
||||
|
||||
pub fn add_enum(
|
||||
&mut self,
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
comment: &'static str,
|
||||
values: Vec<&'static str>,
|
||||
) {
|
||||
self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
|
||||
self.add_setting(
|
||||
name,
|
||||
description,
|
||||
comment,
|
||||
ProtoSpecificSetting::Enum(values),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
|
||||
self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
|
||||
pub fn add_num(
|
||||
&mut self,
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
comment: &'static str,
|
||||
default: u8,
|
||||
) {
|
||||
self.add_setting(
|
||||
name,
|
||||
description,
|
||||
comment,
|
||||
ProtoSpecificSetting::Num(default),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
|
||||
self.predicates.push(ProtoPredicate { name, node });
|
||||
}
|
||||
|
||||
pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
|
||||
pub fn add_preset(
|
||||
&mut self,
|
||||
name: &'static str,
|
||||
description: &'static str,
|
||||
args: Vec<PresetType>,
|
||||
) -> PresetIndex {
|
||||
let mut values = Vec::new();
|
||||
for arg in args {
|
||||
match arg {
|
||||
@@ -302,7 +335,11 @@ impl SettingGroupBuilder {
|
||||
PresetType::BoolSetting(index) => values.push(index),
|
||||
}
|
||||
}
|
||||
self.presets.push(Preset { name, values });
|
||||
self.presets.push(Preset {
|
||||
name,
|
||||
description,
|
||||
values,
|
||||
});
|
||||
PresetIndex(self.presets.len() - 1)
|
||||
}
|
||||
|
||||
@@ -347,6 +384,7 @@ impl SettingGroupBuilder {
|
||||
|
||||
group.settings.push(Setting {
|
||||
name: s.name,
|
||||
description: s.description,
|
||||
comment: s.comment,
|
||||
byte_offset,
|
||||
specific,
|
||||
@@ -367,6 +405,7 @@ impl SettingGroupBuilder {
|
||||
};
|
||||
group.settings.push(Setting {
|
||||
name: s.name,
|
||||
description: s.description,
|
||||
comment: s.comment,
|
||||
byte_offset: byte_offset + predicate_number / 8,
|
||||
specific: SpecificSetting::Bool(BoolSetting {
|
||||
|
||||
@@ -70,6 +70,33 @@ fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatte
|
||||
fmtln!(fmt, "}");
|
||||
}
|
||||
|
||||
/// Generates the `iter` function.
|
||||
fn gen_iterator(group: &SettingGroup, fmt: &mut Formatter) {
|
||||
fmtln!(fmt, "impl Flags {");
|
||||
fmt.indent(|fmt| {
|
||||
fmt.doc_comment("Iterates the setting values.");
|
||||
fmtln!(fmt, "pub fn iter(&self) -> impl Iterator<Item = Value> {");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "let mut bytes = [0; {}];", group.settings_size);
|
||||
fmtln!(fmt, "bytes.copy_from_slice(&self.bytes[0..{}]);", group.settings_size);
|
||||
fmtln!(fmt, "DESCRIPTORS.iter().filter_map(move |d| {");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "let values = match &d.detail {");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "detail::Detail::Preset => return None,");
|
||||
fmtln!(fmt, "detail::Detail::Enum { last, enumerators } => Some(TEMPLATE.enums(*last, *enumerators)),");
|
||||
fmtln!(fmt, "_ => None");
|
||||
});
|
||||
fmtln!(fmt, "};");
|
||||
fmtln!(fmt, "Some(Value{ name: d.name, detail: d.detail, values, value: bytes[d.offset as usize] })");
|
||||
});
|
||||
fmtln!(fmt, "})");
|
||||
});
|
||||
fmtln!(fmt, "}");
|
||||
});
|
||||
fmtln!(fmt, "}");
|
||||
}
|
||||
|
||||
/// Emit Display and FromStr implementations for enum settings.
|
||||
fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
|
||||
fmtln!(fmt, "impl fmt::Display for {} {{", name);
|
||||
@@ -136,7 +163,7 @@ fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {
|
||||
|
||||
/// Emit a getter function for `setting`.
|
||||
fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
|
||||
fmt.doc_comment(setting.comment);
|
||||
fmt.doc_comment(format!("{}\n{}", setting.description, setting.comment));
|
||||
match setting.specific {
|
||||
SpecificSetting::Bool(BoolSetting {
|
||||
predicate_number, ..
|
||||
@@ -254,6 +281,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
|
||||
fmtln!(fmt, "detail::Descriptor {");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "name: \"{}\",", setting.name);
|
||||
fmtln!(fmt, "description: \"{}\",", setting.description);
|
||||
fmtln!(fmt, "offset: {},", setting.byte_offset);
|
||||
match setting.specific {
|
||||
SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
|
||||
@@ -286,6 +314,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
|
||||
fmtln!(fmt, "detail::Descriptor {");
|
||||
fmt.indent(|fmt| {
|
||||
fmtln!(fmt, "name: \"{}\",", preset.name);
|
||||
fmtln!(fmt, "description: \"{}\",", preset.description);
|
||||
fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
|
||||
fmtln!(fmt, "detail: detail::Detail::Preset,");
|
||||
});
|
||||
@@ -427,6 +456,7 @@ fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
|
||||
fmtln!(fmt, "}");
|
||||
|
||||
gen_constructor(group, parent, fmt);
|
||||
gen_iterator(group, fmt);
|
||||
gen_enum_types(group, fmt);
|
||||
gen_getters(group, fmt);
|
||||
gen_descriptors(group, fmt);
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
fn define_settings(_shared: &SettingGroup) -> SettingGroup {
|
||||
let mut setting = SettingGroupBuilder::new("arm64");
|
||||
let has_lse = setting.add_bool("has_lse", "Large System Extensions", false);
|
||||
let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false);
|
||||
|
||||
setting.add_predicate("use_lse", predicate!(has_lse));
|
||||
setting.build()
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::fmt;
|
||||
mod arm32;
|
||||
mod arm64;
|
||||
mod riscv;
|
||||
mod s390x;
|
||||
pub(crate) mod x86;
|
||||
|
||||
/// Represents known ISA target.
|
||||
@@ -15,6 +16,7 @@ pub enum Isa {
|
||||
X86,
|
||||
Arm32,
|
||||
Arm64,
|
||||
S390x,
|
||||
}
|
||||
|
||||
impl Isa {
|
||||
@@ -31,6 +33,7 @@ impl Isa {
|
||||
match arch {
|
||||
"riscv" => Some(Isa::Riscv),
|
||||
"aarch64" => Some(Isa::Arm64),
|
||||
"s390x" => Some(Isa::S390x),
|
||||
x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
|
||||
x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32),
|
||||
_ => None,
|
||||
@@ -39,7 +42,7 @@ impl Isa {
|
||||
|
||||
/// Returns all supported isa targets.
|
||||
pub fn all() -> &'static [Isa] {
|
||||
&[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64]
|
||||
&[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,6 +54,7 @@ impl fmt::Display for Isa {
|
||||
Isa::X86 => write!(f, "x86"),
|
||||
Isa::Arm32 => write!(f, "arm32"),
|
||||
Isa::Arm64 => write!(f, "arm64"),
|
||||
Isa::S390x => write!(f, "s390x"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,6 +66,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<T
|
||||
Isa::X86 => x86::define(shared_defs),
|
||||
Isa::Arm32 => arm32::define(shared_defs),
|
||||
Isa::Arm64 => arm64::define(shared_defs),
|
||||
Isa::S390x => s390x::define(shared_defs),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -17,33 +17,39 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
|
||||
let supports_m = setting.add_bool(
|
||||
"supports_m",
|
||||
"CPU supports the 'M' extension (mul/div)",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
let supports_a = setting.add_bool(
|
||||
"supports_a",
|
||||
"CPU supports the 'A' extension (atomics)",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
let supports_f = setting.add_bool(
|
||||
"supports_f",
|
||||
"CPU supports the 'F' extension (float)",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
let supports_d = setting.add_bool(
|
||||
"supports_d",
|
||||
"CPU supports the 'D' extension (double)",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
let enable_m = setting.add_bool(
|
||||
"enable_m",
|
||||
"Enable the use of 'M' instructions if available",
|
||||
"",
|
||||
true,
|
||||
);
|
||||
|
||||
setting.add_bool(
|
||||
"enable_e",
|
||||
"Enable the 'RV32E' instruction set with only 16 registers",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
|
||||
31
cranelift/codegen/meta/src/isa/s390x/mod.rs
Normal file
31
cranelift/codegen/meta/src/isa/s390x/mod.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use crate::cdsl::cpu_modes::CpuMode;
|
||||
use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
|
||||
use crate::cdsl::isa::TargetIsa;
|
||||
use crate::cdsl::recipes::Recipes;
|
||||
use crate::cdsl::regs::IsaRegsBuilder;
|
||||
use crate::cdsl::settings::SettingGroupBuilder;
|
||||
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
|
||||
let settings = SettingGroupBuilder::new("s390x").build();
|
||||
let regs = IsaRegsBuilder::new().build();
|
||||
let recipes = Recipes::new();
|
||||
let encodings_predicates = InstructionPredicateMap::new();
|
||||
|
||||
let mut mode = CpuMode::new("s390x");
|
||||
let expand = shared_defs.transform_groups.by_name("expand");
|
||||
mode.legalize_default(expand);
|
||||
let cpu_modes = vec![mode];
|
||||
|
||||
TargetIsa::new(
|
||||
"s390x",
|
||||
inst_group,
|
||||
settings,
|
||||
regs,
|
||||
recipes,
|
||||
cpu_modes,
|
||||
encodings_predicates,
|
||||
)
|
||||
}
|
||||
@@ -4,37 +4,77 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
let mut settings = SettingGroupBuilder::new("x86");
|
||||
|
||||
// CPUID.01H:ECX
|
||||
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
|
||||
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
|
||||
let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
|
||||
let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
|
||||
let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
|
||||
let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
|
||||
let has_sse3 = settings.add_bool(
|
||||
"has_sse3",
|
||||
"Has support for SSE3.",
|
||||
"SSE3: CPUID.01H:ECX.SSE3[bit 0]",
|
||||
false,
|
||||
);
|
||||
let has_ssse3 = settings.add_bool(
|
||||
"has_ssse3",
|
||||
"Has support for SSSE3.",
|
||||
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
|
||||
false,
|
||||
);
|
||||
let has_sse41 = settings.add_bool(
|
||||
"has_sse41",
|
||||
"Has support for SSE4.1.",
|
||||
"SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]",
|
||||
false,
|
||||
);
|
||||
let has_sse42 = settings.add_bool(
|
||||
"has_sse42",
|
||||
"Has support for SSE4.2.",
|
||||
"SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]",
|
||||
false,
|
||||
);
|
||||
let has_avx = settings.add_bool(
|
||||
"has_avx",
|
||||
"Has support for AVX.",
|
||||
"AVX: CPUID.01H:ECX.AVX[bit 28]",
|
||||
false,
|
||||
);
|
||||
let has_avx2 = settings.add_bool(
|
||||
"has_avx2",
|
||||
"Has support for AVX2.",
|
||||
"AVX2: CPUID.07H:EBX.AVX2[bit 5]",
|
||||
false,
|
||||
);
|
||||
let has_avx512dq = settings.add_bool(
|
||||
"has_avx512dq",
|
||||
"Has support for AVX512DQ.",
|
||||
"AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
|
||||
false,
|
||||
);
|
||||
let has_avx512vl = settings.add_bool(
|
||||
"has_avx512vl",
|
||||
"Has support for AVX512VL.",
|
||||
"AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
|
||||
false,
|
||||
);
|
||||
let has_avx512f = settings.add_bool(
|
||||
"has_avx512f",
|
||||
"Has support for AVX512F.",
|
||||
"AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
|
||||
false,
|
||||
);
|
||||
let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
|
||||
let has_popcnt = settings.add_bool(
|
||||
"has_popcnt",
|
||||
"Has support for POPCNT.",
|
||||
"POPCNT: CPUID.01H:ECX.POPCNT[bit 23]",
|
||||
false,
|
||||
);
|
||||
|
||||
// CPUID.(EAX=07H, ECX=0H):EBX
|
||||
let has_bmi1 = settings.add_bool(
|
||||
"has_bmi1",
|
||||
"Has support for BMI1.",
|
||||
"BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
|
||||
false,
|
||||
);
|
||||
let has_bmi2 = settings.add_bool(
|
||||
"has_bmi2",
|
||||
"Has support for BMI2.",
|
||||
"BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
|
||||
false,
|
||||
);
|
||||
@@ -42,6 +82,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
// CPUID.EAX=80000001H:ECX
|
||||
let has_lzcnt = settings.add_bool(
|
||||
"has_lzcnt",
|
||||
"Has support for LZCNT.",
|
||||
"LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
|
||||
false,
|
||||
);
|
||||
@@ -85,7 +126,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
|
||||
|
||||
// Some shared boolean values are used in x86 instruction predicates, so we need to group them
|
||||
// in the same TargetIsa, for compabitibity with code generated by meta-python.
|
||||
// in the same TargetIsa, for compatibility with code generated by meta-python.
|
||||
// TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
|
||||
// back in the shared SettingGroup, and use it in x86 instruction predicates.
|
||||
|
||||
@@ -104,21 +145,40 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
|
||||
// Presets corresponding to x86 CPUs.
|
||||
|
||||
settings.add_preset("baseline", preset!());
|
||||
settings.add_preset(
|
||||
"baseline",
|
||||
"A baseline preset with no extensions enabled.",
|
||||
preset!(),
|
||||
);
|
||||
let nehalem = settings.add_preset(
|
||||
"nehalem",
|
||||
"Nehalem microarchitecture.",
|
||||
preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
|
||||
);
|
||||
let haswell = settings.add_preset(
|
||||
"haswell",
|
||||
"Haswell microarchitecture.",
|
||||
preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
|
||||
);
|
||||
let broadwell = settings.add_preset("broadwell", preset!(haswell));
|
||||
let skylake = settings.add_preset("skylake", preset!(broadwell));
|
||||
let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
|
||||
settings.add_preset("icelake", preset!(cannonlake));
|
||||
let broadwell = settings.add_preset(
|
||||
"broadwell",
|
||||
"Broadwell microarchitecture.",
|
||||
preset!(haswell),
|
||||
);
|
||||
let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell));
|
||||
let cannonlake = settings.add_preset(
|
||||
"cannonlake",
|
||||
"Canon Lake microarchitecture.",
|
||||
preset!(skylake),
|
||||
);
|
||||
settings.add_preset(
|
||||
"icelake",
|
||||
"Ice Lake microarchitecture.",
|
||||
preset!(cannonlake),
|
||||
);
|
||||
settings.add_preset(
|
||||
"znver1",
|
||||
"Zen (first generation) microarchitecture.",
|
||||
preset!(
|
||||
has_sse3
|
||||
&& has_ssse3
|
||||
|
||||
@@ -116,6 +116,9 @@ pub fn generate(
|
||||
isa::Isa::Arm64 => {
|
||||
// aarch64 doesn't have platform-specific settings.
|
||||
}
|
||||
isa::Isa::S390x => {
|
||||
// s390x doesn't have platform-specific settings.
|
||||
}
|
||||
isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3582,7 +3582,7 @@ pub(crate) fn define(
|
||||
"fmin_pseudo",
|
||||
r#"
|
||||
Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``.
|
||||
See https://github.com/WebAssembly/simd/pull/122 for background.
|
||||
See <https://github.com/WebAssembly/simd/pull/122> for background.
|
||||
|
||||
The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
|
||||
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
|
||||
@@ -3614,7 +3614,7 @@ pub(crate) fn define(
|
||||
"fmax_pseudo",
|
||||
r#"
|
||||
Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``.
|
||||
See https://github.com/WebAssembly/simd/pull/122 for background.
|
||||
See <https://github.com/WebAssembly/simd/pull/122> for background.
|
||||
|
||||
The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
|
||||
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
|
||||
@@ -4102,7 +4102,7 @@ pub(crate) fn define(
|
||||
This will double the lane width and halve the number of lanes. So the resulting
|
||||
vector has the same number of bits as `x` and `y` do (individually).
|
||||
|
||||
See https://github.com/WebAssembly/simd/pull/127 for background info.
|
||||
See <https://github.com/WebAssembly/simd/pull/127> for background info.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
@@ -4325,6 +4325,26 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"fcvt_low_from_sint",
|
||||
r#"
|
||||
Converts packed signed doubleword integers to packed double precision floating point.
|
||||
|
||||
Considering only the low half of the register, each lane in `x` is interpreted as a
|
||||
signed doubleword integer that is then converted to a double precision float. This
|
||||
instruction differs from fcvt_from_sint in that it converts half the number of lanes
|
||||
which are converted to occupy twice the number of bits. No rounding should be needed
|
||||
for the resulting float.
|
||||
|
||||
The result type will have half the number of vector lanes as the input.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let WideInt = &TypeVar::new(
|
||||
"WideInt",
|
||||
"An integer type with lanes from `i16` upwards",
|
||||
|
||||
@@ -5,8 +5,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_enum(
|
||||
"regalloc",
|
||||
r#"Register allocator to use with the MachInst backend.
|
||||
|
||||
"Register allocator to use with the MachInst backend.",
|
||||
r#"
|
||||
This selects the register allocator as an option among those offered by the `regalloc.rs`
|
||||
crate. Please report register allocation bugs to the maintainers of this crate whenever
|
||||
possible.
|
||||
@@ -38,22 +38,21 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_enum(
|
||||
"opt_level",
|
||||
"Optimization level for generated code.",
|
||||
r#"
|
||||
Optimization level:
|
||||
Supported levels:
|
||||
|
||||
- none: Minimise compile time by disabling most optimizations.
|
||||
- speed: Generate the fastest possible code
|
||||
- speed_and_size: like "speed", but also perform transformations
|
||||
aimed at reducing code size.
|
||||
- `none`: Minimise compile time by disabling most optimizations.
|
||||
- `speed`: Generate the fastest possible code
|
||||
- `speed_and_size`: like "speed", but also perform transformations aimed at reducing code size.
|
||||
"#,
|
||||
vec!["none", "speed", "speed_and_size"],
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"enable_verifier",
|
||||
"Run the Cranelift IR verifier at strategic times during compilation.",
|
||||
r#"
|
||||
Run the Cranelift IR verifier at strategic times during compilation.
|
||||
|
||||
This makes compilation slower but catches many bugs. The verifier is always enabled by
|
||||
default, which is useful during development.
|
||||
"#,
|
||||
@@ -65,15 +64,15 @@ pub(crate) fn define() -> SettingGroup {
|
||||
// `colocated` flag on external functions and global values.
|
||||
settings.add_bool(
|
||||
"is_pic",
|
||||
"Enable Position-Independent Code generation",
|
||||
"Enable Position-Independent Code generation.",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"use_colocated_libcalls",
|
||||
"Use colocated libcalls.",
|
||||
r#"
|
||||
Use colocated libcalls.
|
||||
|
||||
Generate code that assumes that libcalls can be declared "colocated",
|
||||
meaning they will be defined along with the current function, such that
|
||||
they can use more efficient addressing.
|
||||
@@ -83,10 +82,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"avoid_div_traps",
|
||||
"Generate explicit checks around native division instructions to avoid their trapping.",
|
||||
r#"
|
||||
Generate explicit checks around native division instructions to avoid
|
||||
their trapping.
|
||||
|
||||
This is primarily used by SpiderMonkey which doesn't install a signal
|
||||
handler for SIGFPE, but expects a SIGILL trap for division by zero.
|
||||
|
||||
@@ -98,9 +95,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_float",
|
||||
"Enable the use of floating-point instructions.",
|
||||
r#"
|
||||
Enable the use of floating-point instructions
|
||||
|
||||
Disabling use of floating-point instructions is not yet implemented.
|
||||
"#,
|
||||
true,
|
||||
@@ -108,9 +104,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_nan_canonicalization",
|
||||
"Enable NaN canonicalization.",
|
||||
r#"
|
||||
Enable NaN canonicalization
|
||||
|
||||
This replaces NaNs with a single canonical value, for users requiring
|
||||
entirely deterministic WebAssembly computation. This is not required
|
||||
by the WebAssembly spec, so it is not enabled by default.
|
||||
@@ -120,8 +115,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_pinned_reg",
|
||||
r#"Enable the use of the pinned register.
|
||||
|
||||
"Enable the use of the pinned register.",
|
||||
r#"
|
||||
This register is excluded from register allocation, and is completely under the control of
|
||||
the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
|
||||
with the set_pinned_reg instruction.
|
||||
@@ -131,8 +126,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"use_pinned_reg_as_heap_base",
|
||||
r#"Use the pinned register as the heap base.
|
||||
|
||||
"Use the pinned register as the heap base.",
|
||||
r#"
|
||||
Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
|
||||
legalization of the `heap_addr` instruction so it will use the pinned register as the heap
|
||||
base, instead of fetching it from a global value.
|
||||
@@ -144,19 +139,24 @@ pub(crate) fn define() -> SettingGroup {
|
||||
false,
|
||||
);
|
||||
|
||||
settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
|
||||
settings.add_bool(
|
||||
"enable_simd",
|
||||
"Enable the use of SIMD instructions.",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"enable_atomics",
|
||||
"Enable the use of atomic instructions",
|
||||
"",
|
||||
true,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"enable_safepoints",
|
||||
"Enable safepoint instruction insertions.",
|
||||
r#"
|
||||
Enable safepoint instruction insertions.
|
||||
|
||||
This will allow the emit_stack_maps() function to insert the safepoint
|
||||
instruction on top of calls and interrupt traps in order to display the
|
||||
live reference values at that point in the program.
|
||||
@@ -166,9 +166,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_enum(
|
||||
"tls_model",
|
||||
r#"
|
||||
Defines the model used to perform TLS accesses.
|
||||
"#,
|
||||
"Defines the model used to perform TLS accesses.",
|
||||
"",
|
||||
vec!["none", "elf_gd", "macho", "coff"],
|
||||
);
|
||||
|
||||
@@ -176,9 +175,9 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_enum(
|
||||
"libcall_call_conv",
|
||||
"Defines the calling convention to use for LibCalls call expansion.",
|
||||
r#"
|
||||
Defines the calling convention to use for LibCalls call expansion,
|
||||
since it may be different from the ISA default calling convention.
|
||||
This may be different from the ISA default calling convention.
|
||||
|
||||
The default value is to use the same calling convention as the ISA
|
||||
default calling convention.
|
||||
@@ -192,6 +191,7 @@ pub(crate) fn define() -> SettingGroup {
|
||||
"cold",
|
||||
"system_v",
|
||||
"windows_fastcall",
|
||||
"apple_aarch64",
|
||||
"baldrdash_system_v",
|
||||
"baldrdash_windows",
|
||||
"baldrdash_2020",
|
||||
@@ -201,9 +201,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_num(
|
||||
"baldrdash_prologue_words",
|
||||
"Number of pointer-sized words pushed by the baldrdash prologue.",
|
||||
r#"
|
||||
Number of pointer-sized words pushed by the baldrdash prologue.
|
||||
|
||||
Functions with the `baldrdash` calling convention don't generate their
|
||||
own prologue and epilogue. They depend on externally generated code
|
||||
that pushes a fixed number of words in the prologue and restores them
|
||||
@@ -218,9 +217,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_llvm_abi_extensions",
|
||||
"Enable various ABI extensions defined by LLVM's behavior.",
|
||||
r#"
|
||||
Enable various ABI extensions defined by LLVM's behavior.
|
||||
|
||||
In some cases, LLVM's implementation of an ABI (calling convention)
|
||||
goes beyond a standard and supports additional argument types or
|
||||
behavior. This option instructs Cranelift codegen to follow LLVM's
|
||||
@@ -237,12 +235,12 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"unwind_info",
|
||||
"Generate unwind information.",
|
||||
r#"
|
||||
Generate unwind info. This increases metadata size and compile time,
|
||||
but allows for the debugger to trace frames, is needed for GC tracing
|
||||
that relies on libunwind (such as in Wasmtime), and is
|
||||
unconditionally needed on certain platforms (such as Windows) that
|
||||
must always be able to unwind.
|
||||
This increases metadata size and compile time, but allows for the
|
||||
debugger to trace frames, is needed for GC tracing that relies on
|
||||
libunwind (such as in Wasmtime), and is unconditionally needed on
|
||||
certain platforms (such as Windows) that must always be able to unwind.
|
||||
"#,
|
||||
true,
|
||||
);
|
||||
@@ -252,6 +250,7 @@ pub(crate) fn define() -> SettingGroup {
|
||||
settings.add_bool(
|
||||
"emit_all_ones_funcaddrs",
|
||||
"Emit not-yet-relocated function addresses as all-ones bit patterns.",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
@@ -259,27 +258,22 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_probestack",
|
||||
r#"
|
||||
Enable the use of stack probes, for calling conventions which support this
|
||||
functionality.
|
||||
"#,
|
||||
"Enable the use of stack probes for supported calling conventions.",
|
||||
"",
|
||||
true,
|
||||
);
|
||||
|
||||
settings.add_bool(
|
||||
"probestack_func_adjusts_sp",
|
||||
r#"
|
||||
Set this to true of the stack probe function modifies the stack pointer
|
||||
itself.
|
||||
"#,
|
||||
"Enable if the stack probe adjusts the stack pointer.",
|
||||
"",
|
||||
false,
|
||||
);
|
||||
|
||||
settings.add_num(
|
||||
"probestack_size_log2",
|
||||
"The log2 of the size of the stack guard region.",
|
||||
r#"
|
||||
The log2 of the size of the stack guard region.
|
||||
|
||||
Stack frames larger than this size will have stack overflow checked
|
||||
by calling the probestack function.
|
||||
|
||||
@@ -293,6 +287,7 @@ pub(crate) fn define() -> SettingGroup {
|
||||
settings.add_bool(
|
||||
"enable_jump_tables",
|
||||
"Enable the use of jump tables in generated machine code.",
|
||||
"",
|
||||
true,
|
||||
);
|
||||
|
||||
@@ -300,9 +295,8 @@ pub(crate) fn define() -> SettingGroup {
|
||||
|
||||
settings.add_bool(
|
||||
"enable_heap_access_spectre_mitigation",
|
||||
"Enable Spectre mitigation on heap bounds checks.",
|
||||
r#"
|
||||
Enable Spectre mitigation on heap bounds checks.
|
||||
|
||||
This is a no-op for any heap that needs no bounds checks; e.g.,
|
||||
if the limit is static and the guard region is large enough that
|
||||
the index cannot reach past it.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.72.0"
|
||||
version = "0.73.0"
|
||||
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
repository = "https://github.com/bytecodealliance/wasmtime"
|
||||
|
||||
@@ -60,6 +60,8 @@ pub enum Reloc {
|
||||
Arm64Call,
|
||||
/// RISC-V call target
|
||||
RiscvCall,
|
||||
/// s390x PC-relative 4-byte offset
|
||||
S390xPCRel32Dbl,
|
||||
|
||||
/// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
|
||||
ElfX86_64TlsGd,
|
||||
@@ -75,6 +77,7 @@ impl fmt::Display for Reloc {
|
||||
match *self {
|
||||
Self::Abs4 => write!(f, "Abs4"),
|
||||
Self::Abs8 => write!(f, "Abs8"),
|
||||
Self::S390xPCRel32Dbl => write!(f, "PCRel32Dbl"),
|
||||
Self::X86PCRel4 => write!(f, "PCRel4"),
|
||||
Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"),
|
||||
Self::X86CallPCRel4 => write!(f, "CallPCRel4"),
|
||||
|
||||
@@ -267,13 +267,7 @@ impl Context {
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
|
||||
if let Some(backend) = isa.get_mach_backend() {
|
||||
use crate::isa::CallConv;
|
||||
use crate::machinst::UnwindInfoKind;
|
||||
let unwind_info_kind = match self.func.signature.call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => UnwindInfoKind::SystemV,
|
||||
CallConv::WindowsFastcall => UnwindInfoKind::Windows,
|
||||
_ => UnwindInfoKind::None,
|
||||
};
|
||||
let unwind_info_kind = isa.unwind_info_kind();
|
||||
let result = self.mach_compile_result.as_ref().unwrap();
|
||||
return backend.emit_unwind_info(result, unwind_info_kind);
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use crate::ir::{types, ConstantData, Type};
|
||||
use core::convert::TryInto;
|
||||
use core::fmt::{self, Display, Formatter};
|
||||
use core::ptr;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Represent a data value. Where [Value] is an SSA reference, [DataValue] is the type + value
|
||||
/// that would be referred to by a [Value].
|
||||
@@ -97,15 +96,38 @@ impl DataValue {
|
||||
}
|
||||
|
||||
/// Record failures to cast [DataValue].
|
||||
#[derive(Error, Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum DataValueCastFailure {
|
||||
#[error("unable to cast data value of type {0} to type {1}")]
|
||||
TryInto(Type, Type),
|
||||
#[error("unable to cast i64({0}) to a data value of type {1}")]
|
||||
FromInteger(i64, Type),
|
||||
}
|
||||
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for DataValueCastFailure {}
|
||||
|
||||
impl Display for DataValueCastFailure {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DataValueCastFailure::TryInto(from, to) => {
|
||||
write!(
|
||||
f,
|
||||
"unable to cast data value of type {} to type {}",
|
||||
from, to
|
||||
)
|
||||
}
|
||||
DataValueCastFailure::FromInteger(val, to) => {
|
||||
write!(
|
||||
f,
|
||||
"unable to cast i64({}) to a data value of type {}",
|
||||
val, to
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for creating conversion implementations for [DataValue].
|
||||
macro_rules! build_conversion_impl {
|
||||
( $rust_ty:ty, $data_value_ty:ident, $cranelift_ty:ident ) => {
|
||||
|
||||
@@ -146,7 +146,7 @@ impl StackSlot {
|
||||
/// [`VmContext`](super::GlobalValueData::VMContext) using
|
||||
/// [`FuncEnvironment::make_global`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_global).
|
||||
/// - When compiling to native code, you can use it for objects in static memory with
|
||||
/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_data_in_func).
|
||||
/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_data_in_func).
|
||||
/// - For any compilation target, it can be registered with
|
||||
/// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value).
|
||||
///
|
||||
@@ -264,9 +264,9 @@ impl JumpTable {
|
||||
///
|
||||
/// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function)
|
||||
/// for external functions
|
||||
/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_func_in_func)
|
||||
/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_func_in_func)
|
||||
/// for functions declared elsewhere in the same native
|
||||
/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html)
|
||||
/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html)
|
||||
/// - [`FuncEnvironment::make_direct_func`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)
|
||||
/// for functions declared in the same WebAssembly
|
||||
/// [`FuncEnvironment`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)
|
||||
|
||||
@@ -135,12 +135,28 @@ impl Into<AMode> for StackAMode {
|
||||
// Returns the size of stack space needed to store the
|
||||
// `int_reg` and `vec_reg`.
|
||||
fn saved_reg_stack_size(
|
||||
call_conv: isa::CallConv,
|
||||
int_reg: &[Writable<RealReg>],
|
||||
vec_reg: &[Writable<RealReg>],
|
||||
) -> (usize, usize) {
|
||||
// Round up to multiple of 2, to keep 16-byte stack alignment.
|
||||
let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
|
||||
let vec_save_bytes = vec_reg.len() * 16;
|
||||
// The Baldrdash ABIs require saving and restoring the whole 16-byte
|
||||
// SIMD & FP registers, so the necessary stack space is always a
|
||||
// multiple of the mandatory 16-byte stack alignment. However, the
|
||||
// Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64,
|
||||
// including several related ABIs such as the one used by Windows)
|
||||
// mandates saving only the bottom 8 bytes of the vector registers,
|
||||
// so in that case we round up the number of registers to ensure proper
|
||||
// stack alignment (similarly to the situation with `int_reg`).
|
||||
let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 };
|
||||
let vec_save_padding = if call_conv.extends_baldrdash() {
|
||||
0
|
||||
} else {
|
||||
vec_reg.len() & 1
|
||||
};
|
||||
let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size;
|
||||
|
||||
(int_save_bytes, vec_save_bytes)
|
||||
}
|
||||
|
||||
@@ -171,6 +187,21 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
|
||||
|
||||
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
|
||||
//
|
||||
// MacOS aarch64 is slightly different, see also
|
||||
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
|
||||
// We are diverging from the MacOS aarch64 implementation in the
|
||||
// following ways:
|
||||
// - sign- and zero- extensions of data types less than 32 bits are not
|
||||
// implemented yet.
|
||||
// - i128 arguments passing isn't implemented yet in the standard (non
|
||||
// MacOS) aarch64 ABI.
|
||||
// - we align the arguments stack space to a 16-bytes boundary, while
|
||||
// the MacOS allows aligning only on 8 bytes. In practice it means we're
|
||||
// slightly overallocating when calling, which is fine, and doesn't
|
||||
// break our other invariants that the stack is always allocated in
|
||||
// 16-bytes chunks.
|
||||
|
||||
let mut next_xreg = 0;
|
||||
let mut next_vreg = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
@@ -182,18 +213,26 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
next_stack = 16;
|
||||
}
|
||||
|
||||
// Note on return values: on the regular non-baldrdash ABI, we may return values in 8
|
||||
// registers for V128 and I64 registers independently of the number of register values
|
||||
// returned in the other class. That is, we can return values in up to 8 integer and 8
|
||||
// vector registers at once.
|
||||
// In Baldrdash, we can only use one register for return value for all the register
|
||||
// classes. That is, we can't return values in both one integer and one vector register;
|
||||
// only one return value may be in a register.
|
||||
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
|
||||
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
|
||||
|
||||
let (max_per_class_reg_vals, mut remaining_reg_vals) = match (args_or_rets, is_baldrdash) {
|
||||
(ArgsOrRets::Args, _) => (8, 16), // x0-x7 and v0-v7
|
||||
(ArgsOrRets::Rets, false) => (8, 16), // x0-x7 and v0-v7
|
||||
(ArgsOrRets::Rets, true) => (1, 1), // x0 or v0, but not both
|
||||
// Note on return values: on the regular ABI, we may return values
|
||||
// in 8 registers for V128 and I64 registers independently of the
|
||||
// number of register values returned in the other class. That is,
|
||||
// we can return values in up to 8 integer and
|
||||
// 8 vector registers at once.
|
||||
//
|
||||
// In Baldrdash and Wasmtime, we can only use one register for
|
||||
// return value for all the register classes. That is, we can't
|
||||
// return values in both one integer and one vector register; only
|
||||
// one return value may be in a register.
|
||||
ArgsOrRets::Rets => {
|
||||
if is_baldrdash || call_conv.extends_wasmtime() {
|
||||
(1, 1) // x0 or v0, but not both
|
||||
} else {
|
||||
(8, 16) // x0-x7 and v0-v7
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for i in 0..params.len() {
|
||||
@@ -264,13 +303,27 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
*next_reg += 1;
|
||||
remaining_reg_vals -= 1;
|
||||
} else {
|
||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||
// stack alignment happens separately after all args.)
|
||||
// Compute the stack slot's size.
|
||||
let size = (ty_bits(param.value_type) / 8) as u64;
|
||||
let size = std::cmp::max(size, 8);
|
||||
// Align.
|
||||
|
||||
let size = if call_conv == isa::CallConv::AppleAarch64
|
||||
|| (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
|
||||
{
|
||||
// MacOS aarch64 and Wasmtime allow stack slots with
|
||||
// sizes less than 8 bytes. They still need to be
|
||||
// properly aligned on their natural data alignment,
|
||||
// though.
|
||||
size
|
||||
} else {
|
||||
// Every arg takes a minimum slot of 8 bytes. (16-byte stack
|
||||
// alignment happens separately after all args.)
|
||||
std::cmp::max(size, 8)
|
||||
};
|
||||
|
||||
// Align the stack slot.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = align_to(next_stack, size);
|
||||
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
param.value_type,
|
||||
@@ -550,11 +603,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Inst; 16]>) {
|
||||
let mut insts = SmallVec::new();
|
||||
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
|
||||
|
||||
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
|
||||
let (int_save_bytes, vec_save_bytes) =
|
||||
saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
|
||||
let total_save_bytes = int_save_bytes + vec_save_bytes;
|
||||
let clobber_size = total_save_bytes as i32;
|
||||
|
||||
@@ -583,59 +638,170 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
// `frame_offset` tracks offset above start-of-clobbers for unwind-info
|
||||
// purposes.
|
||||
let mut clobber_offset = clobber_size as u32;
|
||||
for reg_pair in clobbered_int.chunks(2) {
|
||||
let (r1, r2) = if reg_pair.len() == 2 {
|
||||
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
|
||||
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
|
||||
} else {
|
||||
(reg_pair[0].to_reg().to_reg(), zero_reg())
|
||||
};
|
||||
let clobber_offset_change = 16;
|
||||
let iter = clobbered_int.chunks_exact(2);
|
||||
|
||||
debug_assert!(r1.get_class() == RegClass::I64);
|
||||
debug_assert!(r2.get_class() == RegClass::I64);
|
||||
if let [rd] = iter.remainder() {
|
||||
let rd = rd.to_reg().to_reg();
|
||||
|
||||
// stp r1, r2, [sp, #-16]!
|
||||
insts.push(Inst::StoreP64 {
|
||||
rt: r1,
|
||||
rt2: r2,
|
||||
mem: PairAMode::PreIndexed(
|
||||
debug_assert_eq!(rd.get_class(), RegClass::I64);
|
||||
// str rd, [sp, #-16]!
|
||||
insts.push(Inst::Store64 {
|
||||
rd,
|
||||
mem: AMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
|
||||
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
});
|
||||
|
||||
if flags.unwind_info() {
|
||||
clobber_offset -= 8;
|
||||
if r2 != zero_reg() {
|
||||
clobber_offset -= clobber_offset_change as u32;
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg: r2.to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
clobber_offset -= 8;
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg: r1.to_real_reg(),
|
||||
reg: rd.to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for reg in clobbered_vec.iter() {
|
||||
insts.push(Inst::FpuStore128 {
|
||||
rd: reg.to_reg().to_reg(),
|
||||
mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()),
|
||||
let mut iter = iter.rev();
|
||||
|
||||
while let Some([rt, rt2]) = iter.next() {
|
||||
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
|
||||
let rt = rt.to_reg().to_reg();
|
||||
let rt2 = rt2.to_reg().to_reg();
|
||||
|
||||
debug_assert!(rt.get_class() == RegClass::I64);
|
||||
debug_assert!(rt2.get_class() == RegClass::I64);
|
||||
|
||||
// stp rt, rt2, [sp, #-16]!
|
||||
insts.push(Inst::StoreP64 {
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
});
|
||||
|
||||
if flags.unwind_info() {
|
||||
clobber_offset -= 16;
|
||||
clobber_offset -= clobber_offset_change as u32;
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg: reg.to_reg(),
|
||||
reg: rt.to_real_reg(),
|
||||
},
|
||||
});
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
|
||||
reg: rt2.to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let store_vec_reg = |rd| {
|
||||
if call_conv.extends_baldrdash() {
|
||||
Inst::FpuStore128 {
|
||||
rd,
|
||||
mem: AMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
}
|
||||
} else {
|
||||
Inst::FpuStore64 {
|
||||
rd,
|
||||
mem: AMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let iter = clobbered_vec.chunks_exact(2);
|
||||
|
||||
if let [rd] = iter.remainder() {
|
||||
let rd = rd.to_reg().to_reg();
|
||||
|
||||
debug_assert_eq!(rd.get_class(), RegClass::V128);
|
||||
insts.push(store_vec_reg(rd));
|
||||
|
||||
if flags.unwind_info() {
|
||||
clobber_offset -= clobber_offset_change as u32;
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg: rd.to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let store_vec_reg_pair = |rt, rt2| {
|
||||
if call_conv.extends_baldrdash() {
|
||||
let clobber_offset_change = 32;
|
||||
|
||||
(
|
||||
Inst::FpuStoreP128 {
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
clobber_offset_change as u32,
|
||||
)
|
||||
} else {
|
||||
let clobber_offset_change = 16;
|
||||
|
||||
(
|
||||
Inst::FpuStoreP64 {
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
clobber_offset_change as u32,
|
||||
)
|
||||
}
|
||||
};
|
||||
let mut iter = iter.rev();
|
||||
|
||||
while let Some([rt, rt2]) = iter.next() {
|
||||
let rt = rt.to_reg().to_reg();
|
||||
let rt2 = rt2.to_reg().to_reg();
|
||||
|
||||
debug_assert_eq!(rt.get_class(), RegClass::V128);
|
||||
debug_assert_eq!(rt2.get_class(), RegClass::V128);
|
||||
|
||||
let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
|
||||
|
||||
insts.push(inst);
|
||||
|
||||
if flags.unwind_info() {
|
||||
clobber_offset -= clobber_offset_change;
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg: rt.to_real_reg(),
|
||||
},
|
||||
});
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset: clobber_offset + clobber_offset_change / 2,
|
||||
reg: rt2.to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -654,6 +820,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> SmallVec<[Inst; 16]> {
|
||||
let mut insts = SmallVec::new();
|
||||
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
|
||||
@@ -663,31 +830,83 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
|
||||
}
|
||||
|
||||
for reg in clobbered_vec.iter().rev() {
|
||||
insts.push(Inst::FpuLoad128 {
|
||||
rd: Writable::from_reg(reg.to_reg().to_reg()),
|
||||
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
|
||||
let load_vec_reg = |rd| {
|
||||
if call_conv.extends_baldrdash() {
|
||||
Inst::FpuLoad128 {
|
||||
rd,
|
||||
mem: AMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm9::maybe_from_i64(16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
});
|
||||
}
|
||||
|
||||
for reg_pair in clobbered_int.chunks(2).rev() {
|
||||
let (r1, r2) = if reg_pair.len() == 2 {
|
||||
(
|
||||
reg_pair[0].map(|r| r.to_reg()),
|
||||
reg_pair[1].map(|r| r.to_reg()),
|
||||
)
|
||||
} else {
|
||||
(reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
|
||||
Inst::FpuLoad64 {
|
||||
rd,
|
||||
mem: AMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm9::maybe_from_i64(16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
}
|
||||
}
|
||||
};
|
||||
let load_vec_reg_pair = |rt, rt2| {
|
||||
if call_conv.extends_baldrdash() {
|
||||
Inst::FpuLoadP128 {
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
}
|
||||
} else {
|
||||
Inst::FpuLoadP64 {
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
|
||||
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
|
||||
let mut iter = clobbered_vec.chunks_exact(2);
|
||||
|
||||
// ldp r1, r2, [sp], #16
|
||||
while let Some([rt, rt2]) = iter.next() {
|
||||
let rt = rt.map(|r| r.to_reg());
|
||||
let rt2 = rt2.map(|r| r.to_reg());
|
||||
|
||||
debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
|
||||
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
|
||||
insts.push(load_vec_reg_pair(rt, rt2));
|
||||
}
|
||||
|
||||
debug_assert!(iter.remainder().len() <= 1);
|
||||
|
||||
if let [rd] = iter.remainder() {
|
||||
let rd = rd.map(|r| r.to_reg());
|
||||
|
||||
debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
|
||||
insts.push(load_vec_reg(rd));
|
||||
}
|
||||
|
||||
let mut iter = clobbered_int.chunks_exact(2);
|
||||
|
||||
while let Some([rt, rt2]) = iter.next() {
|
||||
let rt = rt.map(|r| r.to_reg());
|
||||
let rt2 = rt2.map(|r| r.to_reg());
|
||||
|
||||
debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
|
||||
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
|
||||
// ldp rt, rt2, [sp], #16
|
||||
insts.push(Inst::LoadP64 {
|
||||
rt: r1,
|
||||
rt2: r2,
|
||||
rt,
|
||||
rt2,
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
|
||||
@@ -696,6 +915,20 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
});
|
||||
}
|
||||
|
||||
debug_assert!(iter.remainder().len() <= 1);
|
||||
|
||||
if let [rd] = iter.remainder() {
|
||||
let rd = rd.map(|r| r.to_reg());
|
||||
|
||||
debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
|
||||
// ldr rd, [sp], #16
|
||||
insts.push(Inst::ULoad64 {
|
||||
rd,
|
||||
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
|
||||
flags: MemFlags::trusted(),
|
||||
});
|
||||
}
|
||||
|
||||
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS
|
||||
// register. We may have allocated it for something else and clobbered
|
||||
// it, but the ABI expects us to leave the TLS register unchanged.
|
||||
|
||||
@@ -258,6 +258,28 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
|
||||
| machreg_to_vec(rt.to_reg())
|
||||
}
|
||||
|
||||
fn enc_ldst_vec_pair(
|
||||
opc: u32,
|
||||
amode: u32,
|
||||
is_load: bool,
|
||||
simm7: SImm7Scaled,
|
||||
rn: Reg,
|
||||
rt: Reg,
|
||||
rt2: Reg,
|
||||
) -> u32 {
|
||||
debug_assert_eq!(opc & 0b11, opc);
|
||||
debug_assert_eq!(amode & 0b11, amode);
|
||||
|
||||
0b00_10110_00_0_0000000_00000_00000_00000
|
||||
| opc << 30
|
||||
| amode << 23
|
||||
| (is_load as u32) << 22
|
||||
| simm7.bits() << 15
|
||||
| machreg_to_vec(rt2) << 10
|
||||
| machreg_to_gpr(rn) << 5
|
||||
| machreg_to_vec(rt)
|
||||
}
|
||||
|
||||
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
|
||||
(top11 << 21)
|
||||
| (machreg_to_vec(rm) << 16)
|
||||
@@ -405,6 +427,15 @@ fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: R
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||
|
||||
0b010_11110_11_11000_11011_10_00000_00000
|
||||
| bits_12_16 << 12
|
||||
| machreg_to_vec(rn) << 5
|
||||
| machreg_to_vec(rd.to_reg())
|
||||
}
|
||||
|
||||
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||
debug_assert_eq!(q & 0b1, q);
|
||||
debug_assert_eq!(u & 0b1, u);
|
||||
@@ -923,7 +954,7 @@ impl MachInstEmit for Inst {
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() && !flags.notrap() {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
// Register the offset at which the actual store instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
@@ -987,7 +1018,7 @@ impl MachInstEmit for Inst {
|
||||
} => {
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() && !flags.notrap() {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
// Register the offset at which the actual store instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
match mem {
|
||||
@@ -1034,6 +1065,120 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
}
|
||||
}
|
||||
&Inst::FpuLoadP64 {
|
||||
rt,
|
||||
rt2,
|
||||
ref mem,
|
||||
flags,
|
||||
}
|
||||
| &Inst::FpuLoadP128 {
|
||||
rt,
|
||||
rt2,
|
||||
ref mem,
|
||||
flags,
|
||||
} => {
|
||||
let srcloc = state.cur_srcloc();
|
||||
|
||||
if srcloc != SourceLoc::default() && !flags.notrap() {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
let opc = match self {
|
||||
&Inst::FpuLoadP64 { .. } => 0b01,
|
||||
&Inst::FpuLoadP128 { .. } => 0b10,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rt = rt.to_reg();
|
||||
let rt2 = rt2.to_reg();
|
||||
|
||||
match mem {
|
||||
&PairAMode::SignedOffset(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
|
||||
}
|
||||
&PairAMode::PreIndexed(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(
|
||||
opc,
|
||||
0b11,
|
||||
true,
|
||||
simm7,
|
||||
reg.to_reg(),
|
||||
rt,
|
||||
rt2,
|
||||
));
|
||||
}
|
||||
&PairAMode::PostIndexed(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(
|
||||
opc,
|
||||
0b01,
|
||||
true,
|
||||
simm7,
|
||||
reg.to_reg(),
|
||||
rt,
|
||||
rt2,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
&Inst::FpuStoreP64 {
|
||||
rt,
|
||||
rt2,
|
||||
ref mem,
|
||||
flags,
|
||||
}
|
||||
| &Inst::FpuStoreP128 {
|
||||
rt,
|
||||
rt2,
|
||||
ref mem,
|
||||
flags,
|
||||
} => {
|
||||
let srcloc = state.cur_srcloc();
|
||||
|
||||
if srcloc != SourceLoc::default() && !flags.notrap() {
|
||||
// Register the offset at which the actual store instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
let opc = match self {
|
||||
&Inst::FpuStoreP64 { .. } => 0b01,
|
||||
&Inst::FpuStoreP128 { .. } => 0b10,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
match mem {
|
||||
&PairAMode::SignedOffset(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
|
||||
}
|
||||
&PairAMode::PreIndexed(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(
|
||||
opc,
|
||||
0b11,
|
||||
false,
|
||||
simm7,
|
||||
reg.to_reg(),
|
||||
rt,
|
||||
rt2,
|
||||
));
|
||||
}
|
||||
&PairAMode::PostIndexed(reg, simm7) => {
|
||||
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
|
||||
sink.put4(enc_ldst_vec_pair(
|
||||
opc,
|
||||
0b01,
|
||||
false,
|
||||
simm7,
|
||||
reg.to_reg(),
|
||||
rt,
|
||||
rt2,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
&Inst::Mov64 { rd, rm } => {
|
||||
assert!(rd.to_reg().get_class() == rm.get_class());
|
||||
assert!(rm.get_class() == RegClass::I64);
|
||||
@@ -1492,6 +1637,7 @@ impl MachInstEmit for Inst {
|
||||
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
|
||||
(0b0, 0b00101, enc_size)
|
||||
}
|
||||
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
|
||||
}
|
||||
@@ -1918,6 +2064,13 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecRRPair { op, rd, rn } => {
|
||||
let bits_12_16 = match op {
|
||||
VecPairOp::Addp => 0b11011,
|
||||
};
|
||||
|
||||
sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
|
||||
@@ -2311,6 +2311,16 @@ fn test_aarch64_binemit() {
|
||||
"sqxtun v16.8b, v23.8h",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRPair {
|
||||
op: VecPairOp::Addp,
|
||||
rd: writable_vreg(0),
|
||||
rn: vreg(30),
|
||||
},
|
||||
"C0BBF15E",
|
||||
"addp d0, v30.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sqadd,
|
||||
@@ -3803,6 +3813,17 @@ fn test_aarch64_binemit() {
|
||||
"cnt v23.8b, v5.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Cmeq0,
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(27),
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"6C9B604E",
|
||||
"cmeq v12.8h, v27.8h, #0",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
@@ -5105,6 +5126,168 @@ fn test_aarch64_binemit() {
|
||||
"str q16, [x8, x9, LSL #4]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP64 {
|
||||
rt: writable_vreg(0),
|
||||
rt2: writable_vreg(31),
|
||||
mem: PairAMode::SignedOffset(xreg(0), SImm7Scaled::zero(F64)),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"007C406D",
|
||||
"ldp d0, d31, [x0]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP64 {
|
||||
rt: writable_vreg(19),
|
||||
rt2: writable_vreg(11),
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_xreg(25),
|
||||
SImm7Scaled::maybe_from_i64(-512, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"332FE06D",
|
||||
"ldp d19, d11, [x25, #-512]!",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP64 {
|
||||
rt: writable_vreg(7),
|
||||
rt2: writable_vreg(20),
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(64, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"E753C46C",
|
||||
"ldp d7, d20, [sp], #64",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP64 {
|
||||
rt: vreg(4),
|
||||
rt2: vreg(26),
|
||||
mem: PairAMode::SignedOffset(
|
||||
stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(504, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"E4EB1F6D",
|
||||
"stp d4, d26, [sp, #504]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP64 {
|
||||
rt: vreg(16),
|
||||
rt2: vreg(8),
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_xreg(15),
|
||||
SImm7Scaled::maybe_from_i64(48, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"F021836D",
|
||||
"stp d16, d8, [x15, #48]!",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP64 {
|
||||
rt: vreg(5),
|
||||
rt2: vreg(6),
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_xreg(28),
|
||||
SImm7Scaled::maybe_from_i64(-32, F64).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"851BBE6C",
|
||||
"stp d5, d6, [x28], #-32",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP128 {
|
||||
rt: writable_vreg(0),
|
||||
rt2: writable_vreg(17),
|
||||
mem: PairAMode::SignedOffset(xreg(3), SImm7Scaled::zero(I8X16)),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"604440AD",
|
||||
"ldp q0, q17, [x3]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP128 {
|
||||
rt: writable_vreg(29),
|
||||
rt2: writable_vreg(9),
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_xreg(16),
|
||||
SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"1D26E0AD",
|
||||
"ldp q29, q9, [x16, #-1024]!",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuLoadP128 {
|
||||
rt: writable_vreg(10),
|
||||
rt2: writable_vreg(20),
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_xreg(26),
|
||||
SImm7Scaled::maybe_from_i64(256, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"4A53C8AC",
|
||||
"ldp q10, q20, [x26], #256",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP128 {
|
||||
rt: vreg(9),
|
||||
rt2: vreg(31),
|
||||
mem: PairAMode::SignedOffset(
|
||||
stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(1008, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"E9FF1FAD",
|
||||
"stp q9, q31, [sp, #1008]",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP128 {
|
||||
rt: vreg(27),
|
||||
rt2: vreg(13),
|
||||
mem: PairAMode::PreIndexed(
|
||||
writable_stack_reg(),
|
||||
SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"FB37BAAD",
|
||||
"stp q27, q13, [sp, #-192]!",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuStoreP128 {
|
||||
rt: vreg(18),
|
||||
rt2: vreg(22),
|
||||
mem: PairAMode::PostIndexed(
|
||||
writable_xreg(13),
|
||||
SImm7Scaled::maybe_from_i64(304, I8X16).unwrap(),
|
||||
),
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
"B2D989AC",
|
||||
"stp q18, q22, [x13], #304",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::LoadFpuConst64 {
|
||||
rd: writable_vreg(16),
|
||||
|
||||
@@ -73,7 +73,7 @@ impl SImm7Scaled {
|
||||
/// Create a SImm7Scaled from a raw offset and the known scale type, if
|
||||
/// possible.
|
||||
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
|
||||
assert!(scale_ty == I64 || scale_ty == I32);
|
||||
assert!(scale_ty == I64 || scale_ty == I32 || scale_ty == F64 || scale_ty == I8X16);
|
||||
let scale = scale_ty.bytes();
|
||||
assert!(scale.is_power_of_two());
|
||||
let scale = i64::from(scale);
|
||||
|
||||
@@ -334,6 +334,8 @@ pub enum VecMisc2 {
|
||||
Frintp,
|
||||
/// Population count per byte
|
||||
Cnt,
|
||||
/// Compare bitwise equal to 0
|
||||
Cmeq0,
|
||||
}
|
||||
|
||||
/// A Vector narrowing operation with two registers.
|
||||
@@ -347,6 +349,13 @@ pub enum VecMiscNarrowOp {
|
||||
Sqxtun,
|
||||
}
|
||||
|
||||
/// A vector operation on a pair of elements with one register.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecPairOp {
|
||||
/// Add pair of elements
|
||||
Addp,
|
||||
}
|
||||
|
||||
/// An operation across the lanes of vectors.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecLanesOp {
|
||||
@@ -848,7 +857,34 @@ pub enum Inst {
|
||||
mem: AMode,
|
||||
flags: MemFlags,
|
||||
},
|
||||
|
||||
/// A load of a pair of floating-point registers, double precision (64-bit).
|
||||
FpuLoadP64 {
|
||||
rt: Writable<Reg>,
|
||||
rt2: Writable<Reg>,
|
||||
mem: PairAMode,
|
||||
flags: MemFlags,
|
||||
},
|
||||
/// A store of a pair of floating-point registers, double precision (64-bit).
|
||||
FpuStoreP64 {
|
||||
rt: Reg,
|
||||
rt2: Reg,
|
||||
mem: PairAMode,
|
||||
flags: MemFlags,
|
||||
},
|
||||
/// A load of a pair of floating-point registers, 128-bit.
|
||||
FpuLoadP128 {
|
||||
rt: Writable<Reg>,
|
||||
rt2: Writable<Reg>,
|
||||
mem: PairAMode,
|
||||
flags: MemFlags,
|
||||
},
|
||||
/// A store of a pair of floating-point registers, 128-bit.
|
||||
FpuStoreP128 {
|
||||
rt: Reg,
|
||||
rt2: Reg,
|
||||
mem: PairAMode,
|
||||
flags: MemFlags,
|
||||
},
|
||||
LoadFpuConst64 {
|
||||
rd: Writable<Reg>,
|
||||
const_data: u64,
|
||||
@@ -984,6 +1020,13 @@ pub enum Inst {
|
||||
high_half: bool,
|
||||
},
|
||||
|
||||
/// 1-operand vector instruction that operates on a pair of elements.
|
||||
VecRRPair {
|
||||
op: VecPairOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
},
|
||||
|
||||
/// A vector ALU op.
|
||||
VecRRR {
|
||||
alu_op: VecALUOp,
|
||||
@@ -1908,6 +1951,34 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_use(rd);
|
||||
memarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::FpuLoadP64 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
collector.add_def(rt);
|
||||
collector.add_def(rt2);
|
||||
pairmemarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::FpuStoreP64 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
collector.add_use(rt);
|
||||
collector.add_use(rt2);
|
||||
pairmemarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::FpuLoadP128 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
collector.add_def(rt);
|
||||
collector.add_def(rt2);
|
||||
pairmemarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::FpuStoreP128 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
collector.add_use(rt);
|
||||
collector.add_use(rt2);
|
||||
pairmemarg_regs(mem, collector);
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
@@ -1973,6 +2044,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
}
|
||||
&Inst::VecRRPair { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
alu_op, rd, rn, rm, ..
|
||||
} => {
|
||||
@@ -2590,6 +2665,46 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, rd);
|
||||
map_mem(mapper, mem);
|
||||
}
|
||||
&mut Inst::FpuLoadP64 {
|
||||
ref mut rt,
|
||||
ref mut rt2,
|
||||
ref mut mem,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, rt);
|
||||
map_def(mapper, rt2);
|
||||
map_pairmem(mapper, mem);
|
||||
}
|
||||
&mut Inst::FpuStoreP64 {
|
||||
ref mut rt,
|
||||
ref mut rt2,
|
||||
ref mut mem,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, rt);
|
||||
map_use(mapper, rt2);
|
||||
map_pairmem(mapper, mem);
|
||||
}
|
||||
&mut Inst::FpuLoadP128 {
|
||||
ref mut rt,
|
||||
ref mut rt2,
|
||||
ref mut mem,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, rt);
|
||||
map_def(mapper, rt2);
|
||||
map_pairmem(mapper, mem);
|
||||
}
|
||||
&mut Inst::FpuStoreP128 {
|
||||
ref mut rt,
|
||||
ref mut rt2,
|
||||
ref mut mem,
|
||||
..
|
||||
} => {
|
||||
map_use(mapper, rt);
|
||||
map_use(mapper, rt2);
|
||||
map_pairmem(mapper, mem);
|
||||
}
|
||||
&mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
@@ -2721,6 +2836,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
}
|
||||
&mut Inst::VecRRPair {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecRRR {
|
||||
alu_op,
|
||||
ref mut rd,
|
||||
@@ -3508,6 +3631,42 @@ impl Inst {
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
format!("{}str {}, {}", mem_str, rd, mem)
|
||||
}
|
||||
&Inst::FpuLoadP64 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
|
||||
format!("ldp {}, {}, {}", rt, rt2, mem)
|
||||
}
|
||||
&Inst::FpuStoreP64 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
|
||||
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
|
||||
format!("stp {}, {}, {}", rt, rt2, mem)
|
||||
}
|
||||
&Inst::FpuLoadP128 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
|
||||
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
|
||||
format!("ldp {}, {}, {}", rt, rt2, mem)
|
||||
}
|
||||
&Inst::FpuStoreP128 {
|
||||
rt, rt2, ref mem, ..
|
||||
} => {
|
||||
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
|
||||
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
|
||||
let mem = mem.show_rru(mb_rru);
|
||||
|
||||
format!("stp {}, {}, {}", rt, rt2, mem)
|
||||
}
|
||||
&Inst::LoadFpuConst64 { rd, const_data } => {
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
format!(
|
||||
@@ -3725,6 +3884,15 @@ impl Inst {
|
||||
};
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecRRPair { op, rd, rn } => {
|
||||
let op = match op {
|
||||
VecPairOp::Addp => "addp",
|
||||
};
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
|
||||
let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
|
||||
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
@@ -3788,43 +3956,44 @@ impl Inst {
|
||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let is_shll = op == VecMisc2::Shll;
|
||||
let suffix = match (is_shll, size) {
|
||||
(true, VectorSize::Size8x8) => ", #8",
|
||||
(true, VectorSize::Size16x4) => ", #16",
|
||||
(true, VectorSize::Size32x2) => ", #32",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
let (op, size) = match op {
|
||||
VecMisc2::Not => (
|
||||
"mvn",
|
||||
if size.is_128bits() {
|
||||
let (op, rd_size, size, suffix) = match op {
|
||||
VecMisc2::Not => {
|
||||
let size = if size.is_128bits() {
|
||||
VectorSize::Size8x16
|
||||
} else {
|
||||
VectorSize::Size8x8
|
||||
},
|
||||
),
|
||||
VecMisc2::Neg => ("neg", size),
|
||||
VecMisc2::Abs => ("abs", size),
|
||||
VecMisc2::Fabs => ("fabs", size),
|
||||
VecMisc2::Fneg => ("fneg", size),
|
||||
VecMisc2::Fsqrt => ("fsqrt", size),
|
||||
VecMisc2::Rev64 => ("rev64", size),
|
||||
VecMisc2::Shll => ("shll", size),
|
||||
VecMisc2::Fcvtzs => ("fcvtzs", size),
|
||||
VecMisc2::Fcvtzu => ("fcvtzu", size),
|
||||
VecMisc2::Scvtf => ("scvtf", size),
|
||||
VecMisc2::Ucvtf => ("ucvtf", size),
|
||||
VecMisc2::Frintn => ("frintn", size),
|
||||
VecMisc2::Frintz => ("frintz", size),
|
||||
VecMisc2::Frintm => ("frintm", size),
|
||||
VecMisc2::Frintp => ("frintp", size),
|
||||
VecMisc2::Cnt => ("cnt", size),
|
||||
};
|
||||
|
||||
let rd_size = if is_shll { size.widen() } else { size };
|
||||
|
||||
("mvn", size, size, "")
|
||||
}
|
||||
VecMisc2::Neg => ("neg", size, size, ""),
|
||||
VecMisc2::Abs => ("abs", size, size, ""),
|
||||
VecMisc2::Fabs => ("fabs", size, size, ""),
|
||||
VecMisc2::Fneg => ("fneg", size, size, ""),
|
||||
VecMisc2::Fsqrt => ("fsqrt", size, size, ""),
|
||||
VecMisc2::Rev64 => ("rev64", size, size, ""),
|
||||
VecMisc2::Shll => (
|
||||
"shll",
|
||||
size.widen(),
|
||||
size,
|
||||
match size {
|
||||
VectorSize::Size8x8 => ", #8",
|
||||
VectorSize::Size16x4 => ", #16",
|
||||
VectorSize::Size32x2 => ", #32",
|
||||
_ => panic!("Unexpected vector size: {:?}", size),
|
||||
},
|
||||
),
|
||||
VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""),
|
||||
VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""),
|
||||
VecMisc2::Scvtf => ("scvtf", size, size, ""),
|
||||
VecMisc2::Ucvtf => ("ucvtf", size, size, ""),
|
||||
VecMisc2::Frintn => ("frintn", size, size, ""),
|
||||
VecMisc2::Frintz => ("frintz", size, size, ""),
|
||||
VecMisc2::Frintm => ("frintm", size, size, ""),
|
||||
VecMisc2::Frintp => ("frintp", size, size, ""),
|
||||
VecMisc2::Cnt => ("cnt", size, size, ""),
|
||||
VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}{}", op, rd, rn, suffix)
|
||||
|
||||
@@ -56,8 +56,8 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
|
||||
fn sp(&self) -> u16 {
|
||||
regs::stack_reg().get_hw_encoding().into()
|
||||
}
|
||||
fn fp(&self) -> u16 {
|
||||
regs::fp_reg().get_hw_encoding().into()
|
||||
fn fp(&self) -> Option<u16> {
|
||||
Some(regs::fp_reg().get_hw_encoding().into())
|
||||
}
|
||||
fn lr(&self) -> Option<u16> {
|
||||
Some(regs::link_reg().get_hw_encoding().into())
|
||||
|
||||
@@ -1950,6 +1950,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap();
|
||||
|
||||
// cmeq vtmp.2d, vm.2d, #0
|
||||
// addp dtmp, vtmp.2d
|
||||
// fcmp dtmp, dtmp
|
||||
// cset xd, eq
|
||||
//
|
||||
// Note that after the ADDP the value of the temporary register will
|
||||
// be either 0 when all input elements are true, i.e. non-zero, or a
|
||||
// NaN otherwise (either -1 or -2 when represented as an integer);
|
||||
// NaNs are the only floating-point numbers that compare unequal to
|
||||
// themselves.
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Cmeq0,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
ctx.emit(Inst::VecRRPair {
|
||||
op: VecPairOp::Addp,
|
||||
rd: tmp,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
ctx.emit(Inst::FpuCmp64 {
|
||||
rn: tmp.to_reg(),
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
materialize_bool_result(ctx, insn, rd, Cond::Eq);
|
||||
}
|
||||
|
||||
Opcode::VanyTrue | Opcode::VallTrue => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
@@ -2180,6 +2214,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
}
|
||||
I64X2 => {
|
||||
// mov dst_r, src_v.d[0]
|
||||
// mov tmp_r0, src_v.d[1]
|
||||
// lsr dst_r, dst_r, #63
|
||||
// lsr tmp_r0, tmp_r0, #63
|
||||
// add dst_r, dst_r, tmp_r0, lsl #1
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd: dst_r,
|
||||
rn: src_v,
|
||||
idx: 0,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd: tmp_r0,
|
||||
rn: src_v,
|
||||
idx: 1,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
ctx.emit(Inst::AluRRImmShift {
|
||||
alu_op: ALUOp::Lsr64,
|
||||
rd: dst_r,
|
||||
rn: dst_r.to_reg(),
|
||||
immshift: ImmShift::maybe_from_u64(63).unwrap(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRImmShift {
|
||||
alu_op: ALUOp::Lsr64,
|
||||
rd: tmp_r0,
|
||||
rn: tmp_r0.to_reg(),
|
||||
immshift: ImmShift::maybe_from_u64(63).unwrap(),
|
||||
});
|
||||
ctx.emit(Inst::AluRRRShift {
|
||||
alu_op: ALUOp::Add32,
|
||||
rd: dst_r,
|
||||
rn: dst_r.to_reg(),
|
||||
rm: tmp_r0.to_reg(),
|
||||
shiftop: ShiftOpAndAmt::new(
|
||||
ShiftOp::LSL,
|
||||
ShiftOpShiftImm::maybe_from_shift(1).unwrap(),
|
||||
),
|
||||
});
|
||||
}
|
||||
_ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty),
|
||||
}
|
||||
}
|
||||
@@ -3013,6 +3088,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::TlsValue => unimplemented!("tls_value"),
|
||||
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -7,10 +7,8 @@ use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings as shared_settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::hash::{Hash, Hasher};
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse};
|
||||
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
|
||||
|
||||
@@ -104,6 +102,10 @@ impl MachBackend for AArch64Backend {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! AArch64 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
|
||||
@@ -319,6 +319,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
_flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Inst; 16]>) {
|
||||
let mut insts = SmallVec::new();
|
||||
if fixed_frame_storage_size > 0 {
|
||||
@@ -348,6 +349,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
_flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
_fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> SmallVec<[Inst; 16]> {
|
||||
let mut insts = SmallVec::new();
|
||||
let clobbered_vec = get_callee_saves(clobbers);
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter,
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::hash::{Hash, Hasher};
|
||||
use regalloc::{PrettyPrint, RealRegUniverse};
|
||||
use target_lexicon::{Architecture, ArmArchitecture, Triple};
|
||||
@@ -92,6 +92,10 @@ impl MachBackend for Arm32Backend {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<settings::Value> {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.flags.hash(&mut hasher);
|
||||
}
|
||||
|
||||
@@ -10,23 +10,35 @@ use serde::{Deserialize, Serialize};
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum CallConv {
|
||||
/// Best performance, not ABI-stable
|
||||
/// Best performance, not ABI-stable.
|
||||
Fast,
|
||||
/// Smallest caller code size, not ABI-stable
|
||||
/// Smallest caller code size, not ABI-stable.
|
||||
Cold,
|
||||
/// System V-style convention used on many platforms
|
||||
/// System V-style convention used on many platforms.
|
||||
SystemV,
|
||||
/// Windows "fastcall" convention, also used for x64 and ARM
|
||||
/// Windows "fastcall" convention, also used for x64 and ARM.
|
||||
WindowsFastcall,
|
||||
/// SpiderMonkey WebAssembly convention on systems using natively SystemV
|
||||
/// Mac aarch64 calling convention, which is a tweak aarch64 ABI.
|
||||
AppleAarch64,
|
||||
/// SpiderMonkey WebAssembly convention on systems using natively SystemV.
|
||||
BaldrdashSystemV,
|
||||
/// SpiderMonkey WebAssembly convention on Windows
|
||||
/// SpiderMonkey WebAssembly convention on Windows.
|
||||
BaldrdashWindows,
|
||||
/// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS
|
||||
/// register slots in the frame.
|
||||
Baldrdash2020,
|
||||
/// Specialized convention for the probestack function
|
||||
/// Specialized convention for the probestack function.
|
||||
Probestack,
|
||||
/// Wasmtime equivalent of SystemV, not ABI-stable.
|
||||
///
|
||||
/// Currently only differs in how multiple return values are handled,
|
||||
/// returning the first return value in a register and everything else
|
||||
/// through a return-pointer.
|
||||
WasmtimeSystemV,
|
||||
/// Wasmtime equivalent of WindowsFastcall, not ABI-stable.
|
||||
///
|
||||
/// Differs from fastcall in the same way as `WasmtimeSystemV`.
|
||||
WasmtimeFastcall,
|
||||
}
|
||||
|
||||
impl CallConv {
|
||||
@@ -36,6 +48,7 @@ impl CallConv {
|
||||
// Default to System V for unknown targets because most everything
|
||||
// uses System V.
|
||||
Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV,
|
||||
Ok(CallingConvention::AppleAarch64) => Self::AppleAarch64,
|
||||
Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall,
|
||||
Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
|
||||
}
|
||||
@@ -49,6 +62,7 @@ impl CallConv {
|
||||
LibcallCallConv::Cold => Self::Cold,
|
||||
LibcallCallConv::SystemV => Self::SystemV,
|
||||
LibcallCallConv::WindowsFastcall => Self::WindowsFastcall,
|
||||
LibcallCallConv::AppleAarch64 => Self::AppleAarch64,
|
||||
LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV,
|
||||
LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows,
|
||||
LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020,
|
||||
@@ -59,7 +73,7 @@ impl CallConv {
|
||||
/// Is the calling convention extending the Windows Fastcall ABI?
|
||||
pub fn extends_windows_fastcall(self) -> bool {
|
||||
match self {
|
||||
Self::WindowsFastcall | Self::BaldrdashWindows => true,
|
||||
Self::WindowsFastcall | Self::BaldrdashWindows | Self::WasmtimeFastcall => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -71,6 +85,14 @@ impl CallConv {
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the calling convention extending the Wasmtime ABI?
|
||||
pub fn extends_wasmtime(self) -> bool {
|
||||
match self {
|
||||
Self::WasmtimeSystemV | Self::WasmtimeFastcall => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CallConv {
|
||||
@@ -80,10 +102,13 @@ impl fmt::Display for CallConv {
|
||||
Self::Cold => "cold",
|
||||
Self::SystemV => "system_v",
|
||||
Self::WindowsFastcall => "windows_fastcall",
|
||||
Self::AppleAarch64 => "apple_aarch64",
|
||||
Self::BaldrdashSystemV => "baldrdash_system_v",
|
||||
Self::BaldrdashWindows => "baldrdash_windows",
|
||||
Self::Baldrdash2020 => "baldrdash_2020",
|
||||
Self::Probestack => "probestack",
|
||||
Self::WasmtimeSystemV => "wasmtime_system_v",
|
||||
Self::WasmtimeFastcall => "wasmtime_fastcall",
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -96,10 +121,13 @@ impl str::FromStr for CallConv {
|
||||
"cold" => Ok(Self::Cold),
|
||||
"system_v" => Ok(Self::SystemV),
|
||||
"windows_fastcall" => Ok(Self::WindowsFastcall),
|
||||
"apple_aarch64" => Ok(Self::AppleAarch64),
|
||||
"baldrdash_system_v" => Ok(Self::BaldrdashSystemV),
|
||||
"baldrdash_windows" => Ok(Self::BaldrdashWindows),
|
||||
"baldrdash_2020" => Ok(Self::Baldrdash2020),
|
||||
"probestack" => Ok(Self::Probestack),
|
||||
"wasmtime_system_v" => Ok(Self::WasmtimeSystemV),
|
||||
"wasmtime_fastcall" => Ok(Self::WasmtimeFastcall),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,35 +57,34 @@ use crate::flowgraph;
|
||||
use crate::ir;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
use crate::machinst::MachBackend;
|
||||
use crate::machinst::{MachBackend, UnwindInfoKind};
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
use crate::settings::SetResult;
|
||||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
use core::hash::Hasher;
|
||||
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
|
||||
use thiserror::Error;
|
||||
use target_lexicon::{triple, Architecture, OperatingSystem, PointerWidth, Triple};
|
||||
|
||||
#[cfg(feature = "riscv")]
|
||||
mod riscv;
|
||||
|
||||
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) can both be
|
||||
// included; if the new backend is included, then it is the default backend
|
||||
// returned for an x86-64 triple, but a specific option can request the old
|
||||
// backend. It is important to have the ability to instantiate *both* backends
|
||||
// in the same build so that we can do things like differential fuzzing between
|
||||
// backends, or perhaps offer a runtime configuration flag in the future.
|
||||
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
|
||||
// included whenever building with x86 support. The new backend is the default,
|
||||
// but the old can be requested with `BackendVariant::Legacy`. However, if this
|
||||
// crate is built with the `old-x86-backend` feature, then the old backend is
|
||||
// default instead.
|
||||
#[cfg(feature = "x86")]
|
||||
mod x86;
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
mod x64;
|
||||
// This module is made public here for benchmarking purposes. No guarantees are
|
||||
// made regarding API stability.
|
||||
#[cfg(feature = "x86")]
|
||||
pub mod x64;
|
||||
|
||||
#[cfg(feature = "arm32")]
|
||||
mod arm32;
|
||||
@@ -93,6 +92,9 @@ mod arm32;
|
||||
#[cfg(feature = "arm64")]
|
||||
pub(crate) mod aarch64;
|
||||
|
||||
#[cfg(feature = "s390x")]
|
||||
mod s390x;
|
||||
|
||||
pub mod unwind;
|
||||
|
||||
mod call_conv;
|
||||
@@ -123,7 +125,7 @@ macro_rules! isa_builder {
|
||||
/// The "variant" for a given target. On one platform (x86-64), we have two
|
||||
/// backends, the "old" and "new" one; the new one is the default if included
|
||||
/// in the build configuration and not otherwise specified.
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BackendVariant {
|
||||
/// Any backend available.
|
||||
Any,
|
||||
@@ -150,18 +152,19 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder
|
||||
isa_builder!(x86, (feature = "x86"), triple)
|
||||
}
|
||||
(Architecture::X86_64, BackendVariant::MachInst) => {
|
||||
isa_builder!(x64, (feature = "x64"), triple)
|
||||
isa_builder!(x64, (feature = "x86"), triple)
|
||||
}
|
||||
#[cfg(feature = "x64")]
|
||||
#[cfg(not(feature = "old-x86-backend"))]
|
||||
(Architecture::X86_64, BackendVariant::Any) => {
|
||||
isa_builder!(x64, (feature = "x64"), triple)
|
||||
isa_builder!(x64, (feature = "x86"), triple)
|
||||
}
|
||||
#[cfg(not(feature = "x64"))]
|
||||
#[cfg(feature = "old-x86-backend")]
|
||||
(Architecture::X86_64, BackendVariant::Any) => {
|
||||
isa_builder!(x86, (feature = "x86"), triple)
|
||||
}
|
||||
(Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
|
||||
(Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
|
||||
(Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple),
|
||||
_ => Err(LookupError::Unsupported),
|
||||
}
|
||||
}
|
||||
@@ -180,17 +183,30 @@ pub fn lookup_by_name(name: &str) -> Result<Builder, LookupError> {
|
||||
}
|
||||
|
||||
/// Describes reason for target lookup failure
|
||||
#[derive(Error, PartialEq, Eq, Copy, Clone, Debug)]
|
||||
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
|
||||
pub enum LookupError {
|
||||
/// Support for this target was disabled in the current build.
|
||||
#[error("Support for this target is disabled")]
|
||||
SupportDisabled,
|
||||
|
||||
/// Support for this target has not yet been implemented.
|
||||
#[error("Support for this target has not been implemented yet")]
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for LookupError {}
|
||||
|
||||
impl fmt::Display for LookupError {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match self {
|
||||
LookupError::SupportDisabled => write!(f, "Support for this target is disabled"),
|
||||
LookupError::Unsupported => {
|
||||
write!(f, "Support for this target has not been implemented yet")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for a `TargetIsa`.
|
||||
/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
|
||||
#[derive(Clone)]
|
||||
@@ -201,6 +217,16 @@ pub struct Builder {
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Gets the triple for the builder.
|
||||
pub fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
/// Iterates the available settings in the builder.
|
||||
pub fn iter(&self) -> impl Iterator<Item = settings::Setting> {
|
||||
self.setup.iter()
|
||||
}
|
||||
|
||||
/// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
|
||||
/// fully configured `TargetIsa` trait object.
|
||||
pub fn finish(self, shared_flags: settings::Flags) -> Box<dyn TargetIsa> {
|
||||
@@ -265,6 +291,14 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
/// Get the ISA-independent flags that were used to make this trait object.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
/// Get the ISA-dependent flag values that were used to make this trait object.
|
||||
fn isa_flags(&self) -> Vec<settings::Value>;
|
||||
|
||||
/// Get the variant of this ISA (Legacy or MachInst).
|
||||
fn variant(&self) -> BackendVariant {
|
||||
BackendVariant::Legacy
|
||||
}
|
||||
|
||||
/// Hashes all flags, both ISA-independent and ISA-specific, into the
|
||||
/// specified hasher.
|
||||
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
|
||||
@@ -460,6 +494,18 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
/// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry).
|
||||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC;
|
||||
|
||||
/// Returns the flavor of unwind information emitted for this target.
|
||||
fn unwind_info_kind(&self) -> UnwindInfoKind {
|
||||
match self.triple().operating_system {
|
||||
#[cfg(feature = "unwind")]
|
||||
OperatingSystem::Windows => UnwindInfoKind::Windows,
|
||||
#[cfg(feature = "unwind")]
|
||||
_ => UnwindInfoKind::SystemV,
|
||||
#[cfg(not(feature = "unwind"))]
|
||||
_ => UnwindInfoKind::None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates unwind information for the function.
|
||||
///
|
||||
/// Returns `None` if there is no unwind information for the function.
|
||||
|
||||
@@ -15,8 +15,7 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use crate::regalloc;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::hash::{Hash, Hasher};
|
||||
@@ -70,6 +69,10 @@ impl TargetIsa for Isa {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.shared_flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! RISC-V Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
|
||||
|
||||
770
cranelift/codegen/src/isa/s390x/abi.rs
Normal file
770
cranelift/codegen/src/isa/s390x/abi.rs
Normal file
@@ -0,0 +1,770 @@
|
||||
//! Implementation of a standard S390x ABI.
|
||||
//!
|
||||
//! This machine uses the "vanilla" ABI implementation from abi_impl.rs,
|
||||
//! however a few details are different from the description there:
|
||||
//!
|
||||
//! - On s390x, the caller must provide a "register save area" of 160
|
||||
//! bytes to any function it calls. The called function is free to use
|
||||
//! this space for any purpose; usually to save callee-saved GPRs.
|
||||
//! (Note that while this area is allocated by the caller, it is counted
|
||||
//! as part of the callee's stack frame; in particular, the callee's CFA
|
||||
//! is the top of the register save area, not the incoming SP value.)
|
||||
//!
|
||||
//! - Overflow arguments are passed on the stack starting immediately
|
||||
//! above the register save area. On s390x, this space is allocated
|
||||
//! only once directly in the prologue, using a size large enough to
|
||||
//! hold overflow arguments for every call in the function.
|
||||
//!
|
||||
//! - On s390x we do not use a frame pointer register; instead, every
|
||||
//! element of the stack frame is addressed via (constant) offsets
|
||||
//! from the stack pointer. Note that due to the above (and because
|
||||
//! there are no variable-sized stack allocations in cranelift), the
|
||||
//! value of the stack pointer register never changes after the
|
||||
//! initial allocation in the function prologue.
|
||||
//!
|
||||
//! Overall, the stack frame layout on s390x is as follows:
|
||||
//!
|
||||
//! ```plain
|
||||
//! (high address)
|
||||
//!
|
||||
//! +---------------------------+
|
||||
//! | ... |
|
||||
//! CFA -----> | stack args |
|
||||
//! +---------------------------+
|
||||
//! | ... |
|
||||
//! | 160 bytes reg save area |
|
||||
//! SP at function entry -----> | (used to save GPRs) |
|
||||
//! +---------------------------+
|
||||
//! | ... |
|
||||
//! | clobbered callee-saves |
|
||||
//! | (used to save FPRs) |
|
||||
//! unwind-frame base ----> | (alloc'd by prologue) |
|
||||
//! +---------------------------+
|
||||
//! | ... |
|
||||
//! | spill slots |
|
||||
//! | (accessed via nominal SP) |
|
||||
//! | ... |
|
||||
//! | stack slots |
|
||||
//! | (accessed via nominal SP) |
|
||||
//! nominal SP ---------------> | (alloc'd by prologue) |
|
||||
//! +---------------------------+
|
||||
//! | ... |
|
||||
//! | args for call |
|
||||
//! | outgoing reg save area |
|
||||
//! SP during function ------> | (alloc'd by prologue) |
|
||||
//! +---------------------------+
|
||||
//!
|
||||
//! (low address)
|
||||
//! ```
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::types;
|
||||
use crate::ir::MemFlags;
|
||||
use crate::ir::Type;
|
||||
use crate::isa;
|
||||
use crate::isa::s390x::inst::*;
|
||||
use crate::isa::unwind::UnwindInst;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
// We use a generic implementation that factors out ABI commonalities.
|
||||
|
||||
/// Support for the S390x ABI from the callee side (within a function body).
|
||||
pub type S390xABICallee = ABICalleeImpl<S390xMachineDeps>;
|
||||
|
||||
/// Support for the S390x ABI from the caller side (at a callsite).
|
||||
pub type S390xABICaller = ABICallerImpl<S390xMachineDeps>;
|
||||
|
||||
/// ABI Register usage
|
||||
|
||||
fn in_int_reg(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
|
||||
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn in_flt_reg(ty: Type) -> bool {
|
||||
match ty {
|
||||
types::F32 | types::F64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::gpr(2)),
|
||||
1 => Some(regs::gpr(3)),
|
||||
2 => Some(regs::gpr(4)),
|
||||
3 => Some(regs::gpr(5)),
|
||||
4 => Some(regs::gpr(6)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::fpr(0)),
|
||||
1 => Some(regs::fpr(2)),
|
||||
2 => Some(regs::fpr(4)),
|
||||
3 => Some(regs::fpr(6)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intreg_for_ret(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::gpr(2)),
|
||||
// ABI extension to support multi-value returns:
|
||||
1 => Some(regs::gpr(3)),
|
||||
2 => Some(regs::gpr(4)),
|
||||
3 => Some(regs::gpr(5)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_fltreg_for_ret(idx: usize) -> Option<Reg> {
|
||||
match idx {
|
||||
0 => Some(regs::fpr(0)),
|
||||
// ABI extension to support multi-value returns:
|
||||
1 => Some(regs::fpr(2)),
|
||||
2 => Some(regs::fpr(4)),
|
||||
3 => Some(regs::fpr(6)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the limit for the size of argument and return-value areas on the
|
||||
/// stack. We place a reasonable limit here to avoid integer overflow issues
|
||||
/// with 32-bit arithmetic: for now, 128 MB.
|
||||
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
|
||||
|
||||
impl Into<MemArg> for StackAMode {
|
||||
fn into(self) -> MemArg {
|
||||
match self {
|
||||
StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off },
|
||||
StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off },
|
||||
StackAMode::SPOffset(off, _ty) => {
|
||||
MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// S390x-specific ABI behavior. This struct just serves as an implementation
|
||||
/// point for the trait; it is never actually instantiated.
|
||||
pub struct S390xMachineDeps;
|
||||
|
||||
impl ABIMachineSpec for S390xMachineDeps {
|
||||
type I = Inst;
|
||||
|
||||
fn word_bits() -> u32 {
|
||||
64
|
||||
}
|
||||
|
||||
/// Return required stack alignment in bytes.
|
||||
fn stack_align(_call_conv: isa::CallConv) -> u32 {
|
||||
8
|
||||
}
|
||||
|
||||
fn compute_arg_locs(
|
||||
call_conv: isa::CallConv,
|
||||
_flags: &settings::Flags,
|
||||
params: &[ir::AbiParam],
|
||||
args_or_rets: ArgsOrRets,
|
||||
add_ret_area_ptr: bool,
|
||||
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
|
||||
let mut next_gpr = 0;
|
||||
let mut next_fpr = 0;
|
||||
let mut next_stack: u64 = 0;
|
||||
let mut ret = vec![];
|
||||
|
||||
if args_or_rets == ArgsOrRets::Args {
|
||||
next_stack = 160;
|
||||
}
|
||||
|
||||
for i in 0..params.len() {
|
||||
let param = ¶ms[i];
|
||||
|
||||
// Validate "purpose".
|
||||
match ¶m.purpose {
|
||||
&ir::ArgumentPurpose::VMContext
|
||||
| &ir::ArgumentPurpose::Normal
|
||||
| &ir::ArgumentPurpose::StackLimit
|
||||
| &ir::ArgumentPurpose::SignatureId => {}
|
||||
_ => panic!(
|
||||
"Unsupported argument purpose {:?} in signature: {:?}",
|
||||
param.purpose, params
|
||||
),
|
||||
}
|
||||
|
||||
let intreg = in_int_reg(param.value_type);
|
||||
let fltreg = in_flt_reg(param.value_type);
|
||||
debug_assert!(intreg || fltreg);
|
||||
debug_assert!(!(intreg && fltreg));
|
||||
|
||||
let (next_reg, candidate) = if intreg {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_intreg_for_arg(next_gpr),
|
||||
ArgsOrRets::Rets => get_intreg_for_ret(next_gpr),
|
||||
};
|
||||
(&mut next_gpr, candidate)
|
||||
} else {
|
||||
let candidate = match args_or_rets {
|
||||
ArgsOrRets::Args => get_fltreg_for_arg(next_fpr),
|
||||
ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr),
|
||||
};
|
||||
(&mut next_fpr, candidate)
|
||||
};
|
||||
|
||||
// In the Wasmtime ABI only the first return value can be in a register.
|
||||
let candidate =
|
||||
if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets && i > 0 {
|
||||
None
|
||||
} else {
|
||||
candidate
|
||||
};
|
||||
|
||||
if let Some(reg) = candidate {
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
*next_reg += 1;
|
||||
} else {
|
||||
// Compute size. Every argument or return value takes a slot of
|
||||
// at least 8 bytes, except for return values in the Wasmtime ABI.
|
||||
let size = (ty_bits(param.value_type) / 8) as u64;
|
||||
let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets
|
||||
{
|
||||
size
|
||||
} else {
|
||||
std::cmp::max(size, 8)
|
||||
};
|
||||
|
||||
// Align the stack slot.
|
||||
debug_assert!(slot_size.is_power_of_two());
|
||||
next_stack = align_to(next_stack, slot_size);
|
||||
|
||||
// If the type is actually of smaller size (and the argument
|
||||
// was not extended), it is passed right-aligned.
|
||||
let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None {
|
||||
slot_size - size
|
||||
} else {
|
||||
0
|
||||
};
|
||||
ret.push(ABIArg::stack(
|
||||
(next_stack + offset) as i64,
|
||||
param.value_type,
|
||||
param.extension,
|
||||
param.purpose,
|
||||
));
|
||||
next_stack += slot_size;
|
||||
}
|
||||
}
|
||||
|
||||
next_stack = align_to(next_stack, 8);
|
||||
|
||||
let extra_arg = if add_ret_area_ptr {
|
||||
debug_assert!(args_or_rets == ArgsOrRets::Args);
|
||||
if let Some(reg) = get_intreg_for_arg(next_gpr) {
|
||||
ret.push(ABIArg::reg(
|
||||
reg.to_real_reg(),
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
} else {
|
||||
ret.push(ABIArg::stack(
|
||||
next_stack as i64,
|
||||
types::I64,
|
||||
ir::ArgumentExtension::None,
|
||||
ir::ArgumentPurpose::Normal,
|
||||
));
|
||||
next_stack += 8;
|
||||
}
|
||||
Some(ret.len() - 1)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// To avoid overflow issues, limit the arg/return size to something
|
||||
// reasonable -- here, 128 MB.
|
||||
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
||||
return Err(CodegenError::ImplLimitExceeded);
|
||||
}
|
||||
|
||||
Ok((ret, next_stack as i64, extra_arg))
|
||||
}
|
||||
|
||||
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
|
||||
0
|
||||
}
|
||||
|
||||
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
||||
Inst::gen_load(into_reg, mem.into(), ty)
|
||||
}
|
||||
|
||||
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
|
||||
Inst::gen_store(mem.into(), from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
|
||||
Inst::gen_move(to_reg, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_extend(
|
||||
to_reg: Writable<Reg>,
|
||||
from_reg: Reg,
|
||||
signed: bool,
|
||||
from_bits: u8,
|
||||
to_bits: u8,
|
||||
) -> Inst {
|
||||
assert!(from_bits < to_bits);
|
||||
Inst::Extend {
|
||||
rd: to_reg,
|
||||
rn: from_reg,
|
||||
signed,
|
||||
from_bits,
|
||||
to_bits,
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_ret() -> Inst {
|
||||
Inst::Ret { link: gpr(14) }
|
||||
}
|
||||
|
||||
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
|
||||
let mut insts = SmallVec::new();
|
||||
if let Some(imm) = UImm12::maybe_from_u64(imm as u64) {
|
||||
insts.push(Inst::LoadAddr {
|
||||
rd: into_reg,
|
||||
mem: MemArg::BXD12 {
|
||||
base: from_reg,
|
||||
index: zero_reg(),
|
||||
disp: imm,
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
});
|
||||
} else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) {
|
||||
insts.push(Inst::LoadAddr {
|
||||
rd: into_reg,
|
||||
mem: MemArg::BXD20 {
|
||||
base: from_reg,
|
||||
index: zero_reg(),
|
||||
disp: imm,
|
||||
flags: MemFlags::trusted(),
|
||||
},
|
||||
});
|
||||
} else {
|
||||
if from_reg != into_reg.to_reg() {
|
||||
insts.push(Inst::mov64(into_reg, from_reg));
|
||||
}
|
||||
insts.push(Inst::AluRUImm32 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: into_reg,
|
||||
imm,
|
||||
});
|
||||
}
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
|
||||
let mut insts = SmallVec::new();
|
||||
insts.push(Inst::CmpTrapRR {
|
||||
op: CmpOp::CmpL64,
|
||||
rn: stack_reg(),
|
||||
rm: limit_reg,
|
||||
cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual),
|
||||
trap_code: ir::TrapCode::StackOverflow,
|
||||
});
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder() -> Inst {
|
||||
Inst::EpiloguePlaceholder
|
||||
}
|
||||
|
||||
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
|
||||
let mem = mem.into();
|
||||
Inst::LoadAddr { rd: into_reg, mem }
|
||||
}
|
||||
|
||||
fn get_stacklimit_reg() -> Reg {
|
||||
spilltmp_reg()
|
||||
}
|
||||
|
||||
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
|
||||
let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());
|
||||
Inst::gen_load(into_reg, mem, ty)
|
||||
}
|
||||
|
||||
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
|
||||
let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());
|
||||
Inst::gen_store(mem, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec<Inst> {
|
||||
if imm == 0 {
|
||||
return SmallVec::new();
|
||||
}
|
||||
|
||||
let mut insts = SmallVec::new();
|
||||
if let Ok(imm) = i16::try_from(imm) {
|
||||
insts.push(Inst::AluRSImm16 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: writable_stack_reg(),
|
||||
imm,
|
||||
});
|
||||
} else {
|
||||
insts.push(Inst::AluRSImm32 {
|
||||
alu_op: ALUOp::Add64,
|
||||
rd: writable_stack_reg(),
|
||||
imm,
|
||||
});
|
||||
}
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_nominal_sp_adj(offset: i32) -> Inst {
|
||||
Inst::VirtualSPOffsetAdj {
|
||||
offset: offset.into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec<Inst> {
|
||||
SmallVec::new()
|
||||
}
|
||||
|
||||
fn gen_epilogue_frame_restore(_flags: &settings::Flags) -> SmallInstVec<Inst> {
|
||||
SmallVec::new()
|
||||
}
|
||||
|
||||
fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
|
||||
// TODO: implement if we ever require stack probes on an s390x host
|
||||
// (unlikely unless Lucet is ported)
|
||||
smallvec![]
|
||||
}
|
||||
|
||||
// Returns stack bytes used as well as instructions. Does not adjust
|
||||
// nominal SP offset; abi_impl generic code will do that.
|
||||
fn gen_clobber_save(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Inst; 16]>) {
|
||||
let mut insts = SmallVec::new();
|
||||
|
||||
// Collect clobbered registers.
|
||||
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
|
||||
let mut first_clobbered_gpr = 16;
|
||||
for reg in clobbered_gpr {
|
||||
let enc = reg.to_reg().get_hw_encoding();
|
||||
if enc < first_clobbered_gpr {
|
||||
first_clobbered_gpr = enc;
|
||||
}
|
||||
}
|
||||
let clobber_size = clobbered_fpr.len() * 8;
|
||||
if flags.unwind_info() {
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::DefineNewFrame {
|
||||
offset_upward_to_caller_sp: 160,
|
||||
offset_downward_to_clobbers: clobber_size as u32,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Use STMG to save clobbered GPRs into save area.
|
||||
if first_clobbered_gpr < 16 {
|
||||
let offset = 8 * first_clobbered_gpr as i64;
|
||||
insts.push(Inst::StoreMultiple64 {
|
||||
rt: gpr(first_clobbered_gpr as u8),
|
||||
rt2: gpr(15),
|
||||
addr_reg: stack_reg(),
|
||||
addr_off: SImm20::maybe_from_i64(offset).unwrap(),
|
||||
});
|
||||
}
|
||||
if flags.unwind_info() {
|
||||
for i in first_clobbered_gpr..16 {
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset: clobber_size as u32 + (i * 8) as u32,
|
||||
reg: gpr(i as u8).to_real_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Decrement stack pointer.
|
||||
let stack_size =
|
||||
outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32;
|
||||
insts.extend(Self::gen_sp_reg_adjust(-stack_size));
|
||||
if flags.unwind_info() {
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::StackAlloc {
|
||||
size: stack_size as u32,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
let sp_adj = outgoing_args_size as i32;
|
||||
if sp_adj > 0 {
|
||||
insts.push(Self::gen_nominal_sp_adj(sp_adj));
|
||||
}
|
||||
|
||||
// Save FPRs.
|
||||
for (i, reg) in clobbered_fpr.iter().enumerate() {
|
||||
insts.push(Inst::FpuStore64 {
|
||||
rd: reg.to_reg().to_reg(),
|
||||
mem: MemArg::reg_plus_off(
|
||||
stack_reg(),
|
||||
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
|
||||
MemFlags::trusted(),
|
||||
),
|
||||
});
|
||||
if flags.unwind_info() {
|
||||
insts.push(Inst::Unwind {
|
||||
inst: UnwindInst::SaveReg {
|
||||
clobber_offset: (i * 8) as u32,
|
||||
reg: reg.to_reg(),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
(clobber_size as u64, insts)
|
||||
}
|
||||
|
||||
fn gen_clobber_restore(
|
||||
call_conv: isa::CallConv,
|
||||
_: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> SmallVec<[Inst; 16]> {
|
||||
let mut insts = SmallVec::new();
|
||||
|
||||
// Collect clobbered registers.
|
||||
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
|
||||
let mut first_clobbered_gpr = 16;
|
||||
for reg in clobbered_gpr {
|
||||
let enc = reg.to_reg().get_hw_encoding();
|
||||
if enc < first_clobbered_gpr {
|
||||
first_clobbered_gpr = enc;
|
||||
}
|
||||
}
|
||||
let clobber_size = clobbered_fpr.len() * 8;
|
||||
|
||||
// Restore FPRs.
|
||||
for (i, reg) in clobbered_fpr.iter().enumerate() {
|
||||
insts.push(Inst::FpuLoad64 {
|
||||
rd: Writable::from_reg(reg.to_reg().to_reg()),
|
||||
mem: MemArg::reg_plus_off(
|
||||
stack_reg(),
|
||||
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
|
||||
MemFlags::trusted(),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Increment stack pointer unless it will be restored implicitly.
|
||||
let stack_size =
|
||||
outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32;
|
||||
let implicit_sp_restore = first_clobbered_gpr < 16
|
||||
&& SImm20::maybe_from_i64(8 * first_clobbered_gpr as i64 + stack_size as i64).is_some();
|
||||
if !implicit_sp_restore {
|
||||
insts.extend(Self::gen_sp_reg_adjust(stack_size));
|
||||
}
|
||||
|
||||
// Use LMG to restore clobbered GPRs from save area.
|
||||
if first_clobbered_gpr < 16 {
|
||||
let mut offset = 8 * first_clobbered_gpr as i64;
|
||||
if implicit_sp_restore {
|
||||
offset += stack_size as i64;
|
||||
}
|
||||
insts.push(Inst::LoadMultiple64 {
|
||||
rt: writable_gpr(first_clobbered_gpr as u8),
|
||||
rt2: writable_gpr(15),
|
||||
addr_reg: stack_reg(),
|
||||
addr_off: SImm20::maybe_from_i64(offset).unwrap(),
|
||||
});
|
||||
}
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_call(
|
||||
dest: &CallDest,
|
||||
uses: Vec<Reg>,
|
||||
defs: Vec<Writable<Reg>>,
|
||||
opcode: ir::Opcode,
|
||||
tmp: Writable<Reg>,
|
||||
_callee_conv: isa::CallConv,
|
||||
_caller_conv: isa::CallConv,
|
||||
) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
|
||||
let mut insts = SmallVec::new();
|
||||
match &dest {
|
||||
&CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
|
||||
InstIsSafepoint::Yes,
|
||||
Inst::Call {
|
||||
link: writable_gpr(14),
|
||||
info: Box::new(CallInfo {
|
||||
dest: name.clone(),
|
||||
uses,
|
||||
defs,
|
||||
opcode,
|
||||
}),
|
||||
},
|
||||
)),
|
||||
&CallDest::ExtName(ref name, RelocDistance::Far) => {
|
||||
insts.push((
|
||||
InstIsSafepoint::No,
|
||||
Inst::LoadExtNameFar {
|
||||
rd: tmp,
|
||||
name: Box::new(name.clone()),
|
||||
offset: 0,
|
||||
},
|
||||
));
|
||||
insts.push((
|
||||
InstIsSafepoint::Yes,
|
||||
Inst::CallInd {
|
||||
link: writable_gpr(14),
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: tmp.to_reg(),
|
||||
uses,
|
||||
defs,
|
||||
opcode,
|
||||
}),
|
||||
},
|
||||
));
|
||||
}
|
||||
&CallDest::Reg(reg) => insts.push((
|
||||
InstIsSafepoint::Yes,
|
||||
Inst::CallInd {
|
||||
link: writable_gpr(14),
|
||||
info: Box::new(CallIndInfo {
|
||||
rn: *reg,
|
||||
uses,
|
||||
defs,
|
||||
opcode,
|
||||
}),
|
||||
},
|
||||
)),
|
||||
}
|
||||
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_memcpy(
|
||||
_call_conv: isa::CallConv,
|
||||
_dst: Reg,
|
||||
_src: Reg,
|
||||
_size: usize,
|
||||
) -> SmallVec<[Self::I; 8]> {
|
||||
unimplemented!("StructArgs not implemented for S390X yet");
|
||||
}
|
||||
|
||||
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match (rc, ty) {
|
||||
(RegClass::I64, _) => 1,
|
||||
(RegClass::F64, _) => 1,
|
||||
_ => panic!("Unexpected register class!"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current virtual-SP offset from an instruction-emission state.
|
||||
fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
|
||||
s.virtual_sp_offset
|
||||
}
|
||||
|
||||
/// Get the nominal-SP-to-FP offset from an instruction-emission state.
|
||||
fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
|
||||
s.initial_sp_offset
|
||||
}
|
||||
|
||||
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
|
||||
let mut caller_saved = Vec::new();
|
||||
for i in 0..15 {
|
||||
let x = writable_gpr(i);
|
||||
if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
|
||||
caller_saved.push(x);
|
||||
}
|
||||
}
|
||||
for i in 0..15 {
|
||||
let v = writable_fpr(i);
|
||||
if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
|
||||
caller_saved.push(v);
|
||||
}
|
||||
}
|
||||
caller_saved
|
||||
}
|
||||
|
||||
fn get_ext_mode(
|
||||
_call_conv: isa::CallConv,
|
||||
specified: ir::ArgumentExtension,
|
||||
) -> ir::ArgumentExtension {
|
||||
specified
|
||||
}
|
||||
}
|
||||
|
||||
fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
// r6 - r15 inclusive are callee-saves.
|
||||
r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15
|
||||
}
|
||||
RegClass::F64 => {
|
||||
// f8 - f15 inclusive are callee-saves.
|
||||
r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
|
||||
}
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_regs_saved_in_prologue(
|
||||
call_conv: isa::CallConv,
|
||||
regs: &Set<Writable<RealReg>>,
|
||||
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
|
||||
let mut int_saves = vec![];
|
||||
let mut fpr_saves = vec![];
|
||||
for ® in regs.iter() {
|
||||
if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
|
||||
match reg.to_reg().get_class() {
|
||||
RegClass::I64 => int_saves.push(reg),
|
||||
RegClass::F64 => fpr_saves.push(reg),
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort registers for deterministic code output.
|
||||
int_saves.sort_by_key(|r| r.to_reg().get_index());
|
||||
fpr_saves.sort_by_key(|r| r.to_reg().get_index());
|
||||
(int_saves, fpr_saves)
|
||||
}
|
||||
|
||||
fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool {
|
||||
match r.get_class() {
|
||||
RegClass::I64 => {
|
||||
// r0 - r5 inclusive are caller-saves.
|
||||
r.get_hw_encoding() <= 5
|
||||
}
|
||||
RegClass::F64 => {
|
||||
// f0 - f7 inclusive are caller-saves.
|
||||
r.get_hw_encoding() <= 7
|
||||
}
|
||||
_ => panic!("Unexpected RegClass"),
|
||||
}
|
||||
}
|
||||
317
cranelift/codegen/src/isa/s390x/inst/args.rs
Normal file
317
cranelift/codegen/src/isa/s390x/inst/args.rs
Normal file
@@ -0,0 +1,317 @@
|
||||
//! S390x ISA definitions: instruction arguments.
|
||||
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||
use crate::ir::MemFlags;
|
||||
use crate::isa::s390x::inst::*;
|
||||
use crate::machinst::MachLabel;
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
|
||||
|
||||
use std::string::String;
|
||||
|
||||
//=============================================================================
|
||||
// Instruction sub-components (memory addresses): definitions
|
||||
|
||||
/// A memory argument to load/store, encapsulating the possible addressing modes.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum MemArg {
|
||||
//
|
||||
// Real IBM Z addressing modes:
|
||||
//
|
||||
/// Base register, index register, and 12-bit unsigned displacement.
|
||||
BXD12 {
|
||||
base: Reg,
|
||||
index: Reg,
|
||||
disp: UImm12,
|
||||
flags: MemFlags,
|
||||
},
|
||||
|
||||
/// Base register, index register, and 20-bit signed displacement.
|
||||
BXD20 {
|
||||
base: Reg,
|
||||
index: Reg,
|
||||
disp: SImm20,
|
||||
flags: MemFlags,
|
||||
},
|
||||
|
||||
/// PC-relative Reference to a label.
|
||||
Label { target: BranchTarget },
|
||||
|
||||
/// PC-relative Reference to a near symbol.
|
||||
Symbol {
|
||||
name: Box<ExternalName>,
|
||||
offset: i32,
|
||||
flags: MemFlags,
|
||||
},
|
||||
|
||||
//
|
||||
// Virtual addressing modes that are lowered at emission time:
|
||||
//
|
||||
/// Arbitrary offset from a register. Converted to generation of large
|
||||
/// offsets with multiple instructions as necessary during code emission.
|
||||
RegOffset { reg: Reg, off: i64, flags: MemFlags },
|
||||
|
||||
/// Offset from the stack pointer at function entry.
|
||||
InitialSPOffset { off: i64 },
|
||||
|
||||
/// Offset from the "nominal stack pointer", which is where the real SP is
|
||||
/// just after stack and spill slots are allocated in the function prologue.
|
||||
/// At emission time, this is converted to `SPOffset` with a fixup added to
|
||||
/// the offset constant. The fixup is a running value that is tracked as
|
||||
/// emission iterates through instructions in linear order, and can be
|
||||
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
|
||||
///
|
||||
/// The standard ABI is in charge of handling this (by emitting the
|
||||
/// adjustment meta-instructions). It maintains the invariant that "nominal
|
||||
/// SP" is where the actual SP is after the function prologue and before
|
||||
/// clobber pushes. See the diagram in the documentation for
|
||||
/// [crate::isa::s390x::abi](the ABI module) for more details.
|
||||
NominalSPOffset { off: i64 },
|
||||
}
|
||||
|
||||
impl MemArg {
|
||||
/// Memory reference using an address in a register.
|
||||
pub fn reg(reg: Reg, flags: MemFlags) -> MemArg {
|
||||
MemArg::BXD12 {
|
||||
base: reg,
|
||||
index: zero_reg(),
|
||||
disp: UImm12::zero(),
|
||||
flags,
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory reference using the sum of two registers as an address.
|
||||
pub fn reg_plus_reg(reg1: Reg, reg2: Reg, flags: MemFlags) -> MemArg {
|
||||
MemArg::BXD12 {
|
||||
base: reg1,
|
||||
index: reg2,
|
||||
disp: UImm12::zero(),
|
||||
flags,
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory reference using the sum of a register an an offset as address.
|
||||
pub fn reg_plus_off(reg: Reg, off: i64, flags: MemFlags) -> MemArg {
|
||||
MemArg::RegOffset { reg, off, flags }
|
||||
}
|
||||
|
||||
pub(crate) fn get_flags(&self) -> MemFlags {
|
||||
match self {
|
||||
MemArg::BXD12 { flags, .. } => *flags,
|
||||
MemArg::BXD20 { flags, .. } => *flags,
|
||||
MemArg::RegOffset { flags, .. } => *flags,
|
||||
MemArg::Label { .. } => MemFlags::trusted(),
|
||||
MemArg::Symbol { flags, .. } => *flags,
|
||||
MemArg::InitialSPOffset { .. } => MemFlags::trusted(),
|
||||
MemArg::NominalSPOffset { .. } => MemFlags::trusted(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn can_trap(&self) -> bool {
|
||||
!self.get_flags().notrap()
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instruction sub-components (conditions, branches and branch targets):
|
||||
// definitions
|
||||
|
||||
/// Condition for conditional branches.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Cond {
|
||||
mask: u8,
|
||||
}
|
||||
|
||||
impl Cond {
|
||||
pub fn from_mask(mask: u8) -> Cond {
|
||||
assert!(mask >= 1 && mask <= 14);
|
||||
Cond { mask }
|
||||
}
|
||||
|
||||
pub fn from_intcc(cc: IntCC) -> Cond {
|
||||
let mask = match cc {
|
||||
IntCC::Equal => 8,
|
||||
IntCC::NotEqual => 4 | 2,
|
||||
IntCC::SignedGreaterThanOrEqual => 8 | 2,
|
||||
IntCC::SignedGreaterThan => 2,
|
||||
IntCC::SignedLessThanOrEqual => 8 | 4,
|
||||
IntCC::SignedLessThan => 4,
|
||||
IntCC::UnsignedGreaterThanOrEqual => 8 | 2,
|
||||
IntCC::UnsignedGreaterThan => 2,
|
||||
IntCC::UnsignedLessThanOrEqual => 8 | 4,
|
||||
IntCC::UnsignedLessThan => 4,
|
||||
IntCC::Overflow => 1,
|
||||
IntCC::NotOverflow => 8 | 4 | 2,
|
||||
};
|
||||
Cond { mask }
|
||||
}
|
||||
|
||||
pub fn from_floatcc(cc: FloatCC) -> Cond {
|
||||
let mask = match cc {
|
||||
FloatCC::Ordered => 8 | 4 | 2,
|
||||
FloatCC::Unordered => 1,
|
||||
FloatCC::Equal => 8,
|
||||
FloatCC::NotEqual => 4 | 2 | 1,
|
||||
FloatCC::OrderedNotEqual => 4 | 2,
|
||||
FloatCC::UnorderedOrEqual => 8 | 1,
|
||||
FloatCC::LessThan => 4,
|
||||
FloatCC::LessThanOrEqual => 8 | 4,
|
||||
FloatCC::GreaterThan => 2,
|
||||
FloatCC::GreaterThanOrEqual => 8 | 2,
|
||||
FloatCC::UnorderedOrLessThan => 4 | 1,
|
||||
FloatCC::UnorderedOrLessThanOrEqual => 8 | 4 | 1,
|
||||
FloatCC::UnorderedOrGreaterThan => 2 | 1,
|
||||
FloatCC::UnorderedOrGreaterThanOrEqual => 8 | 2 | 1,
|
||||
};
|
||||
Cond { mask }
|
||||
}
|
||||
|
||||
/// Return the inverted condition.
|
||||
pub fn invert(self) -> Cond {
|
||||
Cond {
|
||||
mask: !self.mask & 15,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the machine encoding of this condition.
|
||||
pub fn bits(self) -> u8 {
|
||||
self.mask
|
||||
}
|
||||
}
|
||||
|
||||
/// A branch target. Either unresolved (basic-block index) or resolved (offset
|
||||
/// from end of current instruction).
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum BranchTarget {
|
||||
/// An unresolved reference to a Label, as passed into
|
||||
/// `lower_branch_group()`.
|
||||
Label(MachLabel),
|
||||
/// A fixed PC offset.
|
||||
ResolvedOffset(i32),
|
||||
}
|
||||
|
||||
impl BranchTarget {
|
||||
/// Return the target's label, if it is a label-based target.
|
||||
pub fn as_label(self) -> Option<MachLabel> {
|
||||
match self {
|
||||
BranchTarget::Label(l) => Some(l),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_ri_offset_or_zero(self) -> u16 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 1,
|
||||
_ => 0,
|
||||
};
|
||||
assert!(off <= 0x7fff);
|
||||
assert!(off >= -0x8000);
|
||||
off as u16
|
||||
}
|
||||
|
||||
/// Return the target's offset, if specified, or zero if label-based.
|
||||
pub fn as_ril_offset_or_zero(self) -> u32 {
|
||||
let off = match self {
|
||||
BranchTarget::ResolvedOffset(off) => off >> 1,
|
||||
_ => 0,
|
||||
};
|
||||
off as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for MemArg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&MemArg::BXD12 {
|
||||
base, index, disp, ..
|
||||
} => {
|
||||
if base != zero_reg() {
|
||||
if index != zero_reg() {
|
||||
format!(
|
||||
"{}({},{})",
|
||||
disp.show_rru(mb_rru),
|
||||
index.show_rru(mb_rru),
|
||||
base.show_rru(mb_rru)
|
||||
)
|
||||
} else {
|
||||
format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
|
||||
}
|
||||
} else {
|
||||
if index != zero_reg() {
|
||||
format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
|
||||
} else {
|
||||
format!("{}", disp.show_rru(mb_rru))
|
||||
}
|
||||
}
|
||||
}
|
||||
&MemArg::BXD20 {
|
||||
base, index, disp, ..
|
||||
} => {
|
||||
if base != zero_reg() {
|
||||
if index != zero_reg() {
|
||||
format!(
|
||||
"{}({},{})",
|
||||
disp.show_rru(mb_rru),
|
||||
index.show_rru(mb_rru),
|
||||
base.show_rru(mb_rru)
|
||||
)
|
||||
} else {
|
||||
format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
|
||||
}
|
||||
} else {
|
||||
if index != zero_reg() {
|
||||
format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
|
||||
} else {
|
||||
format!("{}", disp.show_rru(mb_rru))
|
||||
}
|
||||
}
|
||||
}
|
||||
&MemArg::Label { ref target } => target.show_rru(mb_rru),
|
||||
&MemArg::Symbol {
|
||||
ref name, offset, ..
|
||||
} => format!("{} + {}", name, offset),
|
||||
// Eliminated by `mem_finalize()`.
|
||||
&MemArg::InitialSPOffset { .. }
|
||||
| &MemArg::NominalSPOffset { .. }
|
||||
| &MemArg::RegOffset { .. } => {
|
||||
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for Cond {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
let s = match self.mask {
|
||||
1 => "o",
|
||||
2 => "h",
|
||||
3 => "nle",
|
||||
4 => "l",
|
||||
5 => "nhe",
|
||||
6 => "lh",
|
||||
7 => "ne",
|
||||
8 => "e",
|
||||
9 => "nlh",
|
||||
10 => "he",
|
||||
11 => "nl",
|
||||
12 => "le",
|
||||
13 => "nh",
|
||||
14 => "no",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
s.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for BranchTarget {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
match self {
|
||||
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
|
||||
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
|
||||
}
|
||||
}
|
||||
}
|
||||
1965
cranelift/codegen/src/isa/s390x/inst/emit.rs
Normal file
1965
cranelift/codegen/src/isa/s390x/inst/emit.rs
Normal file
File diff suppressed because it is too large
Load Diff
7140
cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
Normal file
7140
cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
Normal file
File diff suppressed because it is too large
Load Diff
231
cranelift/codegen/src/isa/s390x/inst/imms.rs
Normal file
231
cranelift/codegen/src/isa/s390x/inst/imms.rs
Normal file
@@ -0,0 +1,231 @@
|
||||
//! S390x ISA definitions: immediate constants.
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse};
|
||||
use std::string::String;
|
||||
|
||||
/// An unsigned 12-bit immediate.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct UImm12 {
|
||||
/// The value.
|
||||
value: u16,
|
||||
}
|
||||
|
||||
impl UImm12 {
|
||||
pub fn maybe_from_u64(value: u64) -> Option<UImm12> {
|
||||
if value < 4096 {
|
||||
Some(UImm12 {
|
||||
value: value as u16,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> UImm12 {
|
||||
UImm12 { value: 0 }
|
||||
}
|
||||
|
||||
/// Bits for encoding.
|
||||
pub fn bits(&self) -> u32 {
|
||||
u32::from(self.value)
|
||||
}
|
||||
}
|
||||
|
||||
/// A signed 20-bit immediate.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SImm20 {
|
||||
/// The value.
|
||||
value: i32,
|
||||
}
|
||||
|
||||
impl SImm20 {
|
||||
pub fn maybe_from_i64(value: i64) -> Option<SImm20> {
|
||||
if value >= -524288 && value < 524288 {
|
||||
Some(SImm20 {
|
||||
value: value as i32,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_uimm12(value: UImm12) -> SImm20 {
|
||||
SImm20 {
|
||||
value: value.bits() as i32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> SImm20 {
|
||||
SImm20 { value: 0 }
|
||||
}
|
||||
|
||||
/// Bits for encoding.
|
||||
pub fn bits(&self) -> u32 {
|
||||
let encoded: u32 = self.value as u32;
|
||||
encoded & 0xfffff
|
||||
}
|
||||
}
|
||||
|
||||
/// A 16-bit immediate with a {0,16,32,48}-bit shift.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct UImm16Shifted {
|
||||
/// The value.
|
||||
pub bits: u16,
|
||||
/// Result is `bits` shifted 16*shift bits to the left.
|
||||
pub shift: u8,
|
||||
}
|
||||
|
||||
impl UImm16Shifted {
|
||||
/// Construct a UImm16Shifted from an arbitrary 64-bit constant if possible.
|
||||
pub fn maybe_from_u64(value: u64) -> Option<UImm16Shifted> {
|
||||
let mask0 = 0x0000_0000_0000_ffffu64;
|
||||
let mask1 = 0x0000_0000_ffff_0000u64;
|
||||
let mask2 = 0x0000_ffff_0000_0000u64;
|
||||
let mask3 = 0xffff_0000_0000_0000u64;
|
||||
|
||||
if value == (value & mask0) {
|
||||
return Some(UImm16Shifted {
|
||||
bits: (value & mask0) as u16,
|
||||
shift: 0,
|
||||
});
|
||||
}
|
||||
if value == (value & mask1) {
|
||||
return Some(UImm16Shifted {
|
||||
bits: ((value >> 16) & mask0) as u16,
|
||||
shift: 1,
|
||||
});
|
||||
}
|
||||
if value == (value & mask2) {
|
||||
return Some(UImm16Shifted {
|
||||
bits: ((value >> 32) & mask0) as u16,
|
||||
shift: 2,
|
||||
});
|
||||
}
|
||||
if value == (value & mask3) {
|
||||
return Some(UImm16Shifted {
|
||||
bits: ((value >> 48) & mask0) as u16,
|
||||
shift: 3,
|
||||
});
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<UImm16Shifted> {
|
||||
let shift_enc = shift / 16;
|
||||
if shift_enc > 3 {
|
||||
None
|
||||
} else {
|
||||
Some(UImm16Shifted {
|
||||
bits: imm,
|
||||
shift: shift_enc,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn negate_bits(&self) -> UImm16Shifted {
|
||||
UImm16Shifted {
|
||||
bits: !self.bits,
|
||||
shift: self.shift,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value that this constant represents.
|
||||
pub fn value(&self) -> u64 {
|
||||
(self.bits as u64) << (16 * self.shift)
|
||||
}
|
||||
}
|
||||
|
||||
/// A 32-bit immediate with a {0,32}-bit shift.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct UImm32Shifted {
|
||||
/// The value.
|
||||
pub bits: u32,
|
||||
/// Result is `bits` shifted 32*shift bits to the left.
|
||||
pub shift: u8,
|
||||
}
|
||||
|
||||
impl UImm32Shifted {
|
||||
/// Construct a UImm32Shifted from an arbitrary 64-bit constant if possible.
|
||||
pub fn maybe_from_u64(value: u64) -> Option<UImm32Shifted> {
|
||||
let mask0 = 0x0000_0000_ffff_ffffu64;
|
||||
let mask1 = 0xffff_ffff_0000_0000u64;
|
||||
|
||||
if value == (value & mask0) {
|
||||
return Some(UImm32Shifted {
|
||||
bits: (value & mask0) as u32,
|
||||
shift: 0,
|
||||
});
|
||||
}
|
||||
if value == (value & mask1) {
|
||||
return Some(UImm32Shifted {
|
||||
bits: ((value >> 32) & mask0) as u32,
|
||||
shift: 1,
|
||||
});
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn maybe_with_shift(imm: u32, shift: u8) -> Option<UImm32Shifted> {
|
||||
let shift_enc = shift / 32;
|
||||
if shift_enc > 3 {
|
||||
None
|
||||
} else {
|
||||
Some(UImm32Shifted {
|
||||
bits: imm,
|
||||
shift: shift_enc,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_uimm16shifted(value: UImm16Shifted) -> UImm32Shifted {
|
||||
if value.shift % 2 == 0 {
|
||||
UImm32Shifted {
|
||||
bits: value.bits as u32,
|
||||
shift: value.shift / 2,
|
||||
}
|
||||
} else {
|
||||
UImm32Shifted {
|
||||
bits: (value.bits as u32) << 16,
|
||||
shift: value.shift / 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn negate_bits(&self) -> UImm32Shifted {
|
||||
UImm32Shifted {
|
||||
bits: !self.bits,
|
||||
shift: self.shift,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value that this constant represents.
|
||||
pub fn value(&self) -> u64 {
|
||||
(self.bits as u64) << (32 * self.shift)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for UImm12 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for SImm20 {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for UImm16Shifted {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{}", self.bits)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for UImm32Shifted {
|
||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
format!("{}", self.bits)
|
||||
}
|
||||
}
|
||||
3411
cranelift/codegen/src/isa/s390x/inst/mod.rs
Normal file
3411
cranelift/codegen/src/isa/s390x/inst/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
168
cranelift/codegen/src/isa/s390x/inst/regs.rs
Normal file
168
cranelift/codegen/src/isa/s390x/inst/regs.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
//! S390x ISA definitions: registers.
|
||||
|
||||
use crate::settings;
|
||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
|
||||
|
||||
//=============================================================================
|
||||
// Registers, the Universe thereof, and printing
|
||||
|
||||
#[rustfmt::skip]
|
||||
const GPR_INDICES: [u8; 16] = [
|
||||
// r0 and r1 reserved
|
||||
30, 31,
|
||||
// r2 - r5 call-clobbered
|
||||
16, 17, 18, 19,
|
||||
// r6 - r14 call-saved (order reversed)
|
||||
28, 27, 26, 25, 24, 23, 22, 21, 20,
|
||||
// r15 (SP)
|
||||
29,
|
||||
];
|
||||
|
||||
#[rustfmt::skip]
|
||||
const FPR_INDICES: [u8; 16] = [
|
||||
// f0 - f7 as pairs
|
||||
0, 4, 1, 5, 2, 6, 3, 7,
|
||||
// f8 - f15 as pairs
|
||||
8, 12, 9, 13, 10, 14, 11, 15,
|
||||
];
|
||||
|
||||
/// Get a reference to a GPR (integer register).
|
||||
pub fn gpr(num: u8) -> Reg {
|
||||
assert!(num < 16);
|
||||
Reg::new_real(
|
||||
RegClass::I64,
|
||||
/* enc = */ num,
|
||||
/* index = */ GPR_INDICES[num as usize],
|
||||
)
|
||||
}
|
||||
|
||||
/// Get a writable reference to a GPR.
|
||||
pub fn writable_gpr(num: u8) -> Writable<Reg> {
|
||||
Writable::from_reg(gpr(num))
|
||||
}
|
||||
|
||||
/// Get a reference to a FPR (floating-point register).
|
||||
pub fn fpr(num: u8) -> Reg {
|
||||
assert!(num < 16);
|
||||
Reg::new_real(
|
||||
RegClass::F64,
|
||||
/* enc = */ num,
|
||||
/* index = */ FPR_INDICES[num as usize],
|
||||
)
|
||||
}
|
||||
|
||||
/// Get a writable reference to a V-register.
|
||||
pub fn writable_fpr(num: u8) -> Writable<Reg> {
|
||||
Writable::from_reg(fpr(num))
|
||||
}
|
||||
|
||||
/// Get a reference to the stack-pointer register.
|
||||
pub fn stack_reg() -> Reg {
|
||||
gpr(15)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the stack-pointer register.
|
||||
pub fn writable_stack_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(stack_reg())
|
||||
}
|
||||
|
||||
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
|
||||
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
|
||||
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
|
||||
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
|
||||
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
|
||||
///
|
||||
/// We use r1 for this because it's a scratch register but is slightly special (used for linker
|
||||
/// veneers). We're free to use it as long as we don't expect it to live through call instructions.
|
||||
pub fn spilltmp_reg() -> Reg {
|
||||
gpr(1)
|
||||
}
|
||||
|
||||
/// Get a writable reference to the spilltmp reg.
|
||||
pub fn writable_spilltmp_reg() -> Writable<Reg> {
|
||||
Writable::from_reg(spilltmp_reg())
|
||||
}
|
||||
|
||||
pub fn zero_reg() -> Reg {
|
||||
gpr(0)
|
||||
}
|
||||
|
||||
/// Create the register universe for AArch64.
|
||||
pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse {
|
||||
let mut regs = vec![];
|
||||
let mut allocable_by_class = [None; NUM_REG_CLASSES];
|
||||
|
||||
// Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers:
|
||||
// r0 (we cannot use this for addressing // FIXME regalloc)
|
||||
// r1 (spilltmp)
|
||||
// r15 (stack pointer)
|
||||
|
||||
// FPRs.
|
||||
let mut base = regs.len();
|
||||
regs.push((fpr(0).to_real_reg(), "%f0".into()));
|
||||
regs.push((fpr(2).to_real_reg(), "%f2".into()));
|
||||
regs.push((fpr(4).to_real_reg(), "%f4".into()));
|
||||
regs.push((fpr(6).to_real_reg(), "%f6".into()));
|
||||
regs.push((fpr(1).to_real_reg(), "%f1".into()));
|
||||
regs.push((fpr(3).to_real_reg(), "%f3".into()));
|
||||
regs.push((fpr(5).to_real_reg(), "%f5".into()));
|
||||
regs.push((fpr(7).to_real_reg(), "%f7".into()));
|
||||
regs.push((fpr(8).to_real_reg(), "%f8".into()));
|
||||
regs.push((fpr(10).to_real_reg(), "%f10".into()));
|
||||
regs.push((fpr(12).to_real_reg(), "%f12".into()));
|
||||
regs.push((fpr(14).to_real_reg(), "%f14".into()));
|
||||
regs.push((fpr(9).to_real_reg(), "%f9".into()));
|
||||
regs.push((fpr(11).to_real_reg(), "%f11".into()));
|
||||
regs.push((fpr(13).to_real_reg(), "%f13".into()));
|
||||
regs.push((fpr(15).to_real_reg(), "%f15".into()));
|
||||
|
||||
allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(fpr(1).get_index()),
|
||||
});
|
||||
|
||||
// Caller-saved GPRs in the SystemV s390x ABI.
|
||||
base = regs.len();
|
||||
regs.push((gpr(2).to_real_reg(), "%r2".into()));
|
||||
regs.push((gpr(3).to_real_reg(), "%r3".into()));
|
||||
regs.push((gpr(4).to_real_reg(), "%r4".into()));
|
||||
regs.push((gpr(5).to_real_reg(), "%r5".into()));
|
||||
|
||||
// Callee-saved GPRs in the SystemV s390x ABI.
|
||||
// We start from r14 downwards in an attempt to allow the
|
||||
// prolog to use as short a STMG as possible.
|
||||
regs.push((gpr(14).to_real_reg(), "%r14".into()));
|
||||
regs.push((gpr(13).to_real_reg(), "%r13".into()));
|
||||
regs.push((gpr(12).to_real_reg(), "%r12".into()));
|
||||
regs.push((gpr(11).to_real_reg(), "%r11".into()));
|
||||
regs.push((gpr(10).to_real_reg(), "%r10".into()));
|
||||
regs.push((gpr(9).to_real_reg(), "%r9".into()));
|
||||
regs.push((gpr(8).to_real_reg(), "%r8".into()));
|
||||
regs.push((gpr(7).to_real_reg(), "%r7".into()));
|
||||
regs.push((gpr(6).to_real_reg(), "%r6".into()));
|
||||
|
||||
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
|
||||
first: base,
|
||||
last: regs.len() - 1,
|
||||
suggested_scratch: Some(gpr(13).get_index()),
|
||||
});
|
||||
|
||||
// Other regs, not available to the allocator.
|
||||
let allocable = regs.len();
|
||||
regs.push((gpr(15).to_real_reg(), "%r15".into()));
|
||||
regs.push((gpr(0).to_real_reg(), "%r0".into()));
|
||||
regs.push((gpr(1).to_real_reg(), "%r1".into()));
|
||||
|
||||
// Assert sanity: the indices in the register structs must match their
|
||||
// actual indices in the array.
|
||||
for (i, reg) in regs.iter().enumerate() {
|
||||
assert_eq!(i, reg.0.get_index());
|
||||
}
|
||||
|
||||
RealRegUniverse {
|
||||
regs,
|
||||
allocable,
|
||||
allocable_by_class,
|
||||
}
|
||||
}
|
||||
2
cranelift/codegen/src/isa/s390x/inst/unwind.rs
Normal file
2
cranelift/codegen/src/isa/s390x/inst/unwind.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
#[cfg(feature = "unwind")]
|
||||
pub(crate) mod systemv;
|
||||
197
cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
Normal file
197
cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
Normal file
@@ -0,0 +1,197 @@
|
||||
//! Unwind information for System V ABI (s390x).
|
||||
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
|
||||
use regalloc::{Reg, RegClass};
|
||||
|
||||
/// Creates a new s390x common information entry (CIE).
|
||||
pub fn create_cie() -> CommonInformationEntry {
|
||||
use gimli::write::CallFrameInstruction;
|
||||
|
||||
let mut entry = CommonInformationEntry::new(
|
||||
Encoding {
|
||||
address_size: 8,
|
||||
format: Format::Dwarf32,
|
||||
version: 1,
|
||||
},
|
||||
1, // Code alignment factor
|
||||
-8, // Data alignment factor
|
||||
Register(14), // Return address column - register %r14
|
||||
);
|
||||
|
||||
// Every frame will start with the call frame address (CFA) at %r15 + 160.
|
||||
entry.add_instruction(CallFrameInstruction::Cfa(Register(15), 160));
|
||||
|
||||
entry
|
||||
}
|
||||
|
||||
/// Map Cranelift registers to their corresponding Gimli registers.
|
||||
pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
|
||||
const GPR_MAP: [gimli::Register; 16] = [
|
||||
Register(0),
|
||||
Register(1),
|
||||
Register(2),
|
||||
Register(3),
|
||||
Register(4),
|
||||
Register(5),
|
||||
Register(6),
|
||||
Register(7),
|
||||
Register(8),
|
||||
Register(9),
|
||||
Register(10),
|
||||
Register(11),
|
||||
Register(12),
|
||||
Register(13),
|
||||
Register(14),
|
||||
Register(15),
|
||||
];
|
||||
const FPR_MAP: [gimli::Register; 16] = [
|
||||
Register(16),
|
||||
Register(20),
|
||||
Register(17),
|
||||
Register(21),
|
||||
Register(18),
|
||||
Register(22),
|
||||
Register(19),
|
||||
Register(23),
|
||||
Register(24),
|
||||
Register(28),
|
||||
Register(25),
|
||||
Register(29),
|
||||
Register(26),
|
||||
Register(30),
|
||||
Register(27),
|
||||
Register(31),
|
||||
];
|
||||
|
||||
match reg.get_class() {
|
||||
RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]),
|
||||
RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]),
|
||||
_ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct RegisterMapper;
|
||||
|
||||
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
|
||||
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
|
||||
Ok(map_reg(reg)?.0)
|
||||
}
|
||||
fn sp(&self) -> u16 {
|
||||
Register(15).0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{
|
||||
types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
|
||||
StackSlotKind,
|
||||
};
|
||||
use crate::isa::{lookup, CallConv};
|
||||
use crate::settings::{builder, Flags};
|
||||
use crate::Context;
|
||||
use gimli::write::Address;
|
||||
use std::str::FromStr;
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
fn test_simple_func() {
|
||||
let isa = lookup(triple!("s390x"))
|
||||
.expect("expect s390x ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::SystemV,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let fde = match context
|
||||
.create_unwind_info(isa.as_ref())
|
||||
.expect("can create unwind info")
|
||||
{
|
||||
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
|
||||
info.to_fde(Address::Constant(1234))
|
||||
}
|
||||
_ => panic!("expected unwind information"),
|
||||
};
|
||||
|
||||
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 10, lsda: None, instructions: [(4, CfaOffset(224))] }");
|
||||
}
|
||||
|
||||
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
|
||||
let mut func =
|
||||
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_return_func() {
|
||||
let isa = lookup(triple!("s390x"))
|
||||
.expect("expect s390x ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_multi_return_function(
|
||||
CallConv::SystemV,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let fde = match context
|
||||
.create_unwind_info(isa.as_ref())
|
||||
.expect("can create unwind info")
|
||||
{
|
||||
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
|
||||
info.to_fde(Address::Constant(4321))
|
||||
}
|
||||
_ => panic!("expected unwind information"),
|
||||
};
|
||||
|
||||
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 26, lsda: None, instructions: [(4, CfaOffset(224))] }");
|
||||
}
|
||||
|
||||
fn create_multi_return_function(
|
||||
call_conv: CallConv,
|
||||
stack_slot: Option<StackSlotData>,
|
||||
) -> Function {
|
||||
let mut sig = Signature::new(call_conv);
|
||||
sig.params.push(AbiParam::new(types::I32));
|
||||
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let v0 = func.dfg.append_block_param(block0, types::I32);
|
||||
let block1 = func.dfg.make_block();
|
||||
let block2 = func.dfg.make_block();
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().brnz(v0, block2, &[]);
|
||||
pos.ins().jump(block1, &[]);
|
||||
|
||||
pos.insert_block(block1);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
pos.insert_block(block2);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
}
|
||||
2839
cranelift/codegen/src/isa/s390x/lower.rs
Normal file
2839
cranelift/codegen/src/isa/s390x/lower.rs
Normal file
File diff suppressed because it is too large
Load Diff
296
cranelift/codegen/src/isa/s390x/mod.rs
Normal file
296
cranelift/codegen/src/isa/s390x/mod.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
//! IBM Z 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::s390x::settings as s390x_settings;
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings as shared_settings;
|
||||
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::hash::{Hash, Hasher};
|
||||
|
||||
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
|
||||
use target_lexicon::{Architecture, Triple};
|
||||
|
||||
// New backend:
|
||||
mod abi;
|
||||
pub(crate) mod inst;
|
||||
mod lower;
|
||||
mod settings;
|
||||
|
||||
use inst::create_reg_universe;
|
||||
|
||||
use self::inst::EmitInfo;
|
||||
|
||||
/// A IBM Z backend.
|
||||
pub struct S390xBackend {
|
||||
triple: Triple,
|
||||
flags: shared_settings::Flags,
|
||||
isa_flags: s390x_settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl S390xBackend {
|
||||
/// Create a new IBM Z backend with the given (shared) flags.
|
||||
pub fn new_with_flags(
|
||||
triple: Triple,
|
||||
flags: shared_settings::Flags,
|
||||
isa_flags: s390x_settings::Flags,
|
||||
) -> S390xBackend {
|
||||
let reg_universe = create_reg_universe(&flags);
|
||||
S390xBackend {
|
||||
triple,
|
||||
flags,
|
||||
isa_flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
|
||||
/// This performs lowering to VCode, register-allocates the code, computes block layout and
|
||||
/// finalizes branches. The result is ready for binary emission.
|
||||
fn compile_vcode(
|
||||
&self,
|
||||
func: &Function,
|
||||
flags: shared_settings::Flags,
|
||||
) -> CodegenResult<VCode<inst::Inst>> {
|
||||
let emit_info = EmitInfo::new(flags.clone());
|
||||
let abi = Box::new(abi::S390xABICallee::new(func, flags)?);
|
||||
compile::compile::<S390xBackend>(func, self, abi, emit_info)
|
||||
}
|
||||
}
|
||||
|
||||
impl MachBackend for S390xBackend {
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: &Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult> {
|
||||
let flags = self.flags();
|
||||
let vcode = self.compile_vcode(func, flags.clone())?;
|
||||
let buffer = vcode.emit();
|
||||
let frame_size = vcode.frame_size();
|
||||
let value_labels_ranges = vcode.value_labels_ranges();
|
||||
let stackslot_offsets = vcode.stackslot_offsets().clone();
|
||||
|
||||
let disasm = if want_disasm {
|
||||
Some(vcode.show_rru(Some(&create_reg_universe(flags))))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let buffer = buffer.finish();
|
||||
|
||||
Ok(MachCompileResult {
|
||||
buffer,
|
||||
frame_size,
|
||||
disasm,
|
||||
value_labels_ranges,
|
||||
stackslot_offsets,
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
"s390x"
|
||||
}
|
||||
|
||||
fn triple(&self) -> Triple {
|
||||
self.triple.clone()
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
}
|
||||
|
||||
fn reg_universe(&self) -> &RealRegUniverse {
|
||||
&self.reg_universe
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn emit_unwind_info(
|
||||
&self,
|
||||
result: &MachCompileResult,
|
||||
kind: crate::machinst::UnwindInfoKind,
|
||||
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
|
||||
use crate::isa::unwind::UnwindInfo;
|
||||
use crate::machinst::UnwindInfoKind;
|
||||
Ok(match kind {
|
||||
UnwindInfoKind::SystemV => {
|
||||
let mapper = self::inst::unwind::systemv::RegisterMapper;
|
||||
Some(UnwindInfo::SystemV(
|
||||
crate::isa::unwind::systemv::create_unwind_info_from_insts(
|
||||
&result.buffer.unwind_info[..],
|
||||
result.buffer.data.len(),
|
||||
&mapper,
|
||||
)?,
|
||||
))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
|
||||
Some(inst::unwind::systemv::create_cie())
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
|
||||
inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `isa::Builder`.
|
||||
pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
assert!(triple.architecture == Architecture::S390x);
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: s390x_settings::builder(),
|
||||
constructor: |triple, shared_flags, builder| {
|
||||
let isa_flags = s390x_settings::Flags::new(&shared_flags, builder);
|
||||
let backend = S390xBackend::new_with_flags(triple, shared_flags, isa_flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
|
||||
use crate::isa::CallConv;
|
||||
use crate::settings;
|
||||
use crate::settings::Configurable;
|
||||
use core::str::FromStr;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
#[test]
|
||||
fn test_compile_function() {
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
sig.returns.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
|
||||
let bb0 = func.dfg.make_block();
|
||||
let arg0 = func.dfg.append_block_param(bb0, I32);
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(bb0);
|
||||
let v0 = pos.ins().iconst(I32, 0x1234);
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
pos.ins().return_(&[v1]);
|
||||
|
||||
let mut shared_flags_builder = settings::builder();
|
||||
shared_flags_builder.set("opt_level", "none").unwrap();
|
||||
let shared_flags = settings::Flags::new(shared_flags_builder);
|
||||
let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder());
|
||||
let backend = S390xBackend::new_with_flags(
|
||||
Triple::from_str("s390x").unwrap(),
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = &result.buffer.data[..];
|
||||
|
||||
// ahi %r2, 0x1234
|
||||
// br %r14
|
||||
let golden = vec![0xa7, 0x2a, 0x12, 0x34, 0x07, 0xfe];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branch_lowering() {
|
||||
let name = ExternalName::testcase("test0");
|
||||
let mut sig = Signature::new(CallConv::SystemV);
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
sig.returns.push(AbiParam::new(I32));
|
||||
let mut func = Function::with_name_signature(name, sig);
|
||||
|
||||
let bb0 = func.dfg.make_block();
|
||||
let arg0 = func.dfg.append_block_param(bb0, I32);
|
||||
let bb1 = func.dfg.make_block();
|
||||
let bb2 = func.dfg.make_block();
|
||||
let bb3 = func.dfg.make_block();
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(bb0);
|
||||
let v0 = pos.ins().iconst(I32, 0x1234);
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
pos.ins().brnz(v1, bb1, &[]);
|
||||
pos.ins().jump(bb2, &[]);
|
||||
pos.insert_block(bb1);
|
||||
pos.ins().brnz(v1, bb2, &[]);
|
||||
pos.ins().jump(bb3, &[]);
|
||||
pos.insert_block(bb2);
|
||||
let v2 = pos.ins().iadd(v1, v0);
|
||||
pos.ins().brnz(v2, bb2, &[]);
|
||||
pos.ins().jump(bb1, &[]);
|
||||
pos.insert_block(bb3);
|
||||
let v3 = pos.ins().isub(v1, v0);
|
||||
pos.ins().return_(&[v3]);
|
||||
|
||||
let mut shared_flags_builder = settings::builder();
|
||||
shared_flags_builder.set("opt_level", "none").unwrap();
|
||||
let shared_flags = settings::Flags::new(shared_flags_builder);
|
||||
let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder());
|
||||
let backend = S390xBackend::new_with_flags(
|
||||
Triple::from_str("s390x").unwrap(),
|
||||
shared_flags,
|
||||
isa_flags,
|
||||
);
|
||||
let result = backend
|
||||
.compile_function(&mut func, /* want_disasm = */ false)
|
||||
.unwrap();
|
||||
let code = &result.buffer.data[..];
|
||||
|
||||
// FIXME: the branching logic should be optimized more
|
||||
|
||||
// ahi %r2, 4660
|
||||
// chi %r2, 0
|
||||
// jglh label1 ; jg label2
|
||||
// jg label6
|
||||
// jg label3
|
||||
// ahik %r3, %r2, 4660
|
||||
// chi %r3, 0
|
||||
// jglh label4 ; jg label5
|
||||
// jg label3
|
||||
// jg label6
|
||||
// chi %r2, 0
|
||||
// jglh label7 ; jg label8
|
||||
// jg label3
|
||||
// ahi %r2, -4660
|
||||
// br %r14
|
||||
let golden = vec![
|
||||
167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167,
|
||||
62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246,
|
||||
167, 42, 237, 204, 7, 254,
|
||||
];
|
||||
|
||||
assert_eq!(code, &golden[..]);
|
||||
}
|
||||
}
|
||||
9
cranelift/codegen/src/isa/s390x/settings.rs
Normal file
9
cranelift/codegen/src/isa/s390x/settings.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
//! S390X Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/s390x/settings.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-s390x.rs"));
|
||||
@@ -225,6 +225,11 @@ pub enum UnwindInst {
|
||||
/// the clobber area.
|
||||
offset_downward_to_clobbers: u32,
|
||||
},
|
||||
/// The stack pointer was adjusted to allocate the stack.
|
||||
StackAlloc {
|
||||
/// Size to allocate.
|
||||
size: u32,
|
||||
},
|
||||
/// The stack slot at the given offset from the clobber-area base has been
|
||||
/// used to save the given register.
|
||||
///
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::isa::unwind::UnwindInst;
|
||||
use crate::result::{CodegenError, CodegenResult};
|
||||
use alloc::vec::Vec;
|
||||
use gimli::write::{Address, FrameDescriptionEntry};
|
||||
use thiserror::Error;
|
||||
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -15,16 +14,32 @@ type Register = u16;
|
||||
|
||||
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Error, Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum RegisterMappingError {
|
||||
#[error("unable to find bank for register info")]
|
||||
MissingBank,
|
||||
#[error("register mapping is currently only implemented for x86_64")]
|
||||
UnsupportedArchitecture,
|
||||
#[error("unsupported register bank: {0}")]
|
||||
UnsupportedRegisterBank(&'static str),
|
||||
}
|
||||
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for RegisterMappingError {}
|
||||
|
||||
impl std::fmt::Display for RegisterMappingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
RegisterMappingError::MissingBank => write!(f, "unable to find bank for register info"),
|
||||
RegisterMappingError::UnsupportedArchitecture => write!(
|
||||
f,
|
||||
"register mapping is currently only implemented for x86_64"
|
||||
),
|
||||
RegisterMappingError::UnsupportedRegisterBank(bank) => {
|
||||
write!(f, "unsupported register bank: {}", bank)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This mirrors gimli's CallFrameInstruction, but is serializable
|
||||
// This excludes CfaExpression, Expression, ValExpression due to
|
||||
// https://github.com/gimli-rs/gimli/issues/513.
|
||||
@@ -122,8 +137,10 @@ pub(crate) trait RegisterMapper<Reg> {
|
||||
fn map(&self, reg: Reg) -> Result<Register, RegisterMappingError>;
|
||||
/// Gets stack pointer register.
|
||||
fn sp(&self) -> Register;
|
||||
/// Gets the frame pointer register.
|
||||
fn fp(&self) -> Register;
|
||||
/// Gets the frame pointer register, if any.
|
||||
fn fp(&self) -> Option<Register> {
|
||||
None
|
||||
}
|
||||
/// Gets the link register, if any.
|
||||
fn lr(&self) -> Option<Register> {
|
||||
None
|
||||
@@ -151,6 +168,7 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
|
||||
) -> CodegenResult<UnwindInfo> {
|
||||
let mut instructions = vec![];
|
||||
|
||||
let mut cfa_offset = 0;
|
||||
let mut clobber_offset_to_cfa = 0;
|
||||
for &(instruction_offset, ref inst) in insts {
|
||||
match inst {
|
||||
@@ -163,10 +181,14 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
|
||||
instruction_offset,
|
||||
CallFrameInstruction::CfaOffset(offset_upward_to_caller_sp as i32),
|
||||
));
|
||||
// Note that we saved the old FP value on the stack.
|
||||
// Note that we saved the old FP value on the stack. Use of this
|
||||
// operation implies that the target defines a FP register.
|
||||
instructions.push((
|
||||
instruction_offset,
|
||||
CallFrameInstruction::Offset(mr.fp(), -(offset_upward_to_caller_sp as i32)),
|
||||
CallFrameInstruction::Offset(
|
||||
mr.fp().unwrap(),
|
||||
-(offset_upward_to_caller_sp as i32),
|
||||
),
|
||||
));
|
||||
// If there is a link register on this architecture, note that
|
||||
// we saved it as well.
|
||||
@@ -188,15 +210,29 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
|
||||
// Define CFA in terms of FP. Note that we assume it was already
|
||||
// defined correctly in terms of the current SP, and FP has just
|
||||
// been set to the current SP, so we do not need to change the
|
||||
// offset, only the register.
|
||||
instructions.push((
|
||||
instruction_offset,
|
||||
CallFrameInstruction::CfaRegister(mr.fp()),
|
||||
));
|
||||
// offset, only the register. (This is done only if the target
|
||||
// defines a frame pointer register.)
|
||||
if let Some(fp) = mr.fp() {
|
||||
instructions.push((instruction_offset, CallFrameInstruction::CfaRegister(fp)));
|
||||
}
|
||||
// Record initial CFA offset. This will be used with later
|
||||
// StackAlloc calls if we do not have a frame pointer.
|
||||
cfa_offset = offset_upward_to_caller_sp;
|
||||
// Record distance from CFA downward to clobber area so we can
|
||||
// express clobber offsets later in terms of CFA.
|
||||
clobber_offset_to_cfa = offset_upward_to_caller_sp + offset_downward_to_clobbers;
|
||||
}
|
||||
&UnwindInst::StackAlloc { size } => {
|
||||
// If we do not use a frame pointer, we need to update the
|
||||
// CFA offset whenever the stack pointer changes.
|
||||
if mr.fp().is_none() {
|
||||
cfa_offset += size;
|
||||
instructions.push((
|
||||
instruction_offset,
|
||||
CallFrameInstruction::CfaOffset(cfa_offset as i32),
|
||||
));
|
||||
}
|
||||
}
|
||||
&UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg,
|
||||
|
||||
@@ -3,14 +3,11 @@
|
||||
use crate::isa::unwind::input;
|
||||
use crate::result::{CodegenError, CodegenResult};
|
||||
use alloc::vec::Vec;
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use log::warn;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
use crate::binemit::CodeOffset;
|
||||
#[cfg(feature = "x64")]
|
||||
use crate::isa::unwind::UnwindInst;
|
||||
|
||||
/// Maximum (inclusive) size of a "small" stack allocation
|
||||
@@ -33,20 +30,20 @@ impl<'a> Writer<'a> {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn write_u16<T: ByteOrder>(&mut self, v: u16) {
|
||||
T::write_u16(&mut self.buf[self.offset..(self.offset + 2)], v);
|
||||
fn write_u16_le(&mut self, v: u16) {
|
||||
self.buf[self.offset..(self.offset + 2)].copy_from_slice(&v.to_le_bytes());
|
||||
self.offset += 2;
|
||||
}
|
||||
|
||||
fn write_u32<T: ByteOrder>(&mut self, v: u32) {
|
||||
T::write_u32(&mut self.buf[self.offset..(self.offset + 4)], v);
|
||||
fn write_u32_le(&mut self, v: u32) {
|
||||
self.buf[self.offset..(self.offset + 4)].copy_from_slice(&v.to_le_bytes());
|
||||
self.offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
/// The supported unwind codes for the x64 Windows ABI.
|
||||
///
|
||||
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
|
||||
/// See: <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
|
||||
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
|
||||
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
|
||||
#[allow(dead_code)]
|
||||
@@ -123,11 +120,11 @@ impl UnwindCode {
|
||||
let scaled_stack_offset = stack_offset / 16;
|
||||
if scaled_stack_offset <= core::u16::MAX as u32 {
|
||||
writer.write_u8((*reg << 4) | (op_small as u8));
|
||||
writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
|
||||
writer.write_u16_le(scaled_stack_offset as u16);
|
||||
} else {
|
||||
writer.write_u8((*reg << 4) | (op_large as u8));
|
||||
writer.write_u16::<LittleEndian>(*stack_offset as u16);
|
||||
writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
|
||||
writer.write_u16_le(*stack_offset as u16);
|
||||
writer.write_u16_le((stack_offset >> 16) as u16);
|
||||
}
|
||||
}
|
||||
Self::StackAlloc {
|
||||
@@ -145,10 +142,10 @@ impl UnwindCode {
|
||||
);
|
||||
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
|
||||
writer.write_u8(UnwindOperation::LargeStackAlloc as u8);
|
||||
writer.write_u16::<LittleEndian>((*size / 8) as u16);
|
||||
writer.write_u16_le((*size / 8) as u16);
|
||||
} else {
|
||||
writer.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8));
|
||||
writer.write_u32::<LittleEndian>(*size);
|
||||
writer.write_u32_le(*size);
|
||||
}
|
||||
}
|
||||
Self::SetFPReg { instruction_offset } => {
|
||||
@@ -195,7 +192,7 @@ pub(crate) trait RegisterMapper<Reg> {
|
||||
/// Represents Windows x64 unwind information.
|
||||
///
|
||||
/// For information about Windows x64 unwind info, see:
|
||||
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
|
||||
/// <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct UnwindInfo {
|
||||
@@ -250,7 +247,7 @@ impl UnwindInfo {
|
||||
|
||||
// To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
|
||||
if (node_count & 1) == 1 {
|
||||
writer.write_u16::<LittleEndian>(0);
|
||||
writer.write_u16_le(0);
|
||||
}
|
||||
|
||||
// Ensure the correct number of bytes was emitted
|
||||
@@ -334,10 +331,8 @@ impl UnwindInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
const UNWIND_RBP_REG: u8 = 5;
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
|
||||
insts: &[(CodeOffset, UnwindInst)],
|
||||
) -> CodegenResult<UnwindInfo> {
|
||||
@@ -360,6 +355,12 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
|
||||
frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?;
|
||||
unwind_codes.push(UnwindCode::SetFPReg { instruction_offset });
|
||||
}
|
||||
&UnwindInst::StackAlloc { size } => {
|
||||
unwind_codes.push(UnwindCode::StackAlloc {
|
||||
instruction_offset,
|
||||
size,
|
||||
});
|
||||
}
|
||||
&UnwindInst::SaveReg {
|
||||
clobber_offset,
|
||||
reg,
|
||||
|
||||
@@ -237,10 +237,20 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
extension: param.extension,
|
||||
});
|
||||
} else {
|
||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||
// stack alignment happens separately after all args.)
|
||||
// Compute size. For the wasmtime ABI it differs from native
|
||||
// ABIs in how multiple values are returned, so we take a
|
||||
// leaf out of arm64's book by not rounding everything up to
|
||||
// 8 bytes. For all ABI arguments, and other ABI returns,
|
||||
// though, each slot takes a minimum of 8 bytes.
|
||||
//
|
||||
// Note that in all cases 16-byte stack alignment happens
|
||||
// separately after all args.
|
||||
let size = (reg_ty.bits() / 8) as u64;
|
||||
let size = std::cmp::max(size, 8);
|
||||
let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
|
||||
size
|
||||
} else {
|
||||
std::cmp::max(size, 8)
|
||||
};
|
||||
// Align.
|
||||
debug_assert!(size.is_power_of_two());
|
||||
next_stack = align_to(next_stack, size);
|
||||
@@ -490,6 +500,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Self::I; 16]>) {
|
||||
let mut insts = SmallVec::new();
|
||||
// Find all clobbered registers that are callee-save.
|
||||
@@ -564,6 +575,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
_outgoing_args_size: u32,
|
||||
) -> SmallVec<[Self::I; 16]> {
|
||||
let mut insts = SmallVec::new();
|
||||
|
||||
@@ -824,15 +836,7 @@ impl From<StackAMode> for SyntheticAmode {
|
||||
}
|
||||
|
||||
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (integer) class.
|
||||
@@ -853,15 +857,7 @@ fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Optio
|
||||
}
|
||||
|
||||
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
|
||||
let is_fastcall = match call_conv {
|
||||
CallConv::Fast
|
||||
| CallConv::Cold
|
||||
| CallConv::SystemV
|
||||
| CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020 => false,
|
||||
CallConv::WindowsFastcall => true,
|
||||
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
|
||||
};
|
||||
let is_fastcall = call_conv.extends_windows_fastcall();
|
||||
|
||||
// Fastcall counts by absolute argument number; SysV counts by argument of
|
||||
// this (floating-point) class.
|
||||
@@ -894,7 +890,10 @@ fn get_intreg_for_retval(
|
||||
1 => Some(regs::rdx()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
|
||||
CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020
|
||||
| CallConv::WasmtimeSystemV
|
||||
| CallConv::WasmtimeFastcall => {
|
||||
if intreg_idx == 0 && retval_idx == 0 {
|
||||
Some(regs::rax())
|
||||
} else {
|
||||
@@ -907,6 +906,7 @@ fn get_intreg_for_retval(
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::AppleAarch64 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -921,7 +921,10 @@ fn get_fltreg_for_retval(
|
||||
1 => Some(regs::xmm1()),
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
|
||||
CallConv::BaldrdashSystemV
|
||||
| CallConv::Baldrdash2020
|
||||
| CallConv::WasmtimeFastcall
|
||||
| CallConv::WasmtimeSystemV => {
|
||||
if fltreg_idx == 0 && retval_idx == 0 {
|
||||
Some(regs::xmm0())
|
||||
} else {
|
||||
@@ -933,6 +936,7 @@ fn get_fltreg_for_retval(
|
||||
_ => None,
|
||||
},
|
||||
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
|
||||
CallConv::AppleAarch64 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -990,17 +994,18 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
|
||||
CallConv::BaldrdashWindows => {
|
||||
todo!("baldrdash windows");
|
||||
}
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_systemv(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::WindowsFastcall => regs
|
||||
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|r| is_callee_save_fastcall(r.to_reg()))
|
||||
.collect(),
|
||||
CallConv::Probestack => todo!("probestack?"),
|
||||
CallConv::AppleAarch64 => unreachable!(),
|
||||
};
|
||||
// Sort registers for deterministic code output. We can do an unstable sort because the
|
||||
// registers will be unique (there are no dups).
|
||||
|
||||
403
cranelift/codegen/src/isa/x64/encoding/evex.rs
Normal file
403
cranelift/codegen/src/isa/x64/encoding/evex.rs
Normal file
@@ -0,0 +1,403 @@
|
||||
//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
|
||||
//! EVEX encoding requires a 4-byte prefix:
|
||||
//!
|
||||
//! Byte 0: 0x62
|
||||
//! ┌───┬───┬───┬───┬───┬───┬───┬───┐
|
||||
//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
|
||||
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
|
||||
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
|
||||
//! └───┴───┴───┴───┴───┴───┴───┴───┘
|
||||
//!
|
||||
//! The prefix is then followeded by the opcode byte, the ModR/M byte, and other optional suffixes
|
||||
//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
|
||||
//! Software Development Manual, volume 2A).
|
||||
use super::rex::{encode_modrm, LegacyPrefixes, OpcodeMap};
|
||||
use super::ByteSink;
|
||||
use core::ops::RangeInclusive;
|
||||
|
||||
/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
|
||||
/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
|
||||
/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
|
||||
pub struct EvexInstruction {
|
||||
bits: u32,
|
||||
opcode: u8,
|
||||
reg: Register,
|
||||
rm: Register,
|
||||
}
|
||||
|
||||
/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
|
||||
/// choose to skip setting fields, here we set some sane defaults. Note that:
|
||||
/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
|
||||
/// implemented--remember the little-endian order
|
||||
/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
|
||||
/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
|
||||
/// 3).
|
||||
///
|
||||
/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
|
||||
/// unsetting the W bit, etc.)
|
||||
impl Default for EvexInstruction {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
bits: 0x08_7C_F0_62,
|
||||
opcode: 0,
|
||||
reg: Register::default(),
|
||||
rm: Register::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names.
|
||||
impl EvexInstruction {
|
||||
/// Construct a default EVEX instruction.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
|
||||
/// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
|
||||
/// way to set those context bits and verify that both are not used (e.g. rounding AND length).
|
||||
/// For now, this method is very convenient.
|
||||
#[inline(always)]
|
||||
pub fn length(mut self, length: EvexVectorLength) -> Self {
|
||||
self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
|
||||
/// pack these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
|
||||
self.write(Self::pp, prefix.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
|
||||
/// these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn map(mut self, map: OpcodeMap) -> Self {
|
||||
self.write(Self::mm, map.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
|
||||
/// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
|
||||
/// prefix.
|
||||
#[inline(always)]
|
||||
pub fn w(mut self, w: bool) -> Self {
|
||||
self.write(Self::W, w as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the instruction opcode byte.
|
||||
#[inline(always)]
|
||||
pub fn opcode(mut self, opcode: u8) -> Self {
|
||||
self.opcode = opcode;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `reg` bits; many instructions use this as the write operand.
|
||||
/// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
|
||||
/// bits for register encodings > 8).
|
||||
#[inline(always)]
|
||||
pub fn reg(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.reg = reg.into();
|
||||
let r = !(self.reg.0 >> 3) & 1;
|
||||
let r_ = !(self.reg.0 >> 4) & 1;
|
||||
self.write(Self::R, r as u32);
|
||||
self.write(Self::R_, r_ as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
|
||||
/// more details.
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn mask(mut self, mask: EvexMasking) -> Self {
|
||||
self.write(Self::aaa, mask.aaa_bits() as u32);
|
||||
self.write(Self::z, mask.z_bit() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
|
||||
/// source register in 3-operand instructions (e.g. 2 read, 1 write).
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
|
||||
let reg = reg.into();
|
||||
self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
|
||||
self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `rm` bits; many instructions use this as the "read from
|
||||
/// register/memory" operand. Currently this does not support memory addressing (TODO).Setting
|
||||
/// this affects both the ModRM byte (`rm` section) and the EVEX prefix (the extension bits for
|
||||
/// register encodings > 8).
|
||||
#[inline(always)]
|
||||
pub fn rm(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.rm = reg.into();
|
||||
let b = !(self.rm.0 >> 3) & 1;
|
||||
let x = !(self.rm.0 >> 4) & 1;
|
||||
self.write(Self::X, x as u32);
|
||||
self.write(Self::B, b as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Emit the EVEX-encoded instruction to the code sink:
|
||||
/// - first, the 4-byte EVEX prefix;
|
||||
/// - then, the opcode byte;
|
||||
/// - finally, the ModR/M byte.
|
||||
///
|
||||
/// Eventually this method should support encodings of more than just the reg-reg addressing mode (TODO).
|
||||
pub fn encode<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
|
||||
sink.put4(self.bits);
|
||||
sink.put1(self.opcode);
|
||||
sink.put1(encode_modrm(3, self.reg.0 & 7, self.rm.0 & 7));
|
||||
}
|
||||
|
||||
// In order to simplify the encoding of the various bit ranges in the prefix, we specify those
|
||||
// ranges according to the table below (extracted from the Intel Software Development Manual,
|
||||
// volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
|
||||
// chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
|
||||
// 8, leaving bits 0-7 for the mandatory `0x62`.
|
||||
// ┌───┬───┬───┬───┬───┬───┬───┬───┐
|
||||
// Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
|
||||
// ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
// Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
|
||||
// ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
// Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
|
||||
// └───┴───┴───┴───┴───┴───┴───┴───┘
|
||||
|
||||
// Byte 1:
|
||||
const mm: RangeInclusive<u8> = 8..=9;
|
||||
const R_: RangeInclusive<u8> = 12..=12;
|
||||
const B: RangeInclusive<u8> = 13..=13;
|
||||
const X: RangeInclusive<u8> = 14..=14;
|
||||
const R: RangeInclusive<u8> = 15..=15;
|
||||
|
||||
// Byte 2:
|
||||
const pp: RangeInclusive<u8> = 16..=17;
|
||||
const vvvv: RangeInclusive<u8> = 19..=22;
|
||||
const W: RangeInclusive<u8> = 23..=23;
|
||||
|
||||
// Byte 3:
|
||||
const aaa: RangeInclusive<u8> = 24..=26;
|
||||
const V_: RangeInclusive<u8> = 27..=27;
|
||||
#[allow(dead_code)] // Will be used once broadcast and rounding controls are exposed.
|
||||
const b: RangeInclusive<u8> = 28..=28;
|
||||
const LL: RangeInclusive<u8> = 29..=30;
|
||||
const z: RangeInclusive<u8> = 31..=31;
|
||||
|
||||
// A convenience method for writing the `value` bits to the given range in `self.bits`.
|
||||
#[inline]
|
||||
fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
|
||||
assert!(ExactSizeIterator::len(&range) > 0);
|
||||
let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
|
||||
let mask: u32 = (1 << size) - 1; // Generate a bit mask.
|
||||
debug_assert!(
|
||||
value <= mask,
|
||||
"The written value should have fewer than {} bits.",
|
||||
size
|
||||
);
|
||||
let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
|
||||
self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
|
||||
let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
|
||||
self.bits |= value; // Modify the bits in `range`.
|
||||
}
|
||||
}
|
||||
|
||||
/// Describe the register index to use. This wrapper is a type-safe way to pass
|
||||
/// around the registers defined in `inst/regs.rs`.
|
||||
#[derive(Copy, Clone, Default)]
|
||||
pub struct Register(u8);
|
||||
impl From<u8> for Register {
|
||||
fn from(reg: u8) -> Self {
|
||||
debug_assert!(reg < 16);
|
||||
Self(reg)
|
||||
}
|
||||
}
|
||||
impl Into<u8> for Register {
|
||||
fn into(self) -> u8 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
|
||||
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
|
||||
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
|
||||
/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
|
||||
/// opcodes can result in an #UD.
|
||||
#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used.
|
||||
pub enum EvexContext {
|
||||
RoundingRegToRegFP {
|
||||
rc: EvexRoundingControl,
|
||||
},
|
||||
NoRoundingFP {
|
||||
sae: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
MemoryOp {
|
||||
broadcast: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
Other {
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for EvexContext {
|
||||
fn default() -> Self {
|
||||
Self::Other {
|
||||
length: EvexVectorLength::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EvexContext {
|
||||
/// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
|
||||
pub fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
|
||||
Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
|
||||
Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
|
||||
Self::Other { length } => length.bits() << 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
|
||||
pub enum EvexVectorLength {
|
||||
V128,
|
||||
V256,
|
||||
V512,
|
||||
}
|
||||
|
||||
impl EvexVectorLength {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::V128 => 0b00,
|
||||
Self::V256 => 0b01,
|
||||
Self::V512 => 0b10,
|
||||
// 0b11 is reserved (#UD).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EvexVectorLength {
|
||||
fn default() -> Self {
|
||||
Self::V128
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code, missing_docs)] // Rounding controls are not yet used.
|
||||
pub enum EvexRoundingControl {
|
||||
RNE,
|
||||
RD,
|
||||
RU,
|
||||
RZ,
|
||||
}
|
||||
|
||||
impl EvexRoundingControl {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RNE => 0b00,
|
||||
Self::RD => 0b01,
|
||||
Self::RU => 0b10,
|
||||
Self::RZ => 0b11,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
|
||||
/// Software Development Manual, volume 2A.
|
||||
#[allow(dead_code, missing_docs)] // Masking is not yet used.
|
||||
pub enum EvexMasking {
|
||||
None,
|
||||
Merging { k: u8 },
|
||||
Zeroing { k: u8 },
|
||||
}
|
||||
|
||||
impl Default for EvexMasking {
|
||||
fn default() -> Self {
|
||||
EvexMasking::None
|
||||
}
|
||||
}
|
||||
|
||||
impl EvexMasking {
|
||||
/// Encode the `z` bit for merging with the P2 byte.
|
||||
pub fn z_bit(&self) -> u8 {
|
||||
match self {
|
||||
Self::None | Self::Merging { .. } => 0,
|
||||
Self::Zeroing { .. } => 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the `aaa` bits for merging with the P2 byte.
|
||||
pub fn aaa_bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::None => 0b000,
|
||||
Self::Merging { k } | Self::Zeroing { k } => {
|
||||
debug_assert!(*k <= 7);
|
||||
*k
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::x64::inst::regs;
|
||||
use std::vec::Vec;
|
||||
|
||||
// As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
|
||||
// xmm1'` matches this EVEX encoding machinery.
|
||||
#[test]
|
||||
fn vpabsq() {
|
||||
let dst = regs::xmm0();
|
||||
let src = regs::xmm1();
|
||||
let mut sink0 = Vec::new();
|
||||
|
||||
EvexInstruction::new()
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(true)
|
||||
.opcode(0x1F)
|
||||
.reg(dst.get_hw_encoding())
|
||||
.rm(src.get_hw_encoding())
|
||||
.length(EvexVectorLength::V128)
|
||||
.encode(&mut sink0);
|
||||
|
||||
assert_eq!(sink0, vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1]);
|
||||
}
|
||||
|
||||
/// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
|
||||
/// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
|
||||
/// test is more interesting than it may appear because some of the parameters have flipped-bit
|
||||
/// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
|
||||
#[test]
|
||||
fn default_emission() {
|
||||
let mut sink0 = Vec::new();
|
||||
EvexInstruction::new().encode(&mut sink0);
|
||||
|
||||
let mut sink1 = Vec::new();
|
||||
EvexInstruction::new()
|
||||
.length(EvexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::None)
|
||||
.map(OpcodeMap::None)
|
||||
.w(false)
|
||||
.opcode(0x00)
|
||||
.reg(regs::rax().get_hw_encoding())
|
||||
.rm(regs::rax().get_hw_encoding())
|
||||
.mask(EvexMasking::None)
|
||||
.encode(&mut sink1);
|
||||
|
||||
assert_eq!(sink0, sink1);
|
||||
}
|
||||
}
|
||||
60
cranelift/codegen/src/isa/x64/encoding/mod.rs
Normal file
60
cranelift/codegen/src/isa/x64/encoding/mod.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! Contains the encoding machinery for the various x64 instruction formats.
|
||||
use crate::{isa::x64, machinst::MachBuffer};
|
||||
use std::vec::Vec;
|
||||
|
||||
pub mod evex;
|
||||
pub mod rex;
|
||||
pub mod vex;
|
||||
|
||||
/// The encoding formats in this module all require a way of placing bytes into
|
||||
/// a buffer.
|
||||
pub trait ByteSink {
|
||||
/// Add 1 byte to the code section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the code section.
|
||||
fn put2(&mut self, _: u16);
|
||||
|
||||
/// Add 4 bytes to the code section.
|
||||
fn put4(&mut self, _: u32);
|
||||
|
||||
/// Add 8 bytes to the code section.
|
||||
fn put8(&mut self, _: u64);
|
||||
}
|
||||
|
||||
impl ByteSink for MachBuffer<x64::inst::Inst> {
|
||||
fn put1(&mut self, value: u8) {
|
||||
self.put1(value)
|
||||
}
|
||||
|
||||
fn put2(&mut self, value: u16) {
|
||||
self.put2(value)
|
||||
}
|
||||
|
||||
fn put4(&mut self, value: u32) {
|
||||
self.put4(value)
|
||||
}
|
||||
|
||||
fn put8(&mut self, value: u64) {
|
||||
self.put8(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a convenient implementation for testing.
|
||||
impl ByteSink for Vec<u8> {
|
||||
fn put1(&mut self, v: u8) {
|
||||
self.extend_from_slice(&[v])
|
||||
}
|
||||
|
||||
fn put2(&mut self, v: u16) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
|
||||
fn put4(&mut self, v: u32) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
|
||||
fn put8(&mut self, v: u64) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
}
|
||||
504
cranelift/codegen/src/isa/x64/encoding/rex.rs
Normal file
504
cranelift/codegen/src/isa/x64/encoding/rex.rs
Normal file
@@ -0,0 +1,504 @@
|
||||
//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
|
||||
//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
|
||||
//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
|
||||
//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
|
||||
//!
|
||||
//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
|
||||
//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
|
||||
//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
|
||||
//! means "hardware register encoding number".
|
||||
|
||||
use crate::{
|
||||
ir::TrapCode,
|
||||
isa::x64::inst::{
|
||||
args::{Amode, OperandSize},
|
||||
regs, EmitInfo, EmitState, Inst, LabelUse,
|
||||
},
|
||||
machinst::{MachBuffer, MachInstEmitInfo},
|
||||
};
|
||||
use regalloc::{Reg, RegClass};
|
||||
|
||||
pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
}
|
||||
|
||||
pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
|
||||
let xs = x as i32;
|
||||
xs == ((xs << 24) >> 24)
|
||||
}
|
||||
|
||||
/// Encode the ModR/M byte.
|
||||
#[inline(always)]
|
||||
pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
|
||||
debug_assert!(m0d < 4);
|
||||
debug_assert!(enc_reg_g < 8);
|
||||
debug_assert!(rm_e < 8);
|
||||
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
|
||||
debug_assert!(shift < 4);
|
||||
debug_assert!(enc_index < 8);
|
||||
debug_assert!(enc_base < 8);
|
||||
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
|
||||
}
|
||||
|
||||
/// Get the encoding number of a GPR.
|
||||
#[inline(always)]
|
||||
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert_eq!(reg.get_class(), RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// Get the encoding number of any register.
|
||||
#[inline(always)]
|
||||
pub(crate) fn reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// A small bit field to record a REX prefix specification:
|
||||
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
|
||||
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct RexFlags(u8);
|
||||
|
||||
impl RexFlags {
|
||||
/// By default, set the W field, and don't always emit.
|
||||
#[inline(always)]
|
||||
pub(crate) fn set_w() -> Self {
|
||||
Self(0)
|
||||
}
|
||||
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
|
||||
#[inline(always)]
|
||||
pub(crate) fn clear_w() -> Self {
|
||||
Self(1)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn always_emit(&mut self) -> &mut Self {
|
||||
self.0 = self.0 | 2;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
|
||||
let enc_reg = int_reg_enc(reg);
|
||||
if enc_reg >= 4 && enc_reg <= 7 {
|
||||
self.always_emit();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn must_clear_w(&self) -> bool {
|
||||
(self.0 & 1) != 0
|
||||
}
|
||||
#[inline(always)]
|
||||
pub(crate) fn must_always_emit(&self) -> bool {
|
||||
(self.0 & 2) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (enc_e >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn emit_three_op(
|
||||
&self,
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
enc_g: u8,
|
||||
enc_index: u8,
|
||||
enc_base: u8,
|
||||
) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = (enc_index >> 3) & 1;
|
||||
let b = (enc_base >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the proper Rex flags for the given operand size.
|
||||
impl From<OperandSize> for RexFlags {
|
||||
fn from(size: OperandSize) -> Self {
|
||||
match size {
|
||||
OperandSize::Size64 => RexFlags::set_w(),
|
||||
_ => RexFlags::clear_w(),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generate Rex flags for an OperandSize/register tuple.
|
||||
impl From<(OperandSize, Reg)> for RexFlags {
|
||||
fn from((size, reg): (OperandSize, Reg)) -> Self {
|
||||
let mut rex = RexFlags::from(size);
|
||||
if size == OperandSize::Size8 {
|
||||
rex.always_emit_if_8bit_needed(reg);
|
||||
}
|
||||
rex
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
|
||||
/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
|
||||
#[allow(missing_docs)]
|
||||
pub enum OpcodeMap {
|
||||
None,
|
||||
_0F,
|
||||
_0F38,
|
||||
_0F3A,
|
||||
}
|
||||
|
||||
impl OpcodeMap {
|
||||
/// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
|
||||
/// formats pack this information as bits in a prefix (e.g. EVEX).
|
||||
pub(crate) fn bits(&self) -> u8 {
|
||||
match self {
|
||||
OpcodeMap::None => 0b00,
|
||||
OpcodeMap::_0F => 0b01,
|
||||
OpcodeMap::_0F38 => 0b10,
|
||||
OpcodeMap::_0F3A => 0b11,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpcodeMap {
|
||||
fn default() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
|
||||
/// covers only the small set of possibilities that we actually need.
|
||||
pub enum LegacyPrefixes {
|
||||
/// No prefix bytes.
|
||||
None,
|
||||
/// Operand Size Override -- here, denoting "16-bit operation".
|
||||
_66,
|
||||
/// The Lock prefix.
|
||||
_F0,
|
||||
/// Operand size override and Lock.
|
||||
_66F0,
|
||||
/// REPNE, but no specific meaning here -- is just an opcode extension.
|
||||
_F2,
|
||||
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
|
||||
_F3,
|
||||
/// Operand size override and same effect as F3.
|
||||
_66F3,
|
||||
}
|
||||
|
||||
impl LegacyPrefixes {
|
||||
/// Emit the legacy prefix as bytes (e.g. in REX instructions).
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
|
||||
match self {
|
||||
Self::_66 => sink.put1(0x66),
|
||||
Self::_F0 => sink.put1(0xF0),
|
||||
Self::_66F0 => {
|
||||
// I don't think the order matters, but in any case, this is the same order that
|
||||
// the GNU assembler uses.
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF0);
|
||||
}
|
||||
Self::_F2 => sink.put1(0xF2),
|
||||
Self::_F3 => sink.put1(0xF3),
|
||||
Self::_66F3 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF3);
|
||||
}
|
||||
Self::None => (),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the legacy prefix as bits (e.g. for EVEX instructions).
|
||||
#[inline(always)]
|
||||
pub(crate) fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::None => 0b00,
|
||||
Self::_66 => 0b01,
|
||||
Self::_F3 => 0b10,
|
||||
Self::_F2 => 0b11,
|
||||
_ => panic!(
|
||||
"VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LegacyPrefixes {
|
||||
fn default() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that reference memory.
|
||||
///
|
||||
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
|
||||
/// create and emit:
|
||||
/// - first the legacy prefixes, if any
|
||||
/// - then the REX prefix, if needed
|
||||
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
|
||||
/// - then the MOD/RM byte,
|
||||
/// - then optionally, a SIB byte,
|
||||
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
|
||||
///
|
||||
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
|
||||
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
|
||||
/// instructions will require their own emitter functions.
|
||||
///
|
||||
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
|
||||
///
|
||||
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
|
||||
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
|
||||
/// 0xF3_0F_27 and `num_opcodes` == 3.
|
||||
///
|
||||
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
|
||||
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
|
||||
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
|
||||
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
|
||||
/// indicate a 64-bit operation.
|
||||
pub(crate) fn emit_std_enc_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// General comment for this function: the registers in `mem_e` must be
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `enc_g` can be derived from a register of any class.
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
let can_trap = mem_e.can_trap();
|
||||
if can_trap {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
prefixes.emit(sink);
|
||||
|
||||
match mem_e {
|
||||
Amode::ImmReg { simm32, base, .. } => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
// First, the REX byte.
|
||||
let enc_e = int_reg_enc(*base);
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm and associated immediates. This is
|
||||
// significantly complicated due to the multiple special cases.
|
||||
if *simm32 == 0
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_RBP
|
||||
&& enc_e != regs::ENC_R12
|
||||
&& enc_e != regs::ENC_R13
|
||||
{
|
||||
// FIXME JRS 2020Feb11: those four tests can surely be
|
||||
// replaced by a single mask-and-compare check. We should do
|
||||
// that because this routine is likely to be hot.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
|
||||
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
} else if low8_will_sign_extend_to_32(*simm32)
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_R12
|
||||
{
|
||||
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
|
||||
sink.put4(*simm32);
|
||||
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
|
||||
&& low8_will_sign_extend_to_32(*simm32)
|
||||
{
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
|
||||
//.. wait for test case for RSP case
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
unreachable!("ImmReg");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::ImmRegRegShift {
|
||||
simm32,
|
||||
base: reg_base,
|
||||
index: reg_index,
|
||||
shift,
|
||||
..
|
||||
} => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
let enc_base = int_reg_enc(*reg_base);
|
||||
let enc_index = int_reg_enc(*reg_index);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// modrm, SIB, immediates.
|
||||
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put1(*simm32 as u8);
|
||||
} else if enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
panic!("ImmRegRegShift");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::RipRelative { ref target } => {
|
||||
// First, the REX byte, with REX.B = 0.
|
||||
rex.emit_two_op(sink, enc_g, 0);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// RIP-relative is mod=00, rm=101.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
|
||||
|
||||
let offset = sink.cur_offset();
|
||||
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
|
||||
sink.put4(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that do not reference memory.
|
||||
///
|
||||
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
|
||||
/// operand is a register rather than memory. Hence it is much simpler.
|
||||
pub(crate) fn emit_std_enc_enc(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
enc_e: u8,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// EncG and EncE can be derived from registers of any class, and they
|
||||
// don't even have to be from the same class. For example, for an
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
|
||||
// The legacy prefixes.
|
||||
prefixes.emit(sink);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm byte. The instruction we're generating doesn't access
|
||||
// memory, so there is no SIB byte or immediate -- we're done.
|
||||
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
|
||||
}
|
||||
|
||||
// These are merely wrappers for the above two functions that facilitate passing
|
||||
// actual `Reg`s rather than their encodings.
|
||||
|
||||
pub(crate) fn emit_std_reg_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefixes,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
enc_g,
|
||||
mem_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn emit_std_reg_reg(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
reg_e: Reg,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
let enc_e = reg_enc(reg_e);
|
||||
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
match size {
|
||||
8 | 4 => sink.put4(simm32),
|
||||
2 => sink.put2(simm32 as u16),
|
||||
1 => sink.put1(simm32 as u8),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
2
cranelift/codegen/src/isa/x64/encoding/vex.rs
Normal file
2
cranelift/codegen/src/isa/x64/encoding/vex.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
//! Encodes VEX instructions. These instructions are those added by the Advanced Vector Extensions
|
||||
//! (AVX).
|
||||
@@ -10,6 +10,7 @@ use regalloc::{
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
|
||||
RegUsageMapper, Writable,
|
||||
};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt;
|
||||
use std::string::String;
|
||||
|
||||
@@ -411,12 +412,12 @@ pub enum UnaryRmROpcode {
|
||||
}
|
||||
|
||||
impl UnaryRmROpcode {
|
||||
pub(crate) fn available_from(&self) -> Option<InstructionSet> {
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => None,
|
||||
UnaryRmROpcode::Lzcnt => Some(InstructionSet::Lzcnt),
|
||||
UnaryRmROpcode::Tzcnt => Some(InstructionSet::BMI1),
|
||||
UnaryRmROpcode::Popcnt => Some(InstructionSet::Popcnt),
|
||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => smallvec![],
|
||||
UnaryRmROpcode::Lzcnt => smallvec![InstructionSet::Lzcnt],
|
||||
UnaryRmROpcode::Tzcnt => smallvec![InstructionSet::BMI1],
|
||||
UnaryRmROpcode::Popcnt => smallvec![InstructionSet::Popcnt],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -447,6 +448,7 @@ pub enum CmpOpcode {
|
||||
Test,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum InstructionSet {
|
||||
SSE,
|
||||
SSE2,
|
||||
@@ -458,10 +460,13 @@ pub(crate) enum InstructionSet {
|
||||
BMI1,
|
||||
#[allow(dead_code)] // never constructed (yet).
|
||||
BMI2,
|
||||
AVX512F,
|
||||
AVX512VL,
|
||||
}
|
||||
|
||||
/// Some SSE operations requiring 2 operands r/m and r.
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
#[allow(dead_code)] // some variants here aren't used just yet
|
||||
pub enum SseOpcode {
|
||||
Addps,
|
||||
Addpd,
|
||||
@@ -479,6 +484,7 @@ pub enum SseOpcode {
|
||||
Cmpss,
|
||||
Cmpsd,
|
||||
Cvtdq2ps,
|
||||
Cvtdq2pd,
|
||||
Cvtsd2ss,
|
||||
Cvtsd2si,
|
||||
Cvtsi2ss,
|
||||
@@ -672,6 +678,7 @@ impl SseOpcode {
|
||||
| SseOpcode::Cmpsd
|
||||
| SseOpcode::Comisd
|
||||
| SseOpcode::Cvtdq2ps
|
||||
| SseOpcode::Cvtdq2pd
|
||||
| SseOpcode::Cvtsd2ss
|
||||
| SseOpcode::Cvtsd2si
|
||||
| SseOpcode::Cvtsi2sd
|
||||
@@ -827,6 +834,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Comiss => "comiss",
|
||||
SseOpcode::Comisd => "comisd",
|
||||
SseOpcode::Cvtdq2ps => "cvtdq2ps",
|
||||
SseOpcode::Cvtdq2pd => "cvtdq2pd",
|
||||
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
||||
SseOpcode::Cvtsd2si => "cvtsd2si",
|
||||
SseOpcode::Cvtsi2ss => "cvtsi2ss",
|
||||
@@ -983,6 +991,35 @@ impl fmt::Display for SseOpcode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Avx512Opcode {
|
||||
Vpabsq,
|
||||
}
|
||||
|
||||
impl Avx512Opcode {
|
||||
/// Which `InstructionSet`s support the opcode?
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Avx512Opcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
Avx512Opcode::Vpabsq => "vpabsq",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Avx512Opcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
|
||||
/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
|
||||
/// values can be extended.
|
||||
|
||||
@@ -2,449 +2,18 @@ use crate::binemit::{Addend, Reloc};
|
||||
use crate::ir::immediates::{Ieee32, Ieee64};
|
||||
use crate::ir::LibCall;
|
||||
use crate::ir::TrapCode;
|
||||
use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength};
|
||||
use crate::isa::x64::encoding::rex::{
|
||||
emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc,
|
||||
low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, OpcodeMap,
|
||||
RexFlags,
|
||||
};
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
|
||||
use core::convert::TryInto;
|
||||
use log::debug;
|
||||
use regalloc::{Reg, RegClass, Writable};
|
||||
|
||||
fn low8_will_sign_extend_to_64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
}
|
||||
|
||||
fn low8_will_sign_extend_to_32(x: u32) -> bool {
|
||||
let xs = x as i32;
|
||||
xs == ((xs << 24) >> 24)
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions and subcomponents: emission
|
||||
|
||||
// For all of the routines that take both a memory-or-reg operand (sometimes
|
||||
// called "E" in the Intel documentation) and a reg-only operand ("G" in
|
||||
// Intelese), the order is always G first, then E.
|
||||
//
|
||||
// "enc" in the following means "hardware register encoding number".
|
||||
|
||||
#[inline(always)]
|
||||
fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
|
||||
debug_assert!(m0d < 4);
|
||||
debug_assert!(enc_reg_g < 8);
|
||||
debug_assert!(rm_e < 8);
|
||||
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
|
||||
debug_assert!(shift < 4);
|
||||
debug_assert!(enc_index < 8);
|
||||
debug_assert!(enc_base < 8);
|
||||
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
|
||||
}
|
||||
|
||||
/// Get the encoding number of a GPR.
|
||||
#[inline(always)]
|
||||
fn int_reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert_eq!(reg.get_class(), RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// Get the encoding number of any register.
|
||||
#[inline(always)]
|
||||
fn reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// A small bit field to record a REX prefix specification:
|
||||
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
|
||||
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Copy)]
|
||||
struct RexFlags(u8);
|
||||
|
||||
impl RexFlags {
|
||||
/// By default, set the W field, and don't always emit.
|
||||
#[inline(always)]
|
||||
fn set_w() -> Self {
|
||||
Self(0)
|
||||
}
|
||||
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
|
||||
#[inline(always)]
|
||||
fn clear_w() -> Self {
|
||||
Self(1)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn always_emit(&mut self) -> &mut Self {
|
||||
self.0 = self.0 | 2;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
|
||||
let enc_reg = int_reg_enc(reg);
|
||||
if enc_reg >= 4 && enc_reg <= 7 {
|
||||
self.always_emit();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn must_clear_w(&self) -> bool {
|
||||
(self.0 & 1) != 0
|
||||
}
|
||||
#[inline(always)]
|
||||
fn must_always_emit(&self) -> bool {
|
||||
(self.0 & 2) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (enc_e >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn emit_three_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_index: u8, enc_base: u8) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = (enc_index >> 3) & 1;
|
||||
let b = (enc_base >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the proper Rex flags for the given operand size.
|
||||
impl From<OperandSize> for RexFlags {
|
||||
fn from(size: OperandSize) -> Self {
|
||||
match size {
|
||||
OperandSize::Size64 => RexFlags::set_w(),
|
||||
_ => RexFlags::clear_w(),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generate Rex flags for an OperandSize/register tuple.
|
||||
impl From<(OperandSize, Reg)> for RexFlags {
|
||||
fn from((size, reg): (OperandSize, Reg)) -> Self {
|
||||
let mut rex = RexFlags::from(size);
|
||||
if size == OperandSize::Size8 {
|
||||
rex.always_emit_if_8bit_needed(reg);
|
||||
}
|
||||
rex
|
||||
}
|
||||
}
|
||||
|
||||
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
|
||||
/// covers only the small set of possibilities that we actually need.
|
||||
enum LegacyPrefixes {
|
||||
/// No prefix bytes.
|
||||
None,
|
||||
/// Operand Size Override -- here, denoting "16-bit operation".
|
||||
_66,
|
||||
/// The Lock prefix.
|
||||
_F0,
|
||||
/// Operand size override and Lock.
|
||||
_66F0,
|
||||
/// REPNE, but no specific meaning here -- is just an opcode extension.
|
||||
_F2,
|
||||
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
|
||||
_F3,
|
||||
/// Operand size override and same effect as F3.
|
||||
_66F3,
|
||||
}
|
||||
|
||||
impl LegacyPrefixes {
|
||||
#[inline(always)]
|
||||
fn emit(&self, sink: &mut MachBuffer<Inst>) {
|
||||
match self {
|
||||
LegacyPrefixes::_66 => sink.put1(0x66),
|
||||
LegacyPrefixes::_F0 => sink.put1(0xF0),
|
||||
LegacyPrefixes::_66F0 => {
|
||||
// I don't think the order matters, but in any case, this is the same order that
|
||||
// the GNU assembler uses.
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF0);
|
||||
}
|
||||
LegacyPrefixes::_F2 => sink.put1(0xF2),
|
||||
LegacyPrefixes::_F3 => sink.put1(0xF3),
|
||||
LegacyPrefixes::_66F3 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF3);
|
||||
}
|
||||
LegacyPrefixes::None => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that reference memory.
|
||||
///
|
||||
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
|
||||
/// create and emit:
|
||||
/// - first the legacy prefixes, if any
|
||||
/// - then the REX prefix, if needed
|
||||
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
|
||||
/// - then the MOD/RM byte,
|
||||
/// - then optionally, a SIB byte,
|
||||
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
|
||||
///
|
||||
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
|
||||
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
|
||||
/// instructions will require their own emitter functions.
|
||||
///
|
||||
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
|
||||
///
|
||||
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
|
||||
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
|
||||
/// 0xF3_0F_27 and `num_opcodes` == 3.
|
||||
///
|
||||
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
|
||||
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
|
||||
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
|
||||
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
|
||||
/// indicate a 64-bit operation.
|
||||
fn emit_std_enc_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// General comment for this function: the registers in `mem_e` must be
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `enc_g` can be derived from a register of any class.
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
let can_trap = mem_e.can_trap();
|
||||
if can_trap {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
prefixes.emit(sink);
|
||||
|
||||
match mem_e {
|
||||
Amode::ImmReg { simm32, base, .. } => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
// First, the REX byte.
|
||||
let enc_e = int_reg_enc(*base);
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm and associated immediates. This is
|
||||
// significantly complicated due to the multiple special cases.
|
||||
if *simm32 == 0
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_RBP
|
||||
&& enc_e != regs::ENC_R12
|
||||
&& enc_e != regs::ENC_R13
|
||||
{
|
||||
// FIXME JRS 2020Feb11: those four tests can surely be
|
||||
// replaced by a single mask-and-compare check. We should do
|
||||
// that because this routine is likely to be hot.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
|
||||
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
} else if low8_will_sign_extend_to_32(*simm32)
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_R12
|
||||
{
|
||||
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
|
||||
sink.put4(*simm32);
|
||||
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
|
||||
&& low8_will_sign_extend_to_32(*simm32)
|
||||
{
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
|
||||
//.. wait for test case for RSP case
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
unreachable!("ImmReg");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::ImmRegRegShift {
|
||||
simm32,
|
||||
base: reg_base,
|
||||
index: reg_index,
|
||||
shift,
|
||||
..
|
||||
} => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
let enc_base = int_reg_enc(*reg_base);
|
||||
let enc_index = int_reg_enc(*reg_index);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// modrm, SIB, immediates.
|
||||
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put1(*simm32 as u8);
|
||||
} else if enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
panic!("ImmRegRegShift");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::RipRelative { ref target } => {
|
||||
// First, the REX byte, with REX.B = 0.
|
||||
rex.emit_two_op(sink, enc_g, 0);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// RIP-relative is mod=00, rm=101.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
|
||||
|
||||
let offset = sink.cur_offset();
|
||||
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
|
||||
sink.put4(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that do not reference memory.
|
||||
///
|
||||
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
|
||||
/// operand is a register rather than memory. Hence it is much simpler.
|
||||
fn emit_std_enc_enc(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
enc_e: u8,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// EncG and EncE can be derived from registers of any class, and they
|
||||
// don't even have to be from the same class. For example, for an
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
|
||||
// The legacy prefixes.
|
||||
prefixes.emit(sink);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm byte. The instruction we're generating doesn't access
|
||||
// memory, so there is no SIB byte or immediate -- we're done.
|
||||
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
|
||||
}
|
||||
|
||||
// These are merely wrappers for the above two functions that facilitate passing
|
||||
// actual `Reg`s rather than their encodings.
|
||||
|
||||
fn emit_std_reg_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefixes,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
enc_g,
|
||||
mem_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
fn emit_std_reg_reg(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
reg_e: Reg,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
let enc_e = reg_enc(reg_e);
|
||||
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
match size {
|
||||
8 | 4 => sink.put4(simm32),
|
||||
2 => sink.put2(simm32 as u16),
|
||||
1 => sink.put1(simm32 as u8),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
use regalloc::{Reg, Writable};
|
||||
|
||||
/// A small helper to generate a signed conversion instruction.
|
||||
fn emit_signed_cvt(
|
||||
@@ -546,18 +115,30 @@ pub(crate) fn emit(
|
||||
info: &EmitInfo,
|
||||
state: &mut EmitState,
|
||||
) {
|
||||
if let Some(iset_requirement) = inst.isa_requirement() {
|
||||
let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool {
|
||||
match iset_requirement {
|
||||
// Cranelift assumes SSE2 at least.
|
||||
InstructionSet::SSE | InstructionSet::SSE2 => {}
|
||||
InstructionSet::SSSE3 => assert!(info.isa_flags.use_ssse3()),
|
||||
InstructionSet::SSE41 => assert!(info.isa_flags.use_sse41()),
|
||||
InstructionSet::SSE42 => assert!(info.isa_flags.use_sse42()),
|
||||
InstructionSet::Popcnt => assert!(info.isa_flags.use_popcnt()),
|
||||
InstructionSet::Lzcnt => assert!(info.isa_flags.use_lzcnt()),
|
||||
InstructionSet::BMI1 => assert!(info.isa_flags.use_bmi1()),
|
||||
InstructionSet::BMI2 => assert!(info.isa_flags.has_bmi2()),
|
||||
InstructionSet::SSE | InstructionSet::SSE2 => true,
|
||||
InstructionSet::SSSE3 => info.isa_flags.use_ssse3(),
|
||||
InstructionSet::SSE41 => info.isa_flags.use_sse41(),
|
||||
InstructionSet::SSE42 => info.isa_flags.use_sse42(),
|
||||
InstructionSet::Popcnt => info.isa_flags.use_popcnt(),
|
||||
InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(),
|
||||
InstructionSet::BMI1 => info.isa_flags.use_bmi1(),
|
||||
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
|
||||
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
|
||||
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
|
||||
}
|
||||
};
|
||||
|
||||
// Certain instructions may be present in more than one ISA feature set; we must at least match
|
||||
// one of them in the target CPU.
|
||||
let isa_requirements = inst.available_in_any_isa();
|
||||
if !isa_requirements.is_empty() && !isa_requirements.iter().any(matches_isa_flags) {
|
||||
panic!(
|
||||
"Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}",
|
||||
inst, isa_requirements
|
||||
)
|
||||
}
|
||||
|
||||
match inst {
|
||||
@@ -887,7 +468,6 @@ pub(crate) fn emit(
|
||||
// idiv %divisor
|
||||
//
|
||||
// $done:
|
||||
debug_assert!(info.flags().avoid_div_traps());
|
||||
|
||||
// Check if the divisor is zero, first.
|
||||
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg());
|
||||
@@ -911,7 +491,7 @@ pub(crate) fn emit(
|
||||
// x % -1 = 0; put the result into the destination, $rdx.
|
||||
let done_label = sink.get_label();
|
||||
|
||||
let inst = Inst::imm(*size, 0, Writable::from_reg(regs::rdx()));
|
||||
let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::jmp_known(done_label);
|
||||
@@ -951,11 +531,6 @@ pub(crate) fn emit(
|
||||
sink.bind_label(do_op);
|
||||
}
|
||||
|
||||
assert!(
|
||||
*size != OperandSize::Size8,
|
||||
"CheckedDivOrRemSeq for i8 is not yet implemented"
|
||||
);
|
||||
|
||||
// Fill in the high parts:
|
||||
if kind.is_signed() {
|
||||
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
|
||||
@@ -1769,6 +1344,7 @@ pub(crate) fn emit(
|
||||
let rex = RexFlags::clear_w();
|
||||
|
||||
let (prefix, opcode, num_opcodes) = match op {
|
||||
SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
||||
@@ -1830,6 +1406,24 @@ pub(crate) fn emit(
|
||||
};
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, src, dst } => {
|
||||
let opcode = match op {
|
||||
Avx512Opcode::Vpabsq => 0x1f,
|
||||
};
|
||||
match src {
|
||||
RegMem::Reg { reg: src } => EvexInstruction::new()
|
||||
.length(EvexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(true)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_reg().get_hw_encoding())
|
||||
.rm(src.get_hw_encoding())
|
||||
.encode(sink),
|
||||
_ => todo!(),
|
||||
};
|
||||
}
|
||||
|
||||
Inst::XmmRmR {
|
||||
op,
|
||||
src: src_e,
|
||||
|
||||
@@ -3859,6 +3859,18 @@ fn test_x64_emit() {
|
||||
"pabsd %xmm10, %xmm11",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
|
||||
"F3440FE6C2",
|
||||
"cvtdq2pd %xmm2, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, RegMem::reg(xmm2), w_xmm8),
|
||||
"6272FD081FC2",
|
||||
"vpabsq %xmm2, %xmm8",
|
||||
));
|
||||
|
||||
// Xmm to int conversions, and conversely.
|
||||
|
||||
insns.push((
|
||||
@@ -4270,6 +4282,7 @@ fn test_x64_emit() {
|
||||
let mut isa_flag_builder = x64::settings::builder();
|
||||
isa_flag_builder.enable("has_ssse3").unwrap();
|
||||
isa_flag_builder.enable("has_sse41").unwrap();
|
||||
isa_flag_builder.enable("has_avx512f").unwrap();
|
||||
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
|
||||
|
||||
let rru = regs::create_reg_universe_systemv(&flags);
|
||||
|
||||
@@ -14,7 +14,7 @@ use regalloc::{
|
||||
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
|
||||
RegUsageMapper, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
@@ -224,6 +224,12 @@ pub enum Inst {
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
XmmUnaryRmREvex {
|
||||
op: Avx512Opcode,
|
||||
src: RegMem,
|
||||
dst: Writable<Reg>,
|
||||
},
|
||||
|
||||
/// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq
|
||||
XmmMovRM {
|
||||
op: SseOpcode,
|
||||
@@ -501,7 +507,11 @@ pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
fn isa_requirement(&self) -> Option<InstructionSet> {
|
||||
/// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
|
||||
/// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
|
||||
/// below); more than one `InstructionSet` in the list indicates that the instruction is present
|
||||
/// *any* of the included ISA feature sets.
|
||||
fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
// These instructions are part of SSE2, which is a basic requirement in Cranelift, and
|
||||
// don't have to be checked.
|
||||
@@ -554,7 +564,7 @@ impl Inst {
|
||||
| Inst::ElfTlsGetAddr { .. }
|
||||
| Inst::MachOTlsGetAddr { .. }
|
||||
| Inst::ValueLabelMarker { .. }
|
||||
| Inst::Unwind { .. } => None,
|
||||
| Inst::Unwind { .. } => smallvec![],
|
||||
|
||||
Inst::UnaryRmR { op, .. } => op.available_from(),
|
||||
|
||||
@@ -565,7 +575,9 @@ impl Inst {
|
||||
| Inst::XmmRmR { op, .. }
|
||||
| Inst::XmmRmRImm { op, .. }
|
||||
| Inst::XmmToGpr { op, .. }
|
||||
| Inst::XmmUnaryRmR { op, .. } => Some(op.available_from()),
|
||||
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, .. } => op.available_from(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -700,6 +712,12 @@ impl Inst {
|
||||
Inst::XmmUnaryRmR { op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
src.assert_regclass_is(RegClass::V128);
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||
Inst::XmmUnaryRmREvex { op, src, dst }
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
|
||||
src.assert_regclass_is(RegClass::V128);
|
||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||
@@ -1121,11 +1139,7 @@ impl Inst {
|
||||
pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
|
||||
let rc = from_reg.get_class();
|
||||
match rc {
|
||||
RegClass::I64 => {
|
||||
// Always store the full register, to ensure that the high bits are properly set
|
||||
// when doing a full reload.
|
||||
Inst::mov_r_m(OperandSize::Size64, from_reg, to_addr)
|
||||
}
|
||||
RegClass::I64 => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
|
||||
RegClass::V128 => {
|
||||
let opcode = match ty {
|
||||
types::F32 => SseOpcode::Movss,
|
||||
@@ -1390,6 +1404,13 @@ impl PrettyPrint for Inst {
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
src.show_rru_sized(mb_rru, 8),
|
||||
show_ireg_sized(dst.to_reg(), mb_rru, 8),
|
||||
),
|
||||
|
||||
Inst::XmmMovRM { op, src, dst, .. } => format!(
|
||||
"{} {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
@@ -1862,7 +1883,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(Writable::from_reg(regs::rdx()));
|
||||
}
|
||||
},
|
||||
Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
|
||||
Inst::UnaryRmR { src, dst, .. }
|
||||
| Inst::XmmUnaryRmR { src, dst, .. }
|
||||
| Inst::XmmUnaryRmREvex { src, dst, .. } => {
|
||||
src.get_regs_as_uses(collector);
|
||||
collector.add_def(*dst);
|
||||
}
|
||||
@@ -2209,6 +2232,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
ref mut dst,
|
||||
..
|
||||
}
|
||||
| Inst::XmmUnaryRmREvex {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
..
|
||||
}
|
||||
| Inst::UnaryRmR {
|
||||
ref mut src,
|
||||
ref mut dst,
|
||||
@@ -2827,7 +2855,7 @@ impl EmitState {
|
||||
self.stack_map = None;
|
||||
}
|
||||
|
||||
fn cur_srcloc(&self) -> SourceLoc {
|
||||
pub(crate) fn cur_srcloc(&self) -> SourceLoc {
|
||||
self.cur_srcloc
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,8 +89,8 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
|
||||
fn sp(&self) -> u16 {
|
||||
X86_64::RSP.0
|
||||
}
|
||||
fn fp(&self) -> u16 {
|
||||
X86_64::RBP.0
|
||||
fn fp(&self) -> Option<u16> {
|
||||
Some(X86_64::RBP.0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,6 +109,7 @@ mod tests {
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(feature = "old-x86-backend", ignore)]
|
||||
fn test_simple_func() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
@@ -151,6 +152,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(feature = "old-x86-backend", ignore)]
|
||||
fn test_multi_return_func() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
|
||||
@@ -204,6 +204,7 @@ enum ExtSpec {
|
||||
ZeroExtendTo32,
|
||||
ZeroExtendTo64,
|
||||
SignExtendTo32,
|
||||
#[allow(dead_code)] // not used just yet but may be used in the future!
|
||||
SignExtendTo64,
|
||||
}
|
||||
|
||||
@@ -1854,10 +1855,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ty.unwrap();
|
||||
if ty == types::I64X2 {
|
||||
// This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction.
|
||||
// Instead, we use a separate register, `tmp`, to contain the results of `0 - src`
|
||||
// and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1
|
||||
// (i.e. if `tmp` was negative or, conversely, if `src` was originally positive).
|
||||
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
|
||||
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
|
||||
} else {
|
||||
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
|
||||
// contain the results of `0 - src` and then blend in those results with
|
||||
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
|
||||
// conversely, if `src` was originally positive).
|
||||
|
||||
// Emit all 0s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
@@ -1873,6 +1877,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ty,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
|
||||
}
|
||||
} else if ty.is_vector() {
|
||||
let opcode = match ty {
|
||||
types::I8X16 => SseOpcode::Pabsb,
|
||||
@@ -2041,7 +2046,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
|
||||
} else if dst_ty == types::I128 {
|
||||
let amt_src = put_input_in_reg(ctx, inputs[1]);
|
||||
let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0];
|
||||
let src = put_input_in_regs(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
@@ -3914,7 +3919,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FcvtLowFromSint => {
|
||||
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtdq2pd,
|
||||
RegMem::from(src),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
Opcode::FcvtFromUint => {
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ty.unwrap();
|
||||
@@ -4813,28 +4826,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
if elem_ty == types::I128 {
|
||||
let srcs = put_input_in_regs(ctx, inputs[0]);
|
||||
ctx.emit(Inst::mov_r_m(
|
||||
OperandSize::Size64,
|
||||
srcs.regs()[0],
|
||||
addr.clone(),
|
||||
));
|
||||
ctx.emit(Inst::mov_r_m(
|
||||
OperandSize::Size64,
|
||||
srcs.regs()[1],
|
||||
addr.offset(8),
|
||||
));
|
||||
ctx.emit(Inst::store(types::I64, srcs.regs()[0], addr.clone()));
|
||||
ctx.emit(Inst::store(types::I64, srcs.regs()[1], addr.offset(8)));
|
||||
} else {
|
||||
let src = put_input_in_reg(ctx, inputs[0]);
|
||||
|
||||
ctx.emit(match elem_ty {
|
||||
types::F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
|
||||
types::F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr),
|
||||
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
|
||||
// TODO Specialize for different types: MOVUPD, MOVDQU, etc.
|
||||
Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr)
|
||||
}
|
||||
_ => Inst::mov_r_m(OperandSize::from_ty(elem_ty), src, addr),
|
||||
});
|
||||
ctx.emit(Inst::store(elem_ty, src, addr));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4938,7 +4934,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let ty_access = ctx.input_ty(insn, 0);
|
||||
assert!(is_valid_atomic_transaction_ty(ty_access));
|
||||
|
||||
ctx.emit(Inst::mov_r_m(OperandSize::from_ty(ty_access), data, addr));
|
||||
ctx.emit(Inst::store(ty_access, data, addr));
|
||||
ctx.emit(Inst::Fence {
|
||||
kind: FenceKind::MFence,
|
||||
});
|
||||
@@ -5181,7 +5177,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
input_ty,
|
||||
));
|
||||
|
||||
if flags.avoid_div_traps() {
|
||||
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
|
||||
if flags.avoid_div_traps() || op == Opcode::Srem {
|
||||
// A vcode meta-instruction is used to lower the inline checks, since they embed
|
||||
// pc-relative offsets that must not change, thus requiring regalloc to not
|
||||
// interfere by introducing spills and reloads.
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{self as shared_settings, Flags};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::hash::{Hash, Hasher};
|
||||
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
|
||||
use target_lexicon::Triple;
|
||||
@@ -18,6 +18,7 @@ use target_lexicon::Triple;
|
||||
use crate::isa::unwind::systemv;
|
||||
|
||||
mod abi;
|
||||
pub mod encoding;
|
||||
mod inst;
|
||||
mod lower;
|
||||
mod settings;
|
||||
@@ -85,6 +86,10 @@ impl MachBackend for X64Backend {
|
||||
&self.flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.x64_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.flags.hash(&mut hasher);
|
||||
self.x64_flags.hash(&mut hasher);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
|
||||
@@ -503,15 +503,18 @@ fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS
|
||||
pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
match func.signature.call_conv {
|
||||
// For now, just translate fast and cold as system_v.
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => {
|
||||
system_v_prologue_epilogue(func, isa)
|
||||
}
|
||||
CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
|
||||
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => {
|
||||
fastcall_prologue_epilogue(func, isa)
|
||||
}
|
||||
CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => {
|
||||
baldrdash_prologue_epilogue(func, isa)
|
||||
}
|
||||
CallConv::Probestack => unimplemented!("probestack calling convention"),
|
||||
CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
|
||||
CallConv::AppleAarch64 => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1083,16 +1086,17 @@ pub fn create_unwind_info(
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
|
||||
use crate::isa::unwind::UnwindInfo;
|
||||
use crate::machinst::UnwindInfoKind;
|
||||
|
||||
// Assumption: RBP is being used as the frame pointer for both calling conventions
|
||||
// In the future, we should be omitting frame pointer as an optimization, so this will change
|
||||
Ok(match func.signature.call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
|
||||
Ok(match isa.unwind_info_kind() {
|
||||
UnwindInfoKind::SystemV => {
|
||||
super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u))
|
||||
}
|
||||
CallConv::WindowsFastcall => {
|
||||
UnwindInfoKind::Windows => {
|
||||
super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
|
||||
}
|
||||
_ => None,
|
||||
UnwindInfoKind::None => None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -21,8 +21,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::timing;
|
||||
use alloc::borrow::Cow;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::hash::{Hash, Hasher};
|
||||
@@ -79,6 +78,10 @@ impl TargetIsa for Isa {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.shared_flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use crate::ir::Function;
|
||||
use crate::isa::{
|
||||
unwind::systemv::{RegisterMappingError, UnwindInfo},
|
||||
CallConv, RegUnit, TargetIsa,
|
||||
RegUnit, TargetIsa,
|
||||
};
|
||||
use crate::result::CodegenResult;
|
||||
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
|
||||
@@ -97,8 +97,8 @@ pub(crate) fn create_unwind_info(
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<UnwindInfo>> {
|
||||
// Only System V-like calling conventions are supported
|
||||
match func.signature.call_conv {
|
||||
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {}
|
||||
match isa.unwind_info_kind() {
|
||||
crate::machinst::UnwindInfoKind::SystemV => {}
|
||||
_ => return Ok(None),
|
||||
}
|
||||
|
||||
@@ -121,8 +121,8 @@ pub(crate) fn create_unwind_info(
|
||||
fn sp(&self) -> u16 {
|
||||
X86_64::RSP.0
|
||||
}
|
||||
fn fp(&self) -> u16 {
|
||||
X86_64::RBP.0
|
||||
fn fp(&self) -> Option<u16> {
|
||||
Some(X86_64::RBP.0)
|
||||
}
|
||||
}
|
||||
let map = RegisterMapper(isa);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::x86::registers::{FPR, GPR};
|
||||
use crate::isa::{unwind::winx64::UnwindInfo, CallConv, RegUnit, TargetIsa};
|
||||
use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa};
|
||||
use crate::result::CodegenResult;
|
||||
|
||||
pub(crate) fn create_unwind_info(
|
||||
@@ -10,7 +10,7 @@ pub(crate) fn create_unwind_info(
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<UnwindInfo>> {
|
||||
// Only Windows fastcall is supported for unwind information
|
||||
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
|
||||
if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
||||
@@ -97,6 +97,7 @@ mod inst_predicates;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod log;
|
||||
mod nan_canonicalization;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
|
||||
39
cranelift/codegen/src/log.rs
Normal file
39
cranelift/codegen/src/log.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
//! This module implements deferred display helpers.
|
||||
//!
|
||||
//! These are particularly useful in logging contexts, where the maximum logging level filter might
|
||||
//! be enabled, but we don't want the arguments to be evaluated early:
|
||||
//!
|
||||
//! ```
|
||||
//! log::set_max_level(log::LevelFilter::max());
|
||||
//! fn expensive_calculation() -> String {
|
||||
//! "a string that is very slow to generate".into()
|
||||
//! }
|
||||
//! log::debug!("{}", expensive_calculation());
|
||||
//! ```
|
||||
//!
|
||||
//! If the associated log implementation filters out log debug entries, the expensive calculation
|
||||
//! would have been spurious. In this case, we can wrap the expensive computation within an
|
||||
//! `DeferredDisplay`, so that the computation only happens when the actual `fmt` function is
|
||||
//! called.
|
||||
|
||||
use core::fmt;
|
||||
|
||||
pub(crate) struct DeferredDisplay<F>(F);
|
||||
|
||||
impl<F: Fn() -> T, T: fmt::Display> DeferredDisplay<F> {
|
||||
pub(crate) fn new(f: F) -> Self {
|
||||
Self(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Fn() -> T, T: fmt::Display> fmt::Display for DeferredDisplay<F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Fn() -> T, T: fmt::Debug> fmt::Debug for DeferredDisplay<F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0().fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -30,6 +30,12 @@ pub trait ABICallee {
|
||||
/// Access the (possibly legalized) signature.
|
||||
fn signature(&self) -> &Signature;
|
||||
|
||||
/// Accumulate outgoing arguments. This ensures that at least SIZE bytes
|
||||
/// are allocated in the prologue to be available for use in function calls
|
||||
/// to hold arguments and/or return values. If this function is called
|
||||
/// multiple times, the maximum of all SIZE values will be available.
|
||||
fn accumulate_outgoing_args_size(&mut self, size: u32);
|
||||
|
||||
/// Get the settings controlling this function's compilation.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
@@ -189,9 +195,6 @@ pub trait ABICallee {
|
||||
from_slot: SpillSlot,
|
||||
ty: Option<Type>,
|
||||
) -> Self::I;
|
||||
|
||||
/// Desired unwind info type.
|
||||
fn unwind_info_kind(&self) -> UnwindInfoKind;
|
||||
}
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state and can
|
||||
@@ -245,6 +248,13 @@ pub trait ABICaller {
|
||||
/// Emit code to post-adjust the satck, after call return and return-value copies.
|
||||
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
|
||||
/// Accumulate outgoing arguments. This ensures that the caller (as
|
||||
/// identified via the CTX argument) allocates enough space in the
|
||||
/// prologue to hold all arguments and return values for this call.
|
||||
/// There is no code emitted at the call site, everything is done
|
||||
/// in the caller's function prologue.
|
||||
fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
|
||||
|
||||
/// Emit the call itself.
|
||||
///
|
||||
/// The returned instruction should have proper use- and def-sets according
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
//! support the SpiderMonkey Wasm ABI. For details of the multi-value return
|
||||
//! ABI, see:
|
||||
//!
|
||||
//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134
|
||||
//! <https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134>
|
||||
//!
|
||||
//! In brief:
|
||||
//! - Return values are processed in *reverse* order.
|
||||
@@ -444,6 +444,7 @@ pub trait ABIMachineSpec {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Self::I; 16]>);
|
||||
|
||||
/// Generate a clobber-restore sequence. This sequence should perform the
|
||||
@@ -455,6 +456,7 @@ pub trait ABIMachineSpec {
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> SmallVec<[Self::I; 16]>;
|
||||
|
||||
/// Generate a call instruction/sequence. This method is provided one
|
||||
@@ -576,6 +578,8 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
|
||||
stackslots: PrimaryMap<StackSlot, u32>,
|
||||
/// Total stack size of all stackslots.
|
||||
stackslots_size: u32,
|
||||
/// Stack size to be reserved for outgoing arguments.
|
||||
outgoing_args_size: u32,
|
||||
/// Clobbered registers, from regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
/// Total number of spillslots, from regalloc.
|
||||
@@ -646,7 +650,9 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
|| call_conv == isa::CallConv::Fast
|
||||
|| call_conv == isa::CallConv::Cold
|
||||
|| call_conv.extends_baldrdash()
|
||||
|| call_conv.extends_windows_fastcall(),
|
||||
|| call_conv.extends_windows_fastcall()
|
||||
|| call_conv == isa::CallConv::AppleAarch64
|
||||
|| call_conv == isa::CallConv::WasmtimeSystemV,
|
||||
"Unsupported calling convention: {:?}",
|
||||
call_conv
|
||||
);
|
||||
@@ -689,6 +695,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
sig,
|
||||
stackslots,
|
||||
stackslots_size: stack_offset,
|
||||
outgoing_args_size: 0,
|
||||
clobbered: Set::empty(),
|
||||
spillslots: None,
|
||||
fixed_frame_storage_size: 0,
|
||||
@@ -915,6 +922,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
}
|
||||
|
||||
fn accumulate_outgoing_args_size(&mut self, size: u32) {
|
||||
if size > self.outgoing_args_size {
|
||||
self.outgoing_args_size = size;
|
||||
}
|
||||
}
|
||||
|
||||
fn flags(&self) -> &settings::Flags {
|
||||
&self.flags
|
||||
}
|
||||
@@ -1196,6 +1209,15 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
|
||||
// Integer types smaller than word size have been spilled as words below,
|
||||
// and therefore must be reloaded in the same type.
|
||||
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
|
||||
M::word_type()
|
||||
} else {
|
||||
ty
|
||||
};
|
||||
|
||||
gen_load_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty)
|
||||
}
|
||||
|
||||
@@ -1211,6 +1233,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
|
||||
// When reloading from a spill slot, we might have lost information about real integer
|
||||
// types. For instance, on the x64 backend, a zero-extension can become spurious and
|
||||
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
|
||||
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
|
||||
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
|
||||
// all the time, in this case.
|
||||
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
|
||||
M::word_type()
|
||||
} else {
|
||||
ty
|
||||
};
|
||||
|
||||
gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
|
||||
}
|
||||
|
||||
@@ -1283,11 +1318,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
|
||||
// Save clobbered registers.
|
||||
let (_, clobber_insts) = M::gen_clobber_save(
|
||||
let (clobber_size, clobber_insts) = M::gen_clobber_save(
|
||||
self.call_conv,
|
||||
&self.flags,
|
||||
&self.clobbered,
|
||||
self.fixed_frame_storage_size,
|
||||
self.outgoing_args_size,
|
||||
);
|
||||
insts.extend(clobber_insts);
|
||||
|
||||
@@ -1302,7 +1338,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
// [crate::machinst::abi_impl](this module) for more details
|
||||
// on stackframe layout and nominal SP maintenance.
|
||||
|
||||
self.total_frame_size = Some(total_stacksize);
|
||||
self.total_frame_size = Some(total_stacksize + clobber_size as u32);
|
||||
insts
|
||||
}
|
||||
|
||||
@@ -1315,6 +1351,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
&self.flags,
|
||||
&self.clobbered,
|
||||
self.fixed_frame_storage_size,
|
||||
self.outgoing_args_size,
|
||||
));
|
||||
|
||||
// N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no
|
||||
@@ -1369,18 +1406,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
.next()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn unwind_info_kind(&self) -> UnwindInfoKind {
|
||||
match self.sig.call_conv {
|
||||
#[cfg(feature = "unwind")]
|
||||
isa::CallConv::Fast | isa::CallConv::Cold | isa::CallConv::SystemV => {
|
||||
UnwindInfoKind::SystemV
|
||||
}
|
||||
#[cfg(feature = "unwind")]
|
||||
isa::CallConv::WindowsFastcall => UnwindInfoKind::Windows,
|
||||
_ => UnwindInfoKind::None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
|
||||
@@ -1529,6 +1554,11 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
}
|
||||
}
|
||||
|
||||
fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
|
||||
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
|
||||
ctx.abi().accumulate_outgoing_args_size(off as u32);
|
||||
}
|
||||
|
||||
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
|
||||
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
|
||||
adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ true)
|
||||
|
||||
@@ -2,10 +2,12 @@
|
||||
|
||||
use crate::binemit;
|
||||
use crate::ir;
|
||||
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
|
||||
use crate::isa::{
|
||||
BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
|
||||
};
|
||||
use crate::machinst::*;
|
||||
use crate::regalloc::RegisterSet;
|
||||
use crate::settings::Flags;
|
||||
use crate::settings::{self, Flags};
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::regalloc::RegDiversions;
|
||||
@@ -14,7 +16,6 @@ use crate::regalloc::RegDiversions;
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
|
||||
use core::any::Any;
|
||||
use core::hash::Hasher;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
@@ -59,8 +60,16 @@ impl TargetIsa for TargetIsaAdapter {
|
||||
self.backend.flags()
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<settings::Value> {
|
||||
self.backend.isa_flags()
|
||||
}
|
||||
|
||||
fn variant(&self) -> BackendVariant {
|
||||
BackendVariant::MachInst
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, hasher: &mut dyn Hasher) {
|
||||
self.backend.hash_all_flags(hasher)
|
||||
self.backend.hash_all_flags(hasher);
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//! Compilation backend pipeline: optimized IR to VCode / binemit.
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::log::DeferredDisplay;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::timing;
|
||||
@@ -29,9 +30,11 @@ where
|
||||
lower.lower(b)?
|
||||
};
|
||||
|
||||
// Creating the vcode string representation may be costly for large functions, so defer its
|
||||
// rendering.
|
||||
debug!(
|
||||
"vcode from lowering: \n{}",
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe())))
|
||||
);
|
||||
|
||||
// Perform register allocation.
|
||||
@@ -103,7 +106,7 @@ where
|
||||
|
||||
debug!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
vcode.show_rru(Some(b.reg_universe()))
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe())))
|
||||
);
|
||||
|
||||
Ok(vcode)
|
||||
|
||||
@@ -64,18 +64,18 @@ use crate::binemit::{CodeInfo, CodeOffset, StackMap};
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::{Function, SourceLoc, StackSlot, Type, ValueLabel};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::Flags;
|
||||
use crate::settings::{self, Flags};
|
||||
use crate::value_label::ValueLabelsRanges;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt::Debug;
|
||||
use core::hash::Hasher;
|
||||
use cranelift_entity::PrimaryMap;
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{
|
||||
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::hash::Hasher;
|
||||
use std::string::String;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
@@ -368,8 +368,10 @@ pub trait MachBackend {
|
||||
/// Return flags for this backend.
|
||||
fn flags(&self) -> &Flags;
|
||||
|
||||
/// Hashes all flags, both ISA-independent and ISA-specific, into the
|
||||
/// specified hasher.
|
||||
/// Get the ISA-dependent flag values that were used to make this trait object.
|
||||
fn isa_flags(&self) -> Vec<settings::Value>;
|
||||
|
||||
/// Hashes all flags, both ISA-independent and ISA-dependent, into the specified hasher.
|
||||
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
|
||||
|
||||
/// Return triple for this backend.
|
||||
|
||||
@@ -66,7 +66,7 @@
|
||||
//! Our current implementation uses a sorted array of compressed intervals, represented by their
|
||||
//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of
|
||||
//! intervals easily, and shows some nice performance behavior. See
|
||||
//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a
|
||||
//! <https://github.com/bytecodealliance/cranelift/issues/1084> for benchmarks against using a
|
||||
//! bforest::Map<Block, Inst>.
|
||||
//!
|
||||
//! ## block ordering
|
||||
@@ -112,7 +112,7 @@
|
||||
//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes.
|
||||
//!
|
||||
//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted
|
||||
//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details.
|
||||
//! array, see <https://github.com/bytecodealliance/cranelift/issues/1084> for details.
|
||||
|
||||
use crate::entity::SparseMapValue;
|
||||
use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
|
||||
|
||||
@@ -2,19 +2,17 @@
|
||||
|
||||
use crate::verifier::VerifierErrors;
|
||||
use std::string::String;
|
||||
use thiserror::Error;
|
||||
|
||||
/// A compilation error.
|
||||
///
|
||||
/// When Cranelift fails to compile a function, it will return one of these error codes.
|
||||
#[derive(Error, Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum CodegenError {
|
||||
/// A list of IR verifier errors.
|
||||
///
|
||||
/// This always represents a bug, either in the code that generated IR for Cranelift, or a bug
|
||||
/// in Cranelift itself.
|
||||
#[error("Verifier errors")]
|
||||
Verifier(#[from] VerifierErrors),
|
||||
Verifier(VerifierErrors),
|
||||
|
||||
/// An implementation limit was exceeded.
|
||||
///
|
||||
@@ -22,27 +20,57 @@ pub enum CodegenError {
|
||||
/// limits][limits] that cause compilation to fail when they are exceeded.
|
||||
///
|
||||
/// [limits]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md#implementation-limits
|
||||
#[error("Implementation limit exceeded")]
|
||||
ImplLimitExceeded,
|
||||
|
||||
/// The code size for the function is too large.
|
||||
///
|
||||
/// Different target ISAs may impose a limit on the size of a compiled function. If that limit
|
||||
/// is exceeded, compilation fails.
|
||||
#[error("Code for function is too large")]
|
||||
CodeTooLarge,
|
||||
|
||||
/// Something is not supported by the code generator. This might be an indication that a
|
||||
/// feature is used without explicitly enabling it, or that something is temporarily
|
||||
/// unsupported by a given target backend.
|
||||
#[error("Unsupported feature: {0}")]
|
||||
Unsupported(String),
|
||||
|
||||
/// A failure to map Cranelift register representation to a DWARF register representation.
|
||||
#[cfg(feature = "unwind")]
|
||||
#[error("Register mapping error")]
|
||||
RegisterMappingError(crate::isa::unwind::systemv::RegisterMappingError),
|
||||
}
|
||||
|
||||
/// A convenient alias for a `Result` that uses `CodegenError` as the error type.
|
||||
pub type CodegenResult<T> = Result<T, CodegenError>;
|
||||
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for CodegenError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
CodegenError::Verifier(source) => Some(source),
|
||||
CodegenError::ImplLimitExceeded { .. }
|
||||
| CodegenError::CodeTooLarge { .. }
|
||||
| CodegenError::Unsupported { .. } => None,
|
||||
#[cfg(feature = "unwind")]
|
||||
CodegenError::RegisterMappingError { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CodegenError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
CodegenError::Verifier(_) => write!(f, "Verifier errors"),
|
||||
CodegenError::ImplLimitExceeded => write!(f, "Implementation limit exceeded"),
|
||||
CodegenError::CodeTooLarge => write!(f, "Code for function is too large"),
|
||||
CodegenError::Unsupported(feature) => write!(f, "Unsupported feature: {}", feature),
|
||||
#[cfg(feature = "unwind")]
|
||||
CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<VerifierErrors> for CodegenError {
|
||||
fn from(source: VerifierErrors) -> Self {
|
||||
CodegenError::Verifier { 0: source }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ use alloc::boxed::Box;
|
||||
use alloc::string::{String, ToString};
|
||||
use core::fmt;
|
||||
use core::str;
|
||||
use thiserror::Error;
|
||||
|
||||
/// A string-based configurator for settings groups.
|
||||
///
|
||||
@@ -44,6 +43,78 @@ pub trait Configurable {
|
||||
fn enable(&mut self, name: &str) -> SetResult<()>;
|
||||
}
|
||||
|
||||
/// Represents the kind of setting.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SettingKind {
|
||||
/// The setting is an enumeration.
|
||||
Enum,
|
||||
/// The setting is a number.
|
||||
Num,
|
||||
/// The setting is a boolean.
|
||||
Bool,
|
||||
/// The setting is a preset.
|
||||
Preset,
|
||||
}
|
||||
|
||||
/// Represents an available builder setting.
|
||||
///
|
||||
/// This is used for iterating settings in a builder.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Setting {
|
||||
/// The name of the setting.
|
||||
pub name: &'static str,
|
||||
/// The description of the setting.
|
||||
pub description: &'static str,
|
||||
/// The kind of the setting.
|
||||
pub kind: SettingKind,
|
||||
/// The supported values of the setting (for enum values).
|
||||
pub values: Option<&'static [&'static str]>,
|
||||
}
|
||||
|
||||
/// Represents a setting value.
|
||||
///
|
||||
/// This is used for iterating values in `Flags`.
|
||||
pub struct Value {
|
||||
/// The name of the setting associated with this value.
|
||||
pub name: &'static str,
|
||||
pub(crate) detail: detail::Detail,
|
||||
pub(crate) values: Option<&'static [&'static str]>,
|
||||
pub(crate) value: u8,
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Gets the kind of setting.
|
||||
pub fn kind(&self) -> SettingKind {
|
||||
match &self.detail {
|
||||
detail::Detail::Enum { .. } => SettingKind::Enum,
|
||||
detail::Detail::Num => SettingKind::Num,
|
||||
detail::Detail::Bool { .. } => SettingKind::Bool,
|
||||
detail::Detail::Preset => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the enum value if the value is from an enum setting.
|
||||
pub fn as_enum(&self) -> Option<&'static str> {
|
||||
self.values.map(|v| v[self.value as usize])
|
||||
}
|
||||
|
||||
/// Gets the numerical value if the value is from a num setting.
|
||||
pub fn as_num(&self) -> Option<u8> {
|
||||
match &self.detail {
|
||||
detail::Detail::Num => Some(self.value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the boolean value if the value is from a boolean setting.
|
||||
pub fn as_bool(&self) -> Option<bool> {
|
||||
match &self.detail {
|
||||
detail::Detail::Bool { bit } => Some(self.value & (1 << bit) != 0),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect settings values based on a template.
|
||||
#[derive(Clone, Hash)]
|
||||
pub struct Builder {
|
||||
@@ -66,6 +137,30 @@ impl Builder {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
/// Iterates the available settings in the builder.
|
||||
pub fn iter(&self) -> impl Iterator<Item = Setting> {
|
||||
let template = self.template;
|
||||
|
||||
template.descriptors.iter().map(move |d| {
|
||||
let (kind, values) = match d.detail {
|
||||
detail::Detail::Enum { last, enumerators } => {
|
||||
let values = template.enums(last, enumerators);
|
||||
(SettingKind::Enum, Some(values))
|
||||
}
|
||||
detail::Detail::Num => (SettingKind::Num, None),
|
||||
detail::Detail::Bool { .. } => (SettingKind::Bool, None),
|
||||
detail::Detail::Preset => (SettingKind::Preset, None),
|
||||
};
|
||||
|
||||
Setting {
|
||||
name: d.name,
|
||||
description: d.description,
|
||||
kind,
|
||||
values,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the value of a single bit.
|
||||
fn set_bit(&mut self, offset: usize, bit: u8, value: bool) {
|
||||
let byte = &mut self.bytes[offset];
|
||||
@@ -165,21 +260,34 @@ impl Configurable for Builder {
|
||||
}
|
||||
|
||||
/// An error produced when changing a setting.
|
||||
#[derive(Error, Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum SetError {
|
||||
/// No setting by this name exists.
|
||||
#[error("No existing setting named '{0}'")]
|
||||
BadName(String),
|
||||
|
||||
/// Type mismatch for setting (e.g., setting an enum setting as a bool).
|
||||
#[error("Trying to set a setting with the wrong type")]
|
||||
BadType,
|
||||
|
||||
/// This is not a valid value for this setting.
|
||||
#[error("Unexpected value for a setting, expected {0}")]
|
||||
BadValue(String),
|
||||
}
|
||||
|
||||
impl std::error::Error for SetError {}
|
||||
|
||||
impl fmt::Display for SetError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SetError::BadName(name) => write!(f, "No existing setting named '{}'", name),
|
||||
SetError::BadType => {
|
||||
write!(f, "Trying to set a setting with the wrong type")
|
||||
}
|
||||
SetError::BadValue(value) => {
|
||||
write!(f, "Unexpected value for a setting, expected {}", value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A result returned when changing a setting.
|
||||
pub type SetResult<T> = Result<T, SetError>;
|
||||
|
||||
@@ -288,6 +396,9 @@ pub mod detail {
|
||||
/// Lower snake-case name of setting as defined in meta.
|
||||
pub name: &'static str,
|
||||
|
||||
/// The description of the setting.
|
||||
pub description: &'static str,
|
||||
|
||||
/// Offset of byte containing this setting.
|
||||
pub offset: u32,
|
||||
|
||||
|
||||
@@ -80,7 +80,6 @@ use alloc::vec::Vec;
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt::{self, Display, Formatter, Write};
|
||||
use log::debug;
|
||||
use thiserror::Error;
|
||||
|
||||
pub use self::cssa::verify_cssa;
|
||||
pub use self::liveness::verify_liveness;
|
||||
@@ -92,8 +91,7 @@ mod liveness;
|
||||
mod locations;
|
||||
|
||||
/// A verifier error.
|
||||
#[derive(Error, Debug, PartialEq, Eq, Clone)]
|
||||
#[error("{}{}: {}", .location, format_context(.context), .message)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct VerifierError {
|
||||
/// The entity causing the verifier error.
|
||||
pub location: AnyEntity,
|
||||
@@ -104,11 +102,16 @@ pub struct VerifierError {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Helper for formatting Verifier::Error context.
|
||||
fn format_context(context: &Option<String>) -> String {
|
||||
match context {
|
||||
None => "".to_string(),
|
||||
Some(c) => format!(" ({})", c),
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for VerifierError {}
|
||||
|
||||
impl Display for VerifierError {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
match &self.context {
|
||||
None => write!(f, "{}: {}", self.location, self.message),
|
||||
Some(context) => write!(f, "{} ({}): {}", self.location, context, self.message),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,9 +178,13 @@ pub type VerifierStepResult<T> = Result<T, ()>;
|
||||
pub type VerifierResult<T> = Result<T, VerifierErrors>;
|
||||
|
||||
/// List of verifier errors.
|
||||
#[derive(Error, Debug, Default, PartialEq, Eq, Clone)]
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone)]
|
||||
pub struct VerifierErrors(pub Vec<VerifierError>);
|
||||
|
||||
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
|
||||
// of dependencies used by Cranelift.
|
||||
impl std::error::Error for VerifierErrors {}
|
||||
|
||||
impl VerifierErrors {
|
||||
/// Return a new `VerifierErrors` struct.
|
||||
#[inline]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
name = "cranelift-entity"
|
||||
version = "0.72.0"
|
||||
version = "0.73.0"
|
||||
description = "Data structures using entity references as mapping keys"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-entity"
|
||||
|
||||
@@ -70,21 +70,25 @@ macro_rules! entity_impl {
|
||||
// Basic traits.
|
||||
($entity:ident) => {
|
||||
impl $crate::EntityRef for $entity {
|
||||
#[inline]
|
||||
fn new(index: usize) -> Self {
|
||||
debug_assert!(index < ($crate::__core::u32::MAX as usize));
|
||||
$entity(index as u32)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn index(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl $crate::packed_option::ReservedValue for $entity {
|
||||
#[inline]
|
||||
fn reserved_value() -> $entity {
|
||||
$entity($crate::__core::u32::MAX)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_reserved_value(&self) -> bool {
|
||||
self.0 == $crate::__core::u32::MAX
|
||||
}
|
||||
@@ -93,6 +97,7 @@ macro_rules! entity_impl {
|
||||
impl $entity {
|
||||
/// Create a new instance from a `u32`.
|
||||
#[allow(dead_code)]
|
||||
#[inline]
|
||||
pub fn from_u32(x: u32) -> Self {
|
||||
debug_assert!(x < $crate::__core::u32::MAX);
|
||||
$entity(x)
|
||||
@@ -100,6 +105,7 @@ macro_rules! entity_impl {
|
||||
|
||||
/// Return the underlying index value as a `u32`.
|
||||
#[allow(dead_code)]
|
||||
#[inline]
|
||||
pub fn as_u32(self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
|
||||
@@ -148,6 +148,28 @@ where
|
||||
pub fn into_boxed_slice(self) -> BoxedSlice<K, V> {
|
||||
unsafe { BoxedSlice::<K, V>::from_raw(Box::<[V]>::into_raw(self.elems.into_boxed_slice())) }
|
||||
}
|
||||
|
||||
/// Performs a binary search on the values with a key extraction function.
|
||||
///
|
||||
/// Assumes that the values are sorted by the key extracted by the function.
|
||||
///
|
||||
/// If the value is found then `Ok(K)` is returned, containing the entity key
|
||||
/// of the matching value.
|
||||
///
|
||||
/// If there are multiple matches, then any one of the matches could be returned.
|
||||
///
|
||||
/// If the value is not found then Err(K) is returned, containing the entity key
|
||||
/// where a matching element could be inserted while maintaining sorted order.
|
||||
pub fn binary_search_values_by_key<'a, B, F>(&'a self, b: &B, f: F) -> Result<K, K>
|
||||
where
|
||||
F: FnMut(&'a V) -> B,
|
||||
B: Ord,
|
||||
{
|
||||
self.elems
|
||||
.binary_search_by_key(b, f)
|
||||
.map(|i| K::new(i))
|
||||
.map_err(|i| K::new(i))
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> Default for PrimaryMap<K, V>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "cranelift-filetests"
|
||||
authors = ["The Cranelift Project Developers"]
|
||||
version = "0.66.0"
|
||||
version = "0.73.0"
|
||||
description = "Test driver and implementations of the filetest commands"
|
||||
license = "Apache-2.0 WITH LLVM-exception"
|
||||
documentation = "https://docs.rs/cranelift-filetests"
|
||||
@@ -10,24 +10,22 @@ publish = false
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cranelift-codegen = { path = "../codegen", version = "0.72.0", features = ["testing_hooks"] }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.72.0" }
|
||||
cranelift-interpreter = { path = "../interpreter", version = "0.72.0" }
|
||||
cranelift-native = { path = "../native", version = "0.72.0" }
|
||||
cranelift-reader = { path = "../reader", version = "0.72.0" }
|
||||
cranelift-preopt = { path = "../preopt", version = "0.72.0" }
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
cranelift-codegen = { path = "../codegen", version = "0.73.0", features = ["testing_hooks"] }
|
||||
cranelift-frontend = { path = "../frontend", version = "0.73.0" }
|
||||
cranelift-interpreter = { path = "../interpreter", version = "0.73.0" }
|
||||
cranelift-native = { path = "../native", version = "0.73.0" }
|
||||
cranelift-reader = { path = "../reader", version = "0.73.0" }
|
||||
cranelift-preopt = { path = "../preopt", version = "0.73.0" }
|
||||
file-per-thread-logger = "0.1.2"
|
||||
filecheck = "0.5.0"
|
||||
gimli = { version = "0.23.0", default-features = false, features = ["read"] }
|
||||
gimli = { version = "0.24.0", default-features = false, features = ["read"] }
|
||||
log = "0.4.6"
|
||||
memmap2 = "0.2.1"
|
||||
num_cpus = "1.8.0"
|
||||
target-lexicon = "0.11"
|
||||
target-lexicon = "0.12"
|
||||
thiserror = "1.0.15"
|
||||
anyhow = "1.0.32"
|
||||
|
||||
[features]
|
||||
enable-peepmatic = []
|
||||
experimental_arm32 = []
|
||||
experimental_x64 = []
|
||||
|
||||
@@ -77,22 +77,72 @@ block0(v0: f64):
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: str q8, [sp, #-16]!
|
||||
; nextln: str q9, [sp, #-16]!
|
||||
; nextln: str q10, [sp, #-16]!
|
||||
; nextln: str q11, [sp, #-16]!
|
||||
; nextln: str q12, [sp, #-16]!
|
||||
; nextln: str q13, [sp, #-16]!
|
||||
; nextln: str q14, [sp, #-16]!
|
||||
; nextln: str q15, [sp, #-16]!
|
||||
; nextln: stp d14, d15, [sp, #-16]!
|
||||
; nextln: stp d12, d13, [sp, #-16]!
|
||||
; nextln: stp d10, d11, [sp, #-16]!
|
||||
; nextln: stp d8, d9, [sp, #-16]!
|
||||
|
||||
; check: ldr q15, [sp], #16
|
||||
; nextln: ldr q14, [sp], #16
|
||||
; nextln: ldr q13, [sp], #16
|
||||
; nextln: ldr q12, [sp], #16
|
||||
; nextln: ldr q11, [sp], #16
|
||||
; nextln: ldr q10, [sp], #16
|
||||
; nextln: ldr q9, [sp], #16
|
||||
; nextln: ldr q8, [sp], #16
|
||||
; check: ldp d8, d9, [sp], #16
|
||||
; nextln: ldp d10, d11, [sp], #16
|
||||
; nextln: ldp d12, d13, [sp], #16
|
||||
; nextln: ldp d14, d15, [sp], #16
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %f2(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = iadd.i64 v0, v0
|
||||
v2 = iadd.i64 v0, v1
|
||||
v3 = iadd.i64 v0, v2
|
||||
v4 = iadd.i64 v0, v3
|
||||
v5 = iadd.i64 v0, v4
|
||||
v6 = iadd.i64 v0, v5
|
||||
v7 = iadd.i64 v0, v6
|
||||
v8 = iadd.i64 v0, v7
|
||||
v9 = iadd.i64 v0, v8
|
||||
v10 = iadd.i64 v0, v9
|
||||
v11 = iadd.i64 v0, v10
|
||||
v12 = iadd.i64 v0, v11
|
||||
v13 = iadd.i64 v0, v12
|
||||
v14 = iadd.i64 v0, v13
|
||||
v15 = iadd.i64 v0, v14
|
||||
v16 = iadd.i64 v0, v15
|
||||
v17 = iadd.i64 v0, v16
|
||||
v18 = iadd.i64 v0, v17
|
||||
|
||||
v19 = iadd.i64 v0, v1
|
||||
v20 = iadd.i64 v2, v3
|
||||
v21 = iadd.i64 v4, v5
|
||||
v22 = iadd.i64 v6, v7
|
||||
v23 = iadd.i64 v8, v9
|
||||
v24 = iadd.i64 v10, v11
|
||||
v25 = iadd.i64 v12, v13
|
||||
v26 = iadd.i64 v14, v15
|
||||
v27 = iadd.i64 v16, v17
|
||||
|
||||
v28 = iadd.i64 v18, v19
|
||||
v29 = iadd.i64 v20, v21
|
||||
v30 = iadd.i64 v22, v23
|
||||
v31 = iadd.i64 v24, v25
|
||||
v32 = iadd.i64 v26, v27
|
||||
|
||||
v33 = iadd.i64 v28, v29
|
||||
v34 = iadd.i64 v30, v31
|
||||
|
||||
v35 = iadd.i64 v32, v33
|
||||
v36 = iadd.i64 v34, v35
|
||||
|
||||
return v36
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: str x22, [sp, #-16]!
|
||||
; nextln: stp x19, x20, [sp, #-16]!
|
||||
; nextln: add x1, x0, x0
|
||||
|
||||
; check: add x0, x1, x0
|
||||
; nextln: ldp x19, x20, [sp], #16
|
||||
; nextln: ldr x22, [sp], #16
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
1136
cranelift/filetests/filetests/isa/s390x/arithmetic.clif
Normal file
1136
cranelift/filetests/filetests/isa/s390x/arithmetic.clif
Normal file
File diff suppressed because it is too large
Load Diff
243
cranelift/filetests/filetests/isa/s390x/bitops.clif
Normal file
243
cranelift/filetests/filetests/isa/s390x/bitops.clif
Normal file
@@ -0,0 +1,243 @@
|
||||
test compile
|
||||
target s390x
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BITREV
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; FIXME: bitrev not yet implemented
|
||||
|
||||
;function %bitrev_i64(i64) -> i64 {
|
||||
;block0(v0: i64):
|
||||
; v1 = bitrev v0
|
||||
; return v1
|
||||
;}
|
||||
;
|
||||
;function %bitrev_i32(i32) -> i32 {
|
||||
;block0(v0: i32):
|
||||
; v1 = bitrev v0
|
||||
; return v1
|
||||
;}
|
||||
;
|
||||
;function %bitrev_i16(i16) -> i16 {
|
||||
;block0(v0: i16):
|
||||
; v1 = bitrev v0
|
||||
; return v1
|
||||
;}
|
||||
;
|
||||
;function %bitrev_i8(i8) -> i8 {
|
||||
;block0(v0: i8):
|
||||
; v1 = bitrev v0
|
||||
; return v1
|
||||
;}
|
||||
;
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; CLZ
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %clz_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: flogr %r0, %r2
|
||||
; nextln: lgr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
function %clz_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: llgfr %r2, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -32
|
||||
; nextln: br %r14
|
||||
|
||||
function %clz_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: llghr %r2, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -48
|
||||
; nextln: br %r14
|
||||
|
||||
function %clz_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: llgcr %r2, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -56
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; CLS
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %cls_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: srag %r3, %r2, 63
|
||||
; nextln: xgr %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lgr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
function %cls_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: lgfr %r2, %r2
|
||||
; nextln: srag %r3, %r2, 63
|
||||
; nextln: xgr %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -32
|
||||
; nextln: br %r14
|
||||
|
||||
function %cls_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: lghr %r2, %r2
|
||||
; nextln: srag %r3, %r2, 63
|
||||
; nextln: xgr %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -48
|
||||
; nextln: br %r14
|
||||
|
||||
function %cls_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: lgbr %r2, %r2
|
||||
; nextln: srag %r3, %r2, 63
|
||||
; nextln: xgr %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lr %r2, %r0
|
||||
; nextln: ahi %r2, -56
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; CTZ
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %ctz_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: lcgr %r3, %r2
|
||||
; nextln: ngrk %r2, %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: locghie %r0, -1
|
||||
; nextln: lghi %r2, 63
|
||||
; nextln: sgr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
function %ctz_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: oihl %r2, 1
|
||||
; nextln: lcgr %r3, %r2
|
||||
; nextln: ngrk %r2, %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lhi %r2, 63
|
||||
; nextln: sr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
function %ctz_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: oilh %r2, 1
|
||||
; nextln: lcgr %r3, %r2
|
||||
; nextln: ngrk %r2, %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lhi %r2, 63
|
||||
; nextln: sr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
function %ctz_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: oill %r2, 256
|
||||
; nextln: lcgr %r3, %r2
|
||||
; nextln: ngrk %r2, %r3, %r2
|
||||
; nextln: flogr %r0, %r2
|
||||
; nextln: lhi %r2, 63
|
||||
; nextln: sr %r2, %r0
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; POPCNT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %popcnt_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: popcnt %r2, %r2, 8
|
||||
; nextln: br %r14
|
||||
|
||||
function %popcnt_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: llgfr %r2, %r2
|
||||
; nextln: popcnt %r2, %r2, 8
|
||||
; nextln: br %r14
|
||||
|
||||
function %popcnt_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: llghr %r2, %r2
|
||||
; nextln: popcnt %r2, %r2, 8
|
||||
; nextln: br %r14
|
||||
|
||||
function %popcnt_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: popcnt %r2, %r2
|
||||
; nextln: br %r14
|
||||
490
cranelift/filetests/filetests/isa/s390x/bitwise.clif
Normal file
490
cranelift/filetests/filetests/isa/s390x/bitwise.clif
Normal file
@@ -0,0 +1,490 @@
|
||||
|
||||
test compile
|
||||
target s390x
|
||||
|
||||
; FIXME: add immediate operand versions
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BAND
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %band_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = band.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ngr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i64_mem(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = load.i64 v1
|
||||
v3 = band.i64 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: ng %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = band.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i32_mem(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1
|
||||
v3 = band.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: n %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i32_memoff(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1+4096
|
||||
v3 = band.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: ny %r2, 4096(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = band.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i16_mem(i16, i64) -> i16 {
|
||||
block0(v0: i16, v1: i64):
|
||||
v2 = load.i16 v1
|
||||
v3 = band.i16 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llh %r3, 0(%r3)
|
||||
; nextln: nr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = band.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_i8_mem(i8, i64) -> i8 {
|
||||
block0(v0: i8, v1: i64):
|
||||
v2 = load.i8 v1
|
||||
v3 = band.i8 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llc %r3, 0(%r3)
|
||||
; nextln: nr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BOR
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bor_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bor.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: ogr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i64_mem(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = load.i64 v1
|
||||
v3 = bor.i64 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: og %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = bor.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i32_mem(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1
|
||||
v3 = bor.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: o %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i32_memoff(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1+4096
|
||||
v3 = bor.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: oy %r2, 4096(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = bor.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i16_mem(i16, i64) -> i16 {
|
||||
block0(v0: i16, v1: i64):
|
||||
v2 = load.i16 v1
|
||||
v3 = bor.i16 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llh %r3, 0(%r3)
|
||||
; nextln: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = bor.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_i8_mem(i8, i64) -> i8 {
|
||||
block0(v0: i8, v1: i64):
|
||||
v2 = load.i8 v1
|
||||
v3 = bor.i8 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llc %r3, 0(%r3)
|
||||
; nextln: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BXOR
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bxor_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bxor.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: xgr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i64_mem(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = load.i64 v1
|
||||
v3 = bxor.i64 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: xg %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = bxor.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: xr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i32_mem(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1
|
||||
v3 = bxor.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: x %r2, 0(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i32_memoff(i32, i64) -> i32 {
|
||||
block0(v0: i32, v1: i64):
|
||||
v2 = load.i32 v1+4096
|
||||
v3 = bxor.i32 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: xy %r2, 4096(%r3)
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = bxor.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: xr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i16_mem(i16, i64) -> i16 {
|
||||
block0(v0: i16, v1: i64):
|
||||
v2 = load.i16 v1
|
||||
v3 = bxor.i16 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llh %r3, 0(%r3)
|
||||
; nextln: xr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = bxor.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: xr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_i8_mem(i8, i64) -> i8 {
|
||||
block0(v0: i8, v1: i64):
|
||||
v2 = load.i8 v1
|
||||
v3 = bxor.i8 v0, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: llc %r3, 0(%r3)
|
||||
; nextln: xr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BAND_NOT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %band_not_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = band_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nngrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_not_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = band_not.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nnrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_not_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = band_not.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nnrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %band_not_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = band_not.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nnrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BOR_NOT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bor_not_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bor_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nogrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_not_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = bor_not.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_not_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = bor_not.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bor_not_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = bor_not.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BXOR_NOT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bxor_not_i64(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = bxor_not.i64 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nxgrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_not_i32(i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32):
|
||||
v2 = bxor_not.i32 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nxrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_not_i16(i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16):
|
||||
v2 = bxor_not.i16 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nxrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bxor_not_i8(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = bxor_not.i8 v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: nxrk %r2, %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BNOT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bnot_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = bnot.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: nogrk %r2, %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
function %bnot_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = bnot.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
function %bnot_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = bnot.i16 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
function %bnot_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = bnot.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: nork %r2, %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; BITSELECT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %bitselect_i64(i64, i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64, v2: i64):
|
||||
v3 = bitselect.i64 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: ngr %r3, %r2
|
||||
; nextln: nngrk %r2, %r4, %r2
|
||||
; nextln: ogr %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bitselect_i32(i32, i32, i32) -> i32 {
|
||||
block0(v0: i32, v1: i32, v2: i32):
|
||||
v3 = bitselect.i32 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: nr %r3, %r2
|
||||
; nextln: nnrk %r2, %r4, %r2
|
||||
; nextln: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bitselect_i16(i16, i16, i16) -> i16 {
|
||||
block0(v0: i16, v1: i16, v2: i16):
|
||||
v3 = bitselect.i16 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: nr %r3, %r2
|
||||
; nextln: nnrk %r2, %r4, %r2
|
||||
; nextln: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
function %bitselect_i8(i8, i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8, v2: i8):
|
||||
v3 = bitselect.i8 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; check: nr %r3, %r2
|
||||
; nextln: nnrk %r2, %r4, %r2
|
||||
; nextln: or %r2, %r3
|
||||
; nextln: br %r14
|
||||
|
||||
113
cranelift/filetests/filetests/isa/s390x/call.clif
Normal file
113
cranelift/filetests/filetests/isa/s390x/call.clif
Normal file
@@ -0,0 +1,113 @@
|
||||
test compile
|
||||
target s390x
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; CALL
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %call(i64) -> i64 {
|
||||
fn0 = %g(i64) -> i64
|
||||
|
||||
block0(v0: i64):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stmg %r14, %r15, 112(%r15)
|
||||
; nextln: aghi %r15, -160
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
|
||||
; nextln: basr %r14, %r3
|
||||
; nextln: lmg %r14, %r15, 272(%r15)
|
||||
; nextln: br %r14
|
||||
|
||||
function %call_uext(i32) -> i64 {
|
||||
fn0 = %g(i32 uext) -> i64
|
||||
|
||||
block0(v0: i32):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stmg %r14, %r15, 112(%r15)
|
||||
; nextln: aghi %r15, -160
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: llgfr %r2, %r2
|
||||
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
|
||||
; nextln: basr %r14, %r3
|
||||
; nextln: lmg %r14, %r15, 272(%r15)
|
||||
; nextln: br %r14
|
||||
|
||||
function %ret_uext(i32) -> i32 uext {
|
||||
block0(v0: i32):
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: llgfr %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
function %call_uext(i32) -> i64 {
|
||||
fn0 = %g(i32 sext) -> i64
|
||||
|
||||
block0(v0: i32):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stmg %r14, %r15, 112(%r15)
|
||||
; nextln: aghi %r15, -160
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: lgfr %r2, %r2
|
||||
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
|
||||
; nextln: basr %r14, %r3
|
||||
; nextln: lmg %r14, %r15, 272(%r15)
|
||||
; nextln: br %r14
|
||||
|
||||
function %ret_uext(i32) -> i32 sext {
|
||||
block0(v0: i32):
|
||||
return v0
|
||||
}
|
||||
|
||||
; check: lgfr %r2, %r2
|
||||
; nextln: br %r14
|
||||
|
||||
function %call_colocated(i64) -> i64 {
|
||||
fn0 = colocated %g(i64) -> i64
|
||||
|
||||
block0(v0: i64):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stmg %r14, %r15, 112(%r15)
|
||||
; nextln: aghi %r15, -160
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: brasl %r14, %g
|
||||
; nextln: lmg %r14, %r15, 272(%r15)
|
||||
; nextln: br %r14
|
||||
|
||||
function %f2(i32) -> i64 {
|
||||
fn0 = %g(i32 uext) -> i64
|
||||
|
||||
block0(v0: i32):
|
||||
v1 = call fn0(v0)
|
||||
return v1
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; CALL_INDIRECT
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
function %call_indirect(i64, i64) -> i64 {
|
||||
sig0 = (i64) -> i64
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = call_indirect.i64 sig0, v1(v0)
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stmg %r14, %r15, 112(%r15)
|
||||
; nextln: aghi %r15, -160
|
||||
; nextln: virtual_sp_offset_adjust 160
|
||||
; nextln: basr %r14, %r3
|
||||
; nextln: lmg %r14, %r15, 272(%r15)
|
||||
; nextln: br %r14
|
||||
62
cranelift/filetests/filetests/isa/s390x/condbr.clif
Normal file
62
cranelift/filetests/filetests/isa/s390x/condbr.clif
Normal file
@@ -0,0 +1,62 @@
|
||||
test compile
|
||||
target s390x
|
||||
|
||||
function %f(i64, i64) -> b1 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = icmp eq v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: clgr %r2, %r3
|
||||
; nextln: lhi %r2, 0
|
||||
; nextln: lochie %r2, 1
|
||||
; nextln: br %r14
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = icmp eq v0, v1
|
||||
brnz v2, block1
|
||||
jump block2
|
||||
|
||||
block1:
|
||||
v4 = iconst.i64 1
|
||||
return v4
|
||||
|
||||
block2:
|
||||
v5 = iconst.i64 2
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: Block 0:
|
||||
; check: clgr %r2, %r3
|
||||
; nextln: jge label1 ; jg label2
|
||||
; check: Block 1:
|
||||
; check: lghi %r2, 1
|
||||
; nextln: br %r14
|
||||
; check: Block 2:
|
||||
; check: lghi %r2, 2
|
||||
; nextln: br %r14
|
||||
|
||||
function %f(i64, i64) -> i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = icmp eq v0, v1
|
||||
brnz v2, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
v4 = iconst.i64 1
|
||||
return v4
|
||||
}
|
||||
|
||||
; FIXME: Should optimize away branches
|
||||
|
||||
; check: Block 0:
|
||||
; check: clgr %r2, %r3
|
||||
; nextln: jge label1 ; jg label2
|
||||
; check: Block 1:
|
||||
; check: jg label3
|
||||
; check: Block 2:
|
||||
; check: jg label3
|
||||
; check: Block 3:
|
||||
; check: lghi %r2, 1
|
||||
; nextln: br %r14
|
||||
43
cranelift/filetests/filetests/isa/s390x/condops.clif
Normal file
43
cranelift/filetests/filetests/isa/s390x/condops.clif
Normal file
@@ -0,0 +1,43 @@
|
||||
test compile
|
||||
target s390x
|
||||
|
||||
function %f(i8, i64, i64) -> i64 {
|
||||
block0(v0: i8, v1: i64, v2: i64):
|
||||
v3 = iconst.i8 42
|
||||
v4 = icmp eq v0, v3
|
||||
v5 = select.i64 v4, v1, v2
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: llcr %r2, %r2
|
||||
; nextln: clfi %r2, 42
|
||||
; nextln: locgre %r4, %r3
|
||||
; nextln: lgr %r2, %r4
|
||||
; nextln: br %r14
|
||||
|
||||
function %g(b1, i8, i8) -> i8 {
|
||||
block0(v0: b1, v1: i8, v2: i8):
|
||||
v3 = select.i8 v0, v1, v2
|
||||
return v3
|
||||
}
|
||||
|
||||
; FIXME: optimize i8/i16 compares
|
||||
|
||||
; check: llcr %r2, %r2
|
||||
; nextln: chi %r2, 0
|
||||
; nextln: locrlh %r4, %r3
|
||||
; nextln: lr %r2, %r4
|
||||
; nextln: br %r14
|
||||
|
||||
function %i(i32, i8, i8) -> i8 {
|
||||
block0(v0: i32, v1: i8, v2: i8):
|
||||
v3 = iconst.i32 42
|
||||
v4 = icmp.i32 eq v0, v3
|
||||
v5 = select.i8 v4, v1, v2
|
||||
return v5
|
||||
}
|
||||
|
||||
; check: clfi %r2, 42
|
||||
; nextln: locre %r4, %r3
|
||||
; nextln: lr %r2, %r4
|
||||
; nextln: br %r14
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user