properly splatting bytes in emit_small_memset

This commit is contained in:
MaxGraey
2021-05-13 22:02:17 +03:00
614 changed files with 40658 additions and 7141 deletions

View File

@@ -30,3 +30,8 @@ set_env("CARGO_INCREMENTAL", "0");
// Turn down debuginfo from 2 to 1 to help save disk space
set_env("CARGO_PROFILE_DEV_DEBUG", "1");
set_env("CARGO_PROFILE_TEST_DEBUG", "1");
if (process.platform === 'darwin') {
set_env("CARGO_PROFILE_DEV_SPLIT_DEBUGINFO", "unpacked");
set_env("CARGO_PROFILE_TEST_SPLIT_DEBUGINFO", "unpacked");
}

View File

@@ -48,6 +48,7 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
- run: rustup update stable && rustup default stable
- run: |
set -e
curl -L https://github.com/rust-lang-nursery/mdBook/releases/download/v0.4.4/mdbook-v0.4.4-x86_64-unknown-linux-gnu.tar.gz | tar xzf -
@@ -73,12 +74,15 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
# Note that we use nightly Rust for the doc_cfg feature (enabled via `nightlydoc` above)
# This version is an older nightly for the new x64 backend (see below)
- uses: ./.github/actions/install-rust
with:
toolchain: nightly-2020-12-26
- run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta
toolchain: nightly-2021-04-11
- run: |
cargo doc --no-deps --workspace \
--exclude wasmtime-cli \
--exclude test-programs \
--exclude cranelift-codegen-meta \
--exclude 'peepmatic*'
- run: cargo doc --package cranelift-codegen-meta --document-private-items
- uses: actions/upload-artifact@v1
with:
@@ -165,7 +169,7 @@ jobs:
# flags to rustc.
- uses: ./.github/actions/install-rust
with:
toolchain: nightly
toolchain: nightly-2021-04-11
- run: cargo install cargo-fuzz --vers "^0.8"
- run: cargo fetch
working-directory: ./fuzz
@@ -178,16 +182,9 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
- run: rustup update stable && rustup default stable
- name: Test `peepmatic`
run: |
cargo test \
--package peepmatic \
--package peepmatic-automata \
--package peepmatic-fuzzing \
--package peepmatic-macro \
--package peepmatic-runtime \
--package peepmatic-test \
--package peepmatic-souper
run: cargo test --package 'peepmatic*'
- name: Rebuild Peepmatic-based peephole optimizers
run: |
cargo test \
@@ -211,6 +208,7 @@ jobs:
name: Test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
build: [stable, beta, nightly, windows, macos]
include:
@@ -222,7 +220,7 @@ jobs:
rust: beta
- build: nightly
os: ubuntu-latest
rust: nightly
rust: nightly-2021-04-11
- build: macos
os: macos-latest
rust: stable
@@ -270,18 +268,10 @@ jobs:
- run: |
cargo test \
--features test-programs/test_programs \
--all \
--exclude lightbeam \
--exclude wasmtime-lightbeam \
--exclude wasmtime-wasi-nn \
--exclude wasmtime-wasi-crypto \
--exclude peepmatic \
--exclude peepmatic-automata \
--exclude peepmatic-fuzzing \
--exclude peepmatic-macro \
--exclude peepmatic-runtime \
--exclude peepmatic-test \
--exclude peepmatic-souper
--workspace \
--exclude '*lightbeam*' \
--exclude 'wasmtime-wasi-*' \
--exclude 'peepmatic*'
env:
RUST_BACKTRACE: 1
@@ -297,7 +287,7 @@ jobs:
# Test debug (DWARF) related functionality on new backend.
- run: |
sudo apt-get update && sudo apt-get install -y gdb lldb
cargo test --features experimental_x64 test_debug_dwarf -- --ignored --test-threads 1 --test debug::
cargo test test_debug_dwarf -- --ignored --test-threads 1 --test debug::
if: matrix.os == 'ubuntu-latest'
env:
RUST_BACKTRACE: 1
@@ -320,13 +310,9 @@ jobs:
env:
RUST_BACKTRACE: 1
# Perform all tests (debug mode) for `wasmtime` with the experimental x64
# backend. This runs on an older nightly of Rust (because of issues with
# unifying Cargo features on stable) on Ubuntu such that it's new enough
# to build Wasmtime, but old enough where the -Z options being used
# haven't been stabilized yet.
# Perform all tests (debug mode) for `wasmtime` with the old x86 backend.
test_x64:
name: Test x64 new backend
name: Test old x86 backend
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -334,7 +320,7 @@ jobs:
submodules: true
- uses: ./.github/actions/install-rust
with:
toolchain: nightly-2020-12-26
toolchain: stable
- uses: ./.github/actions/define-llvm-env
# Install wasm32 targets in order to build various tests throughout the
@@ -342,43 +328,9 @@ jobs:
- run: rustup target add wasm32-wasi
- run: rustup target add wasm32-unknown-unknown
# Run the x64 CI script.
- run: ./ci/run-experimental-x64-ci.sh
# Run the old x86 backend CI (we will eventually remove this).
- run: ./ci/run-old-x86-ci.sh
env:
CARGO_VERSION: "+nightly-2020-12-26"
RUST_BACKTRACE: 1
# Perform tests on the new x64 backend on Windows as well.
test_x64_win:
name: Test x64 new backend on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true
- uses: ./.github/actions/install-rust
with:
toolchain: nightly-2020-11-29
- uses: ./.github/actions/define-llvm-env
- name: Install libclang
# Note: libclang is pre-installed on the macOS and linux images.
if: matrix.os == 'windows-latest'
run: |
curl https://releases.llvm.org/9.0.0/LLVM-9.0.0-win64.exe -o llvm-installer.exe
7z x llvm-installer.exe -oC:/llvm-binary
echo LIBCLANG_PATH=C:/llvm-binary/bin/libclang.dll >> $GITHUB_ENV
echo C:/llvm-binary/bin >> $GITHUB_PATH
# Install wasm32 targets in order to build various tests throughout the
# repo.
- run: rustup target add wasm32-wasi
- run: rustup target add wasm32-unknown-unknown
# Run the x64 CI script.
- run: ./ci/run-experimental-x64-ci.sh
env:
CARGO_VERSION: "+nightly-2020-11-29"
RUST_BACKTRACE: 1
# Build and test the wasi-nn module.
@@ -390,8 +342,6 @@ jobs:
with:
submodules: true
- uses: ./.github/actions/install-rust
with:
toolchain: nightly
- run: rustup target add wasm32-wasi
- uses: ./.github/actions/install-openvino
- run: ./ci/run-wasi-nn-example.sh
@@ -433,6 +383,7 @@ jobs:
name: Build wasmtime
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- build: x86_64-linux
@@ -517,18 +468,10 @@ jobs:
$CENTOS cargo test \
--features test-programs/test_programs \
--release \
--all \
--exclude lightbeam \
--exclude wasmtime-lightbeam \
--exclude wasmtime-wasi-nn \
--exclude wasmtime-wasi-crypto \
--exclude peepmatic \
--exclude peepmatic-automata \
--exclude peepmatic-fuzzing \
--exclude peepmatic-macro \
--exclude peepmatic-runtime \
--exclude peepmatic-test \
--exclude peepmatic-souper \
--workspace \
--exclude '*lightbeam*' \
--exclude 'wasmtime-wasi-*' \
--exclude 'peepmatic*' \
--exclude wasmtime-fuzz
env:
RUST_BACKTRACE: 1

660
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "wasmtime-cli"
version = "0.25.0"
version = "0.26.0"
authors = ["The Wasmtime Project Developers"]
description = "Command-line interface for Wasmtime"
license = "Apache-2.0 WITH LLVM-exception"
@@ -22,31 +22,29 @@ doc = false
[dependencies]
# Enable all supported architectures by default.
wasmtime = { path = "crates/wasmtime", version = "0.25.0", default-features = false, features = ['cache'] }
wasmtime-cache = { path = "crates/cache", version = "0.25.0" }
wasmtime-debug = { path = "crates/debug", version = "0.25.0" }
wasmtime-environ = { path = "crates/environ", version = "0.25.0" }
wasmtime-jit = { path = "crates/jit", version = "0.25.0" }
wasmtime-obj = { path = "crates/obj", version = "0.25.0" }
wasmtime-wast = { path = "crates/wast", version = "0.25.0" }
wasmtime-wasi = { path = "crates/wasi", version = "0.25.0" }
wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.25.0", optional = true }
wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.25.0", optional = true }
wasi-common = { path = "crates/wasi-common", version = "0.25.0" }
wasi-cap-std-sync = { path = "crates/wasi-common/cap-std-sync", version = "0.25.0" }
wasmtime = { path = "crates/wasmtime", version = "0.26.0", default-features = false, features = ['cache'] }
wasmtime-cache = { path = "crates/cache", version = "0.26.0" }
wasmtime-debug = { path = "crates/debug", version = "0.26.0" }
wasmtime-environ = { path = "crates/environ", version = "0.26.0" }
wasmtime-jit = { path = "crates/jit", version = "0.26.0" }
wasmtime-obj = { path = "crates/obj", version = "0.26.0" }
wasmtime-wast = { path = "crates/wast", version = "0.26.0" }
wasmtime-wasi = { path = "crates/wasi", version = "0.26.0" }
wasmtime-wasi-crypto = { path = "crates/wasi-crypto", version = "0.26.0", optional = true }
wasmtime-wasi-nn = { path = "crates/wasi-nn", version = "0.26.0", optional = true }
structopt = { version = "0.3.5", features = ["color", "suggestions"] }
object = { version = "0.23.0", default-features = false, features = ["write"] }
object = { version = "0.24.0", default-features = false, features = ["write"] }
anyhow = "1.0.19"
target-lexicon = { version = "0.11.0", default-features = false }
target-lexicon = { version = "0.12.0", default-features = false }
pretty_env_logger = "0.4.0"
file-per-thread-logger = "0.1.1"
wat = "1.0.36"
wat = "1.0.37"
libc = "0.2.60"
log = "0.4.8"
rayon = "1.2.1"
humantime = "2.0.0"
wasmparser = "0.76.0"
cap-std = "0.13"
wasmparser = "0.77.0"
lazy_static = "1.4.0"
[dev-dependencies]
env_logger = "0.8.1"
@@ -56,6 +54,7 @@ tempfile = "3.1.0"
test-programs = { path = "crates/test-programs" }
wasmtime-fuzzing = { path = "crates/fuzzing" }
wasmtime-runtime = { path = "crates/runtime" }
tokio = { version = "1.5.0", features = ["rt", "time", "macros", "rt-multi-thread"] }
tracing-subscriber = "0.2.16"
wast = "35.0.0"
@@ -66,6 +65,7 @@ anyhow = "1.0.19"
opt-level = 0
[workspace]
resolver = '2'
members = [
"cranelift",
"crates/bench-api",
@@ -79,23 +79,29 @@ members = [
"crates/wiggle/wasmtime",
"crates/wasi-common",
"crates/wasi-common/cap-std-sync",
"crates/wasi-common/tokio",
"examples/fib-debug/wasm",
"examples/wasi/wasm",
"examples/tokio/wasm",
"fuzz",
]
[features]
default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"]
default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"]
lightbeam = ["wasmtime/lightbeam"]
jitdump = ["wasmtime/jitdump"]
vtune = ["wasmtime/vtune"]
wasi-crypto = ["wasmtime-wasi-crypto"]
wasi-nn = ["wasmtime-wasi-nn"]
uffd = ["wasmtime/uffd"]
all-arch = ["wasmtime/all-arch"]
# Try the experimental, work-in-progress new x86_64 backend. This is not stable
# as of June 2020.
experimental_x64 = ["wasmtime-jit/experimental_x64"]
# Stub feature that does nothing, for Cargo-features compatibility: the new
# backend is the default now.
experimental_x64 = []
# Use the old x86 backend.
old-x86-backend = ["wasmtime/old-x86-backend"]
[badges]
maintenance = { status = "actively-developed" }
@@ -104,5 +110,9 @@ maintenance = { status = "actively-developed" }
name = "host_segfault"
harness = false
[[example]]
name = "tokio"
required-features = ["wasmtime-wasi/tokio"]
[profile.dev.package.backtrace]
debug = false # FIXME(#1813)

View File

@@ -2,6 +2,137 @@
--------------------------------------------------------------------------------
## Unreleased
### Added
* Added `Store::with_limits`, `StoreLimits`, and `ResourceLimiter` to the
Wasmtime API to help with enforcing resource limits at runtime. The
`ResourceLimiter` trait can be implemented by custom resource limiters to
decide if linear memories or tables can be grown.
### Changed
* Breaking: `Memory::new` has been changed to return `Result` as creating a
host memory object is now a fallible operation when the initial size of
the memory exceeds the store limits.
## 0.26.0
Released 2021-04-05.
### Added
* Added the `wasmtime compile` command to support AOT compilation of Wasm
modules. This adds the `Engine::precompile_module` method. Also added the
`Config::target` method to change the compilation target of the
configuration. This can be used in conjunction with
`Engine::precompile_module` to target a different host triple than the
current one.
[#2791](https://github.com/bytecodealliance/wasmtime/pull/2791)
* Support for macOS on aarch64 (Apple M1 Silicon), including Apple-specific
calling convention details and unwinding/exception handling using Mach ports.
[#2742](https://github.com/bytecodealliance/wasmtime/pull/2742),
[#2723](https://github.com/bytecodealliance/wasmtime/pull/2723)
* A number of SIMD instruction implementations in the new x86-64 backend.
[#2771](https://github.com/bytecodealliance/wasmtime/pull/2771)
* Added the `Config::cranelift_flag_enable` method to enable setting Cranelift
boolean flags or presets in a config.
* Added CLI option `--cranelift-enable` to enable boolean settings and ISA presets.
* Deduplicate function signatures in Wasm modules.
[#2772](https://github.com/bytecodealliance/wasmtime/pull/2772)
* Optimize overheads of calling into Wasm functions.
[#2757](https://github.com/bytecodealliance/wasmtime/pull/2757),
[#2759](https://github.com/bytecodealliance/wasmtime/pull/2759)
* Improvements related to Module Linking: compile fewer trampolines;
[#2774](https://github.com/bytecodealliance/wasmtime/pull/2774)
* Re-export sibling crates from `wasmtime-wasi` to make embedding easier
without needing to match crate versions.
[#2776](https://github.com/bytecodealliance/wasmtime/pull/2776)
### Changed
* Switched the default compiler backend on x86-64 to Cranelift's new backend.
This should not have any user-visible effects other than possibly runtime
performance improvements. The old backend is still available with the
`old-x86-backend` feature flag to the `cranelift-codegen` or `wasmtime`
crates, or programmatically with `BackendVariant::Legacy`. We plan to
maintain the old backend for at least one more release and ensure it works on
CI.
[#2718](https://github.com/bytecodealliance/wasmtime/pull/2718)
* Breaking: `Module::deserialize` has been removed in favor of `Module::new`.
* Breaking: `Config::cranelift_clear_cpu_flags` was removed. Use `Config::target`
to clear the CPU flags for the host's target.
* Breaking: `Config::cranelift_other_flag` was renamed to `Config::cranelift_flag_set`.
* CLI changes:
* Wasmtime CLI options to enable WebAssembly features have been replaced with
a singular `--wasm-features` option. The previous options are still
supported, but are not displayed in help text.
* Breaking: the CLI option `--cranelift-flags` was changed to
`--cranelift-set`.
* Breaking: the CLI option `--enable-reference-types=false` has been changed
to `--wasm-features=-reference-types`.
* Breaking: the CLI option `--enable-multi-value=false` has been changed to
`--wasm-features=-multi-value`.
* Breaking: the CLI option `--enable-bulk-memory=false` has been changed to
`--wasm-features=-bulk-memory`.
* Improved error-reporting in wiggle.
[#2760](https://github.com/bytecodealliance/wasmtime/pull/2760)
* Make WASI sleeping fallible (some systems do not support sleep).
[#2756](https://github.com/bytecodealliance/wasmtime/pull/2756)
* WASI: Support `poll_oneoff` with a sleep.
[#2753](https://github.com/bytecodealliance/wasmtime/pull/2753)
* Allow a `StackMapSink` to be passed when defining functions with
`cranelift-module`.
[#2739](https://github.com/bytecodealliance/wasmtime/pull/2739)
* Some refactoring in new x86-64 backend to prepare for VEX/EVEX (e.g.,
AVX-512) instruction encodings to be supported.
[#2799](https://github.com/bytecodealliance/wasmtime/pull/2799)
### Fixed
* Fixed a corner case in `srem` (signed remainder) in the new x86-64 backend:
`INT_MIN % -1` should return `0`, rather than trapping. This only occurred
when `avoid_div_traps == false` was set by the embedding.
[#2763](https://github.com/bytecodealliance/wasmtime/pull/2763)
* Fixed a memory leak of the `Store` when an instance traps.
[#2803](https://github.com/bytecodealliance/wasmtime/pull/2803)
* Some fuzzing-related fixes.
[#2788](https://github.com/bytecodealliance/wasmtime/pull/2788),
[#2770](https://github.com/bytecodealliance/wasmtime/pull/2770)
* Fixed memory-initialization bug in uffd allocator that could copy into the
wrong destination under certain conditions. Does not affect the default
wasmtime instance allocator.
[#2801](https://github.com/bytecodealliance/wasmtime/pull/2801)
* Fix printing of float values from the Wasmtime CLI.
[#2797](https://github.com/bytecodealliance/wasmtime/pull/2797)
* Remove the ability for the `Linker` to instantiate modules with duplicate
import strings of different types.
[#2789](https://github.com/bytecodealliance/wasmtime/pull/2789)
## 0.25.0
Released 2021-03-16.
@@ -39,7 +170,7 @@ Released 2021-03-16.
### Fixed
* Interepretation of timestamps in `poll_oneoff` for WASI have been fixed to
* Interpretation of timestamps in `poll_oneoff` for WASI have been fixed to
correctly use nanoseconds instead of microseconds.
[#2717](https://github.com/bytecodealliance/wasmtime/pull/2717)

View File

@@ -155,11 +155,8 @@ fn write_testsuite_tests(
let testname = extract_name(path);
writeln!(out, "#[test]")?;
if experimental_x64_should_panic(testsuite, &testname, strategy) {
writeln!(
out,
r#"#[cfg_attr(feature = "experimental_x64", should_panic)]"#
)?;
if x64_should_panic(testsuite, &testname, strategy) {
writeln!(out, r#"#[should_panic]"#)?;
} else if ignore(testsuite, &testname, strategy) {
writeln!(out, "#[ignore]")?;
} else if pooling {
@@ -186,10 +183,10 @@ fn write_testsuite_tests(
Ok(())
}
/// For experimental_x64 backend features that are not supported yet, mark tests as panicking, so
/// For x64 backend features that are not supported yet, mark tests as panicking, so
/// they stop "passing" once the features are properly implemented.
fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
if !cfg!(feature = "experimental_x64") || strategy != "Cranelift" {
fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool {
if !platform_is_x64() || strategy != "Cranelift" {
return false;
}
@@ -222,12 +219,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
_ => (),
},
"Cranelift" => match (testsuite, testname) {
// TODO(#1886): Ignore reference types tests if this isn't x64,
// because Cranelift only supports reference types on x64.
("reference_types", _) => {
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
}
// No simd support yet for s390x.
("simd", _) if platform_is_s390x() => return true,
("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend.
// These are new instructions that are not really implemented in any backend.
("simd", "simd_i8x16_arith2")
| ("simd", "simd_conversions")
@@ -240,22 +235,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
| ("simd", "simd_i64x2_extmul_i32x4")
| ("simd", "simd_int_to_int_extend") => return true,
// These are only implemented on x64.
("simd", "simd_i64x2_arith2") | ("simd", "simd_boolean") => {
return !cfg!(feature = "experimental_x64")
}
// These are only implemented on aarch64 and x64.
("simd", "simd_i64x2_cmp")
| ("simd", "simd_f32x4_pmin_pmax")
| ("simd", "simd_f64x2_pmin_pmax")
| ("simd", "simd_f32x4_rounding")
| ("simd", "simd_f64x2_rounding")
| ("simd", "simd_i32x4_dot_i16x8") => {
return !(cfg!(feature = "experimental_x64")
|| env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "aarch64")
}
_ => {}
},
_ => panic!("unrecognized strategy"),
@@ -263,3 +242,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
false
}
fn platform_is_x64() -> bool {
env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "x86_64"
}
fn platform_is_s390x() -> bool {
env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x"
}

View File

@@ -1,36 +0,0 @@
#!/bin/bash
# Use the Nightly variant of the compiler to properly unify the
# experimental_x64 feature across all crates. Once the feature has stabilized
# and become the default, we can remove this.
CARGO_VERSION=${CARGO_VERSION:-"+nightly"}
# Some WASI tests seem to have an issue on Windows with symlinks if we run them
# with this particular invocation. It's unclear why (nightly toolchain?) but
# we're moving to the new backend by default soon enough, and all tests seem to
# work with the main test setup, so let's just work around this by skipping
# the tests for now.
MINGW_EXTRA=""
if [ `uname -o` == "Msys" ]; then
MINGW_EXTRA="-- --skip wasi_cap_std_sync"
fi
cargo $CARGO_VERSION \
--locked \
-Zfeatures=all -Zpackage-features \
test \
--features test-programs/test_programs \
--features experimental_x64 \
--all \
--exclude wasmtime-lightbeam \
--exclude wasmtime-wasi-nn \
--exclude wasmtime-wasi-crypto \
--exclude peepmatic \
--exclude peepmatic-automata \
--exclude peepmatic-fuzzing \
--exclude peepmatic-macro \
--exclude peepmatic-runtime \
--exclude peepmatic-test \
--exclude peepmatic-souper \
--exclude lightbeam \
$MINGW_EXTRA

18
ci/run-old-x86-ci.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/bin/bash
cargo test \
--locked \
--features test-programs/test_programs \
--features old-x86-backend \
--all \
--exclude wasmtime-lightbeam \
--exclude wasmtime-wasi-nn \
--exclude wasmtime-wasi-crypto \
--exclude peepmatic \
--exclude peepmatic-automata \
--exclude peepmatic-fuzzing \
--exclude peepmatic-macro \
--exclude peepmatic-runtime \
--exclude peepmatic-test \
--exclude peepmatic-souper \
--exclude lightbeam

View File

@@ -7,4 +7,4 @@ pushd "$RUST_BINDINGS"
cargo build --release --target=wasm32-wasi
popd
cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm"
cargo run --features wasi-crypto -- run "$RUST_BINDINGS/target/wasm32-wasi/release/wasi-crypto-guest.wasm" --wasi-modules=experimental-wasi-crypto

View File

@@ -7,7 +7,7 @@
# executed with the Wasmtime CLI.
set -e
WASMTIME_DIR=$(dirname "$0" | xargs dirname)
FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/alexnet
FIXTURE=https://github.com/intel/openvino-rs/raw/main/crates/openvino/tests/fixtures/mobilenet
if [ -z "${1+x}" ]; then
# If no temporary directory is specified, create one.
TMP_DIR=$(mktemp -d -t ci-XXXXXXXXXX)
@@ -26,9 +26,9 @@ source /opt/intel/openvino/bin/setupvars.sh
OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo build -p wasmtime-cli --features wasi-nn
# Download all necessary test fixtures to the temporary directory.
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.bin
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/alexnet.xml
wget --no-clobber --directory-prefix=$TMP_DIR $FIXTURE/tensor-1x3x227x227-f32.bgr
wget --no-clobber $FIXTURE/mobilenet.bin --output-document=$TMP_DIR/model.bin
wget --no-clobber $FIXTURE/mobilenet.xml --output-document=$TMP_DIR/model.xml
wget --no-clobber $FIXTURE/tensor-1x224x224x3-f32.bgr --output-document=$TMP_DIR/tensor.bgr
# Now build an example that uses the wasi-nn API.
pushd $WASMTIME_DIR/crates/wasi-nn/examples/classification-example
@@ -37,7 +37,7 @@ cp target/wasm32-wasi/release/wasi-nn-example.wasm $TMP_DIR
popd
# Run the example in Wasmtime (note that the example uses `fixture` as the expected location of the model/tensor files).
OPENVINO_INSTALL_DIR=/opt/intel/openvino cargo run --features wasi-nn -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm
cargo run -- run --mapdir fixture::$TMP_DIR $TMP_DIR/wasi-nn-example.wasm --wasi-modules=experimental-wasi-nn
# Clean up the temporary directory only if it was not specified (users may want to keep the directory around).
if [[ $REMOVE_TMP_DIR -eq 1 ]]; then

View File

@@ -1,7 +1,7 @@
[package]
name = "cranelift-tools"
authors = ["The Cranelift Project Developers"]
version = "0.66.0"
version = "0.73.0"
description = "Binaries for testing the Cranelift libraries"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/index.md"
@@ -15,27 +15,27 @@ path = "src/clif-util.rs"
[dependencies]
cfg-if = "1.0"
cranelift-codegen = { path = "codegen", version = "0.72.0" }
cranelift-entity = { path = "entity", version = "0.72.0" }
cranelift-interpreter = { path = "interpreter", version = "0.72.0" }
cranelift-reader = { path = "reader", version = "0.72.0" }
cranelift-frontend = { path = "frontend", version = "0.72.0" }
cranelift-serde = { path = "serde", version = "0.72.0", optional = true }
cranelift-wasm = { path = "wasm", version = "0.72.0", optional = true }
cranelift-native = { path = "native", version = "0.72.0" }
cranelift-filetests = { path = "filetests", version = "0.66.0" }
cranelift-module = { path = "module", version = "0.72.0" }
cranelift-object = { path = "object", version = "0.72.0" }
cranelift-jit = { path = "jit", version = "0.72.0" }
cranelift-preopt = { path = "preopt", version = "0.72.0" }
cranelift = { path = "umbrella", version = "0.72.0" }
cranelift-codegen = { path = "codegen", version = "0.73.0" }
cranelift-entity = { path = "entity", version = "0.73.0" }
cranelift-interpreter = { path = "interpreter", version = "0.73.0" }
cranelift-reader = { path = "reader", version = "0.73.0" }
cranelift-frontend = { path = "frontend", version = "0.73.0" }
cranelift-serde = { path = "serde", version = "0.73.0", optional = true }
cranelift-wasm = { path = "wasm", version = "0.73.0", optional = true }
cranelift-native = { path = "native", version = "0.73.0" }
cranelift-filetests = { path = "filetests", version = "0.73.0" }
cranelift-module = { path = "module", version = "0.73.0" }
cranelift-object = { path = "object", version = "0.73.0" }
cranelift-jit = { path = "jit", version = "0.73.0" }
cranelift-preopt = { path = "preopt", version = "0.73.0" }
cranelift = { path = "umbrella", version = "0.73.0" }
filecheck = "0.5.0"
log = "0.4.8"
termcolor = "1.1.2"
capstone = { version = "0.7.0", optional = true }
wat = { version = "1.0.36", optional = true }
target-lexicon = { version = "0.11", features = ["std"] }
peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.72.0", optional = true }
target-lexicon = { version = "0.12", features = ["std"] }
peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.73.0", optional = true }
pretty_env_logger = "0.4.0"
rayon = { version = "1", optional = true }
file-per-thread-logger = "0.1.2"
@@ -50,6 +50,6 @@ default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper", "s
disas = ["capstone"]
enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"]
wasm = ["wat", "cranelift-wasm"]
experimental_x64 = ["cranelift-codegen/x64", "cranelift-filetests/experimental_x64", "cranelift-reader/experimental_x64"]
experimental_arm32 = ["cranelift-codegen/arm32", "cranelift-filetests/experimental_arm32"]
souper-harvest = ["cranelift-codegen/souper-harvest", "rayon"]
all-arch = ["cranelift-codegen/all-arch"]

View File

@@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-bforest"
version = "0.72.0"
version = "0.73.0"
description = "A forest of B+-trees"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-bforest"
@@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
edition = "2018"
[dependencies]
cranelift-entity = { path = "../entity", version = "0.72.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.73.0", default-features = false }
[badges]
maintenance = { status = "experimental" }

View File

@@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen"
version = "0.72.0"
version = "0.73.0"
description = "Low-level code generator library"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-codegen"
@@ -13,21 +13,19 @@ build = "build.rs"
edition = "2018"
[dependencies]
cranelift-codegen-shared = { path = "./shared", version = "0.72.0" }
cranelift-entity = { path = "../entity", version = "0.72.0" }
cranelift-bforest = { path = "../bforest", version = "0.72.0" }
cranelift-codegen-shared = { path = "./shared", version = "0.73.0" }
cranelift-entity = { path = "../entity", version = "0.73.0" }
cranelift-bforest = { path = "../bforest", version = "0.73.0" }
hashbrown = { version = "0.9.1", optional = true }
target-lexicon = "0.11"
target-lexicon = "0.12"
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
bincode = { version = "1.2.1", optional = true }
gimli = { version = "0.23.0", default-features = false, features = ["write"], optional = true }
gimli = { version = "0.24.0", default-features = false, features = ["write"], optional = true }
smallvec = { version = "1.6.1" }
thiserror = "1.0.4"
byteorder = { version = "1.3.2", default-features = false }
peepmatic = { path = "../peepmatic", optional = true, version = "0.72.0" }
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.72.0" }
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.72.0" }
peepmatic = { path = "../peepmatic", optional = true, version = "0.73.0" }
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.73.0" }
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.73.0" }
regalloc = { version = "0.0.31" }
souper-ir = { version = "2.1.0", optional = true }
wast = { version = "35.0.0", optional = true }
@@ -36,8 +34,11 @@ wast = { version = "35.0.0", optional = true }
# machine code. Integration tests that need external dependencies can be
# accomodated in `tests`.
[dev-dependencies]
criterion = "0.3"
[build-dependencies]
cranelift-codegen-meta = { path = "meta", version = "0.72.0" }
cranelift-codegen-meta = { path = "meta", version = "0.73.0" }
[features]
default = ["std", "unwind"]
@@ -63,14 +64,22 @@ unwind = ["gimli"]
x86 = []
arm64 = []
riscv = []
x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel.
s390x = []
arm32 = [] # Work-in-progress codegen backend for ARM.
# Stub feature that does nothing, for Cargo-features compatibility: the new
# backend is the default now.
experimental_x64 = []
# Make the old x86 backend the default.
old-x86-backend = []
# Option to enable all architectures.
all-arch = [
"x86",
"arm64",
"riscv"
"riscv",
"s390x"
]
# For dependent crates that want to serialize some parts of cranelift
@@ -97,3 +106,7 @@ souper-harvest = ["souper-ir", "souper-ir/stringify"]
[badges]
maintenance = { status = "experimental" }
[[bench]]
name = "x64-evex-encoding"
harness = false

View File

@@ -0,0 +1,138 @@
//! Measure instruction encoding latency using various approaches; the
//! benchmarking is feature-gated on `x86` since it only measures the encoding
//! mechanism of that backend.
#[cfg(feature = "x86")]
mod x86 {
use cranelift_codegen::isa::x64::encoding::{
evex::{EvexContext, EvexInstruction, EvexMasking, EvexVectorLength, Register},
rex::OpcodeMap,
rex::{encode_modrm, LegacyPrefixes},
ByteSink,
};
use cranelift_codegen_shared::isa::x86::EncodingBits;
use criterion::{criterion_group, Criterion};
// Define the benchmarks.
fn x64_evex_encoding_benchmarks(c: &mut Criterion) {
let mut group = c.benchmark_group("x64 EVEX encoding");
let rax = Register::from(0);
let rdx = Register::from(2);
group.bench_function("EvexInstruction (builder pattern)", |b| {
let mut sink = vec![];
b.iter(|| {
sink.clear();
EvexInstruction::new()
.prefix(LegacyPrefixes::_66)
.map(OpcodeMap::_0F38)
.w(true)
.opcode(0x1F)
.reg(rax)
.rm(rdx)
.length(EvexVectorLength::V128)
.encode(&mut sink);
});
});
group.bench_function("encode_evex (function pattern)", |b| {
let mut sink = vec![];
let bits = EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1);
let vvvvv = Register::from(0);
b.iter(|| {
sink.clear();
encode_evex(
bits,
rax,
vvvvv,
rdx,
EvexContext::Other {
length: EvexVectorLength::V128,
},
EvexMasking::default(),
&mut sink,
);
})
});
}
criterion_group!(benches, x64_evex_encoding_benchmarks);
/// Using an inner module to feature-gate the benchmarks means that we must
/// manually specify how to run the benchmarks (see `criterion_main!`).
pub fn run_benchmarks() {
criterion::__warn_about_html_reports_feature();
criterion::__warn_about_cargo_bench_support_feature();
benches();
Criterion::default().configure_from_args().final_summary();
}
/// From the legacy x86 backend: a mechanism for encoding an EVEX
/// instruction, including the prefixes, the instruction opcode, and the
/// ModRM byte. This EVEX encoding function only encodes the `reg` (operand
/// 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are possible
/// (see section 2.6.2, Intel Software Development Manual, volume 2A),
/// requiring refactoring of this function or separate functions for each
/// form (e.g. as for the REX prefix).
#[inline(always)]
pub fn encode_evex<CS: ByteSink + ?Sized>(
enc: EncodingBits,
reg: Register,
vvvvv: Register,
rm: Register,
context: EvexContext,
masking: EvexMasking,
sink: &mut CS,
) {
let reg: u8 = reg.into();
let rm: u8 = rm.into();
let vvvvv: u8 = vvvvv.into();
// EVEX prefix.
sink.put1(0x62);
debug_assert!(enc.mm() < 0b100);
let mut p0 = enc.mm() & 0b11;
p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
sink.put1(p0);
let mut p1 = enc.pp() | 0b100; // bit 2 is always set
p1 |= (!(vvvvv) & 0b1111) << 3;
p1 |= (enc.rex_w() & 0b1) << 7;
sink.put1(p1);
let mut p2 = masking.aaa_bits();
p2 |= (!(vvvvv >> 4) & 0b1) << 3;
p2 |= context.bits() << 4;
p2 |= masking.z_bit() << 7;
sink.put1(p2);
// Opcode.
sink.put1(enc.opcode_byte());
// ModR/M byte.
sink.put1(encode_modrm(3, reg & 7, rm & 7))
}
/// From the legacy x86 backend: encode the RXBR' bits of the EVEX P0 byte.
/// For an explanation of these bits, see section 2.6.1 in the Intel
/// Software Development Manual, volume 2A. These bits can be used by
/// different addressing modes (see section 2.6.2), requiring different
/// `vex*` functions than this one.
fn evex2(rm: u8, reg: u8) -> u8 {
let b = !(rm >> 3) & 1;
let x = !(rm >> 4) & 1;
let r = !(reg >> 3) & 1;
let r_ = !(reg >> 4) & 1;
0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
}
}
fn main() {
#[cfg(feature = "x86")]
x86::run_benchmarks();
#[cfg(not(feature = "x86"))]
println!(
"Unable to run the x64-evex-encoding benchmark; the `x86` feature must be enabled in Cargo.",
);
}

View File

@@ -1,19 +1,20 @@
[package]
name = "cranelift-codegen-meta"
authors = ["The Cranelift Project Developers"]
version = "0.72.0"
version = "0.73.0"
description = "Metaprogram for cranelift-codegen code generator library"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"
readme = "README.md"
edition = "2018"
[package.metadata.docs.rs]
rustdoc-args = [ "--document-private-items" ]
# FIXME(rust-lang/cargo#9300): uncomment once that lands
# [package.metadata.docs.rs]
# rustdoc-args = [ "--document-private-items" ]
[dependencies]
cranelift-codegen-shared = { path = "../shared", version = "0.72.0" }
cranelift-entity = { path = "../../entity", version = "0.72.0" }
cranelift-codegen-shared = { path = "../shared", version = "0.73.0" }
cranelift-entity = { path = "../../entity", version = "0.73.0" }
[badges]
maintenance = { status = "experimental" }

View File

@@ -20,6 +20,7 @@ pub(crate) enum SpecificSetting {
#[derive(Hash, PartialEq, Eq)]
pub(crate) struct Setting {
pub name: &'static str,
pub description: &'static str,
pub comment: &'static str,
pub specific: SpecificSetting,
pub byte_offset: u8,
@@ -88,6 +89,7 @@ impl Into<PresetType> for PresetIndex {
#[derive(Hash, PartialEq, Eq)]
pub(crate) struct Preset {
pub name: &'static str,
pub description: &'static str,
values: Vec<BoolSettingIndex>,
}
@@ -169,6 +171,7 @@ pub(crate) enum ProtoSpecificSetting {
/// This is the information provided during building for a setting.
struct ProtoSetting {
name: &'static str,
description: &'static str,
comment: &'static str,
specific: ProtoSpecificSetting,
}
@@ -251,11 +254,13 @@ impl SettingGroupBuilder {
fn add_setting(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
specific: ProtoSpecificSetting,
) {
self.settings.push(ProtoSetting {
name,
description,
comment,
specific,
})
@@ -264,6 +269,7 @@ impl SettingGroupBuilder {
pub fn add_bool(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
default: bool,
) -> BoolSettingIndex {
@@ -271,28 +277,55 @@ impl SettingGroupBuilder {
self.predicates.is_empty(),
"predicates must be added after the boolean settings"
);
self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Bool(default),
);
BoolSettingIndex(self.settings.len() - 1)
}
pub fn add_enum(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
values: Vec<&'static str>,
) {
self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Enum(values),
);
}
pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
pub fn add_num(
&mut self,
name: &'static str,
description: &'static str,
comment: &'static str,
default: u8,
) {
self.add_setting(
name,
description,
comment,
ProtoSpecificSetting::Num(default),
);
}
pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
self.predicates.push(ProtoPredicate { name, node });
}
pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
pub fn add_preset(
&mut self,
name: &'static str,
description: &'static str,
args: Vec<PresetType>,
) -> PresetIndex {
let mut values = Vec::new();
for arg in args {
match arg {
@@ -302,7 +335,11 @@ impl SettingGroupBuilder {
PresetType::BoolSetting(index) => values.push(index),
}
}
self.presets.push(Preset { name, values });
self.presets.push(Preset {
name,
description,
values,
});
PresetIndex(self.presets.len() - 1)
}
@@ -347,6 +384,7 @@ impl SettingGroupBuilder {
group.settings.push(Setting {
name: s.name,
description: s.description,
comment: s.comment,
byte_offset,
specific,
@@ -367,6 +405,7 @@ impl SettingGroupBuilder {
};
group.settings.push(Setting {
name: s.name,
description: s.description,
comment: s.comment,
byte_offset: byte_offset + predicate_number / 8,
specific: SpecificSetting::Bool(BoolSetting {

View File

@@ -70,6 +70,33 @@ fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatte
fmtln!(fmt, "}");
}
/// Generates the `iter` function.
fn gen_iterator(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "impl Flags {");
fmt.indent(|fmt| {
fmt.doc_comment("Iterates the setting values.");
fmtln!(fmt, "pub fn iter(&self) -> impl Iterator<Item = Value> {");
fmt.indent(|fmt| {
fmtln!(fmt, "let mut bytes = [0; {}];", group.settings_size);
fmtln!(fmt, "bytes.copy_from_slice(&self.bytes[0..{}]);", group.settings_size);
fmtln!(fmt, "DESCRIPTORS.iter().filter_map(move |d| {");
fmt.indent(|fmt| {
fmtln!(fmt, "let values = match &d.detail {");
fmt.indent(|fmt| {
fmtln!(fmt, "detail::Detail::Preset => return None,");
fmtln!(fmt, "detail::Detail::Enum { last, enumerators } => Some(TEMPLATE.enums(*last, *enumerators)),");
fmtln!(fmt, "_ => None");
});
fmtln!(fmt, "};");
fmtln!(fmt, "Some(Value{ name: d.name, detail: d.detail, values, value: bytes[d.offset as usize] })");
});
fmtln!(fmt, "})");
});
fmtln!(fmt, "}");
});
fmtln!(fmt, "}");
}
/// Emit Display and FromStr implementations for enum settings.
fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
fmtln!(fmt, "impl fmt::Display for {} {{", name);
@@ -136,7 +163,7 @@ fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {
/// Emit a getter function for `setting`.
fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
fmt.doc_comment(setting.comment);
fmt.doc_comment(format!("{}\n{}", setting.description, setting.comment));
match setting.specific {
SpecificSetting::Bool(BoolSetting {
predicate_number, ..
@@ -254,6 +281,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "detail::Descriptor {");
fmt.indent(|fmt| {
fmtln!(fmt, "name: \"{}\",", setting.name);
fmtln!(fmt, "description: \"{}\",", setting.description);
fmtln!(fmt, "offset: {},", setting.byte_offset);
match setting.specific {
SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
@@ -286,6 +314,7 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
fmtln!(fmt, "detail::Descriptor {");
fmt.indent(|fmt| {
fmtln!(fmt, "name: \"{}\",", preset.name);
fmtln!(fmt, "description: \"{}\",", preset.description);
fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
fmtln!(fmt, "detail: detail::Detail::Preset,");
});
@@ -427,6 +456,7 @@ fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
fmtln!(fmt, "}");
gen_constructor(group, parent, fmt);
gen_iterator(group, fmt);
gen_enum_types(group, fmt);
gen_getters(group, fmt);
gen_descriptors(group, fmt);

View File

@@ -9,7 +9,7 @@ use crate::shared::Definitions as SharedDefinitions;
fn define_settings(_shared: &SettingGroup) -> SettingGroup {
let mut setting = SettingGroupBuilder::new("arm64");
let has_lse = setting.add_bool("has_lse", "Large System Extensions", false);
let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false);
setting.add_predicate("use_lse", predicate!(has_lse));
setting.build()

View File

@@ -6,6 +6,7 @@ use std::fmt;
mod arm32;
mod arm64;
mod riscv;
mod s390x;
pub(crate) mod x86;
/// Represents known ISA target.
@@ -15,6 +16,7 @@ pub enum Isa {
X86,
Arm32,
Arm64,
S390x,
}
impl Isa {
@@ -31,6 +33,7 @@ impl Isa {
match arch {
"riscv" => Some(Isa::Riscv),
"aarch64" => Some(Isa::Arm64),
"s390x" => Some(Isa::S390x),
x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32),
_ => None,
@@ -39,7 +42,7 @@ impl Isa {
/// Returns all supported isa targets.
pub fn all() -> &'static [Isa] {
&[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64]
&[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x]
}
}
@@ -51,6 +54,7 @@ impl fmt::Display for Isa {
Isa::X86 => write!(f, "x86"),
Isa::Arm32 => write!(f, "arm32"),
Isa::Arm64 => write!(f, "arm64"),
Isa::S390x => write!(f, "s390x"),
}
}
}
@@ -62,6 +66,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<T
Isa::X86 => x86::define(shared_defs),
Isa::Arm32 => arm32::define(shared_defs),
Isa::Arm64 => arm64::define(shared_defs),
Isa::S390x => s390x::define(shared_defs),
})
.collect()
}

View File

@@ -17,33 +17,39 @@ fn define_settings(shared: &SettingGroup) -> SettingGroup {
let supports_m = setting.add_bool(
"supports_m",
"CPU supports the 'M' extension (mul/div)",
"",
false,
);
let supports_a = setting.add_bool(
"supports_a",
"CPU supports the 'A' extension (atomics)",
"",
false,
);
let supports_f = setting.add_bool(
"supports_f",
"CPU supports the 'F' extension (float)",
"",
false,
);
let supports_d = setting.add_bool(
"supports_d",
"CPU supports the 'D' extension (double)",
"",
false,
);
let enable_m = setting.add_bool(
"enable_m",
"Enable the use of 'M' instructions if available",
"",
true,
);
setting.add_bool(
"enable_e",
"Enable the 'RV32E' instruction set with only 16 registers",
"",
false,
);

View File

@@ -0,0 +1,31 @@
use crate::cdsl::cpu_modes::CpuMode;
use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
use crate::cdsl::isa::TargetIsa;
use crate::cdsl::recipes::Recipes;
use crate::cdsl::regs::IsaRegsBuilder;
use crate::cdsl::settings::SettingGroupBuilder;
use crate::shared::Definitions as SharedDefinitions;
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
let settings = SettingGroupBuilder::new("s390x").build();
let regs = IsaRegsBuilder::new().build();
let recipes = Recipes::new();
let encodings_predicates = InstructionPredicateMap::new();
let mut mode = CpuMode::new("s390x");
let expand = shared_defs.transform_groups.by_name("expand");
mode.legalize_default(expand);
let cpu_modes = vec![mode];
TargetIsa::new(
"s390x",
inst_group,
settings,
regs,
recipes,
cpu_modes,
encodings_predicates,
)
}

View File

@@ -4,37 +4,77 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
let mut settings = SettingGroupBuilder::new("x86");
// CPUID.01H:ECX
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
let has_sse3 = settings.add_bool(
"has_sse3",
"Has support for SSE3.",
"SSE3: CPUID.01H:ECX.SSE3[bit 0]",
false,
);
let has_ssse3 = settings.add_bool(
"has_ssse3",
"Has support for SSSE3.",
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
false,
);
let has_sse41 = settings.add_bool(
"has_sse41",
"Has support for SSE4.1.",
"SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]",
false,
);
let has_sse42 = settings.add_bool(
"has_sse42",
"Has support for SSE4.2.",
"SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]",
false,
);
let has_avx = settings.add_bool(
"has_avx",
"Has support for AVX.",
"AVX: CPUID.01H:ECX.AVX[bit 28]",
false,
);
let has_avx2 = settings.add_bool(
"has_avx2",
"Has support for AVX2.",
"AVX2: CPUID.07H:EBX.AVX2[bit 5]",
false,
);
let has_avx512dq = settings.add_bool(
"has_avx512dq",
"Has support for AVX512DQ.",
"AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
false,
);
let has_avx512vl = settings.add_bool(
"has_avx512vl",
"Has support for AVX512VL.",
"AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
false,
);
let has_avx512f = settings.add_bool(
"has_avx512f",
"Has support for AVX512F.",
"AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
false,
);
let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
let has_popcnt = settings.add_bool(
"has_popcnt",
"Has support for POPCNT.",
"POPCNT: CPUID.01H:ECX.POPCNT[bit 23]",
false,
);
// CPUID.(EAX=07H, ECX=0H):EBX
let has_bmi1 = settings.add_bool(
"has_bmi1",
"Has support for BMI1.",
"BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
false,
);
let has_bmi2 = settings.add_bool(
"has_bmi2",
"Has support for BMI2.",
"BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
false,
);
@@ -42,6 +82,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
// CPUID.EAX=80000001H:ECX
let has_lzcnt = settings.add_bool(
"has_lzcnt",
"Has support for LZCNT.",
"LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
false,
);
@@ -85,7 +126,7 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
// Some shared boolean values are used in x86 instruction predicates, so we need to group them
// in the same TargetIsa, for compabitibity with code generated by meta-python.
// in the same TargetIsa, for compatibility with code generated by meta-python.
// TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
// back in the shared SettingGroup, and use it in x86 instruction predicates.
@@ -104,21 +145,40 @@ pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
// Presets corresponding to x86 CPUs.
settings.add_preset("baseline", preset!());
settings.add_preset(
"baseline",
"A baseline preset with no extensions enabled.",
preset!(),
);
let nehalem = settings.add_preset(
"nehalem",
"Nehalem microarchitecture.",
preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
);
let haswell = settings.add_preset(
"haswell",
"Haswell microarchitecture.",
preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
);
let broadwell = settings.add_preset("broadwell", preset!(haswell));
let skylake = settings.add_preset("skylake", preset!(broadwell));
let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
settings.add_preset("icelake", preset!(cannonlake));
let broadwell = settings.add_preset(
"broadwell",
"Broadwell microarchitecture.",
preset!(haswell),
);
let skylake = settings.add_preset("skylake", "Skylake microarchitecture.", preset!(broadwell));
let cannonlake = settings.add_preset(
"cannonlake",
"Canon Lake microarchitecture.",
preset!(skylake),
);
settings.add_preset(
"icelake",
"Ice Lake microarchitecture.",
preset!(cannonlake),
);
settings.add_preset(
"znver1",
"Zen (first generation) microarchitecture.",
preset!(
has_sse3
&& has_ssse3

View File

@@ -116,6 +116,9 @@ pub fn generate(
isa::Isa::Arm64 => {
// aarch64 doesn't have platform-specific settings.
}
isa::Isa::S390x => {
// s390x doesn't have platform-specific settings.
}
isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
}
}

View File

@@ -3582,7 +3582,7 @@ pub(crate) fn define(
"fmin_pseudo",
r#"
Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``.
See https://github.com/WebAssembly/simd/pull/122 for background.
See <https://github.com/WebAssembly/simd/pull/122> for background.
The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@@ -3614,7 +3614,7 @@ pub(crate) fn define(
"fmax_pseudo",
r#"
Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``.
See https://github.com/WebAssembly/simd/pull/122 for background.
See <https://github.com/WebAssembly/simd/pull/122> for background.
The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
@@ -4102,7 +4102,7 @@ pub(crate) fn define(
This will double the lane width and halve the number of lanes. So the resulting
vector has the same number of bits as `x` and `y` do (individually).
See https://github.com/WebAssembly/simd/pull/127 for background info.
See <https://github.com/WebAssembly/simd/pull/127> for background info.
"#,
&formats.binary,
)
@@ -4325,6 +4325,26 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"fcvt_low_from_sint",
r#"
Converts packed signed doubleword integers to packed double precision floating point.
Considering only the low half of the register, each lane in `x` is interpreted as a
signed doubleword integer that is then converted to a double precision float. This
instruction differs from fcvt_from_sint in that it converts half the number of lanes
which are converted to occupy twice the number of bits. No rounding should be needed
for the resulting float.
The result type will have half the number of vector lanes as the input.
"#,
&formats.unary,
)
.operands_in(vec![x])
.operands_out(vec![a]),
);
let WideInt = &TypeVar::new(
"WideInt",
"An integer type with lanes from `i16` upwards",

View File

@@ -5,8 +5,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"regalloc",
r#"Register allocator to use with the MachInst backend.
"Register allocator to use with the MachInst backend.",
r#"
This selects the register allocator as an option among those offered by the `regalloc.rs`
crate. Please report register allocation bugs to the maintainers of this crate whenever
possible.
@@ -38,22 +38,21 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"opt_level",
"Optimization level for generated code.",
r#"
Optimization level:
Supported levels:
- none: Minimise compile time by disabling most optimizations.
- speed: Generate the fastest possible code
- speed_and_size: like "speed", but also perform transformations
aimed at reducing code size.
- `none`: Minimise compile time by disabling most optimizations.
- `speed`: Generate the fastest possible code
- `speed_and_size`: like "speed", but also perform transformations aimed at reducing code size.
"#,
vec!["none", "speed", "speed_and_size"],
);
settings.add_bool(
"enable_verifier",
"Run the Cranelift IR verifier at strategic times during compilation.",
r#"
Run the Cranelift IR verifier at strategic times during compilation.
This makes compilation slower but catches many bugs. The verifier is always enabled by
default, which is useful during development.
"#,
@@ -65,15 +64,15 @@ pub(crate) fn define() -> SettingGroup {
// `colocated` flag on external functions and global values.
settings.add_bool(
"is_pic",
"Enable Position-Independent Code generation",
"Enable Position-Independent Code generation.",
"",
false,
);
settings.add_bool(
"use_colocated_libcalls",
"Use colocated libcalls.",
r#"
Use colocated libcalls.
Generate code that assumes that libcalls can be declared "colocated",
meaning they will be defined along with the current function, such that
they can use more efficient addressing.
@@ -83,10 +82,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"avoid_div_traps",
"Generate explicit checks around native division instructions to avoid their trapping.",
r#"
Generate explicit checks around native division instructions to avoid
their trapping.
This is primarily used by SpiderMonkey which doesn't install a signal
handler for SIGFPE, but expects a SIGILL trap for division by zero.
@@ -98,9 +95,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_float",
"Enable the use of floating-point instructions.",
r#"
Enable the use of floating-point instructions
Disabling use of floating-point instructions is not yet implemented.
"#,
true,
@@ -108,9 +104,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_nan_canonicalization",
"Enable NaN canonicalization.",
r#"
Enable NaN canonicalization
This replaces NaNs with a single canonical value, for users requiring
entirely deterministic WebAssembly computation. This is not required
by the WebAssembly spec, so it is not enabled by default.
@@ -120,8 +115,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_pinned_reg",
r#"Enable the use of the pinned register.
"Enable the use of the pinned register.",
r#"
This register is excluded from register allocation, and is completely under the control of
the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
with the set_pinned_reg instruction.
@@ -131,8 +126,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"use_pinned_reg_as_heap_base",
r#"Use the pinned register as the heap base.
"Use the pinned register as the heap base.",
r#"
Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
legalization of the `heap_addr` instruction so it will use the pinned register as the heap
base, instead of fetching it from a global value.
@@ -144,19 +139,24 @@ pub(crate) fn define() -> SettingGroup {
false,
);
settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
settings.add_bool(
"enable_simd",
"Enable the use of SIMD instructions.",
"",
false,
);
settings.add_bool(
"enable_atomics",
"Enable the use of atomic instructions",
"",
true,
);
settings.add_bool(
"enable_safepoints",
"Enable safepoint instruction insertions.",
r#"
Enable safepoint instruction insertions.
This will allow the emit_stack_maps() function to insert the safepoint
instruction on top of calls and interrupt traps in order to display the
live reference values at that point in the program.
@@ -166,9 +166,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"tls_model",
r#"
Defines the model used to perform TLS accesses.
"#,
"Defines the model used to perform TLS accesses.",
"",
vec!["none", "elf_gd", "macho", "coff"],
);
@@ -176,9 +175,9 @@ pub(crate) fn define() -> SettingGroup {
settings.add_enum(
"libcall_call_conv",
"Defines the calling convention to use for LibCalls call expansion.",
r#"
Defines the calling convention to use for LibCalls call expansion,
since it may be different from the ISA default calling convention.
This may be different from the ISA default calling convention.
The default value is to use the same calling convention as the ISA
default calling convention.
@@ -192,6 +191,7 @@ pub(crate) fn define() -> SettingGroup {
"cold",
"system_v",
"windows_fastcall",
"apple_aarch64",
"baldrdash_system_v",
"baldrdash_windows",
"baldrdash_2020",
@@ -201,9 +201,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_num(
"baldrdash_prologue_words",
"Number of pointer-sized words pushed by the baldrdash prologue.",
r#"
Number of pointer-sized words pushed by the baldrdash prologue.
Functions with the `baldrdash` calling convention don't generate their
own prologue and epilogue. They depend on externally generated code
that pushes a fixed number of words in the prologue and restores them
@@ -218,9 +217,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_llvm_abi_extensions",
"Enable various ABI extensions defined by LLVM's behavior.",
r#"
Enable various ABI extensions defined by LLVM's behavior.
In some cases, LLVM's implementation of an ABI (calling convention)
goes beyond a standard and supports additional argument types or
behavior. This option instructs Cranelift codegen to follow LLVM's
@@ -237,12 +235,12 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"unwind_info",
"Generate unwind information.",
r#"
Generate unwind info. This increases metadata size and compile time,
but allows for the debugger to trace frames, is needed for GC tracing
that relies on libunwind (such as in Wasmtime), and is
unconditionally needed on certain platforms (such as Windows) that
must always be able to unwind.
This increases metadata size and compile time, but allows for the
debugger to trace frames, is needed for GC tracing that relies on
libunwind (such as in Wasmtime), and is unconditionally needed on
certain platforms (such as Windows) that must always be able to unwind.
"#,
true,
);
@@ -252,6 +250,7 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"emit_all_ones_funcaddrs",
"Emit not-yet-relocated function addresses as all-ones bit patterns.",
"",
false,
);
@@ -259,27 +258,22 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_probestack",
r#"
Enable the use of stack probes, for calling conventions which support this
functionality.
"#,
"Enable the use of stack probes for supported calling conventions.",
"",
true,
);
settings.add_bool(
"probestack_func_adjusts_sp",
r#"
Set this to true of the stack probe function modifies the stack pointer
itself.
"#,
"Enable if the stack probe adjusts the stack pointer.",
"",
false,
);
settings.add_num(
"probestack_size_log2",
"The log2 of the size of the stack guard region.",
r#"
The log2 of the size of the stack guard region.
Stack frames larger than this size will have stack overflow checked
by calling the probestack function.
@@ -293,6 +287,7 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_jump_tables",
"Enable the use of jump tables in generated machine code.",
"",
true,
);
@@ -300,9 +295,8 @@ pub(crate) fn define() -> SettingGroup {
settings.add_bool(
"enable_heap_access_spectre_mitigation",
"Enable Spectre mitigation on heap bounds checks.",
r#"
Enable Spectre mitigation on heap bounds checks.
This is a no-op for any heap that needs no bounds checks; e.g.,
if the limit is static and the guard region is large enough that
the index cannot reach past it.

View File

@@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-codegen-shared"
version = "0.72.0"
version = "0.73.0"
description = "For code shared between cranelift-codegen-meta and cranelift-codegen"
license = "Apache-2.0 WITH LLVM-exception"
repository = "https://github.com/bytecodealliance/wasmtime"

View File

@@ -60,6 +60,8 @@ pub enum Reloc {
Arm64Call,
/// RISC-V call target
RiscvCall,
/// s390x PC-relative 4-byte offset
S390xPCRel32Dbl,
/// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
ElfX86_64TlsGd,
@@ -75,6 +77,7 @@ impl fmt::Display for Reloc {
match *self {
Self::Abs4 => write!(f, "Abs4"),
Self::Abs8 => write!(f, "Abs8"),
Self::S390xPCRel32Dbl => write!(f, "PCRel32Dbl"),
Self::X86PCRel4 => write!(f, "PCRel4"),
Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"),
Self::X86CallPCRel4 => write!(f, "CallPCRel4"),

View File

@@ -267,13 +267,7 @@ impl Context {
isa: &dyn TargetIsa,
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
if let Some(backend) = isa.get_mach_backend() {
use crate::isa::CallConv;
use crate::machinst::UnwindInfoKind;
let unwind_info_kind = match self.func.signature.call_conv {
CallConv::Fast | CallConv::Cold | CallConv::SystemV => UnwindInfoKind::SystemV,
CallConv::WindowsFastcall => UnwindInfoKind::Windows,
_ => UnwindInfoKind::None,
};
let unwind_info_kind = isa.unwind_info_kind();
let result = self.mach_compile_result.as_ref().unwrap();
return backend.emit_unwind_info(result, unwind_info_kind);
}

View File

@@ -5,7 +5,6 @@ use crate::ir::{types, ConstantData, Type};
use core::convert::TryInto;
use core::fmt::{self, Display, Formatter};
use core::ptr;
use thiserror::Error;
/// Represent a data value. Where [Value] is an SSA reference, [DataValue] is the type + value
/// that would be referred to by a [Value].
@@ -97,15 +96,38 @@ impl DataValue {
}
/// Record failures to cast [DataValue].
#[derive(Error, Debug, PartialEq)]
#[derive(Debug, PartialEq)]
#[allow(missing_docs)]
pub enum DataValueCastFailure {
#[error("unable to cast data value of type {0} to type {1}")]
TryInto(Type, Type),
#[error("unable to cast i64({0}) to a data value of type {1}")]
FromInteger(i64, Type),
}
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for DataValueCastFailure {}
impl Display for DataValueCastFailure {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
DataValueCastFailure::TryInto(from, to) => {
write!(
f,
"unable to cast data value of type {} to type {}",
from, to
)
}
DataValueCastFailure::FromInteger(val, to) => {
write!(
f,
"unable to cast i64({}) to a data value of type {}",
val, to
)
}
}
}
}
/// Helper for creating conversion implementations for [DataValue].
macro_rules! build_conversion_impl {
( $rust_ty:ty, $data_value_ty:ident, $cranelift_ty:ident ) => {

View File

@@ -146,7 +146,7 @@ impl StackSlot {
/// [`VmContext`](super::GlobalValueData::VMContext) using
/// [`FuncEnvironment::make_global`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_global).
/// - When compiling to native code, you can use it for objects in static memory with
/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_data_in_func).
/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_data_in_func).
/// - For any compilation target, it can be registered with
/// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value).
///
@@ -264,9 +264,9 @@ impl JumpTable {
///
/// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function)
/// for external functions
/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_func_in_func)
/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html#method.declare_func_in_func)
/// for functions declared elsewhere in the same native
/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html)
/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/trait.Module.html)
/// - [`FuncEnvironment::make_direct_func`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)
/// for functions declared in the same WebAssembly
/// [`FuncEnvironment`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)

View File

@@ -135,12 +135,28 @@ impl Into<AMode> for StackAMode {
// Returns the size of stack space needed to store the
// `int_reg` and `vec_reg`.
fn saved_reg_stack_size(
call_conv: isa::CallConv,
int_reg: &[Writable<RealReg>],
vec_reg: &[Writable<RealReg>],
) -> (usize, usize) {
// Round up to multiple of 2, to keep 16-byte stack alignment.
let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
let vec_save_bytes = vec_reg.len() * 16;
// The Baldrdash ABIs require saving and restoring the whole 16-byte
// SIMD & FP registers, so the necessary stack space is always a
// multiple of the mandatory 16-byte stack alignment. However, the
// Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64,
// including several related ABIs such as the one used by Windows)
// mandates saving only the bottom 8 bytes of the vector registers,
// so in that case we round up the number of registers to ensure proper
// stack alignment (similarly to the situation with `int_reg`).
let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 };
let vec_save_padding = if call_conv.extends_baldrdash() {
0
} else {
vec_reg.len() & 1
};
let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size;
(int_save_bytes, vec_save_bytes)
}
@@ -171,6 +187,21 @@ impl ABIMachineSpec for AArch64MachineDeps {
let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
//
// MacOS aarch64 is slightly different, see also
// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.
// We are diverging from the MacOS aarch64 implementation in the
// following ways:
// - sign- and zero- extensions of data types less than 32 bits are not
// implemented yet.
// - i128 arguments passing isn't implemented yet in the standard (non
// MacOS) aarch64 ABI.
// - we align the arguments stack space to a 16-bytes boundary, while
// the MacOS allows aligning only on 8 bytes. In practice it means we're
// slightly overallocating when calling, which is fine, and doesn't
// break our other invariants that the stack is always allocated in
// 16-bytes chunks.
let mut next_xreg = 0;
let mut next_vreg = 0;
let mut next_stack: u64 = 0;
@@ -182,18 +213,26 @@ impl ABIMachineSpec for AArch64MachineDeps {
next_stack = 16;
}
// Note on return values: on the regular non-baldrdash ABI, we may return values in 8
// registers for V128 and I64 registers independently of the number of register values
// returned in the other class. That is, we can return values in up to 8 integer and 8
// vector registers at once.
// In Baldrdash, we can only use one register for return value for all the register
// classes. That is, we can't return values in both one integer and one vector register;
// only one return value may be in a register.
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
let (max_per_class_reg_vals, mut remaining_reg_vals) = match (args_or_rets, is_baldrdash) {
(ArgsOrRets::Args, _) => (8, 16), // x0-x7 and v0-v7
(ArgsOrRets::Rets, false) => (8, 16), // x0-x7 and v0-v7
(ArgsOrRets::Rets, true) => (1, 1), // x0 or v0, but not both
// Note on return values: on the regular ABI, we may return values
// in 8 registers for V128 and I64 registers independently of the
// number of register values returned in the other class. That is,
// we can return values in up to 8 integer and
// 8 vector registers at once.
//
// In Baldrdash and Wasmtime, we can only use one register for
// return value for all the register classes. That is, we can't
// return values in both one integer and one vector register; only
// one return value may be in a register.
ArgsOrRets::Rets => {
if is_baldrdash || call_conv.extends_wasmtime() {
(1, 1) // x0 or v0, but not both
} else {
(8, 16) // x0-x7 and v0-v7
}
}
};
for i in 0..params.len() {
@@ -264,13 +303,27 @@ impl ABIMachineSpec for AArch64MachineDeps {
*next_reg += 1;
remaining_reg_vals -= 1;
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
// Compute the stack slot's size.
let size = (ty_bits(param.value_type) / 8) as u64;
let size = std::cmp::max(size, 8);
// Align.
let size = if call_conv == isa::CallConv::AppleAarch64
|| (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
{
// MacOS aarch64 and Wasmtime allow stack slots with
// sizes less than 8 bytes. They still need to be
// properly aligned on their natural data alignment,
// though.
size
} else {
// Every arg takes a minimum slot of 8 bytes. (16-byte stack
// alignment happens separately after all args.)
std::cmp::max(size, 8)
};
// Align the stack slot.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
ret.push(ABIArg::stack(
next_stack as i64,
param.value_type,
@@ -550,11 +603,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
let (int_save_bytes, vec_save_bytes) =
saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
let total_save_bytes = int_save_bytes + vec_save_bytes;
let clobber_size = total_save_bytes as i32;
@@ -583,59 +638,170 @@ impl ABIMachineSpec for AArch64MachineDeps {
// `frame_offset` tracks offset above start-of-clobbers for unwind-info
// purposes.
let mut clobber_offset = clobber_size as u32;
for reg_pair in clobbered_int.chunks(2) {
let (r1, r2) = if reg_pair.len() == 2 {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
} else {
(reg_pair[0].to_reg().to_reg(), zero_reg())
};
let clobber_offset_change = 16;
let iter = clobbered_int.chunks_exact(2);
debug_assert!(r1.get_class() == RegClass::I64);
debug_assert!(r2.get_class() == RegClass::I64);
if let [rd] = iter.remainder() {
let rd = rd.to_reg().to_reg();
// stp r1, r2, [sp, #-16]!
insts.push(Inst::StoreP64 {
rt: r1,
rt2: r2,
mem: PairAMode::PreIndexed(
debug_assert_eq!(rd.get_class(), RegClass::I64);
// str rd, [sp, #-16]!
insts.push(Inst::Store64 {
rd,
mem: AMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(),
});
if flags.unwind_info() {
clobber_offset -= 8;
if r2 != zero_reg() {
clobber_offset -= clobber_offset_change as u32;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: r2.to_real_reg(),
},
});
}
clobber_offset -= 8;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: r1.to_real_reg(),
reg: rd.to_real_reg(),
},
});
}
}
for reg in clobbered_vec.iter() {
insts.push(Inst::FpuStore128 {
rd: reg.to_reg().to_reg(),
mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()),
let mut iter = iter.rev();
while let Some([rt, rt2]) = iter.next() {
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
let rt = rt.to_reg().to_reg();
let rt2 = rt2.to_reg().to_reg();
debug_assert!(rt.get_class() == RegClass::I64);
debug_assert!(rt2.get_class() == RegClass::I64);
// stp rt, rt2, [sp, #-16]!
insts.push(Inst::StoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
),
flags: MemFlags::trusted(),
});
if flags.unwind_info() {
clobber_offset -= 16;
clobber_offset -= clobber_offset_change as u32;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: reg.to_reg(),
reg: rt.to_real_reg(),
},
});
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
reg: rt2.to_real_reg(),
},
});
}
}
let store_vec_reg = |rd| {
if call_conv.extends_baldrdash() {
Inst::FpuStore128 {
rd,
mem: AMode::PreIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(),
}
} else {
Inst::FpuStore64 {
rd,
mem: AMode::PreIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
let iter = clobbered_vec.chunks_exact(2);
if let [rd] = iter.remainder() {
let rd = rd.to_reg().to_reg();
debug_assert_eq!(rd.get_class(), RegClass::V128);
insts.push(store_vec_reg(rd));
if flags.unwind_info() {
clobber_offset -= clobber_offset_change as u32;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: rd.to_real_reg(),
},
});
}
}
let store_vec_reg_pair = |rt, rt2| {
if call_conv.extends_baldrdash() {
let clobber_offset_change = 32;
(
Inst::FpuStoreP128 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
clobber_offset_change as u32,
)
} else {
let clobber_offset_change = 16;
(
Inst::FpuStoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
clobber_offset_change as u32,
)
}
};
let mut iter = iter.rev();
while let Some([rt, rt2]) = iter.next() {
let rt = rt.to_reg().to_reg();
let rt2 = rt2.to_reg().to_reg();
debug_assert_eq!(rt.get_class(), RegClass::V128);
debug_assert_eq!(rt2.get_class(), RegClass::V128);
let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
insts.push(inst);
if flags.unwind_info() {
clobber_offset -= clobber_offset_change;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: rt.to_real_reg(),
},
});
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: clobber_offset + clobber_offset_change / 2,
reg: rt2.to_real_reg(),
},
});
}
@@ -654,6 +820,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
@@ -663,31 +830,83 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
}
for reg in clobbered_vec.iter().rev() {
insts.push(Inst::FpuLoad128 {
rd: Writable::from_reg(reg.to_reg().to_reg()),
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
let load_vec_reg = |rd| {
if call_conv.extends_baldrdash() {
Inst::FpuLoad128 {
rd,
mem: AMode::PostIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(16).unwrap(),
),
flags: MemFlags::trusted(),
});
}
for reg_pair in clobbered_int.chunks(2).rev() {
let (r1, r2) = if reg_pair.len() == 2 {
(
reg_pair[0].map(|r| r.to_reg()),
reg_pair[1].map(|r| r.to_reg()),
)
} else {
(reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
Inst::FpuLoad64 {
rd,
mem: AMode::PostIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(16).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
let load_vec_reg_pair = |rt, rt2| {
if call_conv.extends_baldrdash() {
Inst::FpuLoadP128 {
rt,
rt2,
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
}
} else {
Inst::FpuLoadP64 {
rt,
rt2,
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
debug_assert!(r1.to_reg().get_class() == RegClass::I64);
debug_assert!(r2.to_reg().get_class() == RegClass::I64);
let mut iter = clobbered_vec.chunks_exact(2);
// ldp r1, r2, [sp], #16
while let Some([rt, rt2]) = iter.next() {
let rt = rt.map(|r| r.to_reg());
let rt2 = rt2.map(|r| r.to_reg());
debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
insts.push(load_vec_reg_pair(rt, rt2));
}
debug_assert!(iter.remainder().len() <= 1);
if let [rd] = iter.remainder() {
let rd = rd.map(|r| r.to_reg());
debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
insts.push(load_vec_reg(rd));
}
let mut iter = clobbered_int.chunks_exact(2);
while let Some([rt, rt2]) = iter.next() {
let rt = rt.map(|r| r.to_reg());
let rt2 = rt2.map(|r| r.to_reg());
debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
// ldp rt, rt2, [sp], #16
insts.push(Inst::LoadP64 {
rt: r1,
rt2: r2,
rt,
rt2,
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
@@ -696,6 +915,20 @@ impl ABIMachineSpec for AArch64MachineDeps {
});
}
debug_assert!(iter.remainder().len() <= 1);
if let [rd] = iter.remainder() {
let rd = rd.map(|r| r.to_reg());
debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
// ldr rd, [sp], #16
insts.push(Inst::ULoad64 {
rd,
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
flags: MemFlags::trusted(),
});
}
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS
// register. We may have allocated it for something else and clobbered
// it, but the ABI expects us to leave the TLS register unchanged.

View File

@@ -258,6 +258,28 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
| machreg_to_vec(rt.to_reg())
}
fn enc_ldst_vec_pair(
opc: u32,
amode: u32,
is_load: bool,
simm7: SImm7Scaled,
rn: Reg,
rt: Reg,
rt2: Reg,
) -> u32 {
debug_assert_eq!(opc & 0b11, opc);
debug_assert_eq!(amode & 0b11, amode);
0b00_10110_00_0_0000000_00000_00000_00000
| opc << 30
| amode << 23
| (is_load as u32) << 22
| simm7.bits() << 15
| machreg_to_vec(rt2) << 10
| machreg_to_gpr(rn) << 5
| machreg_to_vec(rt)
}
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
(top11 << 21)
| (machreg_to_vec(rm) << 16)
@@ -405,6 +427,15 @@ fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: R
| machreg_to_vec(rd.to_reg())
}
fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
0b010_11110_11_11000_11011_10_00000_00000
| bits_12_16 << 12
| machreg_to_vec(rn) << 5
| machreg_to_vec(rd.to_reg())
}
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
debug_assert_eq!(q & 0b1, q);
debug_assert_eq!(u & 0b1, u);
@@ -923,7 +954,7 @@ impl MachInstEmit for Inst {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
// Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
@@ -987,7 +1018,7 @@ impl MachInstEmit for Inst {
} => {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
// Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
match mem {
@@ -1034,6 +1065,120 @@ impl MachInstEmit for Inst {
}
}
}
&Inst::FpuLoadP64 {
rt,
rt2,
ref mem,
flags,
}
| &Inst::FpuLoadP128 {
rt,
rt2,
ref mem,
flags,
} => {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let opc = match self {
&Inst::FpuLoadP64 { .. } => 0b01,
&Inst::FpuLoadP128 { .. } => 0b10,
_ => unreachable!(),
};
let rt = rt.to_reg();
let rt2 = rt2.to_reg();
match mem {
&PairAMode::SignedOffset(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
}
&PairAMode::PreIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b11,
true,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
&PairAMode::PostIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b01,
true,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
}
}
&Inst::FpuStoreP64 {
rt,
rt2,
ref mem,
flags,
}
| &Inst::FpuStoreP128 {
rt,
rt2,
ref mem,
flags,
} => {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let opc = match self {
&Inst::FpuStoreP64 { .. } => 0b01,
&Inst::FpuStoreP128 { .. } => 0b10,
_ => unreachable!(),
};
match mem {
&PairAMode::SignedOffset(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
}
&PairAMode::PreIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b11,
false,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
&PairAMode::PostIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b01,
false,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
}
}
&Inst::Mov64 { rd, rm } => {
assert!(rd.to_reg().get_class() == rm.get_class());
assert!(rm.get_class() == RegClass::I64);
@@ -1492,6 +1637,7 @@ impl MachInstEmit for Inst {
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
(0b0, 0b00101, enc_size)
}
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
};
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
}
@@ -1918,6 +2064,13 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecRRPair { op, rd, rn } => {
let bits_12_16 = match op {
VecPairOp::Addp => 0b11011,
};
sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));
}
&Inst::VecRRR {
rd,
rn,

View File

@@ -2311,6 +2311,16 @@ fn test_aarch64_binemit() {
"sqxtun v16.8b, v23.8h",
));
insns.push((
Inst::VecRRPair {
op: VecPairOp::Addp,
rd: writable_vreg(0),
rn: vreg(30),
},
"C0BBF15E",
"addp d0, v30.2d",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Sqadd,
@@ -3803,6 +3813,17 @@ fn test_aarch64_binemit() {
"cnt v23.8b, v5.8b",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmeq0,
rd: writable_vreg(12),
rn: vreg(27),
size: VectorSize::Size16x8,
},
"6C9B604E",
"cmeq v12.8h, v27.8h, #0",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Uminv,
@@ -5105,6 +5126,168 @@ fn test_aarch64_binemit() {
"str q16, [x8, x9, LSL #4]",
));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(0),
rt2: writable_vreg(31),
mem: PairAMode::SignedOffset(xreg(0), SImm7Scaled::zero(F64)),
flags: MemFlags::trusted(),
},
"007C406D",
"ldp d0, d31, [x0]",
));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(19),
rt2: writable_vreg(11),
mem: PairAMode::PreIndexed(
writable_xreg(25),
SImm7Scaled::maybe_from_i64(-512, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"332FE06D",
"ldp d19, d11, [x25, #-512]!",
));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(7),
rt2: writable_vreg(20),
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(64, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"E753C46C",
"ldp d7, d20, [sp], #64",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(4),
rt2: vreg(26),
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64(504, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"E4EB1F6D",
"stp d4, d26, [sp, #504]",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(16),
rt2: vreg(8),
mem: PairAMode::PreIndexed(
writable_xreg(15),
SImm7Scaled::maybe_from_i64(48, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"F021836D",
"stp d16, d8, [x15, #48]!",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(5),
rt2: vreg(6),
mem: PairAMode::PostIndexed(
writable_xreg(28),
SImm7Scaled::maybe_from_i64(-32, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"851BBE6C",
"stp d5, d6, [x28], #-32",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(0),
rt2: writable_vreg(17),
mem: PairAMode::SignedOffset(xreg(3), SImm7Scaled::zero(I8X16)),
flags: MemFlags::trusted(),
},
"604440AD",
"ldp q0, q17, [x3]",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(29),
rt2: writable_vreg(9),
mem: PairAMode::PreIndexed(
writable_xreg(16),
SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"1D26E0AD",
"ldp q29, q9, [x16, #-1024]!",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(10),
rt2: writable_vreg(20),
mem: PairAMode::PostIndexed(
writable_xreg(26),
SImm7Scaled::maybe_from_i64(256, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"4A53C8AC",
"ldp q10, q20, [x26], #256",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(9),
rt2: vreg(31),
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64(1008, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"E9FF1FAD",
"stp q9, q31, [sp, #1008]",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(27),
rt2: vreg(13),
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"FB37BAAD",
"stp q27, q13, [sp, #-192]!",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(18),
rt2: vreg(22),
mem: PairAMode::PostIndexed(
writable_xreg(13),
SImm7Scaled::maybe_from_i64(304, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"B2D989AC",
"stp q18, q22, [x13], #304",
));
insns.push((
Inst::LoadFpuConst64 {
rd: writable_vreg(16),

View File

@@ -73,7 +73,7 @@ impl SImm7Scaled {
/// Create a SImm7Scaled from a raw offset and the known scale type, if
/// possible.
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
assert!(scale_ty == I64 || scale_ty == I32);
assert!(scale_ty == I64 || scale_ty == I32 || scale_ty == F64 || scale_ty == I8X16);
let scale = scale_ty.bytes();
assert!(scale.is_power_of_two());
let scale = i64::from(scale);

View File

@@ -334,6 +334,8 @@ pub enum VecMisc2 {
Frintp,
/// Population count per byte
Cnt,
/// Compare bitwise equal to 0
Cmeq0,
}
/// A Vector narrowing operation with two registers.
@@ -347,6 +349,13 @@ pub enum VecMiscNarrowOp {
Sqxtun,
}
/// A vector operation on a pair of elements with one register.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecPairOp {
/// Add pair of elements
Addp,
}
/// An operation across the lanes of vectors.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecLanesOp {
@@ -848,7 +857,34 @@ pub enum Inst {
mem: AMode,
flags: MemFlags,
},
/// A load of a pair of floating-point registers, double precision (64-bit).
FpuLoadP64 {
rt: Writable<Reg>,
rt2: Writable<Reg>,
mem: PairAMode,
flags: MemFlags,
},
/// A store of a pair of floating-point registers, double precision (64-bit).
FpuStoreP64 {
rt: Reg,
rt2: Reg,
mem: PairAMode,
flags: MemFlags,
},
/// A load of a pair of floating-point registers, 128-bit.
FpuLoadP128 {
rt: Writable<Reg>,
rt2: Writable<Reg>,
mem: PairAMode,
flags: MemFlags,
},
/// A store of a pair of floating-point registers, 128-bit.
FpuStoreP128 {
rt: Reg,
rt2: Reg,
mem: PairAMode,
flags: MemFlags,
},
LoadFpuConst64 {
rd: Writable<Reg>,
const_data: u64,
@@ -984,6 +1020,13 @@ pub enum Inst {
high_half: bool,
},
/// 1-operand vector instruction that operates on a pair of elements.
VecRRPair {
op: VecPairOp,
rd: Writable<Reg>,
rn: Reg,
},
/// A vector ALU op.
VecRRR {
alu_op: VecALUOp,
@@ -1908,6 +1951,34 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_use(rd);
memarg_regs(mem, collector);
}
&Inst::FpuLoadP64 {
rt, rt2, ref mem, ..
} => {
collector.add_def(rt);
collector.add_def(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuStoreP64 {
rt, rt2, ref mem, ..
} => {
collector.add_use(rt);
collector.add_use(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuLoadP128 {
rt, rt2, ref mem, ..
} => {
collector.add_def(rt);
collector.add_def(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuStoreP128 {
rt, rt2, ref mem, ..
} => {
collector.add_use(rt);
collector.add_use(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
collector.add_def(rd);
}
@@ -1973,6 +2044,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
}
}
&Inst::VecRRPair { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecRRR {
alu_op, rd, rn, rm, ..
} => {
@@ -2590,6 +2665,46 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_use(mapper, rd);
map_mem(mapper, mem);
}
&mut Inst::FpuLoadP64 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_def(mapper, rt);
map_def(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuStoreP64 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_use(mapper, rt);
map_use(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuLoadP128 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_def(mapper, rt);
map_def(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuStoreP128 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_use(mapper, rt);
map_use(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
map_def(mapper, rd);
}
@@ -2721,6 +2836,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
}
}
&mut Inst::VecRRPair {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecRRR {
alu_op,
ref mut rd,
@@ -3508,6 +3631,42 @@ impl Inst {
let mem = mem.show_rru(mb_rru);
format!("{}str {}, {}", mem_str, rd, mem)
}
&Inst::FpuLoadP64 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
let mem = mem.show_rru(mb_rru);
format!("ldp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuStoreP64 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuLoadP128 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
let mem = mem.show_rru(mb_rru);
format!("ldp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuStoreP128 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
format!(
@@ -3725,6 +3884,15 @@ impl Inst {
};
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRPair { op, rd, rn } => {
let op = match op {
VecPairOp::Addp => "addp",
};
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRR {
rd,
rn,
@@ -3788,43 +3956,44 @@ impl Inst {
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::VecMisc { op, rd, rn, size } => {
let is_shll = op == VecMisc2::Shll;
let suffix = match (is_shll, size) {
(true, VectorSize::Size8x8) => ", #8",
(true, VectorSize::Size16x4) => ", #16",
(true, VectorSize::Size32x2) => ", #32",
_ => "",
};
let (op, size) = match op {
VecMisc2::Not => (
"mvn",
if size.is_128bits() {
let (op, rd_size, size, suffix) = match op {
VecMisc2::Not => {
let size = if size.is_128bits() {
VectorSize::Size8x16
} else {
VectorSize::Size8x8
},
),
VecMisc2::Neg => ("neg", size),
VecMisc2::Abs => ("abs", size),
VecMisc2::Fabs => ("fabs", size),
VecMisc2::Fneg => ("fneg", size),
VecMisc2::Fsqrt => ("fsqrt", size),
VecMisc2::Rev64 => ("rev64", size),
VecMisc2::Shll => ("shll", size),
VecMisc2::Fcvtzs => ("fcvtzs", size),
VecMisc2::Fcvtzu => ("fcvtzu", size),
VecMisc2::Scvtf => ("scvtf", size),
VecMisc2::Ucvtf => ("ucvtf", size),
VecMisc2::Frintn => ("frintn", size),
VecMisc2::Frintz => ("frintz", size),
VecMisc2::Frintm => ("frintm", size),
VecMisc2::Frintp => ("frintp", size),
VecMisc2::Cnt => ("cnt", size),
};
let rd_size = if is_shll { size.widen() } else { size };
("mvn", size, size, "")
}
VecMisc2::Neg => ("neg", size, size, ""),
VecMisc2::Abs => ("abs", size, size, ""),
VecMisc2::Fabs => ("fabs", size, size, ""),
VecMisc2::Fneg => ("fneg", size, size, ""),
VecMisc2::Fsqrt => ("fsqrt", size, size, ""),
VecMisc2::Rev64 => ("rev64", size, size, ""),
VecMisc2::Shll => (
"shll",
size.widen(),
size,
match size {
VectorSize::Size8x8 => ", #8",
VectorSize::Size16x4 => ", #16",
VectorSize::Size32x2 => ", #32",
_ => panic!("Unexpected vector size: {:?}", size),
},
),
VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""),
VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""),
VecMisc2::Scvtf => ("scvtf", size, size, ""),
VecMisc2::Ucvtf => ("ucvtf", size, size, ""),
VecMisc2::Frintn => ("frintn", size, size, ""),
VecMisc2::Frintz => ("frintz", size, size, ""),
VecMisc2::Frintm => ("frintm", size, size, ""),
VecMisc2::Frintp => ("frintp", size, size, ""),
VecMisc2::Cnt => ("cnt", size, size, ""),
VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
let rn = show_vreg_vector(rn, mb_rru, size);
format!("{} {}, {}{}", op, rd, rn, suffix)

View File

@@ -56,8 +56,8 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn sp(&self) -> u16 {
regs::stack_reg().get_hw_encoding().into()
}
fn fp(&self) -> u16 {
regs::fp_reg().get_hw_encoding().into()
fn fp(&self) -> Option<u16> {
Some(regs::fp_reg().get_hw_encoding().into())
}
fn lr(&self) -> Option<u16> {
Some(regs::link_reg().get_hw_encoding().into())

View File

@@ -1950,6 +1950,40 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap();
// cmeq vtmp.2d, vm.2d, #0
// addp dtmp, vtmp.2d
// fcmp dtmp, dtmp
// cset xd, eq
//
// Note that after the ADDP the value of the temporary register will
// be either 0 when all input elements are true, i.e. non-zero, or a
// NaN otherwise (either -1 or -2 when represented as an integer);
// NaNs are the only floating-point numbers that compare unequal to
// themselves.
ctx.emit(Inst::VecMisc {
op: VecMisc2::Cmeq0,
rd: tmp,
rn: rm,
size: VectorSize::Size64x2,
});
ctx.emit(Inst::VecRRPair {
op: VecPairOp::Addp,
rd: tmp,
rn: tmp.to_reg(),
});
ctx.emit(Inst::FpuCmp64 {
rn: tmp.to_reg(),
rm: tmp.to_reg(),
});
materialize_bool_result(ctx, insn, rd, Cond::Eq);
}
Opcode::VanyTrue | Opcode::VallTrue => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
@@ -2180,6 +2214,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
size: VectorSize::Size32x4,
});
}
I64X2 => {
// mov dst_r, src_v.d[0]
// mov tmp_r0, src_v.d[1]
// lsr dst_r, dst_r, #63
// lsr tmp_r0, tmp_r0, #63
// add dst_r, dst_r, tmp_r0, lsl #1
ctx.emit(Inst::MovFromVec {
rd: dst_r,
rn: src_v,
idx: 0,
size: VectorSize::Size64x2,
});
ctx.emit(Inst::MovFromVec {
rd: tmp_r0,
rn: src_v,
idx: 1,
size: VectorSize::Size64x2,
});
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr64,
rd: dst_r,
rn: dst_r.to_reg(),
immshift: ImmShift::maybe_from_u64(63).unwrap(),
});
ctx.emit(Inst::AluRRImmShift {
alu_op: ALUOp::Lsr64,
rd: tmp_r0,
rn: tmp_r0.to_reg(),
immshift: ImmShift::maybe_from_u64(63).unwrap(),
});
ctx.emit(Inst::AluRRRShift {
alu_op: ALUOp::Add32,
rd: dst_r,
rn: dst_r.to_reg(),
rm: tmp_r0.to_reg(),
shiftop: ShiftOpAndAmt::new(
ShiftOp::LSL,
ShiftOpShiftImm::maybe_from_shift(1).unwrap(),
),
});
}
_ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty),
}
}
@@ -3013,6 +3088,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::TlsValue => unimplemented!("tls_value"),
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
}
Ok(())

View File

@@ -7,10 +7,8 @@ use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings as shared_settings;
use alloc::boxed::Box;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
@@ -104,6 +102,10 @@ impl MachBackend for AArch64Backend {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);

View File

@@ -1,6 +1,6 @@
//! AArch64 Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a

View File

@@ -319,6 +319,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
if fixed_frame_storage_size > 0 {
@@ -348,6 +349,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
_flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
_fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let clobbered_vec = get_callee_saves(clobbers);

View File

@@ -7,7 +7,7 @@ use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter,
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Architecture, ArmArchitecture, Triple};
@@ -92,6 +92,10 @@ impl MachBackend for Arm32Backend {
&self.flags
}
fn isa_flags(&self) -> Vec<settings::Value> {
Vec::new()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
}

View File

@@ -10,23 +10,35 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum CallConv {
/// Best performance, not ABI-stable
/// Best performance, not ABI-stable.
Fast,
/// Smallest caller code size, not ABI-stable
/// Smallest caller code size, not ABI-stable.
Cold,
/// System V-style convention used on many platforms
/// System V-style convention used on many platforms.
SystemV,
/// Windows "fastcall" convention, also used for x64 and ARM
/// Windows "fastcall" convention, also used for x64 and ARM.
WindowsFastcall,
/// SpiderMonkey WebAssembly convention on systems using natively SystemV
/// Mac aarch64 calling convention, which is a tweak aarch64 ABI.
AppleAarch64,
/// SpiderMonkey WebAssembly convention on systems using natively SystemV.
BaldrdashSystemV,
/// SpiderMonkey WebAssembly convention on Windows
/// SpiderMonkey WebAssembly convention on Windows.
BaldrdashWindows,
/// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS
/// register slots in the frame.
Baldrdash2020,
/// Specialized convention for the probestack function
/// Specialized convention for the probestack function.
Probestack,
/// Wasmtime equivalent of SystemV, not ABI-stable.
///
/// Currently only differs in how multiple return values are handled,
/// returning the first return value in a register and everything else
/// through a return-pointer.
WasmtimeSystemV,
/// Wasmtime equivalent of WindowsFastcall, not ABI-stable.
///
/// Differs from fastcall in the same way as `WasmtimeSystemV`.
WasmtimeFastcall,
}
impl CallConv {
@@ -36,6 +48,7 @@ impl CallConv {
// Default to System V for unknown targets because most everything
// uses System V.
Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV,
Ok(CallingConvention::AppleAarch64) => Self::AppleAarch64,
Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall,
Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
}
@@ -49,6 +62,7 @@ impl CallConv {
LibcallCallConv::Cold => Self::Cold,
LibcallCallConv::SystemV => Self::SystemV,
LibcallCallConv::WindowsFastcall => Self::WindowsFastcall,
LibcallCallConv::AppleAarch64 => Self::AppleAarch64,
LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV,
LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows,
LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020,
@@ -59,7 +73,7 @@ impl CallConv {
/// Is the calling convention extending the Windows Fastcall ABI?
pub fn extends_windows_fastcall(self) -> bool {
match self {
Self::WindowsFastcall | Self::BaldrdashWindows => true,
Self::WindowsFastcall | Self::BaldrdashWindows | Self::WasmtimeFastcall => true,
_ => false,
}
}
@@ -71,6 +85,14 @@ impl CallConv {
_ => false,
}
}
/// Is the calling convention extending the Wasmtime ABI?
pub fn extends_wasmtime(self) -> bool {
match self {
Self::WasmtimeSystemV | Self::WasmtimeFastcall => true,
_ => false,
}
}
}
impl fmt::Display for CallConv {
@@ -80,10 +102,13 @@ impl fmt::Display for CallConv {
Self::Cold => "cold",
Self::SystemV => "system_v",
Self::WindowsFastcall => "windows_fastcall",
Self::AppleAarch64 => "apple_aarch64",
Self::BaldrdashSystemV => "baldrdash_system_v",
Self::BaldrdashWindows => "baldrdash_windows",
Self::Baldrdash2020 => "baldrdash_2020",
Self::Probestack => "probestack",
Self::WasmtimeSystemV => "wasmtime_system_v",
Self::WasmtimeFastcall => "wasmtime_fastcall",
})
}
}
@@ -96,10 +121,13 @@ impl str::FromStr for CallConv {
"cold" => Ok(Self::Cold),
"system_v" => Ok(Self::SystemV),
"windows_fastcall" => Ok(Self::WindowsFastcall),
"apple_aarch64" => Ok(Self::AppleAarch64),
"baldrdash_system_v" => Ok(Self::BaldrdashSystemV),
"baldrdash_windows" => Ok(Self::BaldrdashWindows),
"baldrdash_2020" => Ok(Self::Baldrdash2020),
"probestack" => Ok(Self::Probestack),
"wasmtime_system_v" => Ok(Self::WasmtimeSystemV),
"wasmtime_fastcall" => Ok(Self::WasmtimeFastcall),
_ => Err(()),
}
}

View File

@@ -57,35 +57,34 @@ use crate::flowgraph;
use crate::ir;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::machinst::MachBackend;
use crate::machinst::{MachBackend, UnwindInfoKind};
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings;
use crate::settings::SetResult;
use crate::timing;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::fmt::{Debug, Formatter};
use core::hash::Hasher;
use target_lexicon::{triple, Architecture, PointerWidth, Triple};
use thiserror::Error;
use target_lexicon::{triple, Architecture, OperatingSystem, PointerWidth, Triple};
#[cfg(feature = "riscv")]
mod riscv;
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) can both be
// included; if the new backend is included, then it is the default backend
// returned for an x86-64 triple, but a specific option can request the old
// backend. It is important to have the ability to instantiate *both* backends
// in the same build so that we can do things like differential fuzzing between
// backends, or perhaps offer a runtime configuration flag in the future.
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
// included whenever building with x86 support. The new backend is the default,
// but the old can be requested with `BackendVariant::Legacy`. However, if this
// crate is built with the `old-x86-backend` feature, then the old backend is
// default instead.
#[cfg(feature = "x86")]
mod x86;
#[cfg(feature = "x64")]
mod x64;
// This module is made public here for benchmarking purposes. No guarantees are
// made regarding API stability.
#[cfg(feature = "x86")]
pub mod x64;
#[cfg(feature = "arm32")]
mod arm32;
@@ -93,6 +92,9 @@ mod arm32;
#[cfg(feature = "arm64")]
pub(crate) mod aarch64;
#[cfg(feature = "s390x")]
mod s390x;
pub mod unwind;
mod call_conv;
@@ -123,7 +125,7 @@ macro_rules! isa_builder {
/// The "variant" for a given target. On one platform (x86-64), we have two
/// backends, the "old" and "new" one; the new one is the default if included
/// in the build configuration and not otherwise specified.
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Debug)]
pub enum BackendVariant {
/// Any backend available.
Any,
@@ -150,18 +152,19 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder
isa_builder!(x86, (feature = "x86"), triple)
}
(Architecture::X86_64, BackendVariant::MachInst) => {
isa_builder!(x64, (feature = "x64"), triple)
isa_builder!(x64, (feature = "x86"), triple)
}
#[cfg(feature = "x64")]
#[cfg(not(feature = "old-x86-backend"))]
(Architecture::X86_64, BackendVariant::Any) => {
isa_builder!(x64, (feature = "x64"), triple)
isa_builder!(x64, (feature = "x86"), triple)
}
#[cfg(not(feature = "x64"))]
#[cfg(feature = "old-x86-backend")]
(Architecture::X86_64, BackendVariant::Any) => {
isa_builder!(x86, (feature = "x86"), triple)
}
(Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
(Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
(Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple),
_ => Err(LookupError::Unsupported),
}
}
@@ -180,17 +183,30 @@ pub fn lookup_by_name(name: &str) -> Result<Builder, LookupError> {
}
/// Describes reason for target lookup failure
#[derive(Error, PartialEq, Eq, Copy, Clone, Debug)]
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum LookupError {
/// Support for this target was disabled in the current build.
#[error("Support for this target is disabled")]
SupportDisabled,
/// Support for this target has not yet been implemented.
#[error("Support for this target has not been implemented yet")]
Unsupported,
}
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for LookupError {}
impl fmt::Display for LookupError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
LookupError::SupportDisabled => write!(f, "Support for this target is disabled"),
LookupError::Unsupported => {
write!(f, "Support for this target has not been implemented yet")
}
}
}
}
/// Builder for a `TargetIsa`.
/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
#[derive(Clone)]
@@ -201,6 +217,16 @@ pub struct Builder {
}
impl Builder {
/// Gets the triple for the builder.
pub fn triple(&self) -> &Triple {
&self.triple
}
/// Iterates the available settings in the builder.
pub fn iter(&self) -> impl Iterator<Item = settings::Setting> {
self.setup.iter()
}
/// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
/// fully configured `TargetIsa` trait object.
pub fn finish(self, shared_flags: settings::Flags) -> Box<dyn TargetIsa> {
@@ -265,6 +291,14 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// Get the ISA-independent flags that were used to make this trait object.
fn flags(&self) -> &settings::Flags;
/// Get the ISA-dependent flag values that were used to make this trait object.
fn isa_flags(&self) -> Vec<settings::Value>;
/// Get the variant of this ISA (Legacy or MachInst).
fn variant(&self) -> BackendVariant {
BackendVariant::Legacy
}
/// Hashes all flags, both ISA-independent and ISA-specific, into the
/// specified hasher.
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
@@ -460,6 +494,18 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry).
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC;
/// Returns the flavor of unwind information emitted for this target.
fn unwind_info_kind(&self) -> UnwindInfoKind {
match self.triple().operating_system {
#[cfg(feature = "unwind")]
OperatingSystem::Windows => UnwindInfoKind::Windows,
#[cfg(feature = "unwind")]
_ => UnwindInfoKind::SystemV,
#[cfg(not(feature = "unwind"))]
_ => UnwindInfoKind::None,
}
}
/// Creates unwind information for the function.
///
/// Returns `None` if there is no unwind information for the function.

View File

@@ -15,8 +15,7 @@ use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encoding
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::hash::{Hash, Hasher};
@@ -70,6 +69,10 @@ impl TargetIsa for Isa {
&self.shared_flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.shared_flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);

View File

@@ -1,6 +1,6 @@
//! RISC-V Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a

View File

@@ -0,0 +1,770 @@
//! Implementation of a standard S390x ABI.
//!
//! This machine uses the "vanilla" ABI implementation from abi_impl.rs,
//! however a few details are different from the description there:
//!
//! - On s390x, the caller must provide a "register save area" of 160
//! bytes to any function it calls. The called function is free to use
//! this space for any purpose; usually to save callee-saved GPRs.
//! (Note that while this area is allocated by the caller, it is counted
//! as part of the callee's stack frame; in particular, the callee's CFA
//! is the top of the register save area, not the incoming SP value.)
//!
//! - Overflow arguments are passed on the stack starting immediately
//! above the register save area. On s390x, this space is allocated
//! only once directly in the prologue, using a size large enough to
//! hold overflow arguments for every call in the function.
//!
//! - On s390x we do not use a frame pointer register; instead, every
//! element of the stack frame is addressed via (constant) offsets
//! from the stack pointer. Note that due to the above (and because
//! there are no variable-sized stack allocations in cranelift), the
//! value of the stack pointer register never changes after the
//! initial allocation in the function prologue.
//!
//! Overall, the stack frame layout on s390x is as follows:
//!
//! ```plain
//! (high address)
//!
//! +---------------------------+
//! | ... |
//! CFA -----> | stack args |
//! +---------------------------+
//! | ... |
//! | 160 bytes reg save area |
//! SP at function entry -----> | (used to save GPRs) |
//! +---------------------------+
//! | ... |
//! | clobbered callee-saves |
//! | (used to save FPRs) |
//! unwind-frame base ----> | (alloc'd by prologue) |
//! +---------------------------+
//! | ... |
//! | spill slots |
//! | (accessed via nominal SP) |
//! | ... |
//! | stack slots |
//! | (accessed via nominal SP) |
//! nominal SP ---------------> | (alloc'd by prologue) |
//! +---------------------------+
//! | ... |
//! | args for call |
//! | outgoing reg save area |
//! SP during function ------> | (alloc'd by prologue) |
//! +---------------------------+
//!
//! (low address)
//! ```
use crate::ir;
use crate::ir::condcodes::IntCC;
use crate::ir::types;
use crate::ir::MemFlags;
use crate::ir::Type;
use crate::isa;
use crate::isa::s390x::inst::*;
use crate::isa::unwind::UnwindInst;
use crate::machinst::*;
use crate::settings;
use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
// We use a generic implementation that factors out ABI commonalities.
/// Support for the S390x ABI from the callee side (within a function body).
pub type S390xABICallee = ABICalleeImpl<S390xMachineDeps>;
/// Support for the S390x ABI from the caller side (at a callsite).
pub type S390xABICaller = ABICallerImpl<S390xMachineDeps>;
/// ABI Register usage
fn in_int_reg(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
_ => false,
}
}
fn in_flt_reg(ty: Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ => false,
}
}
fn get_intreg_for_arg(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::gpr(2)),
1 => Some(regs::gpr(3)),
2 => Some(regs::gpr(4)),
3 => Some(regs::gpr(5)),
4 => Some(regs::gpr(6)),
_ => None,
}
}
fn get_fltreg_for_arg(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::fpr(0)),
1 => Some(regs::fpr(2)),
2 => Some(regs::fpr(4)),
3 => Some(regs::fpr(6)),
_ => None,
}
}
fn get_intreg_for_ret(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::gpr(2)),
// ABI extension to support multi-value returns:
1 => Some(regs::gpr(3)),
2 => Some(regs::gpr(4)),
3 => Some(regs::gpr(5)),
_ => None,
}
}
fn get_fltreg_for_ret(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::fpr(0)),
// ABI extension to support multi-value returns:
1 => Some(regs::fpr(2)),
2 => Some(regs::fpr(4)),
3 => Some(regs::fpr(6)),
_ => None,
}
}
/// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
impl Into<MemArg> for StackAMode {
fn into(self) -> MemArg {
match self {
StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off },
StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off },
StackAMode::SPOffset(off, _ty) => {
MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted())
}
}
}
}
/// S390x-specific ABI behavior. This struct just serves as an implementation
/// point for the trait; it is never actually instantiated.
pub struct S390xMachineDeps;
impl ABIMachineSpec for S390xMachineDeps {
type I = Inst;
fn word_bits() -> u32 {
64
}
/// Return required stack alignment in bytes.
fn stack_align(_call_conv: isa::CallConv) -> u32 {
8
}
fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let mut next_gpr = 0;
let mut next_fpr = 0;
let mut next_stack: u64 = 0;
let mut ret = vec![];
if args_or_rets == ArgsOrRets::Args {
next_stack = 160;
}
for i in 0..params.len() {
let param = &params[i];
// Validate "purpose".
match &param.purpose {
&ir::ArgumentPurpose::VMContext
| &ir::ArgumentPurpose::Normal
| &ir::ArgumentPurpose::StackLimit
| &ir::ArgumentPurpose::SignatureId => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
),
}
let intreg = in_int_reg(param.value_type);
let fltreg = in_flt_reg(param.value_type);
debug_assert!(intreg || fltreg);
debug_assert!(!(intreg && fltreg));
let (next_reg, candidate) = if intreg {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg(next_gpr),
ArgsOrRets::Rets => get_intreg_for_ret(next_gpr),
};
(&mut next_gpr, candidate)
} else {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg(next_fpr),
ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr),
};
(&mut next_fpr, candidate)
};
// In the Wasmtime ABI only the first return value can be in a register.
let candidate =
if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets && i > 0 {
None
} else {
candidate
};
if let Some(reg) = candidate {
ret.push(ABIArg::reg(
reg.to_real_reg(),
param.value_type,
param.extension,
param.purpose,
));
*next_reg += 1;
} else {
// Compute size. Every argument or return value takes a slot of
// at least 8 bytes, except for return values in the Wasmtime ABI.
let size = (ty_bits(param.value_type) / 8) as u64;
let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets
{
size
} else {
std::cmp::max(size, 8)
};
// Align the stack slot.
debug_assert!(slot_size.is_power_of_two());
next_stack = align_to(next_stack, slot_size);
// If the type is actually of smaller size (and the argument
// was not extended), it is passed right-aligned.
let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None {
slot_size - size
} else {
0
};
ret.push(ABIArg::stack(
(next_stack + offset) as i64,
param.value_type,
param.extension,
param.purpose,
));
next_stack += slot_size;
}
}
next_stack = align_to(next_stack, 8);
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg(next_gpr) {
ret.push(ABIArg::reg(
reg.to_real_reg(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
ret.push(ABIArg::stack(
next_stack as i64,
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
next_stack += 8;
}
Some(ret.len() - 1)
} else {
None
};
// To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded);
}
Ok((ret, next_stack as i64, extra_arg))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
0
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
Inst::gen_load(into_reg, mem.into(), ty)
}
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
Inst::gen_store(mem.into(), from_reg, ty)
}
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
Inst::gen_move(to_reg, from_reg, ty)
}
fn gen_extend(
to_reg: Writable<Reg>,
from_reg: Reg,
signed: bool,
from_bits: u8,
to_bits: u8,
) -> Inst {
assert!(from_bits < to_bits);
Inst::Extend {
rd: to_reg,
rn: from_reg,
signed,
from_bits,
to_bits,
}
}
fn gen_ret() -> Inst {
Inst::Ret { link: gpr(14) }
}
fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if let Some(imm) = UImm12::maybe_from_u64(imm as u64) {
insts.push(Inst::LoadAddr {
rd: into_reg,
mem: MemArg::BXD12 {
base: from_reg,
index: zero_reg(),
disp: imm,
flags: MemFlags::trusted(),
},
});
} else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) {
insts.push(Inst::LoadAddr {
rd: into_reg,
mem: MemArg::BXD20 {
base: from_reg,
index: zero_reg(),
disp: imm,
flags: MemFlags::trusted(),
},
});
} else {
if from_reg != into_reg.to_reg() {
insts.push(Inst::mov64(into_reg, from_reg));
}
insts.push(Inst::AluRUImm32 {
alu_op: ALUOp::Add64,
rd: into_reg,
imm,
});
}
insts
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
insts.push(Inst::CmpTrapRR {
op: CmpOp::CmpL64,
rn: stack_reg(),
rm: limit_reg,
cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual),
trap_code: ir::TrapCode::StackOverflow,
});
insts
}
fn gen_epilogue_placeholder() -> Inst {
Inst::EpiloguePlaceholder
}
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
let mem = mem.into();
Inst::LoadAddr { rd: into_reg, mem }
}
fn get_stacklimit_reg() -> Reg {
spilltmp_reg()
}
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());
Inst::gen_load(into_reg, mem, ty)
}
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());
Inst::gen_store(mem, from_reg, ty)
}
fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec<Inst> {
if imm == 0 {
return SmallVec::new();
}
let mut insts = SmallVec::new();
if let Ok(imm) = i16::try_from(imm) {
insts.push(Inst::AluRSImm16 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
imm,
});
} else {
insts.push(Inst::AluRSImm32 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
imm,
});
}
insts
}
fn gen_nominal_sp_adj(offset: i32) -> Inst {
Inst::VirtualSPOffsetAdj {
offset: offset.into(),
}
}
fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec<Inst> {
SmallVec::new()
}
fn gen_epilogue_frame_restore(_flags: &settings::Flags) -> SmallInstVec<Inst> {
SmallVec::new()
}
fn gen_probestack(_: u32) -> SmallInstVec<Self::I> {
// TODO: implement if we ever require stack probes on an s390x host
// (unlikely unless Lucet is ported)
smallvec![]
}
// Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
// Collect clobbered registers.
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
let mut first_clobbered_gpr = 16;
for reg in clobbered_gpr {
let enc = reg.to_reg().get_hw_encoding();
if enc < first_clobbered_gpr {
first_clobbered_gpr = enc;
}
}
let clobber_size = clobbered_fpr.len() * 8;
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::DefineNewFrame {
offset_upward_to_caller_sp: 160,
offset_downward_to_clobbers: clobber_size as u32,
},
});
}
// Use STMG to save clobbered GPRs into save area.
if first_clobbered_gpr < 16 {
let offset = 8 * first_clobbered_gpr as i64;
insts.push(Inst::StoreMultiple64 {
rt: gpr(first_clobbered_gpr as u8),
rt2: gpr(15),
addr_reg: stack_reg(),
addr_off: SImm20::maybe_from_i64(offset).unwrap(),
});
}
if flags.unwind_info() {
for i in first_clobbered_gpr..16 {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: clobber_size as u32 + (i * 8) as u32,
reg: gpr(i as u8).to_real_reg(),
},
});
}
}
// Decrement stack pointer.
let stack_size =
outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32;
insts.extend(Self::gen_sp_reg_adjust(-stack_size));
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::StackAlloc {
size: stack_size as u32,
},
});
}
let sp_adj = outgoing_args_size as i32;
if sp_adj > 0 {
insts.push(Self::gen_nominal_sp_adj(sp_adj));
}
// Save FPRs.
for (i, reg) in clobbered_fpr.iter().enumerate() {
insts.push(Inst::FpuStore64 {
rd: reg.to_reg().to_reg(),
mem: MemArg::reg_plus_off(
stack_reg(),
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
MemFlags::trusted(),
),
});
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: (i * 8) as u32,
reg: reg.to_reg(),
},
});
}
}
(clobber_size as u64, insts)
}
fn gen_clobber_restore(
call_conv: isa::CallConv,
_: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
// Collect clobbered registers.
let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
let mut first_clobbered_gpr = 16;
for reg in clobbered_gpr {
let enc = reg.to_reg().get_hw_encoding();
if enc < first_clobbered_gpr {
first_clobbered_gpr = enc;
}
}
let clobber_size = clobbered_fpr.len() * 8;
// Restore FPRs.
for (i, reg) in clobbered_fpr.iter().enumerate() {
insts.push(Inst::FpuLoad64 {
rd: Writable::from_reg(reg.to_reg().to_reg()),
mem: MemArg::reg_plus_off(
stack_reg(),
(i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64,
MemFlags::trusted(),
),
});
}
// Increment stack pointer unless it will be restored implicitly.
let stack_size =
outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32;
let implicit_sp_restore = first_clobbered_gpr < 16
&& SImm20::maybe_from_i64(8 * first_clobbered_gpr as i64 + stack_size as i64).is_some();
if !implicit_sp_restore {
insts.extend(Self::gen_sp_reg_adjust(stack_size));
}
// Use LMG to restore clobbered GPRs from save area.
if first_clobbered_gpr < 16 {
let mut offset = 8 * first_clobbered_gpr as i64;
if implicit_sp_restore {
offset += stack_size as i64;
}
insts.push(Inst::LoadMultiple64 {
rt: writable_gpr(first_clobbered_gpr as u8),
rt2: writable_gpr(15),
addr_reg: stack_reg(),
addr_off: SImm20::maybe_from_i64(offset).unwrap(),
});
}
insts
}
fn gen_call(
dest: &CallDest,
uses: Vec<Reg>,
defs: Vec<Writable<Reg>>,
opcode: ir::Opcode,
tmp: Writable<Reg>,
_callee_conv: isa::CallConv,
_caller_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
let mut insts = SmallVec::new();
match &dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
InstIsSafepoint::Yes,
Inst::Call {
link: writable_gpr(14),
info: Box::new(CallInfo {
dest: name.clone(),
uses,
defs,
opcode,
}),
},
)),
&CallDest::ExtName(ref name, RelocDistance::Far) => {
insts.push((
InstIsSafepoint::No,
Inst::LoadExtNameFar {
rd: tmp,
name: Box::new(name.clone()),
offset: 0,
},
));
insts.push((
InstIsSafepoint::Yes,
Inst::CallInd {
link: writable_gpr(14),
info: Box::new(CallIndInfo {
rn: tmp.to_reg(),
uses,
defs,
opcode,
}),
},
));
}
&CallDest::Reg(reg) => insts.push((
InstIsSafepoint::Yes,
Inst::CallInd {
link: writable_gpr(14),
info: Box::new(CallIndInfo {
rn: *reg,
uses,
defs,
opcode,
}),
},
)),
}
insts
}
fn gen_memcpy(
_call_conv: isa::CallConv,
_dst: Reg,
_src: Reg,
_size: usize,
) -> SmallVec<[Self::I; 8]> {
unimplemented!("StructArgs not implemented for S390X yet");
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::F64, _) => 1,
_ => panic!("Unexpected register class!"),
}
}
/// Get the current virtual-SP offset from an instruction-emission state.
fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
s.virtual_sp_offset
}
/// Get the nominal-SP-to-FP offset from an instruction-emission state.
fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
s.initial_sp_offset
}
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = Vec::new();
for i in 0..15 {
let x = writable_gpr(i);
if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
caller_saved.push(x);
}
}
for i in 0..15 {
let v = writable_fpr(i);
if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
caller_saved.push(v);
}
}
caller_saved
}
fn get_ext_mode(
_call_conv: isa::CallConv,
specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
specified
}
}
fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {
match r.get_class() {
RegClass::I64 => {
// r6 - r15 inclusive are callee-saves.
r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15
}
RegClass::F64 => {
// f8 - f15 inclusive are callee-saves.
r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
}
_ => panic!("Unexpected RegClass"),
}
}
fn get_regs_saved_in_prologue(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
let mut int_saves = vec![];
let mut fpr_saves = vec![];
for &reg in regs.iter() {
if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
match reg.to_reg().get_class() {
RegClass::I64 => int_saves.push(reg),
RegClass::F64 => fpr_saves.push(reg),
_ => panic!("Unexpected RegClass"),
}
}
}
// Sort registers for deterministic code output.
int_saves.sort_by_key(|r| r.to_reg().get_index());
fpr_saves.sort_by_key(|r| r.to_reg().get_index());
(int_saves, fpr_saves)
}
fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool {
match r.get_class() {
RegClass::I64 => {
// r0 - r5 inclusive are caller-saves.
r.get_hw_encoding() <= 5
}
RegClass::F64 => {
// f0 - f7 inclusive are caller-saves.
r.get_hw_encoding() <= 7
}
_ => panic!("Unexpected RegClass"),
}
}

View File

@@ -0,0 +1,317 @@
//! S390x ISA definitions: instruction arguments.
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::MemFlags;
use crate::isa::s390x::inst::*;
use crate::machinst::MachLabel;
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use std::string::String;
//=============================================================================
// Instruction sub-components (memory addresses): definitions
/// A memory argument to load/store, encapsulating the possible addressing modes.
#[derive(Clone, Debug)]
pub enum MemArg {
//
// Real IBM Z addressing modes:
//
/// Base register, index register, and 12-bit unsigned displacement.
BXD12 {
base: Reg,
index: Reg,
disp: UImm12,
flags: MemFlags,
},
/// Base register, index register, and 20-bit signed displacement.
BXD20 {
base: Reg,
index: Reg,
disp: SImm20,
flags: MemFlags,
},
/// PC-relative Reference to a label.
Label { target: BranchTarget },
/// PC-relative Reference to a near symbol.
Symbol {
name: Box<ExternalName>,
offset: i32,
flags: MemFlags,
},
//
// Virtual addressing modes that are lowered at emission time:
//
/// Arbitrary offset from a register. Converted to generation of large
/// offsets with multiple instructions as necessary during code emission.
RegOffset { reg: Reg, off: i64, flags: MemFlags },
/// Offset from the stack pointer at function entry.
InitialSPOffset { off: i64 },
/// Offset from the "nominal stack pointer", which is where the real SP is
/// just after stack and spill slots are allocated in the function prologue.
/// At emission time, this is converted to `SPOffset` with a fixup added to
/// the offset constant. The fixup is a running value that is tracked as
/// emission iterates through instructions in linear order, and can be
/// adjusted up and down with [Inst::VirtualSPOffsetAdj].
///
/// The standard ABI is in charge of handling this (by emitting the
/// adjustment meta-instructions). It maintains the invariant that "nominal
/// SP" is where the actual SP is after the function prologue and before
/// clobber pushes. See the diagram in the documentation for
/// [crate::isa::s390x::abi](the ABI module) for more details.
NominalSPOffset { off: i64 },
}
impl MemArg {
/// Memory reference using an address in a register.
pub fn reg(reg: Reg, flags: MemFlags) -> MemArg {
MemArg::BXD12 {
base: reg,
index: zero_reg(),
disp: UImm12::zero(),
flags,
}
}
/// Memory reference using the sum of two registers as an address.
pub fn reg_plus_reg(reg1: Reg, reg2: Reg, flags: MemFlags) -> MemArg {
MemArg::BXD12 {
base: reg1,
index: reg2,
disp: UImm12::zero(),
flags,
}
}
/// Memory reference using the sum of a register an an offset as address.
pub fn reg_plus_off(reg: Reg, off: i64, flags: MemFlags) -> MemArg {
MemArg::RegOffset { reg, off, flags }
}
pub(crate) fn get_flags(&self) -> MemFlags {
match self {
MemArg::BXD12 { flags, .. } => *flags,
MemArg::BXD20 { flags, .. } => *flags,
MemArg::RegOffset { flags, .. } => *flags,
MemArg::Label { .. } => MemFlags::trusted(),
MemArg::Symbol { flags, .. } => *flags,
MemArg::InitialSPOffset { .. } => MemFlags::trusted(),
MemArg::NominalSPOffset { .. } => MemFlags::trusted(),
}
}
pub(crate) fn can_trap(&self) -> bool {
!self.get_flags().notrap()
}
}
//=============================================================================
// Instruction sub-components (conditions, branches and branch targets):
// definitions
/// Condition for conditional branches.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Cond {
mask: u8,
}
impl Cond {
pub fn from_mask(mask: u8) -> Cond {
assert!(mask >= 1 && mask <= 14);
Cond { mask }
}
pub fn from_intcc(cc: IntCC) -> Cond {
let mask = match cc {
IntCC::Equal => 8,
IntCC::NotEqual => 4 | 2,
IntCC::SignedGreaterThanOrEqual => 8 | 2,
IntCC::SignedGreaterThan => 2,
IntCC::SignedLessThanOrEqual => 8 | 4,
IntCC::SignedLessThan => 4,
IntCC::UnsignedGreaterThanOrEqual => 8 | 2,
IntCC::UnsignedGreaterThan => 2,
IntCC::UnsignedLessThanOrEqual => 8 | 4,
IntCC::UnsignedLessThan => 4,
IntCC::Overflow => 1,
IntCC::NotOverflow => 8 | 4 | 2,
};
Cond { mask }
}
pub fn from_floatcc(cc: FloatCC) -> Cond {
let mask = match cc {
FloatCC::Ordered => 8 | 4 | 2,
FloatCC::Unordered => 1,
FloatCC::Equal => 8,
FloatCC::NotEqual => 4 | 2 | 1,
FloatCC::OrderedNotEqual => 4 | 2,
FloatCC::UnorderedOrEqual => 8 | 1,
FloatCC::LessThan => 4,
FloatCC::LessThanOrEqual => 8 | 4,
FloatCC::GreaterThan => 2,
FloatCC::GreaterThanOrEqual => 8 | 2,
FloatCC::UnorderedOrLessThan => 4 | 1,
FloatCC::UnorderedOrLessThanOrEqual => 8 | 4 | 1,
FloatCC::UnorderedOrGreaterThan => 2 | 1,
FloatCC::UnorderedOrGreaterThanOrEqual => 8 | 2 | 1,
};
Cond { mask }
}
/// Return the inverted condition.
pub fn invert(self) -> Cond {
Cond {
mask: !self.mask & 15,
}
}
/// Return the machine encoding of this condition.
pub fn bits(self) -> u8 {
self.mask
}
}
/// A branch target. Either unresolved (basic-block index) or resolved (offset
/// from end of current instruction).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BranchTarget {
/// An unresolved reference to a Label, as passed into
/// `lower_branch_group()`.
Label(MachLabel),
/// A fixed PC offset.
ResolvedOffset(i32),
}
impl BranchTarget {
/// Return the target's label, if it is a label-based target.
pub fn as_label(self) -> Option<MachLabel> {
match self {
BranchTarget::Label(l) => Some(l),
_ => None,
}
}
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_ri_offset_or_zero(self) -> u16 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 1,
_ => 0,
};
assert!(off <= 0x7fff);
assert!(off >= -0x8000);
off as u16
}
/// Return the target's offset, if specified, or zero if label-based.
pub fn as_ril_offset_or_zero(self) -> u32 {
let off = match self {
BranchTarget::ResolvedOffset(off) => off >> 1,
_ => 0,
};
off as u32
}
}
impl PrettyPrint for MemArg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&MemArg::BXD12 {
base, index, disp, ..
} => {
if base != zero_reg() {
if index != zero_reg() {
format!(
"{}({},{})",
disp.show_rru(mb_rru),
index.show_rru(mb_rru),
base.show_rru(mb_rru)
)
} else {
format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
}
} else {
if index != zero_reg() {
format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
} else {
format!("{}", disp.show_rru(mb_rru))
}
}
}
&MemArg::BXD20 {
base, index, disp, ..
} => {
if base != zero_reg() {
if index != zero_reg() {
format!(
"{}({},{})",
disp.show_rru(mb_rru),
index.show_rru(mb_rru),
base.show_rru(mb_rru)
)
} else {
format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru))
}
} else {
if index != zero_reg() {
format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru))
} else {
format!("{}", disp.show_rru(mb_rru))
}
}
}
&MemArg::Label { ref target } => target.show_rru(mb_rru),
&MemArg::Symbol {
ref name, offset, ..
} => format!("{} + {}", name, offset),
// Eliminated by `mem_finalize()`.
&MemArg::InitialSPOffset { .. }
| &MemArg::NominalSPOffset { .. }
| &MemArg::RegOffset { .. } => {
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
}
}
}
}
impl PrettyPrint for Cond {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let s = match self.mask {
1 => "o",
2 => "h",
3 => "nle",
4 => "l",
5 => "nhe",
6 => "lh",
7 => "ne",
8 => "e",
9 => "nlh",
10 => "he",
11 => "nl",
12 => "le",
13 => "nh",
14 => "no",
_ => unreachable!(),
};
s.to_string()
}
}
impl PrettyPrint for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Label(label) => format!("label{:?}", label.get()),
&BranchTarget::ResolvedOffset(off) => format!("{}", off),
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,231 @@
//! S390x ISA definitions: immediate constants.
use regalloc::{PrettyPrint, RealRegUniverse};
use std::string::String;
/// An unsigned 12-bit immediate.
#[derive(Clone, Copy, Debug)]
pub struct UImm12 {
/// The value.
value: u16,
}
impl UImm12 {
pub fn maybe_from_u64(value: u64) -> Option<UImm12> {
if value < 4096 {
Some(UImm12 {
value: value as u16,
})
} else {
None
}
}
/// Create a zero immediate of this format.
pub fn zero() -> UImm12 {
UImm12 { value: 0 }
}
/// Bits for encoding.
pub fn bits(&self) -> u32 {
u32::from(self.value)
}
}
/// A signed 20-bit immediate.
#[derive(Clone, Copy, Debug)]
pub struct SImm20 {
/// The value.
value: i32,
}
impl SImm20 {
pub fn maybe_from_i64(value: i64) -> Option<SImm20> {
if value >= -524288 && value < 524288 {
Some(SImm20 {
value: value as i32,
})
} else {
None
}
}
pub fn from_uimm12(value: UImm12) -> SImm20 {
SImm20 {
value: value.bits() as i32,
}
}
/// Create a zero immediate of this format.
pub fn zero() -> SImm20 {
SImm20 { value: 0 }
}
/// Bits for encoding.
pub fn bits(&self) -> u32 {
let encoded: u32 = self.value as u32;
encoded & 0xfffff
}
}
/// A 16-bit immediate with a {0,16,32,48}-bit shift.
#[derive(Clone, Copy, Debug)]
pub struct UImm16Shifted {
/// The value.
pub bits: u16,
/// Result is `bits` shifted 16*shift bits to the left.
pub shift: u8,
}
impl UImm16Shifted {
/// Construct a UImm16Shifted from an arbitrary 64-bit constant if possible.
pub fn maybe_from_u64(value: u64) -> Option<UImm16Shifted> {
let mask0 = 0x0000_0000_0000_ffffu64;
let mask1 = 0x0000_0000_ffff_0000u64;
let mask2 = 0x0000_ffff_0000_0000u64;
let mask3 = 0xffff_0000_0000_0000u64;
if value == (value & mask0) {
return Some(UImm16Shifted {
bits: (value & mask0) as u16,
shift: 0,
});
}
if value == (value & mask1) {
return Some(UImm16Shifted {
bits: ((value >> 16) & mask0) as u16,
shift: 1,
});
}
if value == (value & mask2) {
return Some(UImm16Shifted {
bits: ((value >> 32) & mask0) as u16,
shift: 2,
});
}
if value == (value & mask3) {
return Some(UImm16Shifted {
bits: ((value >> 48) & mask0) as u16,
shift: 3,
});
}
None
}
pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<UImm16Shifted> {
let shift_enc = shift / 16;
if shift_enc > 3 {
None
} else {
Some(UImm16Shifted {
bits: imm,
shift: shift_enc,
})
}
}
pub fn negate_bits(&self) -> UImm16Shifted {
UImm16Shifted {
bits: !self.bits,
shift: self.shift,
}
}
/// Returns the value that this constant represents.
pub fn value(&self) -> u64 {
(self.bits as u64) << (16 * self.shift)
}
}
/// A 32-bit immediate with a {0,32}-bit shift.
#[derive(Clone, Copy, Debug)]
pub struct UImm32Shifted {
/// The value.
pub bits: u32,
/// Result is `bits` shifted 32*shift bits to the left.
pub shift: u8,
}
impl UImm32Shifted {
/// Construct a UImm32Shifted from an arbitrary 64-bit constant if possible.
pub fn maybe_from_u64(value: u64) -> Option<UImm32Shifted> {
let mask0 = 0x0000_0000_ffff_ffffu64;
let mask1 = 0xffff_ffff_0000_0000u64;
if value == (value & mask0) {
return Some(UImm32Shifted {
bits: (value & mask0) as u32,
shift: 0,
});
}
if value == (value & mask1) {
return Some(UImm32Shifted {
bits: ((value >> 32) & mask0) as u32,
shift: 1,
});
}
None
}
pub fn maybe_with_shift(imm: u32, shift: u8) -> Option<UImm32Shifted> {
let shift_enc = shift / 32;
if shift_enc > 3 {
None
} else {
Some(UImm32Shifted {
bits: imm,
shift: shift_enc,
})
}
}
pub fn from_uimm16shifted(value: UImm16Shifted) -> UImm32Shifted {
if value.shift % 2 == 0 {
UImm32Shifted {
bits: value.bits as u32,
shift: value.shift / 2,
}
} else {
UImm32Shifted {
bits: (value.bits as u32) << 16,
shift: value.shift / 2,
}
}
}
pub fn negate_bits(&self) -> UImm32Shifted {
UImm32Shifted {
bits: !self.bits,
shift: self.shift,
}
}
/// Returns the value that this constant represents.
pub fn value(&self) -> u64 {
(self.bits as u64) << (32 * self.shift)
}
}
impl PrettyPrint for UImm12 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{}", self.value)
}
}
impl PrettyPrint for SImm20 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{}", self.value)
}
}
impl PrettyPrint for UImm16Shifted {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{}", self.bits)
}
}
impl PrettyPrint for UImm32Shifted {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{}", self.bits)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,168 @@
//! S390x ISA definitions: registers.
use crate::settings;
use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
//=============================================================================
// Registers, the Universe thereof, and printing
#[rustfmt::skip]
const GPR_INDICES: [u8; 16] = [
// r0 and r1 reserved
30, 31,
// r2 - r5 call-clobbered
16, 17, 18, 19,
// r6 - r14 call-saved (order reversed)
28, 27, 26, 25, 24, 23, 22, 21, 20,
// r15 (SP)
29,
];
#[rustfmt::skip]
const FPR_INDICES: [u8; 16] = [
// f0 - f7 as pairs
0, 4, 1, 5, 2, 6, 3, 7,
// f8 - f15 as pairs
8, 12, 9, 13, 10, 14, 11, 15,
];
/// Get a reference to a GPR (integer register).
pub fn gpr(num: u8) -> Reg {
assert!(num < 16);
Reg::new_real(
RegClass::I64,
/* enc = */ num,
/* index = */ GPR_INDICES[num as usize],
)
}
/// Get a writable reference to a GPR.
pub fn writable_gpr(num: u8) -> Writable<Reg> {
Writable::from_reg(gpr(num))
}
/// Get a reference to a FPR (floating-point register).
pub fn fpr(num: u8) -> Reg {
assert!(num < 16);
Reg::new_real(
RegClass::F64,
/* enc = */ num,
/* index = */ FPR_INDICES[num as usize],
)
}
/// Get a writable reference to a V-register.
pub fn writable_fpr(num: u8) -> Writable<Reg> {
Writable::from_reg(fpr(num))
}
/// Get a reference to the stack-pointer register.
pub fn stack_reg() -> Reg {
gpr(15)
}
/// Get a writable reference to the stack-pointer register.
pub fn writable_stack_reg() -> Writable<Reg> {
Writable::from_reg(stack_reg())
}
/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
///
/// We use r1 for this because it's a scratch register but is slightly special (used for linker
/// veneers). We're free to use it as long as we don't expect it to live through call instructions.
pub fn spilltmp_reg() -> Reg {
gpr(1)
}
/// Get a writable reference to the spilltmp reg.
pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg())
}
pub fn zero_reg() -> Reg {
gpr(0)
}
/// Create the register universe for AArch64.
pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse {
let mut regs = vec![];
let mut allocable_by_class = [None; NUM_REG_CLASSES];
// Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers:
// r0 (we cannot use this for addressing // FIXME regalloc)
// r1 (spilltmp)
// r15 (stack pointer)
// FPRs.
let mut base = regs.len();
regs.push((fpr(0).to_real_reg(), "%f0".into()));
regs.push((fpr(2).to_real_reg(), "%f2".into()));
regs.push((fpr(4).to_real_reg(), "%f4".into()));
regs.push((fpr(6).to_real_reg(), "%f6".into()));
regs.push((fpr(1).to_real_reg(), "%f1".into()));
regs.push((fpr(3).to_real_reg(), "%f3".into()));
regs.push((fpr(5).to_real_reg(), "%f5".into()));
regs.push((fpr(7).to_real_reg(), "%f7".into()));
regs.push((fpr(8).to_real_reg(), "%f8".into()));
regs.push((fpr(10).to_real_reg(), "%f10".into()));
regs.push((fpr(12).to_real_reg(), "%f12".into()));
regs.push((fpr(14).to_real_reg(), "%f14".into()));
regs.push((fpr(9).to_real_reg(), "%f9".into()));
regs.push((fpr(11).to_real_reg(), "%f11".into()));
regs.push((fpr(13).to_real_reg(), "%f13".into()));
regs.push((fpr(15).to_real_reg(), "%f15".into()));
allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(fpr(1).get_index()),
});
// Caller-saved GPRs in the SystemV s390x ABI.
base = regs.len();
regs.push((gpr(2).to_real_reg(), "%r2".into()));
regs.push((gpr(3).to_real_reg(), "%r3".into()));
regs.push((gpr(4).to_real_reg(), "%r4".into()));
regs.push((gpr(5).to_real_reg(), "%r5".into()));
// Callee-saved GPRs in the SystemV s390x ABI.
// We start from r14 downwards in an attempt to allow the
// prolog to use as short a STMG as possible.
regs.push((gpr(14).to_real_reg(), "%r14".into()));
regs.push((gpr(13).to_real_reg(), "%r13".into()));
regs.push((gpr(12).to_real_reg(), "%r12".into()));
regs.push((gpr(11).to_real_reg(), "%r11".into()));
regs.push((gpr(10).to_real_reg(), "%r10".into()));
regs.push((gpr(9).to_real_reg(), "%r9".into()));
regs.push((gpr(8).to_real_reg(), "%r8".into()));
regs.push((gpr(7).to_real_reg(), "%r7".into()));
regs.push((gpr(6).to_real_reg(), "%r6".into()));
allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
first: base,
last: regs.len() - 1,
suggested_scratch: Some(gpr(13).get_index()),
});
// Other regs, not available to the allocator.
let allocable = regs.len();
regs.push((gpr(15).to_real_reg(), "%r15".into()));
regs.push((gpr(0).to_real_reg(), "%r0".into()));
regs.push((gpr(1).to_real_reg(), "%r1".into()));
// Assert sanity: the indices in the register structs must match their
// actual indices in the array.
for (i, reg) in regs.iter().enumerate() {
assert_eq!(i, reg.0.get_index());
}
RealRegUniverse {
regs,
allocable,
allocable_by_class,
}
}

View File

@@ -0,0 +1,2 @@
#[cfg(feature = "unwind")]
pub(crate) mod systemv;

View File

@@ -0,0 +1,197 @@
//! Unwind information for System V ABI (s390x).
use crate::isa::unwind::systemv::RegisterMappingError;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
use regalloc::{Reg, RegClass};
/// Creates a new s390x common information entry (CIE).
pub fn create_cie() -> CommonInformationEntry {
use gimli::write::CallFrameInstruction;
let mut entry = CommonInformationEntry::new(
Encoding {
address_size: 8,
format: Format::Dwarf32,
version: 1,
},
1, // Code alignment factor
-8, // Data alignment factor
Register(14), // Return address column - register %r14
);
// Every frame will start with the call frame address (CFA) at %r15 + 160.
entry.add_instruction(CallFrameInstruction::Cfa(Register(15), 160));
entry
}
/// Map Cranelift registers to their corresponding Gimli registers.
pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
const GPR_MAP: [gimli::Register; 16] = [
Register(0),
Register(1),
Register(2),
Register(3),
Register(4),
Register(5),
Register(6),
Register(7),
Register(8),
Register(9),
Register(10),
Register(11),
Register(12),
Register(13),
Register(14),
Register(15),
];
const FPR_MAP: [gimli::Register; 16] = [
Register(16),
Register(20),
Register(17),
Register(21),
Register(18),
Register(22),
Register(19),
Register(23),
Register(24),
Register(28),
Register(25),
Register(29),
Register(26),
Register(30),
Register(27),
Register(31),
];
match reg.get_class() {
RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]),
RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]),
_ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
}
}
pub(crate) struct RegisterMapper;
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
fn sp(&self) -> u16 {
Register(15).0
}
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
StackSlotKind,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use gimli::write::Address;
use std::str::FromStr;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("s390x"))
.expect("expect s390x ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(1234))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 10, lsda: None, instructions: [(4, CfaOffset(224))] }");
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("s390x"))
.expect("expect s390x ISA")
.finish(Flags::new(builder()));
let mut context = Context::for_function(create_multi_return_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
));
context.compile(&*isa).expect("expected compilation");
let fde = match context
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(4321))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 26, lsda: None, instructions: [(4, CfaOffset(224))] }");
}
fn create_multi_return_function(
call_conv: CallConv,
stack_slot: Option<StackSlotData>,
) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brnz(v0, block2, &[]);
pos.ins().jump(block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.stack_slots.push(stack_slot);
}
func
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,296 @@
//! IBM Z 64-bit Instruction Set Architecture.
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::s390x::settings as s390x_settings;
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings as shared_settings;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::{Architecture, Triple};
// New backend:
mod abi;
pub(crate) mod inst;
mod lower;
mod settings;
use inst::create_reg_universe;
use self::inst::EmitInfo;
/// A IBM Z backend.
pub struct S390xBackend {
triple: Triple,
flags: shared_settings::Flags,
isa_flags: s390x_settings::Flags,
reg_universe: RealRegUniverse,
}
impl S390xBackend {
/// Create a new IBM Z backend with the given (shared) flags.
pub fn new_with_flags(
triple: Triple,
flags: shared_settings::Flags,
isa_flags: s390x_settings::Flags,
) -> S390xBackend {
let reg_universe = create_reg_universe(&flags);
S390xBackend {
triple,
flags,
isa_flags,
reg_universe,
}
}
/// This performs lowering to VCode, register-allocates the code, computes block layout and
/// finalizes branches. The result is ready for binary emission.
fn compile_vcode(
&self,
func: &Function,
flags: shared_settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> {
let emit_info = EmitInfo::new(flags.clone());
let abi = Box::new(abi::S390xABICallee::new(func, flags)?);
compile::compile::<S390xBackend>(func, self, abi, emit_info)
}
}
impl MachBackend for S390xBackend {
fn compile_function(
&self,
func: &Function,
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let buffer = vcode.emit();
let frame_size = vcode.frame_size();
let value_labels_ranges = vcode.value_labels_ranges();
let stackslot_offsets = vcode.stackslot_offsets().clone();
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe(flags))))
} else {
None
};
let buffer = buffer.finish();
Ok(MachCompileResult {
buffer,
frame_size,
disasm,
value_labels_ranges,
stackslot_offsets,
})
}
fn name(&self) -> &'static str {
"s390x"
}
fn triple(&self) -> Triple {
self.triple.clone()
}
fn flags(&self) -> &shared_settings::Flags {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);
}
fn reg_universe(&self) -> &RealRegUniverse {
&self.reg_universe
}
fn unsigned_add_overflow_condition(&self) -> IntCC {
unimplemented!()
}
fn unsigned_sub_overflow_condition(&self) -> IntCC {
unimplemented!()
}
#[cfg(feature = "unwind")]
fn emit_unwind_info(
&self,
result: &MachCompileResult,
kind: crate::machinst::UnwindInfoKind,
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::UnwindInfo;
use crate::machinst::UnwindInfoKind;
Ok(match kind {
UnwindInfoKind::SystemV => {
let mapper = self::inst::unwind::systemv::RegisterMapper;
Some(UnwindInfo::SystemV(
crate::isa::unwind::systemv::create_unwind_info_from_insts(
&result.buffer.unwind_info[..],
result.buffer.data.len(),
&mapper,
)?,
))
}
_ => None,
})
}
#[cfg(feature = "unwind")]
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
Some(inst::unwind::systemv::create_cie())
}
#[cfg(feature = "unwind")]
fn map_reg_to_dwarf(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
}
}
/// Create a new `isa::Builder`.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
assert!(triple.architecture == Architecture::S390x);
IsaBuilder {
triple,
setup: s390x_settings::builder(),
constructor: |triple, shared_flags, builder| {
let isa_flags = s390x_settings::Flags::new(&shared_flags, builder);
let backend = S390xBackend::new_with_flags(triple, shared_flags, isa_flags);
Box::new(TargetIsaAdapter::new(backend))
},
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
use crate::settings;
use crate::settings::Configurable;
use core::str::FromStr;
use target_lexicon::Triple;
#[test]
fn test_compile_function() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().return_(&[v1]);
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder());
let backend = S390xBackend::new_with_flags(
Triple::from_str("s390x").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = &result.buffer.data[..];
// ahi %r2, 0x1234
// br %r14
let golden = vec![0xa7, 0x2a, 0x12, 0x34, 0x07, 0xfe];
assert_eq!(code, &golden[..]);
}
#[test]
fn test_branch_lowering() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);
let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().brnz(v1, bb1, &[]);
pos.ins().jump(bb2, &[]);
pos.insert_block(bb1);
pos.ins().brnz(v1, bb2, &[]);
pos.ins().jump(bb3, &[]);
pos.insert_block(bb2);
let v2 = pos.ins().iadd(v1, v0);
pos.ins().brnz(v2, bb2, &[]);
pos.ins().jump(bb1, &[]);
pos.insert_block(bb3);
let v3 = pos.ins().isub(v1, v0);
pos.ins().return_(&[v3]);
let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder());
let backend = S390xBackend::new_with_flags(
Triple::from_str("s390x").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = &result.buffer.data[..];
// FIXME: the branching logic should be optimized more
// ahi %r2, 4660
// chi %r2, 0
// jglh label1 ; jg label2
// jg label6
// jg label3
// ahik %r3, %r2, 4660
// chi %r3, 0
// jglh label4 ; jg label5
// jg label3
// jg label6
// chi %r2, 0
// jglh label7 ; jg label8
// jg label3
// ahi %r2, -4660
// br %r14
let golden = vec![
167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167,
62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246,
167, 42, 237, 204, 7, 254,
];
assert_eq!(code, &golden[..]);
}
}

View File

@@ -0,0 +1,9 @@
//! S390X Settings.
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
// public `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta/src/isa/s390x/settings.rs`.
include!(concat!(env!("OUT_DIR"), "/settings-s390x.rs"));

View File

@@ -225,6 +225,11 @@ pub enum UnwindInst {
/// the clobber area.
offset_downward_to_clobbers: u32,
},
/// The stack pointer was adjusted to allocate the stack.
StackAlloc {
/// Size to allocate.
size: u32,
},
/// The stack slot at the given offset from the clobber-area base has been
/// used to save the given register.
///

View File

@@ -6,7 +6,6 @@ use crate::isa::unwind::UnwindInst;
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use gimli::write::{Address, FrameDescriptionEntry};
use thiserror::Error;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
@@ -15,16 +14,32 @@ type Register = u16;
/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
#[allow(missing_docs)]
#[derive(Error, Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq)]
pub enum RegisterMappingError {
#[error("unable to find bank for register info")]
MissingBank,
#[error("register mapping is currently only implemented for x86_64")]
UnsupportedArchitecture,
#[error("unsupported register bank: {0}")]
UnsupportedRegisterBank(&'static str),
}
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for RegisterMappingError {}
impl std::fmt::Display for RegisterMappingError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
RegisterMappingError::MissingBank => write!(f, "unable to find bank for register info"),
RegisterMappingError::UnsupportedArchitecture => write!(
f,
"register mapping is currently only implemented for x86_64"
),
RegisterMappingError::UnsupportedRegisterBank(bank) => {
write!(f, "unsupported register bank: {}", bank)
}
}
}
}
// This mirrors gimli's CallFrameInstruction, but is serializable
// This excludes CfaExpression, Expression, ValExpression due to
// https://github.com/gimli-rs/gimli/issues/513.
@@ -122,8 +137,10 @@ pub(crate) trait RegisterMapper<Reg> {
fn map(&self, reg: Reg) -> Result<Register, RegisterMappingError>;
/// Gets stack pointer register.
fn sp(&self) -> Register;
/// Gets the frame pointer register.
fn fp(&self) -> Register;
/// Gets the frame pointer register, if any.
fn fp(&self) -> Option<Register> {
None
}
/// Gets the link register, if any.
fn lr(&self) -> Option<Register> {
None
@@ -151,6 +168,7 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
) -> CodegenResult<UnwindInfo> {
let mut instructions = vec![];
let mut cfa_offset = 0;
let mut clobber_offset_to_cfa = 0;
for &(instruction_offset, ref inst) in insts {
match inst {
@@ -163,10 +181,14 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
instruction_offset,
CallFrameInstruction::CfaOffset(offset_upward_to_caller_sp as i32),
));
// Note that we saved the old FP value on the stack.
// Note that we saved the old FP value on the stack. Use of this
// operation implies that the target defines a FP register.
instructions.push((
instruction_offset,
CallFrameInstruction::Offset(mr.fp(), -(offset_upward_to_caller_sp as i32)),
CallFrameInstruction::Offset(
mr.fp().unwrap(),
-(offset_upward_to_caller_sp as i32),
),
));
// If there is a link register on this architecture, note that
// we saved it as well.
@@ -188,15 +210,29 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
// Define CFA in terms of FP. Note that we assume it was already
// defined correctly in terms of the current SP, and FP has just
// been set to the current SP, so we do not need to change the
// offset, only the register.
instructions.push((
instruction_offset,
CallFrameInstruction::CfaRegister(mr.fp()),
));
// offset, only the register. (This is done only if the target
// defines a frame pointer register.)
if let Some(fp) = mr.fp() {
instructions.push((instruction_offset, CallFrameInstruction::CfaRegister(fp)));
}
// Record initial CFA offset. This will be used with later
// StackAlloc calls if we do not have a frame pointer.
cfa_offset = offset_upward_to_caller_sp;
// Record distance from CFA downward to clobber area so we can
// express clobber offsets later in terms of CFA.
clobber_offset_to_cfa = offset_upward_to_caller_sp + offset_downward_to_clobbers;
}
&UnwindInst::StackAlloc { size } => {
// If we do not use a frame pointer, we need to update the
// CFA offset whenever the stack pointer changes.
if mr.fp().is_none() {
cfa_offset += size;
instructions.push((
instruction_offset,
CallFrameInstruction::CfaOffset(cfa_offset as i32),
));
}
}
&UnwindInst::SaveReg {
clobber_offset,
reg,

View File

@@ -3,14 +3,11 @@
use crate::isa::unwind::input;
use crate::result::{CodegenError, CodegenResult};
use alloc::vec::Vec;
use byteorder::{ByteOrder, LittleEndian};
use log::warn;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "x64")]
use crate::binemit::CodeOffset;
#[cfg(feature = "x64")]
use crate::isa::unwind::UnwindInst;
/// Maximum (inclusive) size of a "small" stack allocation
@@ -33,20 +30,20 @@ impl<'a> Writer<'a> {
self.offset += 1;
}
fn write_u16<T: ByteOrder>(&mut self, v: u16) {
T::write_u16(&mut self.buf[self.offset..(self.offset + 2)], v);
fn write_u16_le(&mut self, v: u16) {
self.buf[self.offset..(self.offset + 2)].copy_from_slice(&v.to_le_bytes());
self.offset += 2;
}
fn write_u32<T: ByteOrder>(&mut self, v: u32) {
T::write_u32(&mut self.buf[self.offset..(self.offset + 4)], v);
fn write_u32_le(&mut self, v: u32) {
self.buf[self.offset..(self.offset + 4)].copy_from_slice(&v.to_le_bytes());
self.offset += 4;
}
}
/// The supported unwind codes for the x64 Windows ABI.
///
/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// See: <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
#[allow(dead_code)]
@@ -123,11 +120,11 @@ impl UnwindCode {
let scaled_stack_offset = stack_offset / 16;
if scaled_stack_offset <= core::u16::MAX as u32 {
writer.write_u8((*reg << 4) | (op_small as u8));
writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
writer.write_u16_le(scaled_stack_offset as u16);
} else {
writer.write_u8((*reg << 4) | (op_large as u8));
writer.write_u16::<LittleEndian>(*stack_offset as u16);
writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
writer.write_u16_le(*stack_offset as u16);
writer.write_u16_le((stack_offset >> 16) as u16);
}
}
Self::StackAlloc {
@@ -145,10 +142,10 @@ impl UnwindCode {
);
} else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
writer.write_u8(UnwindOperation::LargeStackAlloc as u8);
writer.write_u16::<LittleEndian>((*size / 8) as u16);
writer.write_u16_le((*size / 8) as u16);
} else {
writer.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8));
writer.write_u32::<LittleEndian>(*size);
writer.write_u32_le(*size);
}
}
Self::SetFPReg { instruction_offset } => {
@@ -195,7 +192,7 @@ pub(crate) trait RegisterMapper<Reg> {
/// Represents Windows x64 unwind information.
///
/// For information about Windows x64 unwind info, see:
/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
/// <https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64>
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct UnwindInfo {
@@ -250,7 +247,7 @@ impl UnwindInfo {
// To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
if (node_count & 1) == 1 {
writer.write_u16::<LittleEndian>(0);
writer.write_u16_le(0);
}
// Ensure the correct number of bytes was emitted
@@ -334,10 +331,8 @@ impl UnwindInfo {
}
}
#[cfg(feature = "x64")]
const UNWIND_RBP_REG: u8 = 5;
#[cfg(feature = "x64")]
pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
insts: &[(CodeOffset, UnwindInst)],
) -> CodegenResult<UnwindInfo> {
@@ -360,6 +355,12 @@ pub(crate) fn create_unwind_info_from_insts<MR: RegisterMapper<regalloc::Reg>>(
frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?;
unwind_codes.push(UnwindCode::SetFPReg { instruction_offset });
}
&UnwindInst::StackAlloc { size } => {
unwind_codes.push(UnwindCode::StackAlloc {
instruction_offset,
size,
});
}
&UnwindInst::SaveReg {
clobber_offset,
reg,

View File

@@ -237,10 +237,20 @@ impl ABIMachineSpec for X64ABIMachineSpec {
extension: param.extension,
});
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
// Compute size. For the wasmtime ABI it differs from native
// ABIs in how multiple values are returned, so we take a
// leaf out of arm64's book by not rounding everything up to
// 8 bytes. For all ABI arguments, and other ABI returns,
// though, each slot takes a minimum of 8 bytes.
//
// Note that in all cases 16-byte stack alignment happens
// separately after all args.
let size = (reg_ty.bits() / 8) as u64;
let size = std::cmp::max(size, 8);
let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
size
} else {
std::cmp::max(size, 8)
};
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
@@ -490,6 +500,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>) {
let mut insts = SmallVec::new();
// Find all clobbered registers that are callee-save.
@@ -564,6 +575,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]> {
let mut insts = SmallVec::new();
@@ -824,15 +836,7 @@ impl From<StackAMode> for SyntheticAmode {
}
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("int args only supported for SysV or Fastcall calling convention"),
};
let is_fastcall = call_conv.extends_windows_fastcall();
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
@@ -853,15 +857,7 @@ fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Optio
}
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = match call_conv {
CallConv::Fast
| CallConv::Cold
| CallConv::SystemV
| CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020 => false,
CallConv::WindowsFastcall => true,
_ => panic!("float args only supported for SysV or Fastcall calling convention"),
};
let is_fastcall = call_conv.extends_windows_fastcall();
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (floating-point) class.
@@ -894,7 +890,10 @@ fn get_intreg_for_retval(
1 => Some(regs::rdx()),
_ => None,
},
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020
| CallConv::WasmtimeSystemV
| CallConv::WasmtimeFastcall => {
if intreg_idx == 0 && retval_idx == 0 {
Some(regs::rax())
} else {
@@ -907,6 +906,7 @@ fn get_intreg_for_retval(
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::AppleAarch64 => unreachable!(),
}
}
@@ -921,7 +921,10 @@ fn get_fltreg_for_retval(
1 => Some(regs::xmm1()),
_ => None,
},
CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
CallConv::BaldrdashSystemV
| CallConv::Baldrdash2020
| CallConv::WasmtimeFastcall
| CallConv::WasmtimeSystemV => {
if fltreg_idx == 0 && retval_idx == 0 {
Some(regs::xmm0())
} else {
@@ -933,6 +936,7 @@ fn get_fltreg_for_retval(
_ => None,
},
CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
CallConv::AppleAarch64 => unreachable!(),
}
}
@@ -990,17 +994,18 @@ fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<
CallConv::BaldrdashWindows => {
todo!("baldrdash windows");
}
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg()))
.collect(),
CallConv::WindowsFastcall => regs
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
CallConv::AppleAarch64 => unreachable!(),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).

View File

@@ -0,0 +1,403 @@
//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
//! EVEX encoding requires a 4-byte prefix:
//!
//! Byte 0: 0x62
//! ┌───┬───┬───┬───┬───┬───┬───┬───┐
//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
//! └───┴───┴───┴───┴───┴───┴───┴───┘
//!
//! The prefix is then followeded by the opcode byte, the ModR/M byte, and other optional suffixes
//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
//! Software Development Manual, volume 2A).
use super::rex::{encode_modrm, LegacyPrefixes, OpcodeMap};
use super::ByteSink;
use core::ops::RangeInclusive;
/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
pub struct EvexInstruction {
bits: u32,
opcode: u8,
reg: Register,
rm: Register,
}
/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
/// choose to skip setting fields, here we set some sane defaults. Note that:
/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
/// implemented--remember the little-endian order
/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
/// 3).
///
/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
/// unsetting the W bit, etc.)
impl Default for EvexInstruction {
fn default() -> Self {
Self {
bits: 0x08_7C_F0_62,
opcode: 0,
reg: Register::default(),
rm: Register::default(),
}
}
}
#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names.
impl EvexInstruction {
/// Construct a default EVEX instruction.
pub fn new() -> Self {
Self::default()
}
/// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
/// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
/// way to set those context bits and verify that both are not used (e.g. rounding AND length).
/// For now, this method is very convenient.
#[inline(always)]
pub fn length(mut self, length: EvexVectorLength) -> Self {
self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
self
}
/// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
/// pack these into the prefix, not as separate bytes.
#[inline(always)]
pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
self.write(Self::pp, prefix.bits() as u32);
self
}
/// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
/// these into the prefix, not as separate bytes.
#[inline(always)]
pub fn map(mut self, map: OpcodeMap) -> Self {
self.write(Self::mm, map.bits() as u32);
self
}
/// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
/// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
/// prefix.
#[inline(always)]
pub fn w(mut self, w: bool) -> Self {
self.write(Self::W, w as u32);
self
}
/// Set the instruction opcode byte.
#[inline(always)]
pub fn opcode(mut self, opcode: u8) -> Self {
self.opcode = opcode;
self
}
/// Set the register to use for the `reg` bits; many instructions use this as the write operand.
/// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
/// bits for register encodings > 8).
#[inline(always)]
pub fn reg(mut self, reg: impl Into<Register>) -> Self {
self.reg = reg.into();
let r = !(self.reg.0 >> 3) & 1;
let r_ = !(self.reg.0 >> 4) & 1;
self.write(Self::R, r as u32);
self.write(Self::R_, r_ as u32);
self
}
/// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
/// more details.
#[allow(dead_code)]
#[inline(always)]
pub fn mask(mut self, mask: EvexMasking) -> Self {
self.write(Self::aaa, mask.aaa_bits() as u32);
self.write(Self::z, mask.z_bit() as u32);
self
}
/// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
/// source register in 3-operand instructions (e.g. 2 read, 1 write).
#[allow(dead_code)]
#[inline(always)]
pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
let reg = reg.into();
self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
self
}
/// Set the register to use for the `rm` bits; many instructions use this as the "read from
/// register/memory" operand. Currently this does not support memory addressing (TODO).Setting
/// this affects both the ModRM byte (`rm` section) and the EVEX prefix (the extension bits for
/// register encodings > 8).
#[inline(always)]
pub fn rm(mut self, reg: impl Into<Register>) -> Self {
self.rm = reg.into();
let b = !(self.rm.0 >> 3) & 1;
let x = !(self.rm.0 >> 4) & 1;
self.write(Self::X, x as u32);
self.write(Self::B, b as u32);
self
}
/// Emit the EVEX-encoded instruction to the code sink:
/// - first, the 4-byte EVEX prefix;
/// - then, the opcode byte;
/// - finally, the ModR/M byte.
///
/// Eventually this method should support encodings of more than just the reg-reg addressing mode (TODO).
pub fn encode<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
sink.put4(self.bits);
sink.put1(self.opcode);
sink.put1(encode_modrm(3, self.reg.0 & 7, self.rm.0 & 7));
}
// In order to simplify the encoding of the various bit ranges in the prefix, we specify those
// ranges according to the table below (extracted from the Intel Software Development Manual,
// volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
// chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
// 8, leaving bits 0-7 for the mandatory `0x62`.
// ┌───┬───┬───┬───┬───┬───┬───┬───┐
// Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
// ├───┼───┼───┼───┼───┼───┼───┼───┤
// Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
// ├───┼───┼───┼───┼───┼───┼───┼───┤
// Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
// └───┴───┴───┴───┴───┴───┴───┴───┘
// Byte 1:
const mm: RangeInclusive<u8> = 8..=9;
const R_: RangeInclusive<u8> = 12..=12;
const B: RangeInclusive<u8> = 13..=13;
const X: RangeInclusive<u8> = 14..=14;
const R: RangeInclusive<u8> = 15..=15;
// Byte 2:
const pp: RangeInclusive<u8> = 16..=17;
const vvvv: RangeInclusive<u8> = 19..=22;
const W: RangeInclusive<u8> = 23..=23;
// Byte 3:
const aaa: RangeInclusive<u8> = 24..=26;
const V_: RangeInclusive<u8> = 27..=27;
#[allow(dead_code)] // Will be used once broadcast and rounding controls are exposed.
const b: RangeInclusive<u8> = 28..=28;
const LL: RangeInclusive<u8> = 29..=30;
const z: RangeInclusive<u8> = 31..=31;
// A convenience method for writing the `value` bits to the given range in `self.bits`.
#[inline]
fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
assert!(ExactSizeIterator::len(&range) > 0);
let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
let mask: u32 = (1 << size) - 1; // Generate a bit mask.
debug_assert!(
value <= mask,
"The written value should have fewer than {} bits.",
size
);
let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
self.bits |= value; // Modify the bits in `range`.
}
}
/// Describe the register index to use. This wrapper is a type-safe way to pass
/// around the registers defined in `inst/regs.rs`.
#[derive(Copy, Clone, Default)]
pub struct Register(u8);
impl From<u8> for Register {
fn from(reg: u8) -> Self {
debug_assert!(reg < 16);
Self(reg)
}
}
impl Into<u8> for Register {
fn into(self) -> u8 {
self.0
}
}
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
/// opcodes can result in an #UD.
#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used.
pub enum EvexContext {
RoundingRegToRegFP {
rc: EvexRoundingControl,
},
NoRoundingFP {
sae: bool,
length: EvexVectorLength,
},
MemoryOp {
broadcast: bool,
length: EvexVectorLength,
},
Other {
length: EvexVectorLength,
},
}
impl Default for EvexContext {
fn default() -> Self {
Self::Other {
length: EvexVectorLength::default(),
}
}
}
impl EvexContext {
/// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
pub fn bits(&self) -> u8 {
match self {
Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
Self::Other { length } => length.bits() << 1,
}
}
}
/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
pub enum EvexVectorLength {
V128,
V256,
V512,
}
impl EvexVectorLength {
/// Encode the `L'` and `L` bits for merging with the P2 byte.
fn bits(&self) -> u8 {
match self {
Self::V128 => 0b00,
Self::V256 => 0b01,
Self::V512 => 0b10,
// 0b11 is reserved (#UD).
}
}
}
impl Default for EvexVectorLength {
fn default() -> Self {
Self::V128
}
}
/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
#[allow(dead_code, missing_docs)] // Rounding controls are not yet used.
pub enum EvexRoundingControl {
RNE,
RD,
RU,
RZ,
}
impl EvexRoundingControl {
/// Encode the `L'` and `L` bits for merging with the P2 byte.
fn bits(&self) -> u8 {
match self {
Self::RNE => 0b00,
Self::RD => 0b01,
Self::RU => 0b10,
Self::RZ => 0b11,
}
}
}
/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
/// Software Development Manual, volume 2A.
#[allow(dead_code, missing_docs)] // Masking is not yet used.
pub enum EvexMasking {
None,
Merging { k: u8 },
Zeroing { k: u8 },
}
impl Default for EvexMasking {
fn default() -> Self {
EvexMasking::None
}
}
impl EvexMasking {
/// Encode the `z` bit for merging with the P2 byte.
pub fn z_bit(&self) -> u8 {
match self {
Self::None | Self::Merging { .. } => 0,
Self::Zeroing { .. } => 1,
}
}
/// Encode the `aaa` bits for merging with the P2 byte.
pub fn aaa_bits(&self) -> u8 {
match self {
Self::None => 0b000,
Self::Merging { k } | Self::Zeroing { k } => {
debug_assert!(*k <= 7);
*k
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::isa::x64::inst::regs;
use std::vec::Vec;
// As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
// xmm1'` matches this EVEX encoding machinery.
#[test]
fn vpabsq() {
let dst = regs::xmm0();
let src = regs::xmm1();
let mut sink0 = Vec::new();
EvexInstruction::new()
.prefix(LegacyPrefixes::_66)
.map(OpcodeMap::_0F38)
.w(true)
.opcode(0x1F)
.reg(dst.get_hw_encoding())
.rm(src.get_hw_encoding())
.length(EvexVectorLength::V128)
.encode(&mut sink0);
assert_eq!(sink0, vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1]);
}
/// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
/// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
/// test is more interesting than it may appear because some of the parameters have flipped-bit
/// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
#[test]
fn default_emission() {
let mut sink0 = Vec::new();
EvexInstruction::new().encode(&mut sink0);
let mut sink1 = Vec::new();
EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(LegacyPrefixes::None)
.map(OpcodeMap::None)
.w(false)
.opcode(0x00)
.reg(regs::rax().get_hw_encoding())
.rm(regs::rax().get_hw_encoding())
.mask(EvexMasking::None)
.encode(&mut sink1);
assert_eq!(sink0, sink1);
}
}

View File

@@ -0,0 +1,60 @@
//! Contains the encoding machinery for the various x64 instruction formats.
use crate::{isa::x64, machinst::MachBuffer};
use std::vec::Vec;
pub mod evex;
pub mod rex;
pub mod vex;
/// The encoding formats in this module all require a way of placing bytes into
/// a buffer.
pub trait ByteSink {
/// Add 1 byte to the code section.
fn put1(&mut self, _: u8);
/// Add 2 bytes to the code section.
fn put2(&mut self, _: u16);
/// Add 4 bytes to the code section.
fn put4(&mut self, _: u32);
/// Add 8 bytes to the code section.
fn put8(&mut self, _: u64);
}
impl ByteSink for MachBuffer<x64::inst::Inst> {
fn put1(&mut self, value: u8) {
self.put1(value)
}
fn put2(&mut self, value: u16) {
self.put2(value)
}
fn put4(&mut self, value: u32) {
self.put4(value)
}
fn put8(&mut self, value: u64) {
self.put8(value)
}
}
/// Provide a convenient implementation for testing.
impl ByteSink for Vec<u8> {
fn put1(&mut self, v: u8) {
self.extend_from_slice(&[v])
}
fn put2(&mut self, v: u16) {
self.extend_from_slice(&v.to_le_bytes())
}
fn put4(&mut self, v: u32) {
self.extend_from_slice(&v.to_le_bytes())
}
fn put8(&mut self, v: u64) {
self.extend_from_slice(&v.to_le_bytes())
}
}

View File

@@ -0,0 +1,504 @@
//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
//!
//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
//! means "hardware register encoding number".
use crate::{
ir::TrapCode,
isa::x64::inst::{
args::{Amode, OperandSize},
regs, EmitInfo, EmitState, Inst, LabelUse,
},
machinst::{MachBuffer, MachInstEmitInfo},
};
use regalloc::{Reg, RegClass};
pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
let xs = (x as i32) as i64;
xs == ((xs << 56) >> 56)
}
pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
let xs = x as i32;
xs == ((xs << 24) >> 24)
}
/// Encode the ModR/M byte.
#[inline(always)]
pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
debug_assert!(m0d < 4);
debug_assert!(enc_reg_g < 8);
debug_assert!(rm_e < 8);
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
}
#[inline(always)]
pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
debug_assert!(shift < 4);
debug_assert!(enc_index < 8);
debug_assert!(enc_base < 8);
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
}
/// Get the encoding number of a GPR.
#[inline(always)]
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert_eq!(reg.get_class(), RegClass::I64);
reg.get_hw_encoding()
}
/// Get the encoding number of any register.
#[inline(always)]
pub(crate) fn reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
reg.get_hw_encoding()
}
/// A small bit field to record a REX prefix specification:
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub(crate) struct RexFlags(u8);
impl RexFlags {
/// By default, set the W field, and don't always emit.
#[inline(always)]
pub(crate) fn set_w() -> Self {
Self(0)
}
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
#[inline(always)]
pub(crate) fn clear_w() -> Self {
Self(1)
}
#[inline(always)]
pub(crate) fn always_emit(&mut self) -> &mut Self {
self.0 = self.0 | 2;
self
}
#[inline(always)]
pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
let enc_reg = int_reg_enc(reg);
if enc_reg >= 4 && enc_reg <= 7 {
self.always_emit();
}
self
}
#[inline(always)]
pub(crate) fn must_clear_w(&self) -> bool {
(self.0 & 1) != 0
}
#[inline(always)]
pub(crate) fn must_always_emit(&self) -> bool {
(self.0 & 2) != 0
}
#[inline(always)]
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = 0;
let b = (enc_e >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
#[inline(always)]
pub fn emit_three_op(
&self,
sink: &mut MachBuffer<Inst>,
enc_g: u8,
enc_index: u8,
enc_base: u8,
) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = (enc_index >> 3) & 1;
let b = (enc_base >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
}
/// Generate the proper Rex flags for the given operand size.
impl From<OperandSize> for RexFlags {
fn from(size: OperandSize) -> Self {
match size {
OperandSize::Size64 => RexFlags::set_w(),
_ => RexFlags::clear_w(),
}
}
}
/// Generate Rex flags for an OperandSize/register tuple.
impl From<(OperandSize, Reg)> for RexFlags {
fn from((size, reg): (OperandSize, Reg)) -> Self {
let mut rex = RexFlags::from(size);
if size == OperandSize::Size8 {
rex.always_emit_if_8bit_needed(reg);
}
rex
}
}
/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
#[allow(missing_docs)]
pub enum OpcodeMap {
None,
_0F,
_0F38,
_0F3A,
}
impl OpcodeMap {
/// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
/// formats pack this information as bits in a prefix (e.g. EVEX).
pub(crate) fn bits(&self) -> u8 {
match self {
OpcodeMap::None => 0b00,
OpcodeMap::_0F => 0b01,
OpcodeMap::_0F38 => 0b10,
OpcodeMap::_0F3A => 0b11,
}
}
}
impl Default for OpcodeMap {
fn default() -> Self {
Self::None
}
}
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
/// covers only the small set of possibilities that we actually need.
pub enum LegacyPrefixes {
/// No prefix bytes.
None,
/// Operand Size Override -- here, denoting "16-bit operation".
_66,
/// The Lock prefix.
_F0,
/// Operand size override and Lock.
_66F0,
/// REPNE, but no specific meaning here -- is just an opcode extension.
_F2,
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
_F3,
/// Operand size override and same effect as F3.
_66F3,
}
impl LegacyPrefixes {
/// Emit the legacy prefix as bytes (e.g. in REX instructions).
#[inline(always)]
pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
match self {
Self::_66 => sink.put1(0x66),
Self::_F0 => sink.put1(0xF0),
Self::_66F0 => {
// I don't think the order matters, but in any case, this is the same order that
// the GNU assembler uses.
sink.put1(0x66);
sink.put1(0xF0);
}
Self::_F2 => sink.put1(0xF2),
Self::_F3 => sink.put1(0xF3),
Self::_66F3 => {
sink.put1(0x66);
sink.put1(0xF3);
}
Self::None => (),
}
}
/// Emit the legacy prefix as bits (e.g. for EVEX instructions).
#[inline(always)]
pub(crate) fn bits(&self) -> u8 {
match self {
Self::None => 0b00,
Self::_66 => 0b01,
Self::_F3 => 0b10,
Self::_F2 => 0b11,
_ => panic!(
"VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
),
}
}
}
impl Default for LegacyPrefixes {
fn default() -> Self {
Self::None
}
}
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
/// create and emit:
/// - first the legacy prefixes, if any
/// - then the REX prefix, if needed
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
/// - then the MOD/RM byte,
/// - then optionally, a SIB byte,
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
///
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
/// instructions will require their own emitter functions.
///
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
///
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
/// 0xF3_0F_27 and `num_opcodes` == 3.
///
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
pub(crate) fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
mem_e: &Amode,
rex: RexFlags,
) {
// General comment for this function: the registers in `mem_e` must be
// 64-bit integer registers, because they are part of an address
// expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc();
let can_trap = mem_e.can_trap();
if can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
prefixes.emit(sink);
match mem_e {
Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte.
let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm and associated immediates. This is
// significantly complicated due to the multiple special cases.
if *simm32 == 0
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_RBP
&& enc_e != regs::ENC_R12
&& enc_e != regs::ENC_R13
{
// FIXME JRS 2020Feb11: those four tests can surely be
// replaced by a single mask-and-compare check. We should do
// that because this routine is likely to be hot.
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
sink.put1(encode_modrm(0, enc_g & 7, 4));
sink.put1(0x24);
} else if low8_will_sign_extend_to_32(*simm32)
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_R12
{
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
sink.put4(*simm32);
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
&& low8_will_sign_extend_to_32(*simm32)
{
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(0x24);
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
//.. wait for test case for RSP case
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(0x24);
sink.put4(*simm32);
} else {
unreachable!("ImmReg");
}
}
Amode::ImmRegRegShift {
simm32,
base: reg_base,
index: reg_index,
shift,
..
} => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index);
// The rex byte.
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// modrm, SIB, immediates.
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put1(*simm32 as u8);
} else if enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put4(*simm32);
} else {
panic!("ImmRegRegShift");
}
}
Amode::RipRelative { ref target } => {
// First, the REX byte, with REX.B = 0.
rex.emit_two_op(sink, enc_g, 0);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// RIP-relative is mod=00, rm=101.
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
sink.put4(0);
}
}
}
/// This is the core 'emit' function for instructions that do not reference memory.
///
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
/// operand is a register rather than memory. Hence it is much simpler.
pub(crate) fn emit_std_enc_enc(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
enc_e: u8,
rex: RexFlags,
) {
// EncG and EncE can be derived from registers of any class, and they
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
// The legacy prefixes.
prefixes.emit(sink);
// The rex byte.
rex.emit_two_op(sink, enc_g, enc_e);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm byte. The instruction we're generating doesn't access
// memory, so there is no SIB byte or immediate -- we're done.
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
}
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
pub(crate) fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
mem_e: &Amode,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
emit_std_enc_mem(
sink,
state,
info,
prefixes,
opcodes,
num_opcodes,
enc_g,
mem_e,
rex,
);
}
pub(crate) fn emit_std_reg_reg(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
reg_e: Reg,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
let enc_e = reg_enc(reg_e);
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
}
/// Write a suitable number of bits from an imm64 to the sink.
pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
1 => sink.put1(simm32 as u8),
_ => unreachable!(),
}
}

View File

@@ -0,0 +1,2 @@
//! Encodes VEX instructions. These instructions are those added by the Advanced Vector Extensions
//! (AVX).

View File

@@ -10,6 +10,7 @@ use regalloc::{
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
RegUsageMapper, Writable,
};
use smallvec::{smallvec, SmallVec};
use std::fmt;
use std::string::String;
@@ -411,12 +412,12 @@ pub enum UnaryRmROpcode {
}
impl UnaryRmROpcode {
pub(crate) fn available_from(&self) -> Option<InstructionSet> {
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => None,
UnaryRmROpcode::Lzcnt => Some(InstructionSet::Lzcnt),
UnaryRmROpcode::Tzcnt => Some(InstructionSet::BMI1),
UnaryRmROpcode::Popcnt => Some(InstructionSet::Popcnt),
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => smallvec![],
UnaryRmROpcode::Lzcnt => smallvec![InstructionSet::Lzcnt],
UnaryRmROpcode::Tzcnt => smallvec![InstructionSet::BMI1],
UnaryRmROpcode::Popcnt => smallvec![InstructionSet::Popcnt],
}
}
}
@@ -447,6 +448,7 @@ pub enum CmpOpcode {
Test,
}
#[derive(Debug)]
pub(crate) enum InstructionSet {
SSE,
SSE2,
@@ -458,10 +460,13 @@ pub(crate) enum InstructionSet {
BMI1,
#[allow(dead_code)] // never constructed (yet).
BMI2,
AVX512F,
AVX512VL,
}
/// Some SSE operations requiring 2 operands r/m and r.
#[derive(Clone, Copy, PartialEq)]
#[allow(dead_code)] // some variants here aren't used just yet
pub enum SseOpcode {
Addps,
Addpd,
@@ -479,6 +484,7 @@ pub enum SseOpcode {
Cmpss,
Cmpsd,
Cvtdq2ps,
Cvtdq2pd,
Cvtsd2ss,
Cvtsd2si,
Cvtsi2ss,
@@ -672,6 +678,7 @@ impl SseOpcode {
| SseOpcode::Cmpsd
| SseOpcode::Comisd
| SseOpcode::Cvtdq2ps
| SseOpcode::Cvtdq2pd
| SseOpcode::Cvtsd2ss
| SseOpcode::Cvtsd2si
| SseOpcode::Cvtsi2sd
@@ -827,6 +834,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Comiss => "comiss",
SseOpcode::Comisd => "comisd",
SseOpcode::Cvtdq2ps => "cvtdq2ps",
SseOpcode::Cvtdq2pd => "cvtdq2pd",
SseOpcode::Cvtsd2ss => "cvtsd2ss",
SseOpcode::Cvtsd2si => "cvtsd2si",
SseOpcode::Cvtsi2ss => "cvtsi2ss",
@@ -983,6 +991,35 @@ impl fmt::Display for SseOpcode {
}
}
#[derive(Clone)]
pub enum Avx512Opcode {
Vpabsq,
}
impl Avx512Opcode {
/// Which `InstructionSet`s support the opcode?
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
Avx512Opcode::Vpabsq => smallvec![InstructionSet::AVX512F, InstructionSet::AVX512VL],
}
}
}
impl fmt::Debug for Avx512Opcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
Avx512Opcode::Vpabsq => "vpabsq",
};
write!(fmt, "{}", name)
}
}
impl fmt::Display for Avx512Opcode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
}
}
/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
/// values can be extended.

View File

@@ -2,449 +2,18 @@ use crate::binemit::{Addend, Reloc};
use crate::ir::immediates::{Ieee32, Ieee64};
use crate::ir::LibCall;
use crate::ir::TrapCode;
use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength};
use crate::isa::x64::encoding::rex::{
emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc,
low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, OpcodeMap,
RexFlags,
};
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
use core::convert::TryInto;
use log::debug;
use regalloc::{Reg, RegClass, Writable};
fn low8_will_sign_extend_to_64(x: u32) -> bool {
let xs = (x as i32) as i64;
xs == ((xs << 56) >> 56)
}
fn low8_will_sign_extend_to_32(x: u32) -> bool {
let xs = x as i32;
xs == ((xs << 24) >> 24)
}
//=============================================================================
// Instructions and subcomponents: emission
// For all of the routines that take both a memory-or-reg operand (sometimes
// called "E" in the Intel documentation) and a reg-only operand ("G" in
// Intelese), the order is always G first, then E.
//
// "enc" in the following means "hardware register encoding number".
#[inline(always)]
fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
debug_assert!(m0d < 4);
debug_assert!(enc_reg_g < 8);
debug_assert!(rm_e < 8);
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
}
#[inline(always)]
fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
debug_assert!(shift < 4);
debug_assert!(enc_index < 8);
debug_assert!(enc_base < 8);
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
}
/// Get the encoding number of a GPR.
#[inline(always)]
fn int_reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert_eq!(reg.get_class(), RegClass::I64);
reg.get_hw_encoding()
}
/// Get the encoding number of any register.
#[inline(always)]
fn reg_enc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
reg.get_hw_encoding()
}
/// A small bit field to record a REX prefix specification:
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
#[repr(transparent)]
#[derive(Clone, Copy)]
struct RexFlags(u8);
impl RexFlags {
/// By default, set the W field, and don't always emit.
#[inline(always)]
fn set_w() -> Self {
Self(0)
}
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
#[inline(always)]
fn clear_w() -> Self {
Self(1)
}
#[inline(always)]
fn always_emit(&mut self) -> &mut Self {
self.0 = self.0 | 2;
self
}
#[inline(always)]
fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
let enc_reg = int_reg_enc(reg);
if enc_reg >= 4 && enc_reg <= 7 {
self.always_emit();
}
self
}
#[inline(always)]
fn must_clear_w(&self) -> bool {
(self.0 & 1) != 0
}
#[inline(always)]
fn must_always_emit(&self) -> bool {
(self.0 & 2) != 0
}
#[inline(always)]
fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = 0;
let b = (enc_e >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
#[inline(always)]
fn emit_three_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_index: u8, enc_base: u8) {
let w = if self.must_clear_w() { 0 } else { 1 };
let r = (enc_g >> 3) & 1;
let x = (enc_index >> 3) & 1;
let b = (enc_base >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || self.must_always_emit() {
sink.put1(rex);
}
}
}
/// Generate the proper Rex flags for the given operand size.
impl From<OperandSize> for RexFlags {
fn from(size: OperandSize) -> Self {
match size {
OperandSize::Size64 => RexFlags::set_w(),
_ => RexFlags::clear_w(),
}
}
}
/// Generate Rex flags for an OperandSize/register tuple.
impl From<(OperandSize, Reg)> for RexFlags {
fn from((size, reg): (OperandSize, Reg)) -> Self {
let mut rex = RexFlags::from(size);
if size == OperandSize::Size8 {
rex.always_emit_if_8bit_needed(reg);
}
rex
}
}
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
/// covers only the small set of possibilities that we actually need.
enum LegacyPrefixes {
/// No prefix bytes.
None,
/// Operand Size Override -- here, denoting "16-bit operation".
_66,
/// The Lock prefix.
_F0,
/// Operand size override and Lock.
_66F0,
/// REPNE, but no specific meaning here -- is just an opcode extension.
_F2,
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
_F3,
/// Operand size override and same effect as F3.
_66F3,
}
impl LegacyPrefixes {
#[inline(always)]
fn emit(&self, sink: &mut MachBuffer<Inst>) {
match self {
LegacyPrefixes::_66 => sink.put1(0x66),
LegacyPrefixes::_F0 => sink.put1(0xF0),
LegacyPrefixes::_66F0 => {
// I don't think the order matters, but in any case, this is the same order that
// the GNU assembler uses.
sink.put1(0x66);
sink.put1(0xF0);
}
LegacyPrefixes::_F2 => sink.put1(0xF2),
LegacyPrefixes::_F3 => sink.put1(0xF3),
LegacyPrefixes::_66F3 => {
sink.put1(0x66);
sink.put1(0xF3);
}
LegacyPrefixes::None => (),
}
}
}
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
/// create and emit:
/// - first the legacy prefixes, if any
/// - then the REX prefix, if needed
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
/// - then the MOD/RM byte,
/// - then optionally, a SIB byte,
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
///
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
/// instructions will require their own emitter functions.
///
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
///
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
/// 0xF3_0F_27 and `num_opcodes` == 3.
///
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
/// indicate a 64-bit operation.
fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
mem_e: &Amode,
rex: RexFlags,
) {
// General comment for this function: the registers in `mem_e` must be
// 64-bit integer registers, because they are part of an address
// expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc();
let can_trap = mem_e.can_trap();
if can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
prefixes.emit(sink);
match mem_e {
Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte.
let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm and associated immediates. This is
// significantly complicated due to the multiple special cases.
if *simm32 == 0
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_RBP
&& enc_e != regs::ENC_R12
&& enc_e != regs::ENC_R13
{
// FIXME JRS 2020Feb11: those four tests can surely be
// replaced by a single mask-and-compare check. We should do
// that because this routine is likely to be hot.
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
sink.put1(encode_modrm(0, enc_g & 7, 4));
sink.put1(0x24);
} else if low8_will_sign_extend_to_32(*simm32)
&& enc_e != regs::ENC_RSP
&& enc_e != regs::ENC_R12
{
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
sink.put4(*simm32);
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
&& low8_will_sign_extend_to_32(*simm32)
{
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(0x24);
sink.put1((simm32 & 0xFF) as u8);
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
//.. wait for test case for RSP case
// REX.B distinguishes RSP from R12
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(0x24);
sink.put4(*simm32);
} else {
unreachable!("ImmReg");
}
}
Amode::ImmRegRegShift {
simm32,
base: reg_base,
index: reg_index,
shift,
..
} => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index);
// The rex byte.
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// modrm, SIB, immediates.
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(1, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put1(*simm32 as u8);
} else if enc_index != regs::ENC_RSP {
sink.put1(encode_modrm(2, enc_g & 7, 4));
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
sink.put4(*simm32);
} else {
panic!("ImmRegRegShift");
}
}
Amode::RipRelative { ref target } => {
// First, the REX byte, with REX.B = 0.
rex.emit_two_op(sink, enc_g, 0);
// Now the opcode(s). These include any other prefixes the caller
// hands to us.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// RIP-relative is mod=00, rm=101.
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
let offset = sink.cur_offset();
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
sink.put4(0);
}
}
}
/// This is the core 'emit' function for instructions that do not reference memory.
///
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
/// operand is a register rather than memory. Hence it is much simpler.
fn emit_std_enc_enc(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
enc_e: u8,
rex: RexFlags,
) {
// EncG and EncE can be derived from registers of any class, and they
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
// The legacy prefixes.
prefixes.emit(sink);
// The rex byte.
rex.emit_two_op(sink, enc_g, enc_e);
// All other prefixes and opcodes.
while num_opcodes > 0 {
num_opcodes -= 1;
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
}
// Now the mod/rm byte. The instruction we're generating doesn't access
// memory, so there is no SIB byte or immediate -- we're done.
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
}
// These are merely wrappers for the above two functions that facilitate passing
// actual `Reg`s rather than their encodings.
fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
mem_e: &Amode,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
emit_std_enc_mem(
sink,
state,
info,
prefixes,
opcodes,
num_opcodes,
enc_g,
mem_e,
rex,
);
}
fn emit_std_reg_reg(
sink: &mut MachBuffer<Inst>,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
reg_e: Reg,
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
let enc_e = reg_enc(reg_e);
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
}
/// Write a suitable number of bits from an imm64 to the sink.
fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
match size {
8 | 4 => sink.put4(simm32),
2 => sink.put2(simm32 as u16),
1 => sink.put1(simm32 as u8),
_ => unreachable!(),
}
}
use regalloc::{Reg, Writable};
/// A small helper to generate a signed conversion instruction.
fn emit_signed_cvt(
@@ -546,18 +115,30 @@ pub(crate) fn emit(
info: &EmitInfo,
state: &mut EmitState,
) {
if let Some(iset_requirement) = inst.isa_requirement() {
let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool {
match iset_requirement {
// Cranelift assumes SSE2 at least.
InstructionSet::SSE | InstructionSet::SSE2 => {}
InstructionSet::SSSE3 => assert!(info.isa_flags.use_ssse3()),
InstructionSet::SSE41 => assert!(info.isa_flags.use_sse41()),
InstructionSet::SSE42 => assert!(info.isa_flags.use_sse42()),
InstructionSet::Popcnt => assert!(info.isa_flags.use_popcnt()),
InstructionSet::Lzcnt => assert!(info.isa_flags.use_lzcnt()),
InstructionSet::BMI1 => assert!(info.isa_flags.use_bmi1()),
InstructionSet::BMI2 => assert!(info.isa_flags.has_bmi2()),
InstructionSet::SSE | InstructionSet::SSE2 => true,
InstructionSet::SSSE3 => info.isa_flags.use_ssse3(),
InstructionSet::SSE41 => info.isa_flags.use_sse41(),
InstructionSet::SSE42 => info.isa_flags.use_sse42(),
InstructionSet::Popcnt => info.isa_flags.use_popcnt(),
InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(),
InstructionSet::BMI1 => info.isa_flags.use_bmi1(),
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
}
};
// Certain instructions may be present in more than one ISA feature set; we must at least match
// one of them in the target CPU.
let isa_requirements = inst.available_in_any_isa();
if !isa_requirements.is_empty() && !isa_requirements.iter().any(matches_isa_flags) {
panic!(
"Cannot emit inst '{:?}' for target; failed to match ISA requirements: {:?}",
inst, isa_requirements
)
}
match inst {
@@ -887,7 +468,6 @@ pub(crate) fn emit(
// idiv %divisor
//
// $done:
debug_assert!(info.flags().avoid_div_traps());
// Check if the divisor is zero, first.
let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg());
@@ -911,7 +491,7 @@ pub(crate) fn emit(
// x % -1 = 0; put the result into the destination, $rdx.
let done_label = sink.get_label();
let inst = Inst::imm(*size, 0, Writable::from_reg(regs::rdx()));
let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
inst.emit(sink, info, state);
let inst = Inst::jmp_known(done_label);
@@ -951,11 +531,6 @@ pub(crate) fn emit(
sink.bind_label(do_op);
}
assert!(
*size != OperandSize::Size8,
"CheckedDivOrRemSeq for i8 is not yet implemented"
);
// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
@@ -1769,6 +1344,7 @@ pub(crate) fn emit(
let rex = RexFlags::clear_w();
let (prefix, opcode, num_opcodes) = match op {
SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
@@ -1830,6 +1406,24 @@ pub(crate) fn emit(
};
}
Inst::XmmUnaryRmREvex { op, src, dst } => {
let opcode = match op {
Avx512Opcode::Vpabsq => 0x1f,
};
match src {
RegMem::Reg { reg: src } => EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(LegacyPrefixes::_66)
.map(OpcodeMap::_0F38)
.w(true)
.opcode(opcode)
.reg(dst.to_reg().get_hw_encoding())
.rm(src.get_hw_encoding())
.encode(sink),
_ => todo!(),
};
}
Inst::XmmRmR {
op,
src: src_e,

View File

@@ -3859,6 +3859,18 @@ fn test_x64_emit() {
"pabsd %xmm10, %xmm11",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
"F3440FE6C2",
"cvtdq2pd %xmm2, %xmm8",
));
insns.push((
Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, RegMem::reg(xmm2), w_xmm8),
"6272FD081FC2",
"vpabsq %xmm2, %xmm8",
));
// Xmm to int conversions, and conversely.
insns.push((
@@ -4270,6 +4282,7 @@ fn test_x64_emit() {
let mut isa_flag_builder = x64::settings::builder();
isa_flag_builder.enable("has_ssse3").unwrap();
isa_flag_builder.enable("has_sse41").unwrap();
isa_flag_builder.enable("has_avx512f").unwrap();
let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
let rru = regs::create_reg_universe_systemv(&flags);

View File

@@ -14,7 +14,7 @@ use regalloc::{
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
use std::fmt;
use std::string::{String, ToString};
@@ -224,6 +224,12 @@ pub enum Inst {
dst: Writable<Reg>,
},
XmmUnaryRmREvex {
op: Avx512Opcode,
src: RegMem,
dst: Writable<Reg>,
},
/// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq
XmmMovRM {
op: SseOpcode,
@@ -501,7 +507,11 @@ pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
}
impl Inst {
fn isa_requirement(&self) -> Option<InstructionSet> {
/// Retrieve a list of ISA feature sets in which the instruction is available. An empty list
/// indicates that the instruction is available in the baseline feature set (i.e. SSE2 and
/// below); more than one `InstructionSet` in the list indicates that the instruction is present
/// *any* of the included ISA feature sets.
fn available_in_any_isa(&self) -> SmallVec<[InstructionSet; 2]> {
match self {
// These instructions are part of SSE2, which is a basic requirement in Cranelift, and
// don't have to be checked.
@@ -554,7 +564,7 @@ impl Inst {
| Inst::ElfTlsGetAddr { .. }
| Inst::MachOTlsGetAddr { .. }
| Inst::ValueLabelMarker { .. }
| Inst::Unwind { .. } => None,
| Inst::Unwind { .. } => smallvec![],
Inst::UnaryRmR { op, .. } => op.available_from(),
@@ -565,7 +575,9 @@ impl Inst {
| Inst::XmmRmR { op, .. }
| Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmR { op, .. } => Some(op.available_from()),
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
Inst::XmmUnaryRmREvex { op, .. } => op.available_from(),
}
}
}
@@ -700,6 +712,12 @@ impl Inst {
Inst::XmmUnaryRmR { op, src, dst }
}
pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmUnaryRmREvex { op, src, dst }
}
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
@@ -1121,11 +1139,7 @@ impl Inst {
pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
let rc = from_reg.get_class();
match rc {
RegClass::I64 => {
// Always store the full register, to ensure that the high bits are properly set
// when doing a full reload.
Inst::mov_r_m(OperandSize::Size64, from_reg, to_addr)
}
RegClass::I64 => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr),
RegClass::V128 => {
let opcode = match ty {
types::F32 => SseOpcode::Movss,
@@ -1390,6 +1404,13 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),
Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
src.show_rru_sized(mb_rru, 8),
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),
Inst::XmmMovRM { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
@@ -1862,7 +1883,9 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(Writable::from_reg(regs::rdx()));
}
},
Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
Inst::UnaryRmR { src, dst, .. }
| Inst::XmmUnaryRmR { src, dst, .. }
| Inst::XmmUnaryRmREvex { src, dst, .. } => {
src.get_regs_as_uses(collector);
collector.add_def(*dst);
}
@@ -2209,6 +2232,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
ref mut dst,
..
}
| Inst::XmmUnaryRmREvex {
ref mut src,
ref mut dst,
..
}
| Inst::UnaryRmR {
ref mut src,
ref mut dst,
@@ -2827,7 +2855,7 @@ impl EmitState {
self.stack_map = None;
}
fn cur_srcloc(&self) -> SourceLoc {
pub(crate) fn cur_srcloc(&self) -> SourceLoc {
self.cur_srcloc
}
}

View File

@@ -89,8 +89,8 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn sp(&self) -> u16 {
X86_64::RSP.0
}
fn fp(&self) -> u16 {
X86_64::RBP.0
fn fp(&self) -> Option<u16> {
Some(X86_64::RBP.0)
}
}
@@ -109,6 +109,7 @@ mod tests {
use target_lexicon::triple;
#[test]
#[cfg_attr(feature = "old-x86-backend", ignore)]
fn test_simple_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")
@@ -151,6 +152,7 @@ mod tests {
}
#[test]
#[cfg_attr(feature = "old-x86-backend", ignore)]
fn test_multi_return_func() {
let isa = lookup(triple!("x86_64"))
.expect("expect x86 ISA")

View File

@@ -204,6 +204,7 @@ enum ExtSpec {
ZeroExtendTo32,
ZeroExtendTo64,
SignExtendTo32,
#[allow(dead_code)] // not used just yet but may be used in the future!
SignExtendTo64,
}
@@ -1854,10 +1855,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if ty == types::I64X2 {
// This lowering could be a single instruction with AVX512F/VL's VPABSQ instruction.
// Instead, we use a separate register, `tmp`, to contain the results of `0 - src`
// and then blend in those results with `BLENDVPD` if the MSB of `tmp` was set to 1
// (i.e. if `tmp` was negative or, conversely, if `src` was originally positive).
if isa_flags.use_avx512f_simd() || isa_flags.use_avx512vl_simd() {
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
} else {
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
// contain the results of `0 - src` and then blend in those results with
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
// conversely, if `src` was originally positive).
// Emit all 0s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
@@ -1873,6 +1877,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ty,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
}
} else if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
@@ -2041,7 +2046,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
} else if dst_ty == types::I128 {
let amt_src = put_input_in_reg(ctx, inputs[1]);
let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0];
let src = put_input_in_regs(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
@@ -3914,7 +3919,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
}
}
Opcode::FcvtLowFromSint => {
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
ctx.emit(Inst::xmm_unary_rm_r(
SseOpcode::Cvtdq2pd,
RegMem::from(src),
dst,
));
}
Opcode::FcvtFromUint => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
@@ -4813,28 +4826,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if elem_ty == types::I128 {
let srcs = put_input_in_regs(ctx, inputs[0]);
ctx.emit(Inst::mov_r_m(
OperandSize::Size64,
srcs.regs()[0],
addr.clone(),
));
ctx.emit(Inst::mov_r_m(
OperandSize::Size64,
srcs.regs()[1],
addr.offset(8),
));
ctx.emit(Inst::store(types::I64, srcs.regs()[0], addr.clone()));
ctx.emit(Inst::store(types::I64, srcs.regs()[1], addr.offset(8)));
} else {
let src = put_input_in_reg(ctx, inputs[0]);
ctx.emit(match elem_ty {
types::F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
types::F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr),
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
// TODO Specialize for different types: MOVUPD, MOVDQU, etc.
Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr)
}
_ => Inst::mov_r_m(OperandSize::from_ty(elem_ty), src, addr),
});
ctx.emit(Inst::store(elem_ty, src, addr));
}
}
@@ -4938,7 +4934,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty_access = ctx.input_ty(insn, 0);
assert!(is_valid_atomic_transaction_ty(ty_access));
ctx.emit(Inst::mov_r_m(OperandSize::from_ty(ty_access), data, addr));
ctx.emit(Inst::store(ty_access, data, addr));
ctx.emit(Inst::Fence {
kind: FenceKind::MFence,
});
@@ -5181,7 +5177,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
input_ty,
));
if flags.avoid_div_traps() {
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if flags.avoid_div_traps() || op == Opcode::Srem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.

View File

@@ -9,7 +9,7 @@ use crate::isa::Builder as IsaBuilder;
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags};
use alloc::boxed::Box;
use alloc::{boxed::Box, vec::Vec};
use core::hash::{Hash, Hasher};
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use target_lexicon::Triple;
@@ -18,6 +18,7 @@ use target_lexicon::Triple;
use crate::isa::unwind::systemv;
mod abi;
pub mod encoding;
mod inst;
mod lower;
mod settings;
@@ -85,6 +86,10 @@ impl MachBackend for X64Backend {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.x64_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.flags.hash(&mut hasher);
self.x64_flags.hash(&mut hasher);

View File

@@ -1,6 +1,6 @@
//! x86 Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a

View File

@@ -503,15 +503,18 @@ fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS
pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
match func.signature.call_conv {
// For now, just translate fast and cold as system_v.
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => {
system_v_prologue_epilogue(func, isa)
}
CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => {
fastcall_prologue_epilogue(func, isa)
}
CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => {
baldrdash_prologue_epilogue(func, isa)
}
CallConv::Probestack => unimplemented!("probestack calling convention"),
CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
CallConv::AppleAarch64 => unreachable!(),
}
}
@@ -1083,16 +1086,17 @@ pub fn create_unwind_info(
isa: &dyn TargetIsa,
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::UnwindInfo;
use crate::machinst::UnwindInfoKind;
// Assumption: RBP is being used as the frame pointer for both calling conventions
// In the future, we should be omitting frame pointer as an optimization, so this will change
Ok(match func.signature.call_conv {
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
Ok(match isa.unwind_info_kind() {
UnwindInfoKind::SystemV => {
super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u))
}
CallConv::WindowsFastcall => {
UnwindInfoKind::Windows => {
super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
}
_ => None,
UnwindInfoKind::None => None,
})
}

View File

@@ -21,8 +21,7 @@ use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use crate::result::CodegenResult;
use crate::timing;
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::hash::{Hash, Hasher};
@@ -79,6 +78,10 @@ impl TargetIsa for Isa {
&self.shared_flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
self.shared_flags.hash(&mut hasher);
self.isa_flags.hash(&mut hasher);

View File

@@ -1,6 +1,6 @@
//! x86 Settings.
use crate::settings::{self, detail, Builder};
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a

View File

@@ -3,7 +3,7 @@
use crate::ir::Function;
use crate::isa::{
unwind::systemv::{RegisterMappingError, UnwindInfo},
CallConv, RegUnit, TargetIsa,
RegUnit, TargetIsa,
};
use crate::result::CodegenResult;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
@@ -97,8 +97,8 @@ pub(crate) fn create_unwind_info(
isa: &dyn TargetIsa,
) -> CodegenResult<Option<UnwindInfo>> {
// Only System V-like calling conventions are supported
match func.signature.call_conv {
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {}
match isa.unwind_info_kind() {
crate::machinst::UnwindInfoKind::SystemV => {}
_ => return Ok(None),
}
@@ -121,8 +121,8 @@ pub(crate) fn create_unwind_info(
fn sp(&self) -> u16 {
X86_64::RSP.0
}
fn fp(&self) -> u16 {
X86_64::RBP.0
fn fp(&self) -> Option<u16> {
Some(X86_64::RBP.0)
}
}
let map = RegisterMapper(isa);

View File

@@ -2,7 +2,7 @@
use crate::ir::Function;
use crate::isa::x86::registers::{FPR, GPR};
use crate::isa::{unwind::winx64::UnwindInfo, CallConv, RegUnit, TargetIsa};
use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa};
use crate::result::CodegenResult;
pub(crate) fn create_unwind_info(
@@ -10,7 +10,7 @@ pub(crate) fn create_unwind_info(
isa: &dyn TargetIsa,
) -> CodegenResult<Option<UnwindInfo>> {
// Only Windows fastcall is supported for unwind information
if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() {
return Ok(None);
}

View File

@@ -97,6 +97,7 @@ mod inst_predicates;
mod iterators;
mod legalizer;
mod licm;
mod log;
mod nan_canonicalization;
mod partition_slice;
mod postopt;

View File

@@ -0,0 +1,39 @@
//! This module implements deferred display helpers.
//!
//! These are particularly useful in logging contexts, where the maximum logging level filter might
//! be enabled, but we don't want the arguments to be evaluated early:
//!
//! ```
//! log::set_max_level(log::LevelFilter::max());
//! fn expensive_calculation() -> String {
//! "a string that is very slow to generate".into()
//! }
//! log::debug!("{}", expensive_calculation());
//! ```
//!
//! If the associated log implementation filters out log debug entries, the expensive calculation
//! would have been spurious. In this case, we can wrap the expensive computation within an
//! `DeferredDisplay`, so that the computation only happens when the actual `fmt` function is
//! called.
use core::fmt;
pub(crate) struct DeferredDisplay<F>(F);
impl<F: Fn() -> T, T: fmt::Display> DeferredDisplay<F> {
pub(crate) fn new(f: F) -> Self {
Self(f)
}
}
impl<F: Fn() -> T, T: fmt::Display> fmt::Display for DeferredDisplay<F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0().fmt(f)
}
}
impl<F: Fn() -> T, T: fmt::Debug> fmt::Debug for DeferredDisplay<F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0().fmt(f)
}
}

View File

@@ -30,6 +30,12 @@ pub trait ABICallee {
/// Access the (possibly legalized) signature.
fn signature(&self) -> &Signature;
/// Accumulate outgoing arguments. This ensures that at least SIZE bytes
/// are allocated in the prologue to be available for use in function calls
/// to hold arguments and/or return values. If this function is called
/// multiple times, the maximum of all SIZE values will be available.
fn accumulate_outgoing_args_size(&mut self, size: u32);
/// Get the settings controlling this function's compilation.
fn flags(&self) -> &settings::Flags;
@@ -189,9 +195,6 @@ pub trait ABICallee {
from_slot: SpillSlot,
ty: Option<Type>,
) -> Self::I;
/// Desired unwind info type.
fn unwind_info_kind(&self) -> UnwindInfoKind;
}
/// Trait implemented by an object that tracks ABI-related state and can
@@ -245,6 +248,13 @@ pub trait ABICaller {
/// Emit code to post-adjust the satck, after call return and return-value copies.
fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
/// Accumulate outgoing arguments. This ensures that the caller (as
/// identified via the CTX argument) allocates enough space in the
/// prologue to hold all arguments and return values for this call.
/// There is no code emitted at the call site, everything is done
/// in the caller's function prologue.
fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
/// Emit the call itself.
///
/// The returned instruction should have proper use- and def-sets according

View File

@@ -102,7 +102,7 @@
//! support the SpiderMonkey Wasm ABI. For details of the multi-value return
//! ABI, see:
//!
//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134
//! <https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134>
//!
//! In brief:
//! - Return values are processed in *reverse* order.
@@ -444,6 +444,7 @@ pub trait ABIMachineSpec {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>);
/// Generate a clobber-restore sequence. This sequence should perform the
@@ -455,6 +456,7 @@ pub trait ABIMachineSpec {
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]>;
/// Generate a call instruction/sequence. This method is provided one
@@ -576,6 +578,8 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
stackslots: PrimaryMap<StackSlot, u32>,
/// Total stack size of all stackslots.
stackslots_size: u32,
/// Stack size to be reserved for outgoing arguments.
outgoing_args_size: u32,
/// Clobbered registers, from regalloc.
clobbered: Set<Writable<RealReg>>,
/// Total number of spillslots, from regalloc.
@@ -646,7 +650,9 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|| call_conv == isa::CallConv::Fast
|| call_conv == isa::CallConv::Cold
|| call_conv.extends_baldrdash()
|| call_conv.extends_windows_fastcall(),
|| call_conv.extends_windows_fastcall()
|| call_conv == isa::CallConv::AppleAarch64
|| call_conv == isa::CallConv::WasmtimeSystemV,
"Unsupported calling convention: {:?}",
call_conv
);
@@ -689,6 +695,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
sig,
stackslots,
stackslots_size: stack_offset,
outgoing_args_size: 0,
clobbered: Set::empty(),
spillslots: None,
fixed_frame_storage_size: 0,
@@ -915,6 +922,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
}
fn accumulate_outgoing_args_size(&mut self, size: u32) {
if size > self.outgoing_args_size {
self.outgoing_args_size = size;
}
}
fn flags(&self) -> &settings::Flags {
&self.flags
}
@@ -1196,6 +1209,15 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
// Integer types smaller than word size have been spilled as words below,
// and therefore must be reloaded in the same type.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};
gen_load_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty)
}
@@ -1211,6 +1233,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
// When reloading from a spill slot, we might have lost information about real integer
// types. For instance, on the x64 backend, a zero-extension can become spurious and
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
// all the time, in this case.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};
gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
}
@@ -1283,11 +1318,12 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
// Save clobbered registers.
let (_, clobber_insts) = M::gen_clobber_save(
let (clobber_size, clobber_insts) = M::gen_clobber_save(
self.call_conv,
&self.flags,
&self.clobbered,
self.fixed_frame_storage_size,
self.outgoing_args_size,
);
insts.extend(clobber_insts);
@@ -1302,7 +1338,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
// [crate::machinst::abi_impl](this module) for more details
// on stackframe layout and nominal SP maintenance.
self.total_frame_size = Some(total_stacksize);
self.total_frame_size = Some(total_stacksize + clobber_size as u32);
insts
}
@@ -1315,6 +1351,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
&self.flags,
&self.clobbered,
self.fixed_frame_storage_size,
self.outgoing_args_size,
));
// N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no
@@ -1369,18 +1406,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
.next()
.unwrap()
}
fn unwind_info_kind(&self) -> UnwindInfoKind {
match self.sig.call_conv {
#[cfg(feature = "unwind")]
isa::CallConv::Fast | isa::CallConv::Cold | isa::CallConv::SystemV => {
UnwindInfoKind::SystemV
}
#[cfg(feature = "unwind")]
isa::CallConv::WindowsFastcall => UnwindInfoKind::Windows,
_ => UnwindInfoKind::None,
}
}
}
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
@@ -1529,6 +1554,11 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
}
}
fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
ctx.abi().accumulate_outgoing_args_size(off as u32);
}
fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ true)

View File

@@ -2,10 +2,12 @@
use crate::binemit;
use crate::ir;
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
use crate::isa::{
BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
};
use crate::machinst::*;
use crate::regalloc::RegisterSet;
use crate::settings::Flags;
use crate::settings::{self, Flags};
#[cfg(feature = "testing_hooks")]
use crate::regalloc::RegDiversions;
@@ -14,7 +16,6 @@ use crate::regalloc::RegDiversions;
use crate::isa::unwind::systemv::RegisterMappingError;
use core::any::Any;
use core::hash::Hasher;
use std::borrow::Cow;
use std::fmt;
use target_lexicon::Triple;
@@ -59,8 +60,16 @@ impl TargetIsa for TargetIsaAdapter {
self.backend.flags()
}
fn isa_flags(&self) -> Vec<settings::Value> {
self.backend.isa_flags()
}
fn variant(&self) -> BackendVariant {
BackendVariant::MachInst
}
fn hash_all_flags(&self, hasher: &mut dyn Hasher) {
self.backend.hash_all_flags(hasher)
self.backend.hash_all_flags(hasher);
}
fn register_info(&self) -> RegInfo {

View File

@@ -1,6 +1,7 @@
//! Compilation backend pipeline: optimized IR to VCode / binemit.
use crate::ir::Function;
use crate::log::DeferredDisplay;
use crate::machinst::*;
use crate::settings;
use crate::timing;
@@ -29,9 +30,11 @@ where
lower.lower(b)?
};
// Creating the vcode string representation may be costly for large functions, so defer its
// rendering.
debug!(
"vcode from lowering: \n{}",
vcode.show_rru(Some(b.reg_universe()))
DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe())))
);
// Perform register allocation.
@@ -103,7 +106,7 @@ where
debug!(
"vcode after regalloc: final version:\n{}",
vcode.show_rru(Some(b.reg_universe()))
DeferredDisplay::new(|| vcode.show_rru(Some(b.reg_universe())))
);
Ok(vcode)

View File

@@ -64,18 +64,18 @@ use crate::binemit::{CodeInfo, CodeOffset, StackMap};
use crate::ir::condcodes::IntCC;
use crate::ir::{Function, SourceLoc, StackSlot, Type, ValueLabel};
use crate::result::CodegenResult;
use crate::settings::Flags;
use crate::settings::{self, Flags};
use crate::value_label::ValueLabelsRanges;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::fmt::Debug;
use core::hash::Hasher;
use cranelift_entity::PrimaryMap;
use regalloc::RegUsageCollector;
use regalloc::{
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use smallvec::{smallvec, SmallVec};
use std::hash::Hasher;
use std::string::String;
use target_lexicon::Triple;
@@ -368,8 +368,10 @@ pub trait MachBackend {
/// Return flags for this backend.
fn flags(&self) -> &Flags;
/// Hashes all flags, both ISA-independent and ISA-specific, into the
/// specified hasher.
/// Get the ISA-dependent flag values that were used to make this trait object.
fn isa_flags(&self) -> Vec<settings::Value>;
/// Hashes all flags, both ISA-independent and ISA-dependent, into the specified hasher.
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
/// Return triple for this backend.

View File

@@ -66,7 +66,7 @@
//! Our current implementation uses a sorted array of compressed intervals, represented by their
//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of
//! intervals easily, and shows some nice performance behavior. See
//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a
//! <https://github.com/bytecodealliance/cranelift/issues/1084> for benchmarks against using a
//! bforest::Map<Block, Inst>.
//!
//! ## block ordering
@@ -112,7 +112,7 @@
//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes.
//!
//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted
//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details.
//! array, see <https://github.com/bytecodealliance/cranelift/issues/1084> for details.
use crate::entity::SparseMapValue;
use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};

View File

@@ -2,19 +2,17 @@
use crate::verifier::VerifierErrors;
use std::string::String;
use thiserror::Error;
/// A compilation error.
///
/// When Cranelift fails to compile a function, it will return one of these error codes.
#[derive(Error, Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq)]
pub enum CodegenError {
/// A list of IR verifier errors.
///
/// This always represents a bug, either in the code that generated IR for Cranelift, or a bug
/// in Cranelift itself.
#[error("Verifier errors")]
Verifier(#[from] VerifierErrors),
Verifier(VerifierErrors),
/// An implementation limit was exceeded.
///
@@ -22,27 +20,57 @@ pub enum CodegenError {
/// limits][limits] that cause compilation to fail when they are exceeded.
///
/// [limits]: https://github.com/bytecodealliance/wasmtime/blob/main/cranelift/docs/ir.md#implementation-limits
#[error("Implementation limit exceeded")]
ImplLimitExceeded,
/// The code size for the function is too large.
///
/// Different target ISAs may impose a limit on the size of a compiled function. If that limit
/// is exceeded, compilation fails.
#[error("Code for function is too large")]
CodeTooLarge,
/// Something is not supported by the code generator. This might be an indication that a
/// feature is used without explicitly enabling it, or that something is temporarily
/// unsupported by a given target backend.
#[error("Unsupported feature: {0}")]
Unsupported(String),
/// A failure to map Cranelift register representation to a DWARF register representation.
#[cfg(feature = "unwind")]
#[error("Register mapping error")]
RegisterMappingError(crate::isa::unwind::systemv::RegisterMappingError),
}
/// A convenient alias for a `Result` that uses `CodegenError` as the error type.
pub type CodegenResult<T> = Result<T, CodegenError>;
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for CodegenError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
CodegenError::Verifier(source) => Some(source),
CodegenError::ImplLimitExceeded { .. }
| CodegenError::CodeTooLarge { .. }
| CodegenError::Unsupported { .. } => None,
#[cfg(feature = "unwind")]
CodegenError::RegisterMappingError { .. } => None,
}
}
}
impl std::fmt::Display for CodegenError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
CodegenError::Verifier(_) => write!(f, "Verifier errors"),
CodegenError::ImplLimitExceeded => write!(f, "Implementation limit exceeded"),
CodegenError::CodeTooLarge => write!(f, "Code for function is too large"),
CodegenError::Unsupported(feature) => write!(f, "Unsupported feature: {}", feature),
#[cfg(feature = "unwind")]
CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"),
}
}
}
impl From<VerifierErrors> for CodegenError {
fn from(source: VerifierErrors) -> Self {
CodegenError::Verifier { 0: source }
}
}

View File

@@ -26,7 +26,6 @@ use alloc::boxed::Box;
use alloc::string::{String, ToString};
use core::fmt;
use core::str;
use thiserror::Error;
/// A string-based configurator for settings groups.
///
@@ -44,6 +43,78 @@ pub trait Configurable {
fn enable(&mut self, name: &str) -> SetResult<()>;
}
/// Represents the kind of setting.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SettingKind {
/// The setting is an enumeration.
Enum,
/// The setting is a number.
Num,
/// The setting is a boolean.
Bool,
/// The setting is a preset.
Preset,
}
/// Represents an available builder setting.
///
/// This is used for iterating settings in a builder.
#[derive(Clone, Copy, Debug)]
pub struct Setting {
/// The name of the setting.
pub name: &'static str,
/// The description of the setting.
pub description: &'static str,
/// The kind of the setting.
pub kind: SettingKind,
/// The supported values of the setting (for enum values).
pub values: Option<&'static [&'static str]>,
}
/// Represents a setting value.
///
/// This is used for iterating values in `Flags`.
pub struct Value {
/// The name of the setting associated with this value.
pub name: &'static str,
pub(crate) detail: detail::Detail,
pub(crate) values: Option<&'static [&'static str]>,
pub(crate) value: u8,
}
impl Value {
/// Gets the kind of setting.
pub fn kind(&self) -> SettingKind {
match &self.detail {
detail::Detail::Enum { .. } => SettingKind::Enum,
detail::Detail::Num => SettingKind::Num,
detail::Detail::Bool { .. } => SettingKind::Bool,
detail::Detail::Preset => unreachable!(),
}
}
/// Gets the enum value if the value is from an enum setting.
pub fn as_enum(&self) -> Option<&'static str> {
self.values.map(|v| v[self.value as usize])
}
/// Gets the numerical value if the value is from a num setting.
pub fn as_num(&self) -> Option<u8> {
match &self.detail {
detail::Detail::Num => Some(self.value),
_ => None,
}
}
/// Gets the boolean value if the value is from a boolean setting.
pub fn as_bool(&self) -> Option<bool> {
match &self.detail {
detail::Detail::Bool { bit } => Some(self.value & (1 << bit) != 0),
_ => None,
}
}
}
/// Collect settings values based on a template.
#[derive(Clone, Hash)]
pub struct Builder {
@@ -66,6 +137,30 @@ impl Builder {
self.bytes
}
/// Iterates the available settings in the builder.
pub fn iter(&self) -> impl Iterator<Item = Setting> {
let template = self.template;
template.descriptors.iter().map(move |d| {
let (kind, values) = match d.detail {
detail::Detail::Enum { last, enumerators } => {
let values = template.enums(last, enumerators);
(SettingKind::Enum, Some(values))
}
detail::Detail::Num => (SettingKind::Num, None),
detail::Detail::Bool { .. } => (SettingKind::Bool, None),
detail::Detail::Preset => (SettingKind::Preset, None),
};
Setting {
name: d.name,
description: d.description,
kind,
values,
}
})
}
/// Set the value of a single bit.
fn set_bit(&mut self, offset: usize, bit: u8, value: bool) {
let byte = &mut self.bytes[offset];
@@ -165,21 +260,34 @@ impl Configurable for Builder {
}
/// An error produced when changing a setting.
#[derive(Error, Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq)]
pub enum SetError {
/// No setting by this name exists.
#[error("No existing setting named '{0}'")]
BadName(String),
/// Type mismatch for setting (e.g., setting an enum setting as a bool).
#[error("Trying to set a setting with the wrong type")]
BadType,
/// This is not a valid value for this setting.
#[error("Unexpected value for a setting, expected {0}")]
BadValue(String),
}
impl std::error::Error for SetError {}
impl fmt::Display for SetError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SetError::BadName(name) => write!(f, "No existing setting named '{}'", name),
SetError::BadType => {
write!(f, "Trying to set a setting with the wrong type")
}
SetError::BadValue(value) => {
write!(f, "Unexpected value for a setting, expected {}", value)
}
}
}
}
/// A result returned when changing a setting.
pub type SetResult<T> = Result<T, SetError>;
@@ -288,6 +396,9 @@ pub mod detail {
/// Lower snake-case name of setting as defined in meta.
pub name: &'static str,
/// The description of the setting.
pub description: &'static str,
/// Offset of byte containing this setting.
pub offset: u32,

View File

@@ -80,7 +80,6 @@ use alloc::vec::Vec;
use core::cmp::Ordering;
use core::fmt::{self, Display, Formatter, Write};
use log::debug;
use thiserror::Error;
pub use self::cssa::verify_cssa;
pub use self::liveness::verify_liveness;
@@ -92,8 +91,7 @@ mod liveness;
mod locations;
/// A verifier error.
#[derive(Error, Debug, PartialEq, Eq, Clone)]
#[error("{}{}: {}", .location, format_context(.context), .message)]
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct VerifierError {
/// The entity causing the verifier error.
pub location: AnyEntity,
@@ -104,11 +102,16 @@ pub struct VerifierError {
pub message: String,
}
/// Helper for formatting Verifier::Error context.
fn format_context(context: &Option<String>) -> String {
match context {
None => "".to_string(),
Some(c) => format!(" ({})", c),
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for VerifierError {}
impl Display for VerifierError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match &self.context {
None => write!(f, "{}: {}", self.location, self.message),
Some(context) => write!(f, "{} ({}): {}", self.location, context, self.message),
}
}
}
@@ -175,9 +178,13 @@ pub type VerifierStepResult<T> = Result<T, ()>;
pub type VerifierResult<T> = Result<T, VerifierErrors>;
/// List of verifier errors.
#[derive(Error, Debug, Default, PartialEq, Eq, Clone)]
#[derive(Debug, Default, PartialEq, Eq, Clone)]
pub struct VerifierErrors(pub Vec<VerifierError>);
// This is manually implementing Error and Display instead of using thiserror to reduce the amount
// of dependencies used by Cranelift.
impl std::error::Error for VerifierErrors {}
impl VerifierErrors {
/// Return a new `VerifierErrors` struct.
#[inline]

View File

@@ -1,7 +1,7 @@
[package]
authors = ["The Cranelift Project Developers"]
name = "cranelift-entity"
version = "0.72.0"
version = "0.73.0"
description = "Data structures using entity references as mapping keys"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-entity"

View File

@@ -70,21 +70,25 @@ macro_rules! entity_impl {
// Basic traits.
($entity:ident) => {
impl $crate::EntityRef for $entity {
#[inline]
fn new(index: usize) -> Self {
debug_assert!(index < ($crate::__core::u32::MAX as usize));
$entity(index as u32)
}
#[inline]
fn index(self) -> usize {
self.0 as usize
}
}
impl $crate::packed_option::ReservedValue for $entity {
#[inline]
fn reserved_value() -> $entity {
$entity($crate::__core::u32::MAX)
}
#[inline]
fn is_reserved_value(&self) -> bool {
self.0 == $crate::__core::u32::MAX
}
@@ -93,6 +97,7 @@ macro_rules! entity_impl {
impl $entity {
/// Create a new instance from a `u32`.
#[allow(dead_code)]
#[inline]
pub fn from_u32(x: u32) -> Self {
debug_assert!(x < $crate::__core::u32::MAX);
$entity(x)
@@ -100,6 +105,7 @@ macro_rules! entity_impl {
/// Return the underlying index value as a `u32`.
#[allow(dead_code)]
#[inline]
pub fn as_u32(self) -> u32 {
self.0
}

View File

@@ -148,6 +148,28 @@ where
pub fn into_boxed_slice(self) -> BoxedSlice<K, V> {
unsafe { BoxedSlice::<K, V>::from_raw(Box::<[V]>::into_raw(self.elems.into_boxed_slice())) }
}
/// Performs a binary search on the values with a key extraction function.
///
/// Assumes that the values are sorted by the key extracted by the function.
///
/// If the value is found then `Ok(K)` is returned, containing the entity key
/// of the matching value.
///
/// If there are multiple matches, then any one of the matches could be returned.
///
/// If the value is not found then Err(K) is returned, containing the entity key
/// where a matching element could be inserted while maintaining sorted order.
pub fn binary_search_values_by_key<'a, B, F>(&'a self, b: &B, f: F) -> Result<K, K>
where
F: FnMut(&'a V) -> B,
B: Ord,
{
self.elems
.binary_search_by_key(b, f)
.map(|i| K::new(i))
.map_err(|i| K::new(i))
}
}
impl<K, V> Default for PrimaryMap<K, V>

View File

@@ -1,7 +1,7 @@
[package]
name = "cranelift-filetests"
authors = ["The Cranelift Project Developers"]
version = "0.66.0"
version = "0.73.0"
description = "Test driver and implementations of the filetest commands"
license = "Apache-2.0 WITH LLVM-exception"
documentation = "https://docs.rs/cranelift-filetests"
@@ -10,24 +10,22 @@ publish = false
edition = "2018"
[dependencies]
cranelift-codegen = { path = "../codegen", version = "0.72.0", features = ["testing_hooks"] }
cranelift-frontend = { path = "../frontend", version = "0.72.0" }
cranelift-interpreter = { path = "../interpreter", version = "0.72.0" }
cranelift-native = { path = "../native", version = "0.72.0" }
cranelift-reader = { path = "../reader", version = "0.72.0" }
cranelift-preopt = { path = "../preopt", version = "0.72.0" }
byteorder = { version = "1.3.2", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.73.0", features = ["testing_hooks"] }
cranelift-frontend = { path = "../frontend", version = "0.73.0" }
cranelift-interpreter = { path = "../interpreter", version = "0.73.0" }
cranelift-native = { path = "../native", version = "0.73.0" }
cranelift-reader = { path = "../reader", version = "0.73.0" }
cranelift-preopt = { path = "../preopt", version = "0.73.0" }
file-per-thread-logger = "0.1.2"
filecheck = "0.5.0"
gimli = { version = "0.23.0", default-features = false, features = ["read"] }
gimli = { version = "0.24.0", default-features = false, features = ["read"] }
log = "0.4.6"
memmap2 = "0.2.1"
num_cpus = "1.8.0"
target-lexicon = "0.11"
target-lexicon = "0.12"
thiserror = "1.0.15"
anyhow = "1.0.32"
[features]
enable-peepmatic = []
experimental_arm32 = []
experimental_x64 = []

View File

@@ -77,22 +77,72 @@ block0(v0: f64):
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: str q8, [sp, #-16]!
; nextln: str q9, [sp, #-16]!
; nextln: str q10, [sp, #-16]!
; nextln: str q11, [sp, #-16]!
; nextln: str q12, [sp, #-16]!
; nextln: str q13, [sp, #-16]!
; nextln: str q14, [sp, #-16]!
; nextln: str q15, [sp, #-16]!
; nextln: stp d14, d15, [sp, #-16]!
; nextln: stp d12, d13, [sp, #-16]!
; nextln: stp d10, d11, [sp, #-16]!
; nextln: stp d8, d9, [sp, #-16]!
; check: ldr q15, [sp], #16
; nextln: ldr q14, [sp], #16
; nextln: ldr q13, [sp], #16
; nextln: ldr q12, [sp], #16
; nextln: ldr q11, [sp], #16
; nextln: ldr q10, [sp], #16
; nextln: ldr q9, [sp], #16
; nextln: ldr q8, [sp], #16
; check: ldp d8, d9, [sp], #16
; nextln: ldp d10, d11, [sp], #16
; nextln: ldp d12, d13, [sp], #16
; nextln: ldp d14, d15, [sp], #16
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f2(i64) -> i64 {
block0(v0: i64):
v1 = iadd.i64 v0, v0
v2 = iadd.i64 v0, v1
v3 = iadd.i64 v0, v2
v4 = iadd.i64 v0, v3
v5 = iadd.i64 v0, v4
v6 = iadd.i64 v0, v5
v7 = iadd.i64 v0, v6
v8 = iadd.i64 v0, v7
v9 = iadd.i64 v0, v8
v10 = iadd.i64 v0, v9
v11 = iadd.i64 v0, v10
v12 = iadd.i64 v0, v11
v13 = iadd.i64 v0, v12
v14 = iadd.i64 v0, v13
v15 = iadd.i64 v0, v14
v16 = iadd.i64 v0, v15
v17 = iadd.i64 v0, v16
v18 = iadd.i64 v0, v17
v19 = iadd.i64 v0, v1
v20 = iadd.i64 v2, v3
v21 = iadd.i64 v4, v5
v22 = iadd.i64 v6, v7
v23 = iadd.i64 v8, v9
v24 = iadd.i64 v10, v11
v25 = iadd.i64 v12, v13
v26 = iadd.i64 v14, v15
v27 = iadd.i64 v16, v17
v28 = iadd.i64 v18, v19
v29 = iadd.i64 v20, v21
v30 = iadd.i64 v22, v23
v31 = iadd.i64 v24, v25
v32 = iadd.i64 v26, v27
v33 = iadd.i64 v28, v29
v34 = iadd.i64 v30, v31
v35 = iadd.i64 v32, v33
v36 = iadd.i64 v34, v35
return v36
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: str x22, [sp, #-16]!
; nextln: stp x19, x20, [sp, #-16]!
; nextln: add x1, x0, x0
; check: add x0, x1, x0
; nextln: ldp x19, x20, [sp], #16
; nextln: ldr x22, [sp], #16
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,243 @@
test compile
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BITREV
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; FIXME: bitrev not yet implemented
;function %bitrev_i64(i64) -> i64 {
;block0(v0: i64):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i32(i32) -> i32 {
;block0(v0: i32):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i16(i16) -> i16 {
;block0(v0: i16):
; v1 = bitrev v0
; return v1
;}
;
;function %bitrev_i8(i8) -> i8 {
;block0(v0: i8):
; v1 = bitrev v0
; return v1
;}
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CLZ
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %clz_i64(i64) -> i64 {
block0(v0: i64):
v1 = clz v0
return v1
}
; check: flogr %r0, %r2
; nextln: lgr %r2, %r0
; nextln: br %r14
function %clz_i32(i32) -> i32 {
block0(v0: i32):
v1 = clz v0
return v1
}
; check: llgfr %r2, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -32
; nextln: br %r14
function %clz_i16(i16) -> i16 {
block0(v0: i16):
v1 = clz v0
return v1
}
; check: llghr %r2, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -48
; nextln: br %r14
function %clz_i8(i8) -> i8 {
block0(v0: i8):
v1 = clz v0
return v1
}
; check: llgcr %r2, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -56
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CLS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %cls_i64(i64) -> i64 {
block0(v0: i64):
v1 = cls v0
return v1
}
; check: srag %r3, %r2, 63
; nextln: xgr %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lgr %r2, %r0
; nextln: br %r14
function %cls_i32(i32) -> i32 {
block0(v0: i32):
v1 = cls v0
return v1
}
; check: lgfr %r2, %r2
; nextln: srag %r3, %r2, 63
; nextln: xgr %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -32
; nextln: br %r14
function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; check: lghr %r2, %r2
; nextln: srag %r3, %r2, 63
; nextln: xgr %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -48
; nextln: br %r14
function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; check: lgbr %r2, %r2
; nextln: srag %r3, %r2, 63
; nextln: xgr %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lr %r2, %r0
; nextln: ahi %r2, -56
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CTZ
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %ctz_i64(i64) -> i64 {
block0(v0: i64):
v1 = ctz v0
return v1
}
; check: lcgr %r3, %r2
; nextln: ngrk %r2, %r3, %r2
; nextln: flogr %r0, %r2
; nextln: locghie %r0, -1
; nextln: lghi %r2, 63
; nextln: sgr %r2, %r0
; nextln: br %r14
function %ctz_i32(i32) -> i32 {
block0(v0: i32):
v1 = ctz v0
return v1
}
; check: oihl %r2, 1
; nextln: lcgr %r3, %r2
; nextln: ngrk %r2, %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lhi %r2, 63
; nextln: sr %r2, %r0
; nextln: br %r14
function %ctz_i16(i16) -> i16 {
block0(v0: i16):
v1 = ctz v0
return v1
}
; check: oilh %r2, 1
; nextln: lcgr %r3, %r2
; nextln: ngrk %r2, %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lhi %r2, 63
; nextln: sr %r2, %r0
; nextln: br %r14
function %ctz_i8(i8) -> i8 {
block0(v0: i8):
v1 = ctz v0
return v1
}
; check: oill %r2, 256
; nextln: lcgr %r3, %r2
; nextln: ngrk %r2, %r3, %r2
; nextln: flogr %r0, %r2
; nextln: lhi %r2, 63
; nextln: sr %r2, %r0
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; POPCNT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %popcnt_i64(i64) -> i64 {
block0(v0: i64):
v1 = popcnt v0
return v1
}
; check: popcnt %r2, %r2, 8
; nextln: br %r14
function %popcnt_i32(i32) -> i32 {
block0(v0: i32):
v1 = popcnt v0
return v1
}
; check: llgfr %r2, %r2
; nextln: popcnt %r2, %r2, 8
; nextln: br %r14
function %popcnt_i16(i16) -> i16 {
block0(v0: i16):
v1 = popcnt v0
return v1
}
; check: llghr %r2, %r2
; nextln: popcnt %r2, %r2, 8
; nextln: br %r14
function %popcnt_i8(i8) -> i8 {
block0(v0: i8):
v1 = popcnt v0
return v1
}
; check: popcnt %r2, %r2
; nextln: br %r14

View File

@@ -0,0 +1,490 @@
test compile
target s390x
; FIXME: add immediate operand versions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BAND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %band_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band.i64 v0, v1
return v2
}
; check: ngr %r2, %r3
; nextln: br %r14
function %band_i64_mem(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v1
v3 = band.i64 v0, v2
return v3
}
; check: ng %r2, 0(%r3)
; nextln: br %r14
function %band_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = band.i32 v0, v1
return v2
}
; check: nr %r2, %r3
; nextln: br %r14
function %band_i32_mem(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1
v3 = band.i32 v0, v2
return v3
}
; check: n %r2, 0(%r3)
; nextln: br %r14
function %band_i32_memoff(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1+4096
v3 = band.i32 v0, v2
return v3
}
; check: ny %r2, 4096(%r3)
; nextln: br %r14
function %band_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = band.i16 v0, v1
return v2
}
; check: nr %r2, %r3
; nextln: br %r14
function %band_i16_mem(i16, i64) -> i16 {
block0(v0: i16, v1: i64):
v2 = load.i16 v1
v3 = band.i16 v0, v2
return v3
}
; check: llh %r3, 0(%r3)
; nextln: nr %r2, %r3
; nextln: br %r14
function %band_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = band.i8 v0, v1
return v2
}
; check: nr %r2, %r3
; nextln: br %r14
function %band_i8_mem(i8, i64) -> i8 {
block0(v0: i8, v1: i64):
v2 = load.i8 v1
v3 = band.i8 v0, v2
return v3
}
; check: llc %r3, 0(%r3)
; nextln: nr %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BOR
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor.i64 v0, v1
return v2
}
; check: ogr %r2, %r3
; nextln: br %r14
function %bor_i64_mem(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v1
v3 = bor.i64 v0, v2
return v3
}
; check: og %r2, 0(%r3)
; nextln: br %r14
function %bor_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bor.i32 v0, v1
return v2
}
; check: or %r2, %r3
; nextln: br %r14
function %bor_i32_mem(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1
v3 = bor.i32 v0, v2
return v3
}
; check: o %r2, 0(%r3)
; nextln: br %r14
function %bor_i32_memoff(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1+4096
v3 = bor.i32 v0, v2
return v3
}
; check: oy %r2, 4096(%r3)
; nextln: br %r14
function %bor_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = bor.i16 v0, v1
return v2
}
; check: or %r2, %r3
; nextln: br %r14
function %bor_i16_mem(i16, i64) -> i16 {
block0(v0: i16, v1: i64):
v2 = load.i16 v1
v3 = bor.i16 v0, v2
return v3
}
; check: llh %r3, 0(%r3)
; nextln: or %r2, %r3
; nextln: br %r14
function %bor_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = bor.i8 v0, v1
return v2
}
; check: or %r2, %r3
; nextln: br %r14
function %bor_i8_mem(i8, i64) -> i8 {
block0(v0: i8, v1: i64):
v2 = load.i8 v1
v3 = bor.i8 v0, v2
return v3
}
; check: llc %r3, 0(%r3)
; nextln: or %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BXOR
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bxor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor.i64 v0, v1
return v2
}
; check: xgr %r2, %r3
; nextln: br %r14
function %bxor_i64_mem(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = load.i64 v1
v3 = bxor.i64 v0, v2
return v3
}
; check: xg %r2, 0(%r3)
; nextln: br %r14
function %bxor_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor.i32 v0, v1
return v2
}
; check: xr %r2, %r3
; nextln: br %r14
function %bxor_i32_mem(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1
v3 = bxor.i32 v0, v2
return v3
}
; check: x %r2, 0(%r3)
; nextln: br %r14
function %bxor_i32_memoff(i32, i64) -> i32 {
block0(v0: i32, v1: i64):
v2 = load.i32 v1+4096
v3 = bxor.i32 v0, v2
return v3
}
; check: xy %r2, 4096(%r3)
; nextln: br %r14
function %bxor_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = bxor.i16 v0, v1
return v2
}
; check: xr %r2, %r3
; nextln: br %r14
function %bxor_i16_mem(i16, i64) -> i16 {
block0(v0: i16, v1: i64):
v2 = load.i16 v1
v3 = bxor.i16 v0, v2
return v3
}
; check: llh %r3, 0(%r3)
; nextln: xr %r2, %r3
; nextln: br %r14
function %bxor_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = bxor.i8 v0, v1
return v2
}
; check: xr %r2, %r3
; nextln: br %r14
function %bxor_i8_mem(i8, i64) -> i8 {
block0(v0: i8, v1: i64):
v2 = load.i8 v1
v3 = bxor.i8 v0, v2
return v3
}
; check: llc %r3, 0(%r3)
; nextln: xr %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BAND_NOT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %band_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band_not.i64 v0, v1
return v2
}
; check: nngrk %r2, %r2, %r3
; nextln: br %r14
function %band_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = band_not.i32 v0, v1
return v2
}
; check: nnrk %r2, %r2, %r3
; nextln: br %r14
function %band_not_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = band_not.i16 v0, v1
return v2
}
; check: nnrk %r2, %r2, %r3
; nextln: br %r14
function %band_not_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = band_not.i8 v0, v1
return v2
}
; check: nnrk %r2, %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BOR_NOT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor_not.i64 v0, v1
return v2
}
; check: nogrk %r2, %r2, %r3
; nextln: br %r14
function %bor_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bor_not.i32 v0, v1
return v2
}
; check: nork %r2, %r2, %r3
; nextln: br %r14
function %bor_not_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = bor_not.i16 v0, v1
return v2
}
; check: nork %r2, %r2, %r3
; nextln: br %r14
function %bor_not_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = bor_not.i8 v0, v1
return v2
}
; check: nork %r2, %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BXOR_NOT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bxor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor_not.i64 v0, v1
return v2
}
; check: nxgrk %r2, %r2, %r3
; nextln: br %r14
function %bxor_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor_not.i32 v0, v1
return v2
}
; check: nxrk %r2, %r2, %r3
; nextln: br %r14
function %bxor_not_i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = bxor_not.i16 v0, v1
return v2
}
; check: nxrk %r2, %r2, %r3
; nextln: br %r14
function %bxor_not_i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = bxor_not.i8 v0, v1
return v2
}
; check: nxrk %r2, %r2, %r3
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BNOT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bnot_i64(i64) -> i64 {
block0(v0: i64):
v1 = bnot.i64 v0
return v1
}
; check: nogrk %r2, %r2, %r2
; nextln: br %r14
function %bnot_i32(i32) -> i32 {
block0(v0: i32):
v1 = bnot.i32 v0
return v1
}
; check: nork %r2, %r2, %r2
; nextln: br %r14
function %bnot_i16(i16) -> i16 {
block0(v0: i16):
v1 = bnot.i16 v0
return v1
}
; check: nork %r2, %r2, %r2
; nextln: br %r14
function %bnot_i8(i8) -> i8 {
block0(v0: i8):
v1 = bnot.i8 v0
return v1
}
; check: nork %r2, %r2, %r2
; nextln: br %r14
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BITSELECT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %bitselect_i64(i64, i64, i64) -> i64 {
block0(v0: i64, v1: i64, v2: i64):
v3 = bitselect.i64 v0, v1, v2
return v3
}
; check: ngr %r3, %r2
; nextln: nngrk %r2, %r4, %r2
; nextln: ogr %r2, %r3
; nextln: br %r14
function %bitselect_i32(i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32):
v3 = bitselect.i32 v0, v1, v2
return v3
}
; check: nr %r3, %r2
; nextln: nnrk %r2, %r4, %r2
; nextln: or %r2, %r3
; nextln: br %r14
function %bitselect_i16(i16, i16, i16) -> i16 {
block0(v0: i16, v1: i16, v2: i16):
v3 = bitselect.i16 v0, v1, v2
return v3
}
; check: nr %r3, %r2
; nextln: nnrk %r2, %r4, %r2
; nextln: or %r2, %r3
; nextln: br %r14
function %bitselect_i8(i8, i8, i8) -> i8 {
block0(v0: i8, v1: i8, v2: i8):
v3 = bitselect.i8 v0, v1, v2
return v3
}
; check: nr %r3, %r2
; nextln: nnrk %r2, %r4, %r2
; nextln: or %r2, %r3
; nextln: br %r14

View File

@@ -0,0 +1,113 @@
test compile
target s390x
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CALL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %call(i64) -> i64 {
fn0 = %g(i64) -> i64
block0(v0: i64):
v1 = call fn0(v0)
return v1
}
; check: stmg %r14, %r15, 112(%r15)
; nextln: aghi %r15, -160
; nextln: virtual_sp_offset_adjust 160
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
; nextln: basr %r14, %r3
; nextln: lmg %r14, %r15, 272(%r15)
; nextln: br %r14
function %call_uext(i32) -> i64 {
fn0 = %g(i32 uext) -> i64
block0(v0: i32):
v1 = call fn0(v0)
return v1
}
; check: stmg %r14, %r15, 112(%r15)
; nextln: aghi %r15, -160
; nextln: virtual_sp_offset_adjust 160
; nextln: llgfr %r2, %r2
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
; nextln: basr %r14, %r3
; nextln: lmg %r14, %r15, 272(%r15)
; nextln: br %r14
function %ret_uext(i32) -> i32 uext {
block0(v0: i32):
return v0
}
; check: llgfr %r2, %r2
; nextln: br %r14
function %call_uext(i32) -> i64 {
fn0 = %g(i32 sext) -> i64
block0(v0: i32):
v1 = call fn0(v0)
return v1
}
; check: stmg %r14, %r15, 112(%r15)
; nextln: aghi %r15, -160
; nextln: virtual_sp_offset_adjust 160
; nextln: lgfr %r2, %r2
; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1)
; nextln: basr %r14, %r3
; nextln: lmg %r14, %r15, 272(%r15)
; nextln: br %r14
function %ret_uext(i32) -> i32 sext {
block0(v0: i32):
return v0
}
; check: lgfr %r2, %r2
; nextln: br %r14
function %call_colocated(i64) -> i64 {
fn0 = colocated %g(i64) -> i64
block0(v0: i64):
v1 = call fn0(v0)
return v1
}
; check: stmg %r14, %r15, 112(%r15)
; nextln: aghi %r15, -160
; nextln: virtual_sp_offset_adjust 160
; nextln: brasl %r14, %g
; nextln: lmg %r14, %r15, 272(%r15)
; nextln: br %r14
function %f2(i32) -> i64 {
fn0 = %g(i32 uext) -> i64
block0(v0: i32):
v1 = call fn0(v0)
return v1
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CALL_INDIRECT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %call_indirect(i64, i64) -> i64 {
sig0 = (i64) -> i64
block0(v0: i64, v1: i64):
v2 = call_indirect.i64 sig0, v1(v0)
return v2
}
; check: stmg %r14, %r15, 112(%r15)
; nextln: aghi %r15, -160
; nextln: virtual_sp_offset_adjust 160
; nextln: basr %r14, %r3
; nextln: lmg %r14, %r15, 272(%r15)
; nextln: br %r14

View File

@@ -0,0 +1,62 @@
test compile
target s390x
function %f(i64, i64) -> b1 {
block0(v0: i64, v1: i64):
v2 = icmp eq v0, v1
return v2
}
; check: clgr %r2, %r3
; nextln: lhi %r2, 0
; nextln: lochie %r2, 1
; nextln: br %r14
function %f(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = icmp eq v0, v1
brnz v2, block1
jump block2
block1:
v4 = iconst.i64 1
return v4
block2:
v5 = iconst.i64 2
return v5
}
; check: Block 0:
; check: clgr %r2, %r3
; nextln: jge label1 ; jg label2
; check: Block 1:
; check: lghi %r2, 1
; nextln: br %r14
; check: Block 2:
; check: lghi %r2, 2
; nextln: br %r14
function %f(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = icmp eq v0, v1
brnz v2, block1
jump block1
block1:
v4 = iconst.i64 1
return v4
}
; FIXME: Should optimize away branches
; check: Block 0:
; check: clgr %r2, %r3
; nextln: jge label1 ; jg label2
; check: Block 1:
; check: jg label3
; check: Block 2:
; check: jg label3
; check: Block 3:
; check: lghi %r2, 1
; nextln: br %r14

View File

@@ -0,0 +1,43 @@
test compile
target s390x
function %f(i8, i64, i64) -> i64 {
block0(v0: i8, v1: i64, v2: i64):
v3 = iconst.i8 42
v4 = icmp eq v0, v3
v5 = select.i64 v4, v1, v2
return v5
}
; check: llcr %r2, %r2
; nextln: clfi %r2, 42
; nextln: locgre %r4, %r3
; nextln: lgr %r2, %r4
; nextln: br %r14
function %g(b1, i8, i8) -> i8 {
block0(v0: b1, v1: i8, v2: i8):
v3 = select.i8 v0, v1, v2
return v3
}
; FIXME: optimize i8/i16 compares
; check: llcr %r2, %r2
; nextln: chi %r2, 0
; nextln: locrlh %r4, %r3
; nextln: lr %r2, %r4
; nextln: br %r14
function %i(i32, i8, i8) -> i8 {
block0(v0: i32, v1: i8, v2: i8):
v3 = iconst.i32 42
v4 = icmp.i32 eq v0, v3
v5 = select.i8 v4, v1, v2
return v5
}
; check: clfi %r2, 42
; nextln: locre %r4, %r3
; nextln: lr %r2, %r4
; nextln: br %r14

Some files were not shown because too many files have changed in this diff Show More