From 011e94f3fad1321d77f4f8bb11cf0ae6a7c6ffce Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 10 May 2021 11:05:07 -0700 Subject: [PATCH] x64: add benchmarks for EVEX encoding This change adds a criterion-enabled benchmark, x64-evex-encoding, to compare the performance of the builder pattern used to encode EVEX instructions in the new x64 backend against the function pattern used to encode EVEX instructions in the legacy x86 backend. At face value, the results imply that the builder pattern is faster, but no efforts were made to analyze and optimize these approaches further. --- Cargo.lock | 268 +++++++++++++++++- cranelift/codegen/Cargo.toml | 7 + .../codegen/benches/x64-evex-encoding.rs | 138 +++++++++ 3 files changed, 402 insertions(+), 11 deletions(-) create mode 100644 cranelift/codegen/benches/x64-evex-encoding.rs diff --git a/Cargo.lock b/Cargo.lock index 6a2815313a..aad4a2bc11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -250,6 +250,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.6.1" @@ -276,7 +288,7 @@ checksum = "ff3a1e32332db9ad29d6da34693ce9a7ac26a9edf96abb5c1788d193410031ab" dependencies = [ "cap-primitives", "cap-std", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", ] @@ -294,7 +306,7 @@ dependencies = [ "maybe-owned", "once_cell", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", "winapi-util", @@ -318,7 +330,7 @@ checksum = "7019d48ea53c5f378e0fdab0fe5f627fc00e76d65e75dffd6fb1cbc0c9b382ee" dependencies = [ "cap-primitives", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", ] @@ -365,6 +377,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cast" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75" +dependencies = [ + "rustc_version 0.2.3", +] + [[package]] name = "cc" version = "1.0.67" @@ -542,6 +563,7 @@ dependencies = [ "cranelift-codegen-meta", "cranelift-codegen-shared", "cranelift-entity", + "criterion", "gimli", "hashbrown", "log", @@ -748,7 +770,7 @@ dependencies = [ "cranelift-entity", "cranelift-frontend", "hashbrown", - "itertools", + "itertools 0.10.0", "log", "serde", "smallvec", @@ -767,6 +789,42 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "criterion" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab327ed7354547cc2ef43cbe20ef68b988e70b4b593cbd66a2a61733123a3d23" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools 0.10.0", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" +dependencies = [ + "cast", + "itertools 0.9.0", +] + [[package]] name = "crossbeam-channel" version = "0.5.0" @@ -822,6 +880,28 @@ dependencies = [ "subtle", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "ctr" version = "0.6.0" @@ -1252,6 +1332,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + [[package]] name = "hashbrown" version = "0.9.1" @@ -1369,6 +1455,15 @@ dependencies = [ "syn", ] +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.10.0" @@ -1402,6 +1497,15 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "k256" version = "0.7.2" @@ -1478,7 +1582,7 @@ dependencies = [ "dynasm", "dynasmrt", "iter-enum", - "itertools", + "itertools 0.10.0", "lazy_static", "memoffset", "more-asserts", @@ -1747,6 +1851,12 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "opaque-debug" version = "0.3.0" @@ -1986,6 +2096,34 @@ dependencies = [ "zeroize", ] +[[package]] +name = "plotters" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590" + +[[package]] +name = "plotters-svg" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211" +dependencies = [ + "plotters-backend", +] + [[package]] name = "poly1305" version = "0.6.2" @@ -2420,13 +2558,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + [[package]] name = "rustc_version" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", ] [[package]] @@ -2482,15 +2629,30 @@ dependencies = [ "syn", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser 0.7.0", +] + [[package]] name = "semver" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" dependencies = [ - "semver-parser", + "semver-parser 0.10.2", ] +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "semver-parser" version = "0.10.2" @@ -2509,6 +2671,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.123" @@ -2696,9 +2868,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.60" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" +checksum = "6498a9efc342871f91cc2d0d694c674368b4ceb40f62b65a7a08c3792935e702" dependencies = [ "proc-macro2", "quote", @@ -2728,7 +2900,7 @@ dependencies = [ "cap-fs-ext", "cap-std", "posish", - "rustc_version", + "rustc_version 0.3.3", "unsafe-io", "winapi", "winx", @@ -2840,6 +3012,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.5.0" @@ -3022,7 +3204,7 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe39acfe60d3754452ea6881613c3240100b23ffd94a627c138863f8cd314b1b" dependencies = [ - "rustc_version", + "rustc_version 0.3.3", "winapi", ] @@ -3200,6 +3382,60 @@ dependencies = [ "winapi", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" + [[package]] name = "wasm-encoder" version = "0.4.1" @@ -3700,6 +3936,16 @@ dependencies = [ "wast 35.0.2", ] +[[package]] +name = "web-sys" +version = "0.3.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wiggle" version = "0.26.0" diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 5e21f11a19..2f35b5fe4b 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -34,6 +34,9 @@ wast = { version = "35.0.0", optional = true } # machine code. Integration tests that need external dependencies can be # accomodated in `tests`. +[dev-dependencies] +criterion = "0.3" + [build-dependencies] cranelift-codegen-meta = { path = "meta", version = "0.73.0" } @@ -103,3 +106,7 @@ souper-harvest = ["souper-ir", "souper-ir/stringify"] [badges] maintenance = { status = "experimental" } + +[[bench]] +name = "x64-evex-encoding" +harness = false diff --git a/cranelift/codegen/benches/x64-evex-encoding.rs b/cranelift/codegen/benches/x64-evex-encoding.rs new file mode 100644 index 0000000000..e5c1434181 --- /dev/null +++ b/cranelift/codegen/benches/x64-evex-encoding.rs @@ -0,0 +1,138 @@ +//! Measure instruction encoding latency using various approaches; the +//! benchmarking is feature-gated on `x86` since it only measures the encoding +//! mechanism of that backend. + +#[cfg(feature = "x86")] +mod x86 { + use cranelift_codegen::isa::x64::encoding::{ + evex::{EvexContext, EvexInstruction, EvexMasking, EvexVectorLength, Register}, + rex::OpcodeMap, + rex::{encode_modrm, LegacyPrefixes}, + ByteSink, + }; + use cranelift_codegen_shared::isa::x86::EncodingBits; + use criterion::{criterion_group, Criterion}; + + // Define the benchmarks. + fn x64_evex_encoding_benchmarks(c: &mut Criterion) { + let mut group = c.benchmark_group("x64 EVEX encoding"); + let rax = Register::from(0); + let rdx = Register::from(2); + + group.bench_function("EvexInstruction (builder pattern)", |b| { + let mut sink = vec![]; + b.iter(|| { + sink.clear(); + EvexInstruction::new() + .prefix(LegacyPrefixes::_66) + .map(OpcodeMap::_0F38) + .w(true) + .opcode(0x1F) + .reg(rax) + .rm(rdx) + .length(EvexVectorLength::V128) + .encode(&mut sink); + }); + }); + + group.bench_function("encode_evex (function pattern)", |b| { + let mut sink = vec![]; + let bits = EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1); + let vvvvv = Register::from(0); + b.iter(|| { + sink.clear(); + encode_evex( + bits, + rax, + vvvvv, + rdx, + EvexContext::Other { + length: EvexVectorLength::V128, + }, + EvexMasking::default(), + &mut sink, + ); + }) + }); + } + criterion_group!(benches, x64_evex_encoding_benchmarks); + + /// Using an inner module to feature-gate the benchmarks means that we must + /// manually specify how to run the benchmarks (see `criterion_main!`). + pub fn run_benchmarks() { + criterion::__warn_about_html_reports_feature(); + criterion::__warn_about_cargo_bench_support_feature(); + benches(); + Criterion::default().configure_from_args().final_summary(); + } + + /// From the legacy x86 backend: a mechanism for encoding an EVEX + /// instruction, including the prefixes, the instruction opcode, and the + /// ModRM byte. This EVEX encoding function only encodes the `reg` (operand + /// 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are possible + /// (see section 2.6.2, Intel Software Development Manual, volume 2A), + /// requiring refactoring of this function or separate functions for each + /// form (e.g. as for the REX prefix). + #[inline(always)] + pub fn encode_evex( + enc: EncodingBits, + reg: Register, + vvvvv: Register, + rm: Register, + context: EvexContext, + masking: EvexMasking, + sink: &mut CS, + ) { + let reg: u8 = reg.into(); + let rm: u8 = rm.into(); + let vvvvv: u8 = vvvvv.into(); + + // EVEX prefix. + sink.put1(0x62); + + debug_assert!(enc.mm() < 0b100); + let mut p0 = enc.mm() & 0b11; + p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset + sink.put1(p0); + + let mut p1 = enc.pp() | 0b100; // bit 2 is always set + p1 |= (!(vvvvv) & 0b1111) << 3; + p1 |= (enc.rex_w() & 0b1) << 7; + sink.put1(p1); + + let mut p2 = masking.aaa_bits(); + p2 |= (!(vvvvv >> 4) & 0b1) << 3; + p2 |= context.bits() << 4; + p2 |= masking.z_bit() << 7; + sink.put1(p2); + + // Opcode. + sink.put1(enc.opcode_byte()); + + // ModR/M byte. + sink.put1(encode_modrm(3, reg & 7, rm & 7)) + } + + /// From the legacy x86 backend: encode the RXBR' bits of the EVEX P0 byte. + /// For an explanation of these bits, see section 2.6.1 in the Intel + /// Software Development Manual, volume 2A. These bits can be used by + /// different addressing modes (see section 2.6.2), requiring different + /// `vex*` functions than this one. + fn evex2(rm: u8, reg: u8) -> u8 { + let b = !(rm >> 3) & 1; + let x = !(rm >> 4) & 1; + let r = !(reg >> 3) & 1; + let r_ = !(reg >> 4) & 1; + 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) + } +} + +fn main() { + #[cfg(feature = "x86")] + x86::run_benchmarks(); + + #[cfg(not(feature = "x86"))] + println!( + "Unable to run the x64-evex-encoding benchmark; the `x86` feature must be enabled in Cargo.", + ); +}