From 22181d0819cd2ef6abd5030f33339c6175273212 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 18 Aug 2020 09:42:46 -0700 Subject: [PATCH 01/34] Use regalloc 0.0.30. This upgrade pulls in one memory-allocation reduction improvement (bytecodealliance/regalloc.rs#95). There should be no change in behavior as a result of this. --- Cargo.lock | 4 ++-- cranelift/codegen/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e19c67c2e..a31ce0605e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1715,9 +1715,9 @@ dependencies = [ [[package]] name = "regalloc" -version = "0.0.29" +version = "0.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c178c51068338acd359c6e1ed356fcffe6b6cb3c162f68f70e251ca29bfe0eba" +checksum = "2041c2d34f6ff346d6f428974f03d8bf12679b0c816bb640dc5eb1d48848d8d1" dependencies = [ "log", "rustc-hash", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 88ad9ba0c7..cc9e4421ea 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -28,7 +28,7 @@ byteorder = { version = "1.3.2", default-features = false } peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" } peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.66.0" } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.66.0" } -regalloc = "0.0.29" +regalloc = "0.0.30" wast = { version = "22.0.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary From 94ee96712ac7e1f64a0e29ee020a7e49ea699db8 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 10:35:05 -0700 Subject: [PATCH 02/34] wasi-common: switch all logs from `log` to `tracing` tracing is already the dep that wiggle uses. I used tracing structured arguments wherever I could, but I skipped over it in all of the snapshot_0 code, because I'm going to delete that code and replace it with wiggle-based stuff real soon. --- Cargo.lock | 4 +- crates/wasi-common/Cargo.toml | 4 +- crates/wasi-common/src/ctx.rs | 9 ++-- crates/wasi-common/src/entry.rs | 8 ++-- crates/wasi-common/src/old/snapshot_0/ctx.rs | 6 +-- .../src/old/snapshot_0/hostcalls_impl/fs.rs | 8 ++-- .../snapshot_0/hostcalls_impl/fs_helpers.rs | 8 ++-- .../src/old/snapshot_0/hostcalls_impl/misc.rs | 10 ++--- .../src/old/snapshot_0/sys/unix/entry_impl.rs | 14 +++--- .../src/old/snapshot_0/sys/unix/host_impl.rs | 4 +- .../snapshot_0/sys/unix/hostcalls_impl/fs.rs | 14 +++--- .../sys/unix/hostcalls_impl/fs_helpers.rs | 4 +- .../sys/unix/hostcalls_impl/misc.rs | 8 ++-- .../sys/unix/linux/hostcalls_impl.rs | 4 +- crates/wasi-common/src/path.rs | 10 ++--- .../src/snapshots/wasi_snapshot_preview1.rs | 22 ++++++---- crates/wasi-common/src/sys/mod.rs | 17 ++++++-- crates/wasi-common/src/sys/osdir.rs | 2 +- crates/wasi-common/src/sys/unix/fd.rs | 4 +- crates/wasi-common/src/sys/unix/linux/path.rs | 10 ++--- crates/wasi-common/src/sys/unix/mod.rs | 18 ++++---- crates/wasi-common/src/sys/unix/path.rs | 23 +++++++--- crates/wasi-common/src/sys/unix/poll.rs | 7 ++- crates/wasi-common/src/virtfs.rs | 43 +++++-------------- crates/wasi-common/wig/src/hostcalls.rs | 2 +- crates/wasi-common/wig/src/wasi.rs | 4 +- 26 files changed, 135 insertions(+), 132 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e19c67c2e..bd3f302bf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2324,8 +2324,8 @@ dependencies = [ "getrandom", "lazy_static", "libc", - "log", "thiserror", + "tracing", "wig", "wiggle", "winapi", @@ -2629,7 +2629,7 @@ name = "wasmtime-wasi" version = "0.19.1" dependencies = [ "anyhow", - "log", + "tracing", "wasi-common", "wasmtime", "wasmtime-runtime", diff --git a/crates/wasi-common/Cargo.toml b/crates/wasi-common/Cargo.toml index c92e98a5e8..6a26b7129c 100644 --- a/crates/wasi-common/Cargo.toml +++ b/crates/wasi-common/Cargo.toml @@ -22,11 +22,11 @@ thiserror = "1.0" libc = "0.2" getrandom = "0.1" cfg-if = "0.1.9" -log = "0.4" filetime = "0.2.7" lazy_static = "1.4.0" wig = { path = "wig", version = "0.19.0" } wiggle = { path = "../wiggle", default-features = false, version = "0.19.0" } +tracing = "0.1.15" [target.'cfg(unix)'.dependencies] yanix = { path = "yanix", version = "0.19.0" } @@ -44,7 +44,7 @@ default = ["trace_log"] # This feature enables the `tracing` logs in the calls to target the `log` # ecosystem of backends (e.g. `env_logger`. Disable this if you want to use # `tracing-subscriber`. -trace_log = [ "wiggle/tracing_log" ] +trace_log = [ "wiggle/tracing_log", "tracing/log" ] # Need to make the wiggle_metadata feature available to consumers of this # crate if they want the snapshots to have metadata available. wiggle_metadata = ["wiggle/wiggle_metadata"] diff --git a/crates/wasi-common/src/ctx.rs b/crates/wasi-common/src/ctx.rs index 05bb0fce94..9d2ffd54f1 100644 --- a/crates/wasi-common/src/ctx.rs +++ b/crates/wasi-common/src/ctx.rs @@ -359,7 +359,10 @@ impl WasiCtxBuilder { self.stdout.take().unwrap(), self.stderr.take().unwrap(), ] { - log::debug!("WasiCtx inserting entry {:?}", pending); + tracing::debug!( + pending = tracing::field::debug(&pending), + "WasiCtx inserting entry" + ); let fd = match pending { PendingEntry::Thunk(f) => { let handle = EntryHandle::from(f()?); @@ -376,7 +379,7 @@ impl WasiCtxBuilder { .ok_or(WasiCtxBuilderError::TooManyFilesOpen)? } }; - log::debug!("WasiCtx inserted at {:?}", fd); + tracing::debug!(fd = tracing::field::debug(fd), "WasiCtx inserted"); } // Then add the preopen entries. for (guest_path, preopen) in self.preopens.take().unwrap() { @@ -386,7 +389,7 @@ impl WasiCtxBuilder { let fd = entries .insert(entry) .ok_or(WasiCtxBuilderError::TooManyFilesOpen)?; - log::debug!("WasiCtx inserted at {:?}", fd); + tracing::debug!(fd = tracing::field::debug(fd), "WasiCtx inserted",); } Ok(WasiCtx { diff --git a/crates/wasi-common/src/entry.rs b/crates/wasi-common/src/entry.rs index ba9e90c160..8734c63d3f 100644 --- a/crates/wasi-common/src/entry.rs +++ b/crates/wasi-common/src/entry.rs @@ -87,10 +87,10 @@ impl Entry { if this_rights.contains(rights) { Ok(()) } else { - log::trace!( - " | validate_rights failed: required rights = {}; actual rights = {}", - rights, - this_rights, + tracing::trace!( + required = tracing::field::display(rights), + actual = tracing::field::display(this_rights), + "validate_rights failed", ); Err(Errno::Notcapable) } diff --git a/crates/wasi-common/src/old/snapshot_0/ctx.rs b/crates/wasi-common/src/old/snapshot_0/ctx.rs index 28c1cac673..7ffa536bf3 100644 --- a/crates/wasi-common/src/old/snapshot_0/ctx.rs +++ b/crates/wasi-common/src/old/snapshot_0/ctx.rs @@ -287,7 +287,7 @@ impl WasiCtxBuilder { let fd = fd_pool .allocate() .ok_or(WasiCtxBuilderError::TooManyFilesOpen)?; - log::debug!("WasiCtx inserting ({:?}, {:?})", fd, pending); + tracing::debug!("WasiCtx inserting ({:?}, {:?})", fd, pending); match pending.take().unwrap() { PendingEntry::Thunk(f) => { entries.insert(fd, f()?); @@ -311,9 +311,9 @@ impl WasiCtxBuilder { let mut fe = Entry::from(dir)?; fe.preopen_path = Some(guest_path); - log::debug!("WasiCtx inserting ({:?}, {:?})", preopen_fd, fe); + tracing::debug!("WasiCtx inserting ({:?}, {:?})", preopen_fd, fe); entries.insert(preopen_fd, fe); - log::debug!("WasiCtx entries = {:?}", entries); + tracing::debug!("WasiCtx entries = {:?}", entries); } Ok(WasiCtx { diff --git a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs index 8d71856682..6ba1c4d99b 100644 --- a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs @@ -11,11 +11,11 @@ use crate::old::snapshot_0::wasi::{self, WasiError, WasiResult}; use crate::old::snapshot_0::{helpers, host, wasi32}; use crate::sandboxed_tty_writer::SandboxedTTYWriter; use filetime::{set_file_handle_times, FileTime}; -use log::trace; use std::fs::File; use std::io::{self, Read, Seek, SeekFrom, Write}; use std::ops::DerefMut; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use tracing::trace; pub(crate) unsafe fn fd_close( wasi_ctx: &mut WasiCtx, @@ -686,8 +686,8 @@ pub(crate) unsafe fn path_rename( true, )?; - log::debug!("path_rename resolved_old={:?}", resolved_old); - log::debug!("path_rename resolved_new={:?}", resolved_new); + tracing::debug!("path_rename resolved_old={:?}", resolved_old); + tracing::debug!("path_rename resolved_new={:?}", resolved_new); hostcalls_impl::path_rename(resolved_old, resolved_new) } @@ -950,7 +950,7 @@ pub(crate) unsafe fn path_remove_directory( true, )?; - log::debug!("path_remove_directory resolved={:?}", resolved); + tracing::debug!("path_remove_directory resolved={:?}", resolved); hostcalls_impl::path_remove_directory(resolved) } diff --git a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs_helpers.rs b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs_helpers.rs index aabdb29da2..59a94d1bec 100644 --- a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs_helpers.rs +++ b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs_helpers.rs @@ -68,7 +68,7 @@ pub(crate) fn path_get( loop { match path_stack.pop() { Some(cur_path) => { - log::debug!("path_get cur_path = {:?}", cur_path); + tracing::debug!("path_get cur_path = {:?}", cur_path); let ends_with_slash = cur_path.ends_with('/'); let mut components = Path::new(&cur_path).components(); @@ -86,7 +86,7 @@ pub(crate) fn path_get( path_stack.push(tail); } - log::debug!("path_get path_stack = {:?}", path_stack); + tracing::debug!("path_get path_stack = {:?}", path_stack); match head { Component::Prefix(_) | Component::RootDir => { @@ -140,7 +140,7 @@ pub(crate) fn path_get( link_path.push('/'); } - log::debug!( + tracing::debug!( "attempted symlink expansion link_path={:?}", link_path ); @@ -172,7 +172,7 @@ pub(crate) fn path_get( link_path.push('/'); } - log::debug!( + tracing::debug!( "attempted symlink expansion link_path={:?}", link_path ); diff --git a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/misc.rs b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/misc.rs index d6f4d24684..7016fb2162 100644 --- a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/misc.rs +++ b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/misc.rs @@ -5,8 +5,8 @@ use crate::old::snapshot_0::memory::*; use crate::old::snapshot_0::sys::hostcalls_impl; use crate::old::snapshot_0::wasi::{self, WasiError, WasiResult}; use crate::old::snapshot_0::wasi32; -use log::{error, trace}; use std::convert::TryFrom; +use tracing::{error, trace}; pub(crate) fn args_get( wasi_ctx: &WasiCtx, @@ -227,8 +227,8 @@ pub(crate) fn poll_oneoff( let clock = unsafe { subscription.u.u.clock }; let delay = wasi_clock_to_relative_ns_delay(clock)?; - log::debug!("poll_oneoff event.u.clock = {:?}", clock); - log::debug!("poll_oneoff delay = {:?}ns", delay); + tracing::debug!("poll_oneoff event.u.clock = {:?}", clock); + tracing::debug!("poll_oneoff delay = {:?}ns", delay); let current = ClockEventData { delay, @@ -299,8 +299,8 @@ pub(crate) fn poll_oneoff( } } - log::debug!("poll_oneoff timeout = {:?}", timeout); - log::debug!("poll_oneoff fd_events = {:?}", fd_events); + tracing::debug!("poll_oneoff timeout = {:?}", timeout); + tracing::debug!("poll_oneoff fd_events = {:?}", fd_events); hostcalls_impl::poll_oneoff(timeout, fd_events, &mut events)?; diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/entry_impl.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/entry_impl.rs index 8f9debabf9..325c627f60 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/entry_impl.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/entry_impl.rs @@ -61,14 +61,14 @@ pub(crate) unsafe fn determine_type_rights( let file = std::mem::ManuallyDrop::new(std::fs::File::from_raw_fd(fd.as_raw_fd())); let ft = file.metadata()?.file_type(); if ft.is_block_device() { - log::debug!("Host fd {:?} is a block device", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a block device", fd.as_raw_fd()); ( wasi::__WASI_FILETYPE_BLOCK_DEVICE, wasi::RIGHTS_BLOCK_DEVICE_BASE, wasi::RIGHTS_BLOCK_DEVICE_INHERITING, ) } else if ft.is_char_device() { - log::debug!("Host fd {:?} is a char device", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a char device", fd.as_raw_fd()); use yanix::file::isatty; if isatty(fd.as_raw_fd())? { ( @@ -84,21 +84,21 @@ pub(crate) unsafe fn determine_type_rights( ) } } else if ft.is_dir() { - log::debug!("Host fd {:?} is a directory", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a directory", fd.as_raw_fd()); ( wasi::__WASI_FILETYPE_DIRECTORY, wasi::RIGHTS_DIRECTORY_BASE, wasi::RIGHTS_DIRECTORY_INHERITING, ) } else if ft.is_file() { - log::debug!("Host fd {:?} is a file", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a file", fd.as_raw_fd()); ( wasi::__WASI_FILETYPE_REGULAR_FILE, wasi::RIGHTS_REGULAR_FILE_BASE, wasi::RIGHTS_REGULAR_FILE_INHERITING, ) } else if ft.is_socket() { - log::debug!("Host fd {:?} is a socket", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a socket", fd.as_raw_fd()); use yanix::socket::{get_socket_type, SockType}; match get_socket_type(fd.as_raw_fd())? { SockType::Datagram => ( @@ -114,14 +114,14 @@ pub(crate) unsafe fn determine_type_rights( _ => return Err(io::Error::from_raw_os_error(libc::EINVAL)), } } else if ft.is_fifo() { - log::debug!("Host fd {:?} is a fifo", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is a fifo", fd.as_raw_fd()); ( wasi::__WASI_FILETYPE_UNKNOWN, wasi::RIGHTS_REGULAR_FILE_BASE, wasi::RIGHTS_REGULAR_FILE_INHERITING, ) } else { - log::debug!("Host fd {:?} is unknown", fd.as_raw_fd()); + tracing::debug!("Host fd {:?} is unknown", fd.as_raw_fd()); return Err(io::Error::from_raw_os_error(libc::EINVAL)); } }; diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/host_impl.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/host_impl.rs index 50e294dd2a..c6ff0fac3e 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/host_impl.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/host_impl.rs @@ -92,12 +92,12 @@ impl From for WasiError { libc::ENOTRECOVERABLE => Self::ENOTRECOVERABLE, libc::ENOTSUP => Self::ENOTSUP, x => { - log::debug!("Unknown errno value: {}", x); + tracing::debug!("Unknown errno value: {}", x); Self::EIO } }, None => { - log::debug!("Other I/O error: {}", err); + tracing::debug!("Other I/O error: {}", err); Self::EIO } } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs.rs index cfbbb1ead5..f618dfca8c 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs.rs @@ -115,8 +115,8 @@ pub(crate) fn path_open( // umask is, but don't set the executable flag, because it isn't yet // meaningful for WASI programs to create executable files. - log::debug!("path_open resolved = {:?}", resolved); - log::debug!("path_open oflags = {:?}", nix_all_oflags); + tracing::debug!("path_open resolved = {:?}", resolved); + tracing::debug!("path_open oflags = {:?}", nix_all_oflags); let new_fd = match unsafe { openat( @@ -144,7 +144,7 @@ pub(crate) fn path_open( } } Err(err) => { - log::debug!("path_open fstatat error: {:?}", err); + tracing::debug!("path_open fstatat error: {:?}", err); } } } @@ -166,7 +166,7 @@ pub(crate) fn path_open( } } Err(err) => { - log::debug!("path_open fstatat error: {:?}", err); + tracing::debug!("path_open fstatat error: {:?}", err); } } } @@ -182,7 +182,7 @@ pub(crate) fn path_open( } }; - log::debug!("path_open (host) new_fd = {:?}", new_fd); + tracing::debug!("path_open (host) new_fd = {:?}", new_fd); // Determine the type of the new file descriptor and which rights contradict with this type Ok(unsafe { File::from_raw_fd(new_fd) }) @@ -294,10 +294,10 @@ pub(crate) fn fd_readdir<'a>( // Seek if needed. Unless cookie is wasi::__WASI_DIRCOOKIE_START, // new items may not be returned to the caller. if cookie == wasi::__WASI_DIRCOOKIE_START { - log::trace!(" | fd_readdir: doing rewinddir"); + tracing::trace!(" | fd_readdir: doing rewinddir"); dir.rewind(); } else { - log::trace!(" | fd_readdir: doing seekdir to {}", cookie); + tracing::trace!(" | fd_readdir: doing seekdir to {}", cookie); let loc = unsafe { SeekLoc::from_raw(cookie as i64)? }; dir.seek(loc); } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs_helpers.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs_helpers.rs index 23ad31a578..4d812ebe02 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs_helpers.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/fs_helpers.rs @@ -40,7 +40,7 @@ pub(crate) fn openat(dirfd: &File, path: &str) -> WasiResult { use std::os::unix::prelude::{AsRawFd, FromRawFd}; use yanix::file::{openat, Mode}; - log::debug!("path_get openat path = {:?}", path); + tracing::debug!("path_get openat path = {:?}", path); unsafe { openat( @@ -58,7 +58,7 @@ pub(crate) fn readlinkat(dirfd: &File, path: &str) -> WasiResult { use std::os::unix::prelude::AsRawFd; use yanix::file::readlinkat; - log::debug!("path_get readlinkat path = {:?}", path); + tracing::debug!("path_get readlinkat path = {:?}", path); unsafe { readlinkat(dirfd.as_raw_fd(), path) } .map_err(Into::into) diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/misc.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/misc.rs index 914a25d94a..015f942efe 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/misc.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/hostcalls_impl/misc.rs @@ -85,7 +85,7 @@ pub(crate) fn poll_oneoff( let delay = timeout.delay / 1_000_000; // poll syscall requires delay to expressed in milliseconds delay.try_into().unwrap_or(libc::c_int::max_value()) }); - log::debug!("poll_oneoff poll_timeout = {:?}", poll_timeout); + tracing::debug!("poll_oneoff poll_timeout = {:?}", poll_timeout); let ready = loop { match poll(&mut poll_fds, poll_timeout) { @@ -131,15 +131,15 @@ fn poll_oneoff_handle_fd_event<'a>( use yanix::{file::fionread, poll::PollFlags}; for (fd_event, poll_fd) in ready_events { - log::debug!("poll_oneoff_handle_fd_event fd_event = {:?}", fd_event); - log::debug!("poll_oneoff_handle_fd_event poll_fd = {:?}", poll_fd); + tracing::debug!("poll_oneoff_handle_fd_event fd_event = {:?}", fd_event); + tracing::debug!("poll_oneoff_handle_fd_event poll_fd = {:?}", poll_fd); let revents = match poll_fd.revents() { Some(revents) => revents, None => continue, }; - log::debug!("poll_oneoff_handle_fd_event revents = {:?}", revents); + tracing::debug!("poll_oneoff_handle_fd_event revents = {:?}", revents); let nbytes = if fd_event.r#type == wasi::__WASI_EVENTTYPE_FD_READ { unsafe { fionread(fd_event.descriptor.as_raw_fd())? } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/linux/hostcalls_impl.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/linux/hostcalls_impl.rs index 8380f1742e..6489b5bf30 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/linux/hostcalls_impl.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/linux/hostcalls_impl.rs @@ -17,8 +17,8 @@ pub(crate) fn path_unlink_file(resolved: PathGet) -> WasiResult<()> { pub(crate) fn path_symlink(old_path: &str, resolved: PathGet) -> WasiResult<()> { use yanix::file::symlinkat; - log::debug!("path_symlink old_path = {:?}", old_path); - log::debug!("path_symlink resolved = {:?}", resolved); + tracing::debug!("path_symlink old_path = {:?}", old_path); + tracing::debug!("path_symlink resolved = {:?}", resolved); unsafe { symlinkat(old_path, resolved.dirfd().as_raw_fd(), resolved.path()) } .map_err(Into::into) diff --git a/crates/wasi-common/src/path.rs b/crates/wasi-common/src/path.rs index fdce7b481a..99452a7b17 100644 --- a/crates/wasi-common/src/path.rs +++ b/crates/wasi-common/src/path.rs @@ -22,7 +22,7 @@ pub(crate) fn get( // Extract path as &str from guest's memory. let path = path_ptr.as_str()?; - log::trace!(" | (path_ptr,path_len)='{}'", &*path); + tracing::trace!(path = &*path); if path.contains('\0') { // if contains NUL, return Ilseq @@ -55,7 +55,7 @@ pub(crate) fn get( loop { match path_stack.pop() { Some(cur_path) => { - log::debug!("path_get cur_path = {:?}", cur_path); + tracing::debug!(cur_path = tracing::field::display(&cur_path), "path get"); let ends_with_slash = cur_path.ends_with('/'); let mut components = Path::new(&cur_path).components(); @@ -73,7 +73,7 @@ pub(crate) fn get( path_stack.push(tail); } - log::debug!("path_get path_stack = {:?}", path_stack); + tracing::debug!(path_stack = tracing::field::debug(&path_stack), "path_get"); match head { Component::Prefix(_) | Component::RootDir => { @@ -129,7 +129,7 @@ pub(crate) fn get( link_path.push('/'); } - log::debug!( + tracing::debug!( "attempted symlink expansion link_path={:?}", link_path ); @@ -161,7 +161,7 @@ pub(crate) fn get( link_path.push('/'); } - log::debug!( + tracing::debug!( "attempted symlink expansion link_path={:?}", link_path ); diff --git a/crates/wasi-common/src/snapshots/wasi_snapshot_preview1.rs b/crates/wasi-common/src/snapshots/wasi_snapshot_preview1.rs index a6d3d97d44..342fbad1d4 100644 --- a/crates/wasi-common/src/snapshots/wasi_snapshot_preview1.rs +++ b/crates/wasi-common/src/snapshots/wasi_snapshot_preview1.rs @@ -5,9 +5,9 @@ use crate::wasi::wasi_snapshot_preview1::WasiSnapshotPreview1; use crate::wasi::{types, AsBytes, Errno, Result}; use crate::WasiCtx; use crate::{path, poll}; -use log::{debug, error, trace}; use std::convert::TryInto; use std::io::{self, SeekFrom}; +use tracing::{debug, error, trace}; use wiggle::{GuestPtr, GuestSlice}; impl<'a> WasiSnapshotPreview1 for WasiCtx { @@ -651,7 +651,7 @@ impl<'a> WasiSnapshotPreview1 for WasiCtx { true, )?; let old_path = old_path.as_str()?; - trace!(" | old_path='{}'", &*old_path); + trace!(old_path = &*old_path); new_fd.symlink(&old_path, &new_path) } @@ -701,8 +701,11 @@ impl<'a> WasiSnapshotPreview1 for WasiCtx { match subscription.u { types::SubscriptionU::Clock(clock) => { let delay = clock::to_relative_ns_delay(&clock)?; - debug!("poll_oneoff event.u.clock = {:?}", clock); - debug!("poll_oneoff delay = {:?}ns", delay); + debug!( + clock = tracing::field::debug(&clock), + delay_ns = tracing::field::debug(delay), + "poll_oneoff" + ); let current = poll::ClockEventData { delay, userdata: subscription.userdata, @@ -766,8 +769,11 @@ impl<'a> WasiSnapshotPreview1 for WasiCtx { } } } - debug!("poll_oneoff events = {:?}", events); - debug!("poll_oneoff timeout = {:?}", timeout); + debug!( + events = tracing::field::debug(&events), + timeout = tracing::field::debug(timeout), + "poll_oneoff" + ); // The underlying implementation should successfully and immediately return // if no events have been passed. Such situation may occur if all provided // events have been filtered out as errors in the code above. @@ -780,7 +786,7 @@ impl<'a> WasiSnapshotPreview1 for WasiCtx { event_ptr.write(event)?; } - trace!(" | *nevents={:?}", nevents); + trace!(nevents = nevents); Ok(nevents) } @@ -803,7 +809,7 @@ impl<'a> WasiSnapshotPreview1 for WasiCtx { fn random_get(&self, buf: &GuestPtr, buf_len: types::Size) -> Result<()> { let mut slice = buf.as_array(buf_len).as_slice()?; getrandom::getrandom(&mut *slice).map_err(|err| { - error!("getrandom failure: {:?}", err); + error!(error = tracing::field::display(err), "getrandom failure"); Errno::Io }) } diff --git a/crates/wasi-common/src/sys/mod.rs b/crates/wasi-common/src/sys/mod.rs index bca99f146d..a07603034f 100644 --- a/crates/wasi-common/src/sys/mod.rs +++ b/crates/wasi-common/src/sys/mod.rs @@ -55,7 +55,7 @@ impl AsFile for dyn Handle + 'static { } else if let Some(other) = self.as_any().downcast_ref::() { other.as_file() } else { - log::error!("tried to make std::fs::File from non-OS handle"); + tracing::error!("tried to make std::fs::File from non-OS handle"); Err(io::Error::from_raw_os_error(libc::EBADF)) } } @@ -69,17 +69,26 @@ impl TryFrom for Box { match file_type { types::Filetype::RegularFile => { let handle = OsFile::try_from(file)?; - log::debug!("Created new instance of OsFile: {:?}", handle); + tracing::debug!( + handle = tracing::field::debug(&handle), + "Created new instance of OsFile" + ); Ok(Box::new(handle)) } types::Filetype::Directory => { let handle = OsDir::try_from(file)?; - log::debug!("Created new instance of OsDir: {:?}", handle); + tracing::debug!( + handle = tracing::field::debug(&handle), + "Created new instance of OsDir" + ); Ok(Box::new(handle)) } _ => { let handle = OsOther::try_from(file)?; - log::debug!("Created new instance of OsOther: {:?}", handle); + tracing::debug!( + handle = tracing::field::debug(&handle), + "Created new instance of OsOther" + ); Ok(Box::new(handle)) } } diff --git a/crates/wasi-common/src/sys/osdir.rs b/crates/wasi-common/src/sys/osdir.rs index 6ba3cc63a9..bf8bb0c280 100644 --- a/crates/wasi-common/src/sys/osdir.rs +++ b/crates/wasi-common/src/sys/osdir.rs @@ -2,10 +2,10 @@ use super::sys_impl::oshandle::RawOsHandle; use super::{fd, path, AsFile}; use crate::handle::{Handle, HandleRights}; use crate::wasi::{types, Errno, Result}; -use log::{debug, error}; use std::any::Any; use std::io; use std::ops::Deref; +use tracing::{debug, error}; // TODO could this be cleaned up? // The actual `OsDir` struct is OS-dependent, therefore we delegate diff --git a/crates/wasi-common/src/sys/unix/fd.rs b/crates/wasi-common/src/sys/unix/fd.rs index 83d717711c..5a84e51d5e 100644 --- a/crates/wasi-common/src/sys/unix/fd.rs +++ b/crates/wasi-common/src/sys/unix/fd.rs @@ -59,10 +59,10 @@ pub(crate) fn readdir<'a>( // Seek if needed. Unless cookie is wasi::__WASI_DIRCOOKIE_START, // new items may not be returned to the caller. if cookie == wasi::DIRCOOKIE_START { - log::trace!(" | fd_readdir: doing rewinddir"); + tracing::trace!("fd_readdir: doing rewinddir"); dir.rewind(); } else { - log::trace!(" | fd_readdir: doing seekdir to {}", cookie); + tracing::trace!("fd_readdir: doing seekdir to {}", cookie); let loc = unsafe { SeekLoc::from_raw(cookie as i64)? }; dir.seek(loc); } diff --git a/crates/wasi-common/src/sys/unix/linux/path.rs b/crates/wasi-common/src/sys/unix/linux/path.rs index 9bbcb34514..1d441a3fa9 100644 --- a/crates/wasi-common/src/sys/unix/linux/path.rs +++ b/crates/wasi-common/src/sys/unix/linux/path.rs @@ -11,11 +11,11 @@ pub(crate) fn unlink_file(dirfd: &OsDir, path: &str) -> Result<()> { pub(crate) fn symlink(old_path: &str, new_dirfd: &OsDir, new_path: &str) -> Result<()> { use yanix::file::symlinkat; - log::debug!("path_symlink old_path = {:?}", old_path); - log::debug!( - "path_symlink (new_dirfd, new_path) = ({:?}, {:?})", - new_dirfd, - new_path + tracing::debug!( + old_path = old_path, + new_dirfd = tracing::field::debug(new_dirfd), + new_path = new_path, + "path symlink" ); unsafe { symlinkat(old_path, new_dirfd.as_raw_fd(), new_path)? }; diff --git a/crates/wasi-common/src/sys/unix/mod.rs b/crates/wasi-common/src/sys/unix/mod.rs index 7a28810509..79ab5c9010 100644 --- a/crates/wasi-common/src/sys/unix/mod.rs +++ b/crates/wasi-common/src/sys/unix/mod.rs @@ -50,19 +50,19 @@ impl AsFile for T { pub(super) fn get_file_type(file: &File) -> io::Result { let ft = file.metadata()?.file_type(); let file_type = if ft.is_block_device() { - log::debug!("Host fd {:?} is a block device", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a block device", file.as_raw_fd()); types::Filetype::BlockDevice } else if ft.is_char_device() { - log::debug!("Host fd {:?} is a char device", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a char device", file.as_raw_fd()); types::Filetype::CharacterDevice } else if ft.is_dir() { - log::debug!("Host fd {:?} is a directory", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a directory", file.as_raw_fd()); types::Filetype::Directory } else if ft.is_file() { - log::debug!("Host fd {:?} is a file", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a file", file.as_raw_fd()); types::Filetype::RegularFile } else if ft.is_socket() { - log::debug!("Host fd {:?} is a socket", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a socket", file.as_raw_fd()); use yanix::socket::{get_socket_type, SockType}; match unsafe { get_socket_type(file.as_raw_fd())? } { SockType::Datagram => types::Filetype::SocketDgram, @@ -70,10 +70,10 @@ pub(super) fn get_file_type(file: &File) -> io::Result { _ => return Err(io::Error::from_raw_os_error(libc::EINVAL)), } } else if ft.is_fifo() { - log::debug!("Host fd {:?} is a fifo", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is a fifo", file.as_raw_fd()); types::Filetype::Unknown } else { - log::debug!("Host fd {:?} is unknown", file.as_raw_fd()); + tracing::debug!("Host fd {:?} is unknown", file.as_raw_fd()); return Err(io::Error::from_raw_os_error(libc::EINVAL)); }; Ok(file_type) @@ -221,12 +221,12 @@ impl From for Errno { libc::ENOTRECOVERABLE => Self::Notrecoverable, libc::ENOTSUP => Self::Notsup, x => { - log::debug!("Unknown errno value: {}", x); + tracing::debug!("Unknown errno value: {}", x); Self::Io } }, None => { - log::debug!("Other I/O error: {}", err); + tracing::debug!("Other I/O error: {}", err); Self::Io } } diff --git a/crates/wasi-common/src/sys/unix/path.rs b/crates/wasi-common/src/sys/unix/path.rs index fcaf98a96b..d109fe96f5 100644 --- a/crates/wasi-common/src/sys/unix/path.rs +++ b/crates/wasi-common/src/sys/unix/path.rs @@ -54,7 +54,7 @@ pub(crate) fn readlinkat(dirfd: &OsDir, path: &str) -> Result { use std::os::unix::prelude::AsRawFd; use yanix::file::readlinkat; - log::debug!("path_get readlinkat path = {:?}", path); + tracing::debug!(path = path, "path_get readlinkat"); let path = unsafe { readlinkat(dirfd.as_raw_fd(), path)? }; let path = from_host(path)?; @@ -123,9 +123,12 @@ pub(crate) fn open( // umask is, but don't set the executable flag, because it isn't yet // meaningful for WASI programs to create executable files. - log::debug!("path_open dirfd = {:?}", dirfd); - log::debug!("path_open path = {:?}", path); - log::debug!("path_open oflags = {:?}", nix_all_oflags); + tracing::debug!( + dirfd = tracing::field::debug(dirfd), + path = tracing::field::debug(path), + oflags = tracing::field::debug(nix_all_oflags), + "path_open" + ); let fd_no = unsafe { openat( @@ -148,7 +151,10 @@ pub(crate) fn open( } } Err(err) => { - log::debug!("path_open fstatat error: {:?}", err); + tracing::debug!( + error = tracing::field::debug(&err), + "path_open fstatat error", + ); } } } @@ -164,7 +170,10 @@ pub(crate) fn open( } } Err(err) => { - log::debug!("path_open fstatat error: {:?}", err); + tracing::debug!( + error = tracing::field::debug(&err), + "path_open fstatat error", + ); } } } @@ -180,7 +189,7 @@ pub(crate) fn open( } }; - log::debug!("path_open (host) new_fd = {:?}", new_fd); + tracing::debug!(new_fd = tracing::field::debug(new_fd)); // Determine the type of the new file descriptor and which rights contradict with this type let file = unsafe { File::from_raw_fd(new_fd) }; diff --git a/crates/wasi-common/src/sys/unix/poll.rs b/crates/wasi-common/src/sys/unix/poll.rs index b185bdb93c..8b30861b8d 100644 --- a/crates/wasi-common/src/sys/unix/poll.rs +++ b/crates/wasi-common/src/sys/unix/poll.rs @@ -38,7 +38,7 @@ pub(crate) fn oneoff( let delay = timeout.delay / 1_000_000; // poll syscall requires delay to expressed in milliseconds delay.try_into().unwrap_or(libc::c_int::max_value()) }); - log::debug!("poll_oneoff poll_timeout = {:?}", poll_timeout); + tracing::debug!("poll_oneoff poll_timeout = {:?}", poll_timeout); let ready = loop { match poll(&mut poll_fds, poll_timeout) { @@ -91,15 +91,14 @@ fn handle_fd_event( } for (fd_event, poll_fd) in ready_events { - // log::debug!("poll_oneoff_handle_fd_event fd_event = {:?}", fd_event); - log::debug!("poll_oneoff_handle_fd_event poll_fd = {:?}", poll_fd); + tracing::debug!("poll_oneoff_handle_fd_event poll_fd = {:?}", poll_fd); let revents = match poll_fd.revents() { Some(revents) => revents, None => continue, }; - log::debug!("poll_oneoff_handle_fd_event revents = {:?}", revents); + tracing::debug!("poll_oneoff_handle_fd_event revents = {:?}", revents); let nbytes = if fd_event.r#type == types::Eventtype::FdRead { query_nbytes(fd_event.handle)? diff --git a/crates/wasi-common/src/virtfs.rs b/crates/wasi-common/src/virtfs.rs index 0f20ddb509..2b4cd6649f 100644 --- a/crates/wasi-common/src/virtfs.rs +++ b/crates/wasi-common/src/virtfs.rs @@ -1,6 +1,5 @@ use crate::handle::{Handle, HandleRights}; use crate::wasi::{self, types, Errno, Result, RightsExt}; -use log::trace; use std::any::Any; use std::cell::{Cell, RefCell}; use std::collections::hash_map::Entry; @@ -10,6 +9,7 @@ use std::io; use std::io::SeekFrom; use std::path::{Path, PathBuf}; use std::rc::Rc; +use tracing::trace; pub mod pipe; @@ -96,7 +96,7 @@ impl FileContents for VecFileContents { } fn pread(&self, buf: &mut [u8], offset: types::Filesize) -> Result { - trace!(" | pread(buf.len={}, offset={})", buf.len(), offset); + trace!(buffer_length = buf.len(), offset = offset, "pread"); let offset: usize = offset.try_into().map_err(|_| Errno::Inval)?; let data_remaining = self.content.len().saturating_sub(offset); @@ -105,9 +105,7 @@ impl FileContents for VecFileContents { (&mut buf[..read_count]).copy_from_slice(&self.content[offset..][..read_count]); - let res = Ok(read_count); - trace!(" | pread={:?}", res); - res + Ok(read_count) } fn pwrite(&mut self, buf: &[u8], offset: types::Filesize) -> Result { @@ -352,21 +350,11 @@ impl Handle for InMemoryFile { oflags: types::Oflags, fd_flags: types::Fdflags, ) -> Result> { - log::trace!( - "InMemoryFile::openat(path={:?}, read={:?}, write={:?}, oflags={:?}, fd_flags={:?}", - path, - read, - write, - oflags, - fd_flags - ); - if oflags.contains(&types::Oflags::DIRECTORY) { - log::trace!( + tracing::trace!( "InMemoryFile::openat was passed oflags DIRECTORY, but {:?} is a file.", path ); - log::trace!(" return Notdir"); return Err(Errno::Notdir); } @@ -526,7 +514,7 @@ impl Handle for VirtualDir { type Item = Result<(types::Dirent, String)>; fn next(&mut self) -> Option { - log::trace!("VirtualDirIter::next continuing from {}", self.start); + tracing::trace!("VirtualDirIter::next continuing from {}", self.start); if self.start == SELF_DIR_COOKIE { self.start += 1; let name = ".".to_owned(); @@ -650,15 +638,6 @@ impl Handle for VirtualDir { oflags: types::Oflags, fd_flags: types::Fdflags, ) -> Result> { - log::trace!( - "VirtualDir::openat(path={:?}, read={:?}, write={:?}, oflags={:?}, fd_flags={:?}", - path, - read, - write, - oflags, - fd_flags - ); - if path == "." { return self.try_clone().map_err(Into::into); } else if path == ".." { @@ -681,19 +660,17 @@ impl Handle for VirtualDir { Entry::Occupied(e) => { let creat_excl_mask = types::Oflags::CREAT | types::Oflags::EXCL; if (oflags & creat_excl_mask) == creat_excl_mask { - log::trace!("VirtualDir::openat was passed oflags CREAT|EXCL, but the file {:?} exists.", file_name); - log::trace!(" return Exist"); + tracing::trace!("VirtualDir::openat was passed oflags CREAT|EXCL, but the file {:?} exists.", file_name); return Err(Errno::Exist); } if oflags.contains(&types::Oflags::DIRECTORY) && e.get().get_file_type() != types::Filetype::Directory { - log::trace!( + tracing::trace!( "VirtualDir::openat was passed oflags DIRECTORY, but {:?} is a file.", file_name ); - log::trace!(" return Notdir"); return Err(Errno::Notdir); } @@ -709,7 +686,7 @@ impl Handle for VirtualDir { return Err(Errno::Nospc); } - log::trace!("VirtualDir::openat creating an InMemoryFile named {}", path); + tracing::trace!("VirtualDir::openat creating an InMemoryFile named {}", path); let file = Box::new(InMemoryFile::memory_backed()); file.fd_flags.set(fd_flags); @@ -757,7 +734,7 @@ impl Handle for VirtualDir { Ok(()) } Entry::Vacant(_) => { - log::trace!( + tracing::trace!( "VirtualDir::remove_directory failed to remove {}, no such entry", trimmed_path ); @@ -798,7 +775,7 @@ impl Handle for VirtualDir { Ok(()) } Entry::Vacant(_) => { - log::trace!( + tracing::trace!( "VirtualDir::unlink_file failed to remove {}, no such entry", trimmed_path ); diff --git a/crates/wasi-common/wig/src/hostcalls.rs b/crates/wasi-common/wig/src/hostcalls.rs index 2ea4e6e5eb..a01926743b 100644 --- a/crates/wasi-common/wig/src/hostcalls.rs +++ b/crates/wasi-common/wig/src/hostcalls.rs @@ -96,7 +96,7 @@ fn generate_wrappers(func: &witx::InterfaceFunc, old: bool) -> TokenStream { let ret = #call .err() .unwrap_or(super::wasi::WasiError::ESUCCESS); - log::trace!(" | errno={}", ret); + tracing::trace!(" | errno={}", ret); ret.as_raw_errno() } }; diff --git a/crates/wasi-common/wig/src/wasi.rs b/crates/wasi-common/wig/src/wasi.rs index 9e31879659..60269d28f7 100644 --- a/crates/wasi-common/wig/src/wasi.rs +++ b/crates/wasi-common/wig/src/wasi.rs @@ -201,7 +201,7 @@ pub fn define_struct(args: TokenStream) -> TokenStream { let #name_ident = wasmtime::Func::wrap( store, move |caller: wasmtime::Caller<'_> #(,#shim_arg_decls)*| -> #ret_ty { - log::trace!( + tracing::trace!( #format_str, #(#format_args),* ); @@ -209,7 +209,7 @@ pub fn define_struct(args: TokenStream) -> TokenStream { let memory = match caller.get_export("memory") { Some(wasmtime::Extern::Memory(m)) => m, _ => { - log::warn!("callee does not export a memory as \"memory\""); + tracing::warn!("callee does not export a memory as \"memory\""); let e = wasi_common::old::snapshot_0::wasi::__WASI_ERRNO_INVAL; #handle_early_error } From 8d39d9b1b56f0291e87b344bce04f2d091fe91c9 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 10:42:14 -0700 Subject: [PATCH 03/34] wasmtime-wasi: switch dep from log to tracing --- crates/wasi/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/wasi/Cargo.toml b/crates/wasi/Cargo.toml index 63d14fee81..18a330dbed 100644 --- a/crates/wasi/Cargo.toml +++ b/crates/wasi/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] anyhow = "1.0" -log = { version = "0.4.8", default-features = false } +tracing = "0.1.15" wasi-common = { path = "../wasi-common", version = "0.19.0" } wasmtime = { path = "../wasmtime", version = "0.19.0", default-features = false } wasmtime-runtime = { path = "../runtime", version = "0.19.0" } From 0e1c534e071e4b27063d580700a978840235c5c1 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 10:45:42 -0700 Subject: [PATCH 04/34] wasi-common: fix some warnings --- crates/wasi-common/src/virtfs.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/wasi-common/src/virtfs.rs b/crates/wasi-common/src/virtfs.rs index 2b4cd6649f..9e543a3d24 100644 --- a/crates/wasi-common/src/virtfs.rs +++ b/crates/wasi-common/src/virtfs.rs @@ -345,10 +345,10 @@ impl Handle for InMemoryFile { fn openat( &self, path: &str, - read: bool, - write: bool, + _read: bool, + _write: bool, oflags: types::Oflags, - fd_flags: types::Fdflags, + _fd_flags: types::Fdflags, ) -> Result> { if oflags.contains(&types::Oflags::DIRECTORY) { tracing::trace!( @@ -633,8 +633,8 @@ impl Handle for VirtualDir { fn openat( &self, path: &str, - read: bool, - write: bool, + _read: bool, + _write: bool, oflags: types::Oflags, fd_flags: types::Fdflags, ) -> Result> { From 35c100fdca2e4f25d7af29cee9bce76804cba298 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 11:02:14 -0700 Subject: [PATCH 05/34] delete cranelift-faerie see https://github.com/bytecodealliance/wasmtime/pull/1816 DEPRECATION NOTICE: the Cranelift developer team intends to stop maintaining the `cranelift-faerie` crate and remove it from the `wasmtime` git repository on or after August 3, 2020. We recommend users use its successor, the `cranelift-object` crate. --- Cargo.lock | 54 ---- cranelift/Cargo.toml | 1 - cranelift/docs/index.md | 5 - cranelift/faerie/Cargo.toml | 21 -- cranelift/faerie/LICENSE | 220 -------------- cranelift/faerie/README.md | 9 - cranelift/faerie/src/backend.rs | 482 ------------------------------ cranelift/faerie/src/container.rs | 65 ---- cranelift/faerie/src/lib.rs | 35 --- cranelift/module/README.md | 3 - cranelift/module/src/backend.rs | 3 - scripts/publish.rs | 1 - 12 files changed, 899 deletions(-) delete mode 100644 cranelift/faerie/Cargo.toml delete mode 100644 cranelift/faerie/LICENSE delete mode 100644 cranelift/faerie/README.md delete mode 100644 cranelift/faerie/src/backend.rs delete mode 100644 cranelift/faerie/src/container.rs delete mode 100644 cranelift/faerie/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 9e19c67c2e..ec6233a1f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -414,18 +414,6 @@ dependencies = [ "serde", ] -[[package]] -name = "cranelift-faerie" -version = "0.66.0" -dependencies = [ - "anyhow", - "cranelift-codegen", - "cranelift-module", - "faerie", - "goblin", - "target-lexicon", -] - [[package]] name = "cranelift-filetests" version = "0.66.0" @@ -561,7 +549,6 @@ dependencies = [ "cranelift", "cranelift-codegen", "cranelift-entity", - "cranelift-faerie", "cranelift-filetests", "cranelift-frontend", "cranelift-interpreter", @@ -825,21 +812,6 @@ version = "0.0.0" name = "example-wasi-wasm" version = "0.0.0" -[[package]] -name = "faerie" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfef65b0e94693295c5d2fe2506f0ee6f43465342d4b5331659936aee8b16084" -dependencies = [ - "goblin", - "indexmap", - "log", - "scroll", - "string-interner", - "target-lexicon", - "thiserror", -] - [[package]] name = "fake-simd" version = "0.1.2" @@ -951,17 +923,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" -[[package]] -name = "goblin" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3081214398d39e4bd7f2c1975f0488ed04614ffdd976c6fc7a0708278552c0da" -dependencies = [ - "log", - "plain", - "scroll", -] - [[package]] name = "hashbrown" version = "0.6.3" @@ -1384,12 +1345,6 @@ dependencies = [ name = "peepmatic-traits" version = "0.66.0" -[[package]] -name = "plain" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" - [[package]] name = "ppv-lite86" version = "0.2.8" @@ -1962,15 +1917,6 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fbd8c65873d2e06991c33399d5e4bccba6ba04743fe99e9656afa0c64137d2c" -[[package]] -name = "string-interner" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd710eadff449a1531351b0e43eb81ea404336fa2f56c777427ab0e32a4cf183" -dependencies = [ - "serde", -] - [[package]] name = "strsim" version = "0.8.0" diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index 450e4552f1..edcd9580ca 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -25,7 +25,6 @@ cranelift-wasm = { path = "wasm", version = "0.66.0", optional = true } cranelift-native = { path = "native", version = "0.66.0" } cranelift-filetests = { path = "filetests", version = "0.66.0" } cranelift-module = { path = "module", version = "0.66.0" } -cranelift-faerie = { path = "faerie", version = "0.66.0" } cranelift-object = { path = "object", version = "0.66.0" } cranelift-simplejit = { path = "simplejit", version = "0.66.0" } cranelift-preopt = { path = "preopt", version = "0.66.0" } diff --git a/cranelift/docs/index.md b/cranelift/docs/index.md index 2ca568d379..2334325f15 100644 --- a/cranelift/docs/index.md +++ b/cranelift/docs/index.md @@ -52,11 +52,6 @@ emits native object files using the `object `_ library. - - [cranelift-faerie](https://docs.rs/cranelift-faerie) - This crate provides a faerie-based backend for `cranelift-module`, which - emits native object files using the - `faerie `_ library. - - [cranelift-simplejit](https://docs.rs/cranelift-simplejit) This crate provides a simple JIT backend for `cranelift-module`, which emits code and data into memory. diff --git a/cranelift/faerie/Cargo.toml b/cranelift/faerie/Cargo.toml deleted file mode 100644 index 6c3e3772b2..0000000000 --- a/cranelift/faerie/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "cranelift-faerie" -version = "0.66.0" -authors = ["The Cranelift Project Developers"] -description = "Emit Cranelift output to native object files with Faerie" -repository = "https://github.com/bytecodealliance/wasmtime" -documentation = "https://docs.rs/cranelift-faerie" -license = "Apache-2.0 WITH LLVM-exception" -readme = "README.md" -edition = "2018" - -[dependencies] -cranelift-module = { path = "../module", version = "0.66.0" } -cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false, features = ["std"] } -faerie = "0.15.0" -goblin = "0.1.0" -anyhow = "1.0" -target-lexicon = "0.10" - -[badges] -maintenance = { status = "experimental" } diff --git a/cranelift/faerie/LICENSE b/cranelift/faerie/LICENSE deleted file mode 100644 index f9d81955f4..0000000000 --- a/cranelift/faerie/LICENSE +++ /dev/null @@ -1,220 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ---- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - diff --git a/cranelift/faerie/README.md b/cranelift/faerie/README.md deleted file mode 100644 index 2b7aaeb875..0000000000 --- a/cranelift/faerie/README.md +++ /dev/null @@ -1,9 +0,0 @@ -This crate contains a library that enables -[Cranelift](https://crates.io/crates/cranelift) -to emit native object (".o") files, using the -[Faerie](https://crates.io/crates/faerie) library. - -DEPRECATION NOTICE: the Cranelift developer team intends to stop maintaining -the `cranelift-faerie` crate and remove it from the `wasmtime` git repository -on or after August 3, 2020. We recommend users use its successor, the -`cranelift-object` crate. diff --git a/cranelift/faerie/src/backend.rs b/cranelift/faerie/src/backend.rs deleted file mode 100644 index 8b7d3df5ae..0000000000 --- a/cranelift/faerie/src/backend.rs +++ /dev/null @@ -1,482 +0,0 @@ -//! Defines `FaerieBackend`. - -use crate::container; -use anyhow::anyhow; -use cranelift_codegen::binemit::{ - Addend, CodeOffset, NullStackMapSink, Reloc, RelocSink, StackMap, StackMapSink, TrapSink, -}; -use cranelift_codegen::isa::TargetIsa; -use cranelift_codegen::{self, binemit, ir}; -use cranelift_module::{ - Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleError, - ModuleNamespace, ModuleResult, -}; -use faerie; -use std::convert::TryInto; -use std::fs::File; -use target_lexicon::Triple; - -/// A builder for `FaerieBackend`. -pub struct FaerieBuilder { - isa: Box, - name: String, - libcall_names: Box String>, -} - -impl FaerieBuilder { - /// Create a new `FaerieBuilder` using the given Cranelift target, that - /// can be passed to - /// [`Module::new`](cranelift_module::Module::new) - /// - /// Faerie output requires that TargetIsa have PIC (Position Independent Code) enabled. - /// - /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` - /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain - /// floating point instructions, and for stack probes. If you don't know what to use for this - /// argument, use `cranelift_module::default_libcall_names()`. - #[deprecated( - since = "0.65.0", - note = "the Cranelift developer team intends to stop maintaining the `cranelift-faerie` - crate and remove it from the `wasmtime` git repository on or after August 3, 2020. We - recommend users use its successor, the `cranelift-object` crate." - )] - pub fn new( - isa: Box, - name: String, - libcall_names: Box String>, - ) -> ModuleResult { - if !isa.flags().is_pic() { - return Err(ModuleError::Backend(anyhow!( - "faerie requires TargetIsa be PIC" - ))); - } - Ok(Self { - isa, - name, - libcall_names, - }) - } -} - -/// A `FaerieBackend` implements `Backend` and emits ".o" files using the `faerie` library. -/// -/// See the `FaerieBuilder` for a convenient way to construct `FaerieBackend` instances. -pub struct FaerieBackend { - isa: Box, - artifact: faerie::Artifact, - libcall_names: Box String>, -} - -pub struct FaerieCompiledFunction { - code_length: u32, -} - -impl FaerieCompiledFunction { - pub fn code_length(&self) -> u32 { - self.code_length - } -} - -pub struct FaerieCompiledData {} - -impl Backend for FaerieBackend { - type Builder = FaerieBuilder; - - type CompiledFunction = FaerieCompiledFunction; - type CompiledData = FaerieCompiledData; - - // There's no need to return individual artifacts; we're writing them into - // the output file instead. - type FinalizedFunction = (); - type FinalizedData = (); - - /// The returned value here provides functions for emitting object files - /// to memory and files. - type Product = FaerieProduct; - - /// Create a new `FaerieBackend` using the given Cranelift target. - fn new(builder: FaerieBuilder) -> Self { - Self { - artifact: faerie::Artifact::new(builder.isa.triple().clone(), builder.name), - isa: builder.isa, - libcall_names: builder.libcall_names, - } - } - - fn isa(&self) -> &dyn TargetIsa { - &*self.isa - } - - fn declare_function(&mut self, _id: FuncId, name: &str, linkage: Linkage) { - self.artifact - .declare(name, translate_function_linkage(linkage)) - .expect("inconsistent declarations"); - } - - fn declare_data( - &mut self, - _id: DataId, - name: &str, - linkage: Linkage, - writable: bool, - tls: bool, - align: Option, - ) { - assert!(!tls, "Faerie doesn't yet support TLS"); - self.artifact - .declare(name, translate_data_linkage(linkage, writable, align)) - .expect("inconsistent declarations"); - } - - fn define_function( - &mut self, - _id: FuncId, - name: &str, - ctx: &cranelift_codegen::Context, - namespace: &ModuleNamespace, - total_size: u32, - trap_sink: &mut TS, - ) -> ModuleResult - where - TS: TrapSink, - { - let mut code: Vec = vec![0; total_size as usize]; - // TODO: Replace this with FaerieStackMapSink once it is implemented. - let mut stack_map_sink = NullStackMapSink {}; - - // Non-lexical lifetimes would obviate the braces here. - { - let mut reloc_sink = FaerieRelocSink { - triple: self.isa.triple().clone(), - artifact: &mut self.artifact, - name, - namespace, - libcall_names: &*self.libcall_names, - }; - - unsafe { - ctx.emit_to_memory( - &*self.isa, - code.as_mut_ptr(), - &mut reloc_sink, - trap_sink, - &mut stack_map_sink, - ) - }; - } - - // because `define` will take ownership of code, this is our last chance - let code_length = code.len() as u32; - - self.artifact - .define(name, code) - .expect("inconsistent declaration"); - - Ok(FaerieCompiledFunction { code_length }) - } - - fn define_function_bytes( - &mut self, - _id: FuncId, - name: &str, - bytes: &[u8], - _namespace: &ModuleNamespace, - ) -> ModuleResult { - let code_length: u32 = match bytes.len().try_into() { - Ok(code_length) => code_length, - _ => Err(ModuleError::FunctionTooLarge(name.to_string()))?, - }; - - self.artifact - .define(name, bytes.to_vec()) - .expect("inconsistent declaration"); - - Ok(FaerieCompiledFunction { code_length }) - } - - fn define_data( - &mut self, - _id: DataId, - name: &str, - _writable: bool, - tls: bool, - _align: Option, - data_ctx: &DataContext, - namespace: &ModuleNamespace, - ) -> ModuleResult { - assert!(!tls, "Faerie doesn't yet support TLS"); - let &DataDescription { - ref init, - ref function_decls, - ref data_decls, - ref function_relocs, - ref data_relocs, - ref custom_segment_section, - } = data_ctx.description(); - - if let Some((segment, section)) = custom_segment_section { - return Err(cranelift_module::ModuleError::Backend(anyhow::anyhow!( - "Custom section not supported by cranelift-faerie: `{}:{}`", - segment, - section - ))); - } - - for &(offset, id) in function_relocs { - let to = &namespace.get_function_decl(&function_decls[id]).name; - self.artifact - .link(faerie::Link { - from: name, - to, - at: u64::from(offset), - }) - .map_err(|e| ModuleError::Backend(e.into()))?; - } - for &(offset, id, addend) in data_relocs { - debug_assert_eq!( - addend, 0, - "faerie doesn't support addends in data section relocations yet" - ); - let to = &namespace.get_data_decl(&data_decls[id]).name; - self.artifact - .link(faerie::Link { - from: name, - to, - at: u64::from(offset), - }) - .map_err(|e| ModuleError::Backend(e.into()))?; - } - - match *init { - Init::Uninitialized => { - panic!("data is not initialized yet"); - } - Init::Zeros { size } => { - self.artifact - .define_zero_init(name, size) - .expect("inconsistent declaration"); - } - Init::Bytes { ref contents } => { - self.artifact - .define(name, contents.to_vec()) - .expect("inconsistent declaration"); - } - } - - Ok(FaerieCompiledData {}) - } - - fn write_data_funcaddr( - &mut self, - _data: &mut FaerieCompiledData, - _offset: usize, - _what: ir::FuncRef, - ) { - unimplemented!() - } - - fn write_data_dataaddr( - &mut self, - _data: &mut FaerieCompiledData, - _offset: usize, - _what: ir::GlobalValue, - _usize: binemit::Addend, - ) { - unimplemented!() - } - - fn finalize_function( - &mut self, - _id: FuncId, - _func: &FaerieCompiledFunction, - _namespace: &ModuleNamespace, - ) { - // Nothing to do. - } - - fn get_finalized_function(&self, _func: &FaerieCompiledFunction) { - // Nothing to do. - } - - fn finalize_data( - &mut self, - _id: DataId, - _data: &FaerieCompiledData, - _namespace: &ModuleNamespace, - ) { - // Nothing to do. - } - - fn get_finalized_data(&self, _data: &FaerieCompiledData) { - // Nothing to do. - } - - fn publish(&mut self) { - // Nothing to do. - } - - fn finish(self, _namespace: &ModuleNamespace) -> FaerieProduct { - FaerieProduct { - artifact: self.artifact, - } - } -} - -/// This is the output of `Module`'s -/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function. -/// It provides functions for writing out the object file to memory or a file. -#[derive(Debug)] -pub struct FaerieProduct { - /// Faerie artifact with all functions, data, and links from the module defined - pub artifact: faerie::Artifact, -} - -impl FaerieProduct { - /// Return the name of the output file. This is the name passed into `new`. - pub fn name(&self) -> &str { - &self.artifact.name - } - - /// Call `emit` on the faerie `Artifact`, producing bytes in memory. - pub fn emit(&self) -> Result, faerie::ArtifactError> { - self.artifact.emit() - } - - /// Call `write` on the faerie `Artifact`, writing to a file. - pub fn write(&self, sink: File) -> Result<(), faerie::ArtifactError> { - self.artifact.write(sink) - } -} - -fn translate_function_linkage(linkage: Linkage) -> faerie::Decl { - match linkage { - Linkage::Import => faerie::Decl::function_import().into(), - Linkage::Local => faerie::Decl::function().into(), - Linkage::Preemptible => faerie::Decl::function().weak().into(), - Linkage::Hidden => faerie::Decl::function().global().hidden().into(), - Linkage::Export => faerie::Decl::function().global().into(), - } -} - -fn translate_data_linkage(linkage: Linkage, writable: bool, align: Option) -> faerie::Decl { - let align = align.map(u64::from); - match linkage { - Linkage::Import => faerie::Decl::data_import().into(), - Linkage::Local => faerie::Decl::data() - .with_writable(writable) - .with_align(align) - .into(), - Linkage::Preemptible => faerie::Decl::data() - .weak() - .with_writable(writable) - .with_align(align) - .into(), - Linkage::Hidden => faerie::Decl::data() - .global() - .hidden() - .with_writable(writable) - .with_align(align) - .into(), - Linkage::Export => faerie::Decl::data() - .global() - .with_writable(writable) - .with_align(align) - .into(), - } -} - -struct FaerieRelocSink<'a> { - triple: Triple, - artifact: &'a mut faerie::Artifact, - name: &'a str, - namespace: &'a ModuleNamespace<'a, FaerieBackend>, - libcall_names: &'a dyn Fn(ir::LibCall) -> String, -} - -impl<'a> RelocSink for FaerieRelocSink<'a> { - fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) { - unimplemented!(); - } - - fn reloc_external( - &mut self, - offset: CodeOffset, - _srcloc: ir::SourceLoc, - reloc: Reloc, - name: &ir::ExternalName, - addend: Addend, - ) { - let ref_name: String = match *name { - ir::ExternalName::User { .. } => { - if self.namespace.is_function(name) { - self.namespace.get_function_decl(name).name.clone() - } else { - self.namespace.get_data_decl(name).name.clone() - } - } - ir::ExternalName::LibCall(ref libcall) => { - let sym = (self.libcall_names)(*libcall); - self.artifact - .declare(sym.clone(), faerie::Decl::function_import()) - .expect("faerie declaration of libcall"); - sym - } - _ => panic!("invalid ExternalName {}", name), - }; - let (raw_reloc, raw_addend) = container::raw_relocation(reloc, &self.triple); - // TODO: Handle overflow. - let final_addend = addend + raw_addend; - let addend_i32 = final_addend as i32; - debug_assert!(i64::from(addend_i32) == final_addend); - self.artifact - .link_with( - faerie::Link { - from: self.name, - to: &ref_name, - at: u64::from(offset), - }, - faerie::Reloc::Raw { - reloc: raw_reloc, - addend: addend_i32, - }, - ) - .expect("faerie relocation error"); - } - - fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } - - fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } -} - -#[allow(dead_code)] -struct FaerieStackMapSink<'a> { - artifact: &'a mut faerie::Artifact, - namespace: &'a ModuleNamespace<'a, FaerieBackend>, -} - -/// Faerie is currently not used in SpiderMonkey. Methods are unimplemented. -impl<'a> StackMapSink for FaerieStackMapSink<'a> { - fn add_stack_map(&mut self, _: CodeOffset, _: StackMap) { - unimplemented!("faerie support for stack maps"); - } -} diff --git a/cranelift/faerie/src/container.rs b/cranelift/faerie/src/container.rs deleted file mode 100644 index 51355f4435..0000000000 --- a/cranelift/faerie/src/container.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! Utilities for working with Faerie container formats. - -use cranelift_codegen::binemit::Reloc; -use target_lexicon::{Architecture, BinaryFormat, Triple}; - -/// An object file format. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum Format { - /// The ELF object file format. - ELF, - /// The Mach-O object file format. - MachO, -} - -/// Translate from a Cranelift `Reloc` to a raw object-file-format-specific -/// relocation code and relocation-implied addend. -pub fn raw_relocation(reloc: Reloc, triple: &Triple) -> (u32, i64) { - match triple.binary_format { - BinaryFormat::Elf => { - use goblin::elf; - ( - match triple.architecture { - Architecture::X86_64 => { - match reloc { - Reloc::Abs4 => elf::reloc::R_X86_64_32, - Reloc::Abs8 => elf::reloc::R_X86_64_64, - Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => elf::reloc::R_X86_64_PC32, - // TODO: Get Cranelift to tell us when we can use - // R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX. - Reloc::X86CallPLTRel4 => elf::reloc::R_X86_64_PLT32, - Reloc::X86GOTPCRel4 => elf::reloc::R_X86_64_GOTPCREL, - _ => unimplemented!(), - } - } - _ => unimplemented!("unsupported architecture: {}", triple), - }, - // Most ELF relocations do not include an implicit addend. - 0, - ) - } - BinaryFormat::Macho => { - use goblin::mach; - match triple.architecture { - Architecture::X86_64 => { - match reloc { - Reloc::Abs8 => (u32::from(mach::relocation::R_ABS), 0), - // Mach-O doesn't need us to distinguish between PC-relative calls - // and PLT calls, but it does need us to distinguish between calls - // and non-calls. And, it includes the 4-byte addend implicitly. - Reloc::X86PCRel4 => (u32::from(mach::relocation::X86_64_RELOC_SIGNED), 4), - Reloc::X86CallPCRel4 | Reloc::X86CallPLTRel4 => { - (u32::from(mach::relocation::X86_64_RELOC_BRANCH), 4) - } - Reloc::X86GOTPCRel4 => { - (u32::from(mach::relocation::X86_64_RELOC_GOT_LOAD), 4) - } - _ => unimplemented!("unsupported mach-o reloc: {}", reloc), - } - } - _ => unimplemented!("unsupported architecture: {}", triple), - } - } - _ => unimplemented!("unsupported format"), - } -} diff --git a/cranelift/faerie/src/lib.rs b/cranelift/faerie/src/lib.rs deleted file mode 100644 index 67da33565b..0000000000 --- a/cranelift/faerie/src/lib.rs +++ /dev/null @@ -1,35 +0,0 @@ -//! Top-level lib.rs for `cranelift_faerie`. -//! -//! Users of this module should not have to depend on faerie directly. - -#![deny( - missing_docs, - trivial_numeric_casts, - unused_extern_crates, - unstable_features -)] -#![warn(unused_import_braces)] -#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] -#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] -#![cfg_attr( - feature = "cargo-clippy", - warn( - clippy::float_arithmetic, - clippy::mut_mut, - clippy::nonminimal_bool, - clippy::option_map_unwrap_or, - clippy::option_map_unwrap_or_else, - clippy::print_stdout, - clippy::unicode_not_nfc, - clippy::use_self - ) -)] - -mod backend; -mod container; - -pub use crate::backend::{FaerieBackend, FaerieBuilder, FaerieProduct}; -pub use crate::container::Format; - -/// Version number of this crate. -pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/module/README.md b/cranelift/module/README.md index 07e7bd998d..a357aa637d 100644 --- a/cranelift/module/README.md +++ b/cranelift/module/README.md @@ -15,9 +15,6 @@ following `Backend` implementations: code to memory for direct execution. - `ObjectBackend`, provided by [cranelift-object], which emits native object files. - - `FaerieBackend`, provided by [cranelift-faerie], which emits native - object files. [cranelift-simplejit]: https://crates.io/crates/cranelift-simplejit [cranelift-object]: https://crates.io/crates/cranelift-object -[cranelift-faerie]: https://crates.io/crates/cranelift-faerie diff --git a/cranelift/module/src/backend.rs b/cranelift/module/src/backend.rs index 4743e4fe61..ff1372208b 100644 --- a/cranelift/module/src/backend.rs +++ b/cranelift/module/src/backend.rs @@ -22,12 +22,9 @@ use std::string::String; /// the contents of a `Module` to memory which can be directly executed. /// - `ObjectBackend`, defined in [cranelift-object], which writes the /// contents of a `Module` out as a native object file. -/// - `FaerieBackend`, defined in [cranelift-faerie], which writes the -/// contents of a `Module` out as a native object file. /// /// [cranelift-simplejit]: https://docs.rs/cranelift-simplejit/ /// [cranelift-object]: https://docs.rs/cranelift-object/ -/// [cranelift-faerie]: https://docs.rs/cranelift-faerie/ pub trait Backend where Self: marker::Sized, diff --git a/scripts/publish.rs b/scripts/publish.rs index c61870edbe..152f6b960d 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -34,7 +34,6 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "cranelift-preopt", "cranelift-frontend", "cranelift-wasm", - "cranelift-faerie", "cranelift-native", "cranelift-object", "cranelift-interpreter", From 73aef4c6ca3038e82a8c35c72d5a6aa1b1fa1a88 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 15:01:04 -0700 Subject: [PATCH 06/34] guess i forgot a lot of these that dont build on linux --- Cargo.lock | 2 +- .../snapshot_0/sys/unix/bsd/hostcalls_impl.rs | 10 ++++---- .../old/snapshot_0/sys/windows/host_impl.rs | 4 ++-- .../sys/windows/hostcalls_impl/fs.rs | 18 +++++++------- .../sys/windows/hostcalls_impl/fs_helpers.rs | 6 ++--- crates/wasi-common/src/sys/unix/bsd/path.rs | 10 ++++---- crates/wasi-common/src/sys/windows/fd.rs | 2 +- crates/wasi-common/src/sys/windows/mod.rs | 4 ++-- crates/wasi-common/src/sys/windows/path.rs | 24 +++++++++---------- crates/wasi-common/src/sys/windows/poll.rs | 4 ++-- crates/wasi-common/yanix/Cargo.toml | 2 +- crates/wasi-common/yanix/src/filetime.rs | 2 +- 12 files changed, 44 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bd3f302bf5..6515517faf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2804,7 +2804,7 @@ dependencies = [ "cfg-if", "filetime", "libc", - "log", + "tracing", ] [[package]] diff --git a/crates/wasi-common/src/old/snapshot_0/sys/unix/bsd/hostcalls_impl.rs b/crates/wasi-common/src/old/snapshot_0/sys/unix/bsd/hostcalls_impl.rs index 3182ec4dc3..ca59b30d49 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/unix/bsd/hostcalls_impl.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/unix/bsd/hostcalls_impl.rs @@ -36,7 +36,7 @@ pub(crate) fn path_unlink_file(resolved: PathGet) -> WasiResult<()> { } } Err(err) => { - log::debug!("path_unlink_file fstatat error: {:?}", err); + tracing::debug!("path_unlink_file fstatat error: {:?}", err); } } } @@ -50,8 +50,8 @@ pub(crate) fn path_unlink_file(resolved: PathGet) -> WasiResult<()> { pub(crate) fn path_symlink(old_path: &str, resolved: PathGet) -> WasiResult<()> { use yanix::file::{fstatat, symlinkat, AtFlags}; - log::debug!("path_symlink old_path = {:?}", old_path); - log::debug!("path_symlink resolved = {:?}", resolved); + tracing::debug!("path_symlink old_path = {:?}", old_path); + tracing::debug!("path_symlink resolved = {:?}", resolved); match unsafe { symlinkat(old_path, resolved.dirfd().as_raw_fd(), resolved.path()) } { Err(err) => { @@ -71,7 +71,7 @@ pub(crate) fn path_symlink(old_path: &str, resolved: PathGet) -> WasiResult<()> } { Ok(_) => return Err(WasiError::EEXIST), Err(err) => { - log::debug!("path_symlink fstatat error: {:?}", err); + tracing::debug!("path_symlink fstatat error: {:?}", err); } } } @@ -119,7 +119,7 @@ pub(crate) fn path_rename(resolved_old: PathGet, resolved_new: PathGet) -> WasiR } } Err(err) => { - log::debug!("path_rename fstatat error: {:?}", err); + tracing::debug!("path_rename fstatat error: {:?}", err); } } } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/windows/host_impl.rs b/crates/wasi-common/src/old/snapshot_0/sys/windows/host_impl.rs index 53d8073724..1ebdd6bc48 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/windows/host_impl.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/windows/host_impl.rs @@ -43,12 +43,12 @@ impl From for WasiError { winerror::ERROR_DIRECTORY => Self::ENOTDIR, winerror::ERROR_ALREADY_EXISTS => Self::EEXIST, x => { - log::debug!("unknown error value: {}", x); + tracing::debug!("unknown error value: {}", x); Self::EIO } }, None => { - log::debug!("Other I/O error: {}", err); + tracing::debug!("Other I/O error: {}", err); Self::EIO } } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs.rs b/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs.rs index 1fb0796b1c..d1830d8273 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs.rs @@ -9,13 +9,13 @@ use crate::old::snapshot_0::sys::entry_impl::determine_type_rights; use crate::old::snapshot_0::sys::host_impl::{self, path_from_host}; use crate::old::snapshot_0::sys::hostcalls_impl::fs_helpers::PathGetExt; use crate::old::snapshot_0::wasi::{self, WasiError, WasiResult}; -use log::{debug, trace}; use std::convert::TryInto; use std::fs::{File, Metadata, OpenOptions}; use std::io::{self, Seek, SeekFrom}; use std::os::windows::fs::{FileExt, OpenOptionsExt}; use std::os::windows::prelude::{AsRawHandle, FromRawHandle}; use std::path::{Path, PathBuf}; +use tracing::{debug, trace}; use winapi::shared::winerror; use winx::file::{AccessMode, CreationDisposition, FileModeInformation, Flags}; @@ -156,7 +156,7 @@ pub(crate) fn path_open( } Err(err) => match err.raw_os_error() { Some(code) => { - log::debug!("path_open at symlink_metadata error code={:?}", code); + tracing::debug!("path_open at symlink_metadata error code={:?}", code); if code as u32 != winerror::ERROR_FILE_NOT_FOUND { return Err(err.into()); @@ -165,7 +165,7 @@ pub(crate) fn path_open( // trying to open it } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); return Err(WasiError::EIO); } }, @@ -397,7 +397,7 @@ pub(crate) fn path_rename(resolved_old: PathGet, resolved_new: PathGet) -> WasiR }; match err.raw_os_error() { Some(code) => { - log::debug!("path_rename at rename error code={:?}", code); + tracing::debug!("path_rename at rename error code={:?}", code); match code as u32 { winerror::ERROR_ACCESS_DENIED => { // So most likely dealing with new_path == dir. @@ -427,7 +427,7 @@ pub(crate) fn path_rename(resolved_old: PathGet, resolved_new: PathGet) -> WasiR Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(WasiError::EIO) } } @@ -474,7 +474,7 @@ pub(crate) fn path_symlink(old_path: &str, resolved: PathGet) -> WasiResult<()> }; match err.raw_os_error() { Some(code) => { - log::debug!("path_symlink at symlink_file error code={:?}", code); + tracing::debug!("path_symlink at symlink_file error code={:?}", code); match code as u32 { winerror::ERROR_NOT_A_REPARSE_POINT => { // try creating a dir symlink instead @@ -500,7 +500,7 @@ pub(crate) fn path_symlink(old_path: &str, resolved: PathGet) -> WasiResult<()> Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(WasiError::EIO) } } @@ -526,7 +526,7 @@ pub(crate) fn path_unlink_file(resolved: PathGet) -> WasiResult<()> { }; match err.raw_os_error() { Some(code) => { - log::debug!("path_unlink_file at symlink_file error code={:?}", code); + tracing::debug!("path_unlink_file at symlink_file error code={:?}", code); if code as u32 == winerror::ERROR_ACCESS_DENIED { // try unlinking a dir symlink instead return fs::remove_dir(path).map_err(Into::into); @@ -535,7 +535,7 @@ pub(crate) fn path_unlink_file(resolved: PathGet) -> WasiResult<()> { Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(WasiError::EIO) } } diff --git a/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs_helpers.rs b/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs_helpers.rs index 63efce5a4c..7403a1f428 100644 --- a/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs_helpers.rs +++ b/crates/wasi-common/src/old/snapshot_0/sys/windows/hostcalls_impl/fs_helpers.rs @@ -61,7 +61,7 @@ pub(crate) fn openat(dirfd: &File, path: &str) -> WasiResult { Err(e) => e, }; if let Some(code) = err.raw_os_error() { - log::debug!("openat error={:?}", code); + tracing::debug!("openat error={:?}", code); if code as u32 == winerror::ERROR_INVALID_NAME { return Err(WasiError::ENOTDIR); } @@ -90,7 +90,7 @@ pub(crate) fn readlinkat(dirfd: &File, s_path: &str) -> WasiResult { Err(e) => e, }; if let Some(code) = err.raw_os_error() { - log::debug!("readlinkat error={:?}", code); + tracing::debug!("readlinkat error={:?}", code); if code as u32 == winerror::ERROR_INVALID_NAME { if s_path.ends_with('/') { // strip "/" and check if exists @@ -130,7 +130,7 @@ pub(crate) fn concatenate>(dirfd: &File, path: P) -> WasiResult

Result<()> { } } Err(err) => { - log::debug!("path_unlink_file fstatat error: {:?}", err); + tracing::debug!("path_unlink_file fstatat error: {:?}", err); } } } @@ -38,8 +38,8 @@ pub(crate) fn unlink_file(dirfd: &OsDir, path: &str) -> Result<()> { pub(crate) fn symlink(old_path: &str, new_dirfd: &OsDir, new_path: &str) -> Result<()> { use yanix::file::{fstatat, symlinkat, AtFlags}; - log::debug!("path_symlink old_path = {:?}", old_path); - log::debug!( + tracing::debug!("path_symlink old_path = {:?}", old_path); + tracing::debug!( "path_symlink (new_dirfd, new_path) = ({:?}, {:?})", new_dirfd, new_path @@ -58,7 +58,7 @@ pub(crate) fn symlink(old_path: &str, new_dirfd: &OsDir, new_path: &str) -> Resu { Ok(_) => return Err(Errno::Exist), Err(err) => { - log::debug!("path_symlink fstatat error: {:?}", err); + tracing::debug!("path_symlink fstatat error: {:?}", err); } } } @@ -106,7 +106,7 @@ pub(crate) fn rename( } } Err(err) => { - log::debug!("path_rename fstatat error: {:?}", err); + tracing::debug!("path_rename fstatat error: {:?}", err); } } } diff --git a/crates/wasi-common/src/sys/windows/fd.rs b/crates/wasi-common/src/sys/windows/fd.rs index 78604a4719..15ccb3f5cd 100644 --- a/crates/wasi-common/src/sys/windows/fd.rs +++ b/crates/wasi-common/src/sys/windows/fd.rs @@ -5,12 +5,12 @@ use crate::sys::osdir::OsDir; use crate::sys::osfile::OsFile; use crate::sys::AsFile; use crate::wasi::{types, Result}; -use log::trace; use std::convert::TryInto; use std::fs::{File, OpenOptions}; use std::os::windows::fs::OpenOptionsExt; use std::os::windows::prelude::{AsRawHandle, FromRawHandle}; use std::path::Path; +use tracing::trace; use winx::file::{AccessMode, FileModeInformation, Flags}; pub(crate) fn fdstat_get(file: &File) -> Result { diff --git a/crates/wasi-common/src/sys/windows/mod.rs b/crates/wasi-common/src/sys/windows/mod.rs index 109385f15a..c3aa913a59 100644 --- a/crates/wasi-common/src/sys/windows/mod.rs +++ b/crates/wasi-common/src/sys/windows/mod.rs @@ -134,12 +134,12 @@ impl From for Errno { winerror::ERROR_DIRECTORY => Self::Notdir, winerror::ERROR_ALREADY_EXISTS => Self::Exist, x => { - log::debug!("winerror: unknown error value: {}", x); + tracing::debug!("winerror: unknown error value: {}", x); Self::Io } }, None => { - log::debug!("Other I/O error: {}", err); + tracing::debug!("Other I/O error: {}", err); Self::Io } } diff --git a/crates/wasi-common/src/sys/windows/path.rs b/crates/wasi-common/src/sys/windows/path.rs index d2a2d17276..1e92f71afb 100644 --- a/crates/wasi-common/src/sys/windows/path.rs +++ b/crates/wasi-common/src/sys/windows/path.rs @@ -46,7 +46,7 @@ fn concatenate>(file: &OsDir, path: P) -> Result { // components with `out_path` let out_path = PathBuf::from(strip_extended_prefix(out_path)); - log::debug!("out_path={:?}", out_path); + tracing::debug!("out_path={:?}", out_path); Ok(out_path) } @@ -138,7 +138,7 @@ pub(crate) fn readlinkat(dirfd: &OsDir, s_path: &str) -> Result { Err(e) => e, }; if let Some(code) = err.raw_os_error() { - log::debug!("readlinkat error={:?}", code); + tracing::debug!("readlinkat error={:?}", code); if code as u32 == winerror::ERROR_INVALID_NAME { if s_path.ends_with('/') { // strip "/" and check if exists @@ -170,7 +170,7 @@ pub(crate) fn link( let new_path = concatenate(new_dirfd, new_path)?; if follow_symlinks { // in particular, this will return an error if the target path doesn't exist - log::debug!("Following symlinks for path: {:?}", old_path); + tracing::debug!("Following symlinks for path: {:?}", old_path); old_path = fs::canonicalize(&old_path).map_err(|e| match e.raw_os_error() { // fs::canonicalize under Windows will return: // * ERROR_FILE_NOT_FOUND, if it encounters a dangling symlink @@ -184,7 +184,7 @@ pub(crate) fn link( Err(e) => e, }; if let Some(code) = err.raw_os_error() { - log::debug!("path_link at fs::hard_link error code={:?}", code); + tracing::debug!("path_link at fs::hard_link error code={:?}", code); if code as u32 == winerror::ERROR_ACCESS_DENIED { // If an attempt is made to create a hard link to a directory, POSIX-compliant // implementations of link return `EPERM`, but `ERROR_ACCESS_DENIED` is converted @@ -248,7 +248,7 @@ pub(crate) fn open( } Err(err) => match err.raw_os_error() { Some(code) => { - log::debug!("path_open at symlink_metadata error code={:?}", code); + tracing::debug!("path_open at symlink_metadata error code={:?}", code); match code as u32 { winerror::ERROR_FILE_NOT_FOUND => { // file not found, let it proceed to actually @@ -263,7 +263,7 @@ pub(crate) fn open( }; } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); return Err(Errno::Io); } }, @@ -353,7 +353,7 @@ pub(crate) fn rename( }; match err.raw_os_error() { Some(code) => { - log::debug!("path_rename at rename error code={:?}", code); + tracing::debug!("path_rename at rename error code={:?}", code); match code as u32 { winerror::ERROR_ACCESS_DENIED => { // So most likely dealing with new_path == dir. @@ -385,7 +385,7 @@ pub(crate) fn rename( Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(Errno::Io) } } @@ -417,7 +417,7 @@ pub(crate) fn symlink(old_path: &str, new_dirfd: &OsDir, new_path_: &str) -> Res }; match err.raw_os_error() { Some(code) => { - log::debug!("path_symlink at symlink_file error code={:?}", code); + tracing::debug!("path_symlink at symlink_file error code={:?}", code); match code as u32 { // If the target contains a trailing slash, the Windows API returns // ERROR_INVALID_NAME (which corresponds to ENOENT) instead of @@ -442,7 +442,7 @@ pub(crate) fn symlink(old_path: &str, new_dirfd: &OsDir, new_path_: &str) -> Res Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(Errno::Io) } } @@ -468,7 +468,7 @@ pub(crate) fn unlink_file(dirfd: &OsDir, path: &str) -> Result<()> { }; match err.raw_os_error() { Some(code) => { - log::debug!("path_unlink_file at symlink_file error code={:?}", code); + tracing::debug!("path_unlink_file at symlink_file error code={:?}", code); if code as u32 == winerror::ERROR_ACCESS_DENIED { // try unlinking a dir symlink instead return fs::remove_dir(path).map_err(Into::into); @@ -477,7 +477,7 @@ pub(crate) fn unlink_file(dirfd: &OsDir, path: &str) -> Result<()> { Err(err.into()) } None => { - log::debug!("Inconvertible OS error: {}", err); + tracing::debug!("Inconvertible OS error: {}", err); Err(Errno::Io) } } diff --git a/crates/wasi-common/src/sys/windows/poll.rs b/crates/wasi-common/src/sys/windows/poll.rs index d49524a5d5..f467059b11 100644 --- a/crates/wasi-common/src/sys/windows/poll.rs +++ b/crates/wasi-common/src/sys/windows/poll.rs @@ -7,12 +7,12 @@ use crate::sys::stdio::{Stderr, Stdin, Stdout}; use crate::sys::AsFile; use crate::wasi::{types, Errno, Result}; use lazy_static::lazy_static; -use log::{debug, error, trace, warn}; use std::convert::TryInto; use std::sync::mpsc::{self, Receiver, RecvTimeoutError, Sender, TryRecvError}; use std::sync::Mutex; use std::thread; use std::time::Duration; +use tracing::{debug, error, trace, warn}; struct StdinPoll { request_tx: Sender<()>, @@ -241,7 +241,7 @@ pub(crate) fn oneoff( handle_error_event(event, Errno::Notsup, events); } } else { - log::error!("can poll FdEvent for OS resources only"); + tracing::error!("can poll FdEvent for OS resources only"); return Err(Errno::Badf); } } diff --git a/crates/wasi-common/yanix/Cargo.toml b/crates/wasi-common/yanix/Cargo.toml index 0e0f1c9641..bd6c4e1018 100644 --- a/crates/wasi-common/yanix/Cargo.toml +++ b/crates/wasi-common/yanix/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" edition = "2018" [dependencies] -log = "0.4" +tracing = "0.1.15" libc = { version = "0.2", features = ["extra_traits"] } bitflags = "1.2" cfg-if = "0.1.9" diff --git a/crates/wasi-common/yanix/src/filetime.rs b/crates/wasi-common/yanix/src/filetime.rs index fe95e1d950..cc6b80f3c5 100644 --- a/crates/wasi-common/yanix/src/filetime.rs +++ b/crates/wasi-common/yanix/src/filetime.rs @@ -25,7 +25,7 @@ impl FileTimeExt for filetime::FileTime { let sec = match self.seconds().try_into() { Ok(sec) => sec, Err(_) => { - log::debug!("filetime_to_timespec failed converting seconds to required width"); + tracing::debug!("filetime_to_timespec failed converting seconds to required width"); return Err(Error::from_raw_os_error(libc::EOVERFLOW)); } }; From 94f904449bd2a487ede23dee01db3da8d2624d1e Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 18 Aug 2020 16:02:53 -0700 Subject: [PATCH 07/34] fix yanix --- crates/wasi-common/yanix/src/sys/bsd/fadvise.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/wasi-common/yanix/src/sys/bsd/fadvise.rs b/crates/wasi-common/yanix/src/sys/bsd/fadvise.rs index d1fa29d9d9..32d290a973 100644 --- a/crates/wasi-common/yanix/src/sys/bsd/fadvise.rs +++ b/crates/wasi-common/yanix/src/sys/bsd/fadvise.rs @@ -51,7 +51,9 @@ pub unsafe fn posix_fadvise( // is providing a dubiously large hint. This is not confirmed (no helpful info in the man // pages), but offhand, a 2+ GiB advisory read async seems unlikely to help with any kind // of performance, so we log and exit early with a no-op. - log::warn!("`len` too big to fit in the host's command. Returning early with no-op!"); + tracing::warn!( + "`len` too big to fit in the host's command. Returning early with no-op!" + ); return Ok(()); } }; From 4a84f3f0732d41e8685d1005fc63b31acad44e1f Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 18:07:12 +0200 Subject: [PATCH 08/34] Lower fcvt_from_{u,s}int for 8 and 16 bit ints --- .../codegen/src/isa/aarch64/lower_inst.rs | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5fe62da697..819071cc96 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2157,12 +2157,12 @@ pub(crate) fn lower_insn_to_regs>( let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtToSint; let op = match (signed, in_bits, out_bits) { - (false, 32, 32) => FpuToIntOp::F32ToU32, - (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32, (false, 32, 64) => FpuToIntOp::F32ToU64, (true, 32, 64) => FpuToIntOp::F32ToI64, - (false, 64, 32) => FpuToIntOp::F64ToU32, - (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32, (false, 64, 64) => FpuToIntOp::F64ToU64, (true, 64, 64) => FpuToIntOp::F64ToI64, _ => panic!("Unknown input/output-bits combination"), @@ -2199,6 +2199,16 @@ pub(crate) fn lower_insn_to_regs>( if in_bits == 32 { // From float32. let (low_bound, low_cond, high_bound) = match (signed, out_bits) { + (true, 8) => ( + i8::min_value() as f32 - 1., + FloatCC::GreaterThan, + i8::max_value() as f32 + 1., + ), + (true, 16) => ( + i16::min_value() as f32 - 1., + FloatCC::GreaterThan, + i16::max_value() as f32 + 1., + ), (true, 32) => ( i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32. FloatCC::GreaterThanOrEqual, @@ -2209,6 +2219,8 @@ pub(crate) fn lower_insn_to_regs>( FloatCC::GreaterThanOrEqual, i64::max_value() as f32 + 1., ), + (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.), + (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.), (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.), (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.), _ => panic!("Unknown input/output-bits combination"), @@ -2240,6 +2252,16 @@ pub(crate) fn lower_insn_to_regs>( } else { // From float64. let (low_bound, low_cond, high_bound) = match (signed, out_bits) { + (true, 8) => ( + i8::min_value() as f64 - 1., + FloatCC::GreaterThan, + i8::max_value() as f64 + 1., + ), + (true, 16) => ( + i16::min_value() as f64 - 1., + FloatCC::GreaterThan, + i16::max_value() as f64 + 1., + ), (true, 32) => ( i32::min_value() as f64 - 1., FloatCC::GreaterThan, @@ -2250,6 +2272,8 @@ pub(crate) fn lower_insn_to_regs>( FloatCC::GreaterThanOrEqual, i64::max_value() as f64 + 1., ), + (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.), + (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.), (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.), (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.), _ => panic!("Unknown input/output-bits combination"), @@ -2289,10 +2313,10 @@ pub(crate) fn lower_insn_to_regs>( let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtFromSint; let op = match (signed, in_bits, out_bits) { - (false, 32, 32) => IntToFpuOp::U32ToF32, - (true, 32, 32) => IntToFpuOp::I32ToF32, - (false, 32, 64) => IntToFpuOp::U32ToF64, - (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, (false, 64, 32) => IntToFpuOp::U64ToF32, (true, 64, 32) => IntToFpuOp::I64ToF32, (false, 64, 64) => IntToFpuOp::U64ToF64, @@ -2300,8 +2324,8 @@ pub(crate) fn lower_insn_to_regs>( _ => panic!("Unknown input/output-bits combination"), }; let narrow_mode = match (signed, in_bits) { - (false, 32) => NarrowValueMode::ZeroExtend32, - (true, 32) => NarrowValueMode::SignExtend32, + (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, (false, 64) => NarrowValueMode::ZeroExtend64, (true, 64) => NarrowValueMode::SignExtend64, _ => panic!("Unknown input size"), From 3a16416132ac3ed215179841de664938636d9177 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 19:17:27 +0200 Subject: [PATCH 09/34] Add tests --- .../filetests/vcode/aarch64/fcvt-small.clif | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif diff --git a/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif new file mode 100644 index 0000000000..518c80e17a --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif @@ -0,0 +1,134 @@ +test compile +target aarch64 + +function u0:0(i8) -> f32 { +block0(v0: i8): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f32 v0 + ; check: uxtb w0, w0 + ; check: ucvtf s0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i8) -> f64 { +block0(v0: i8): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f64 v0 + ; check: uxtb w0, w0 + ; check: ucvtf d0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i16) -> f32 { +block0(v0: i16): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f32 v0 + ; check: uxth w0, w0 + ; check: ucvtf s0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i16) -> f64 { +block0(v0: i16): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f64 v0 + ; check: uxth w0, w0 + ; check: ucvtf d0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f32) -> i8 { +block0(v0: f32): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i8 v0 + ; check: fcmp s0, s0 + ; check: b.vc 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 -1 + ; check: fcmp s0, s1 + ; check: b.gt 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 256 + ; check: fcmp s0, s1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, s0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f64) -> i8 { +block0(v0: f64): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i8 v0 + ; check: fcmp d0, d0 + ; check: b.vc 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 -1 + ; check: fcmp d0, d1 + ; check: b.gt 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 256 + ; check: fcmp d0, d1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, d0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f32) -> i16 { +block0(v0: f32): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i16 v0 + ; check: fcmp s0, s0 + ; check: b.vc 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 -1 + ; check: fcmp s0, s1 + ; check: b.gt 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 65536 + ; check: fcmp s0, s1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, s0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f64) -> i16 { +block0(v0: f64): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i16 v0 + ; check: fcmp d0, d0 + ; check: b.vc 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 -1 + ; check: fcmp d0, d1 + ; check: b.gt 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 65536 + ; check: fcmp d0, d1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, d0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} From a31336996c184eeccdd7d2e5e120751fc56bf5a7 Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Mon, 17 Aug 2020 13:44:10 -0700 Subject: [PATCH 10/34] Add support for some packed multiplication for new x64 backend Adds support for i32x4, and i16x8 and lowering for pmuludq in preperation for i64x2. --- cranelift/codegen/src/isa/x64/inst/args.rs | 12 ++- cranelift/codegen/src/isa/x64/inst/emit.rs | 89 ++++++++++--------- .../codegen/src/isa/x64/inst/emit_tests.rs | 18 ++++ cranelift/codegen/src/isa/x64/lower.rs | 5 ++ 4 files changed, 80 insertions(+), 44 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 9885102b66..600381496f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -395,6 +395,9 @@ pub enum SseOpcode { Paddd, Paddq, Paddw, + Pmulld, + Pmullw, + Pmuludq, Psllw, Pslld, Psllq, @@ -491,6 +494,8 @@ impl SseOpcode { | SseOpcode::Paddd | SseOpcode::Paddq | SseOpcode::Paddw + | SseOpcode::Pmullw + | SseOpcode::Pmuludq | SseOpcode::Psllw | SseOpcode::Pslld | SseOpcode::Psllq @@ -510,7 +515,9 @@ impl SseOpcode { | SseOpcode::Ucomisd | SseOpcode::Xorpd => SSE2, - SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, + SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => { + SSE41 + } } } @@ -579,6 +586,9 @@ impl fmt::Debug for SseOpcode { SseOpcode::Paddd => "paddd", SseOpcode::Paddq => "paddq", SseOpcode::Paddw => "paddw", + SseOpcode::Pmulld => "pmulld", + SseOpcode::Pmullw => "pmullw", + SseOpcode::Pmuludq => "pmuludq", SseOpcode::Psllw => "psllw", SseOpcode::Pslld => "pslld", SseOpcode::Psllq => "psllq", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f0932b73c4..6118284674 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1632,57 +1632,60 @@ pub(crate) fn emit( dst: reg_g, } => { let rex = RexFlags::clear_w(); - let (prefix, opcode) = match op { - SseOpcode::Addps => (LegacyPrefix::None, 0x0F58), - SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58), - SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58), - SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58), - SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54), - SseOpcode::Andps => (LegacyPrefix::None, 0x0F54), - SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55), - SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55), - SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E), - SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E), - SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E), - SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E), - SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D), - SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D), - SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D), - SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D), - SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F), - SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F), - SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F), - SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F), - SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59), - SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59), - SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59), - SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59), - SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56), - SseOpcode::Orps => (LegacyPrefix::None, 0x0F56), - SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC), - SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE), - SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4), - SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD), - SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8), - SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA), - SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB), - SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9), - SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C), - SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C), - SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C), - SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C), - SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57), - SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57), + let (prefix, opcode, length) = match op { + SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2), + SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2), + SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2), + SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2), + SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2), + SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2), + SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2), + SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2), + SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2), + SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2), + SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2), + SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2), + SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2), + SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2), + SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2), + SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2), + SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2), + SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2), + SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2), + SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2), + SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2), + SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2), + SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2), + SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2), + SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2), + SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2), + SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2), + SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2), + SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2), + SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2), + SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3), + SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2), + SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2), + SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2), + SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2), + SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2), + SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2), + SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2), + SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2), + SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2), + SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2), + SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2), + SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; match src_e { RegMem::Reg { reg: reg_e } => { - emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); + emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); - emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); + emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex); } } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 877c2282b4..e0f2ea1acd 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3062,6 +3062,24 @@ fn test_x64_emit() { "psubq %xmm8, %xmm1", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6), + "66410F3840F7", + "pmulld %xmm15, %xmm6", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1), + "66410FD5CE", + "pmullw %xmm14, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9), + "66450FF4C8", + "pmuludq %xmm8, %xmm9", + )); + // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12), None), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 66d16c894d..f4eb306882 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -357,6 +357,11 @@ fn lower_insn_to_regs>( types::I64X2 => SseOpcode::Psubq, _ => panic!("Unsupported type for packed Isub instruction"), }, + Opcode::Imul => match ty { + types::I16X8 => SseOpcode::Pmullw, + types::I32X4 => SseOpcode::Pmulld, + _ => panic!("Unsupported type for packed Imul instruction"), + }, _ => panic!("Unsupported packed instruction"), }; let lhs = input_to_reg(ctx, inputs[0]); From 4cafb9018653b83df7dce00dfdf377e34bcb5e41 Mon Sep 17 00:00:00 2001 From: Gabor Greif Date: Wed, 19 Aug 2020 19:30:01 +0200 Subject: [PATCH 11/34] use opcode directly, now that we support this (#2145) fixing also two typos with `DW_OP_fbreg` --- crates/debug/src/transform/expression.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index 8419706760..c5386d11e4 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -418,7 +418,7 @@ where let op = Operation::parse(&mut pc, encoding)?; match op { Operation::FrameOffset { offset } => { - // Expand DW_OP_fpreg into frame location and DW_OP_plus_uconst. + // Expand DW_OP_fbreg into frame location and DW_OP_plus_uconst. if frame_base.is_some() { // Add frame base expressions. flush_code_chunk!(); @@ -680,8 +680,7 @@ mod tests { let e = expression!(DW_OP_WASM_location, 0x0, 3, DW_OP_stack_value); let fe = compile_expression(&e, DWARF_ENCODING, None).expect("non-error"); - // DW_OP_fpreg 0x12 - let e = expression!(0x91, 0x12); + let e = expression!(DW_OP_fbreg, 0x12); let ce = compile_expression(&e, DWARF_ENCODING, fe.as_ref()) .expect("non-error") .expect("expression"); From ba48b9aef1aefb220ce2f93ca58b1b0e2fe81bd3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 18:45:46 +0200 Subject: [PATCH 12/34] Fix put_input_in_reg --- cranelift/codegen/src/isa/aarch64/lower.rs | 4 +-- .../filetests/vcode/aarch64/extend-op.clif | 1 + .../filetests/vcode/aarch64/heap_addr.clif | 6 ++-- .../vcode/aarch64/iconst-icmp-small.clif | 32 +++++++++++++++++++ .../vcode/aarch64/narrow-arithmetic.clif | 2 ++ 5 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/iconst-icmp-small.clif diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 07c6b27281..65576044b7 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -355,7 +355,7 @@ fn put_input_in_rse>( && ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64)) { - let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); let extendop = match (narrow_mode, out_bits) { (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => { ExtendOp::SXTB @@ -400,7 +400,7 @@ fn put_input_in_rse>( (false, 32) => ExtendOp::UXTW, _ => unreachable!(), }; - let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); return ResultRSE::RegExtend(reg, extendop); } } diff --git a/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif index aa58cb2deb..5237aa6ac4 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif @@ -11,6 +11,7 @@ block0(v0: i8): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp +; nextln: sxtb x0, w0 ; nextln: movz x1, #42 ; nextln: add x0, x1, x0, SXTB ; nextln: mov sp, fp diff --git a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif index e4ff1471be..a982a68684 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif @@ -20,7 +20,8 @@ block0(v0: i64, v1: i32): ; nextln: subs wzr, w1, w2 ; nextln: b.ls label1 ; b label2 ; check: Block 1: -; check: add x0, x0, x1, UXTW +; check: mov w3, w1 +; check: add x0, x0, x3, UXTW ; nextln: subs wzr, w1, w2 ; nextln: movz x1, #0 ; nextln: csel x0, x1, x0, hi @@ -45,7 +46,8 @@ block0(v0: i64, v1: i32): ; nextln: subs wzr, w1, #65536 ; nextln: b.ls label1 ; b label2 ; check: Block 1: -; check: add x0, x0, x1, UXTW +; check: mov w2, w1 +; check: add x0, x0, x2, UXTW ; nextln: subs wzr, w1, #65536 ; nextln: movz x1, #0 ; nextln: csel x0, x1, x0, hi diff --git a/cranelift/filetests/filetests/vcode/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/vcode/aarch64/iconst-icmp-small.clif new file mode 100644 index 0000000000..6c0f098ef2 --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/iconst-icmp-small.clif @@ -0,0 +1,32 @@ +; Test that `put_input_in_rse` doesn't try to put the input of the `iconst` into a register, which +; would result in an out-of-bounds panic. (#2147) + +test compile +target aarch64 + +function u0:0() -> i8 system_v { + +block0: + v0 = iconst.i16 0xddcc + v1 = icmp.i16 ne v0, v0 + v2 = bint.i8 v1 + return v2 +} + +; check: VCode_ShowWithRRU {{ +; nextln: Entry block: 0 +; nextln: Block 0: +; nextln: (original IR block: block0) +; nextln: (instruction range: 0 .. 11) +; nextln: Inst 0: stp fp, lr, [sp, #-16]! +; nextln: Inst 1: mov fp, sp +; nextln: Inst 2: movz x0, #56780 +; nextln: Inst 3: uxth w0, w0 +; nextln: Inst 4: movz x1, #56780 +; nextln: Inst 5: subs wzr, w0, w1, UXTH +; nextln: Inst 6: cset x0, ne +; nextln: Inst 7: and w0, w0, #1 +; nextln: Inst 8: mov sp, fp +; nextln: Inst 9: ldp fp, lr, [sp], #16 +; nextln: Inst 10: ret +; nextln: }} diff --git a/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif index e68eb28c67..7af0502152 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif @@ -49,6 +49,7 @@ block0(v0: i32, v1: i8): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp +; nextln: sxtb w1, w1 ; nextln: add w0, w0, w1, SXTB ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -63,6 +64,7 @@ block0(v0: i64, v1: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp +; nextln: sxtw x1, w1 ; nextln: add x0, x0, x1, SXTW ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 From b5e24c8c67e32c8d1ba976ee9faa9ca96eb2b942 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 22:14:26 +0200 Subject: [PATCH 13/34] Update object to 0.21.1 (#2144) --- Cargo.lock | 16 +++++++++++----- Cargo.toml | 2 +- cranelift/object/Cargo.toml | 2 +- crates/debug/Cargo.toml | 2 +- crates/jit/Cargo.toml | 2 +- crates/obj/Cargo.toml | 2 +- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 517dd3b500..77e99deddf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -487,7 +487,7 @@ dependencies = [ "anyhow", "cranelift-codegen", "cranelift-module", - "object 0.20.0", + "object 0.21.1", "target-lexicon", ] @@ -1216,6 +1216,12 @@ name = "object" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ab52be62400ca80aa00285d25253d7f7c437b7375c4de678f5405d3afe82ca5" + +[[package]] +name = "object" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37fd5004feb2ce328a52b0b3d01dbf4ffff72583493900ed15f22d4111c51693" dependencies = [ "crc32fast", "indexmap", @@ -2387,7 +2393,7 @@ dependencies = [ "libc", "log", "more-asserts", - "object 0.20.0", + "object 0.21.1", "pretty_env_logger", "rayon", "structopt", @@ -2415,7 +2421,7 @@ dependencies = [ "anyhow", "gimli 0.21.0", "more-asserts", - "object 0.20.0", + "object 0.21.1", "target-lexicon", "thiserror", "wasmparser 0.59.0", @@ -2487,7 +2493,7 @@ dependencies = [ "gimli 0.21.0", "log", "more-asserts", - "object 0.20.0", + "object 0.21.1", "rayon", "region", "serde", @@ -2508,7 +2514,7 @@ version = "0.19.0" dependencies = [ "anyhow", "more-asserts", - "object 0.20.0", + "object 0.21.1", "target-lexicon", "wasmtime-debug", "wasmtime-environ", diff --git a/Cargo.toml b/Cargo.toml index a6001bcf35..983811b326 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ wasmtime-wast = { path = "crates/wast", version = "0.19.0" } wasmtime-wasi = { path = "crates/wasi", version = "0.19.0" } wasi-common = { path = "crates/wasi-common", version = "0.19.0" } structopt = { version = "0.3.5", features = ["color", "suggestions"] } -object = { version = "0.20", default-features = false, features = ["write"] } +object = { version = "0.21.1", default-features = false, features = ["write"] } anyhow = "1.0.19" target-lexicon = { version = "0.10.0", default-features = false } pretty_env_logger = "0.4.0" diff --git a/cranelift/object/Cargo.toml b/cranelift/object/Cargo.toml index 6da1c88454..f8dd7adce4 100644 --- a/cranelift/object/Cargo.toml +++ b/cranelift/object/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] cranelift-module = { path = "../module", version = "0.66.0" } cranelift-codegen = { path = "../codegen", version = "0.66.0", default-features = false, features = ["std"] } -object = { version = "0.20", default-features = false, features = ["write"] } +object = { version = "0.21.1", default-features = false, features = ["write"] } target-lexicon = "0.10" anyhow = "1.0" diff --git a/crates/debug/Cargo.toml b/crates/debug/Cargo.toml index d04fa27d8d..ffabb62f5a 100644 --- a/crates/debug/Cargo.toml +++ b/crates/debug/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" [dependencies] gimli = "0.21.0" wasmparser = "0.59.0" -object = { version = "0.20", default-features = false, features = ["read", "write"] } +object = { version = "0.21.1", default-features = false, features = ["read", "write"] } wasmtime-environ = { path = "../environ", version = "0.19.0" } target-lexicon = { version = "0.10.0", default-features = false } anyhow = "1.0" diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index 8d9fdc0cd3..1896cbc4e2 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -32,7 +32,7 @@ anyhow = "1.0" cfg-if = "0.1.9" log = "0.4" gimli = { version = "0.21.0", default-features = false, features = ["write"] } -object = { version = "0.20", default-features = false, features = ["write"] } +object = { version = "0.21.1", default-features = false, features = ["write"] } serde = { version = "1.0.94", features = ["derive"] } [target.'cfg(target_os = "windows")'.dependencies] diff --git a/crates/obj/Cargo.toml b/crates/obj/Cargo.toml index c5e3cb0dbe..d29a880333 100644 --- a/crates/obj/Cargo.toml +++ b/crates/obj/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" [dependencies] anyhow = "1.0" wasmtime-environ = { path = "../environ", version = "0.19.0" } -object = { version = "0.20", default-features = false, features = ["write"] } +object = { version = "0.21.1", default-features = false, features = ["write"] } more-asserts = "0.2.1" target-lexicon = { version = "0.10.0", default-features = false } wasmtime-debug = { path = "../debug", version = "0.19.0" } From 957eb9eeba7c2c664b137055caaf20d13c8738b1 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Thu, 20 Aug 2020 10:17:04 +0200 Subject: [PATCH 14/34] Less unnecessary zero and sign extensions --- cranelift/codegen/src/isa/aarch64/lower.rs | 61 ++++++++++++------- .../filetests/vcode/aarch64/extend-op.clif | 1 - .../filetests/vcode/aarch64/heap_addr.clif | 6 +- .../vcode/aarch64/narrow-arithmetic.clif | 2 - 4 files changed, 41 insertions(+), 29 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 65576044b7..d399b90ed0 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -348,6 +348,45 @@ fn put_input_in_rse>( let out_ty = ctx.output_ty(insn, 0); let out_bits = ty_bits(out_ty); + // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator? + if op == Opcode::Uextend || op == Opcode::Sextend { + let sign_extend = op == Opcode::Sextend; + let inner_ty = ctx.input_ty(insn, 0); + let inner_bits = ty_bits(inner_ty); + assert!(inner_bits < out_bits); + if match (sign_extend, narrow_mode) { + // A single zero-extend or sign-extend is equal to itself. + (_, NarrowValueMode::None) => true, + // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend. + (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => { + true + } + (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => { + true + } + // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend + (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => { + false + } + (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => { + false + } + } { + let extendop = match (sign_extend, inner_bits) { + (true, 8) => ExtendOp::SXTB, + (false, 8) => ExtendOp::UXTB, + (true, 16) => ExtendOp::SXTH, + (false, 16) => ExtendOp::UXTH, + (true, 32) => ExtendOp::SXTW, + (false, 32) => ExtendOp::UXTW, + _ => unreachable!(), + }; + let reg = + put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + return ResultRSE::RegExtend(reg, extendop); + } + } + // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend, // then get the result into a register and return an Extend-mode operand on // that register. @@ -381,28 +420,6 @@ fn put_input_in_rse>( }; return ResultRSE::RegExtend(reg, extendop); } - - // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator? - if op == Opcode::Uextend || op == Opcode::Sextend { - assert!(out_bits == 32 || out_bits == 64); - let sign_extend = op == Opcode::Sextend; - let inner_ty = ctx.input_ty(insn, 0); - let inner_bits = ty_bits(inner_ty); - assert!(inner_bits < out_bits); - let extendop = match (sign_extend, inner_bits) { - (true, 1) => ExtendOp::SXTB, - (false, 1) => ExtendOp::UXTB, - (true, 8) => ExtendOp::SXTB, - (false, 8) => ExtendOp::UXTB, - (true, 16) => ExtendOp::SXTH, - (false, 16) => ExtendOp::UXTH, - (true, 32) => ExtendOp::SXTW, - (false, 32) => ExtendOp::UXTW, - _ => unreachable!(), - }; - let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); - return ResultRSE::RegExtend(reg, extendop); - } } ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode)) diff --git a/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif index 5237aa6ac4..aa58cb2deb 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/extend-op.clif @@ -11,7 +11,6 @@ block0(v0: i8): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sxtb x0, w0 ; nextln: movz x1, #42 ; nextln: add x0, x1, x0, SXTB ; nextln: mov sp, fp diff --git a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif index a982a68684..e4ff1471be 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif @@ -20,8 +20,7 @@ block0(v0: i64, v1: i32): ; nextln: subs wzr, w1, w2 ; nextln: b.ls label1 ; b label2 ; check: Block 1: -; check: mov w3, w1 -; check: add x0, x0, x3, UXTW +; check: add x0, x0, x1, UXTW ; nextln: subs wzr, w1, w2 ; nextln: movz x1, #0 ; nextln: csel x0, x1, x0, hi @@ -46,8 +45,7 @@ block0(v0: i64, v1: i32): ; nextln: subs wzr, w1, #65536 ; nextln: b.ls label1 ; b label2 ; check: Block 1: -; check: mov w2, w1 -; check: add x0, x0, x2, UXTW +; check: add x0, x0, x1, UXTW ; nextln: subs wzr, w1, #65536 ; nextln: movz x1, #0 ; nextln: csel x0, x1, x0, hi diff --git a/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif index 7af0502152..e68eb28c67 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/narrow-arithmetic.clif @@ -49,7 +49,6 @@ block0(v0: i32, v1: i8): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sxtb w1, w1 ; nextln: add w0, w0, w1, SXTB ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -64,7 +63,6 @@ block0(v0: i64, v1: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sxtw x1, w1 ; nextln: add x0, x0, x1, SXTW ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 From 693c6ea7715060950702d85f2dcf16627026b0da Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 20 Aug 2020 04:34:31 -0500 Subject: [PATCH 15/34] wasmtime: Extract cranelift/lightbeam compilers to separate crates (#2117) This commit extracts the two implementations of `Compiler` into two separate crates, `wasmtime-cranelfit` and `wasmtime-lightbeam`. The `wasmtime-jit` crate then depends on these two and instantiates them appropriately. The goal here is to start reducing the weight of the `wasmtime-environ` crate, which currently serves as a common set of types between all `wasmtime-*` crates. Long-term I'd like to remove the dependency on Cranelift from `wasmtime-environ`, but that's going to take a lot more work. In the meantime I figure it's a good way to get started by separating out the lightbeam/cranelift function compilers from the `wasmtime-environ` crate. We can continue to iterate on moving things out in the future, too. --- .github/workflows/main.yml | 2 + Cargo.lock | 25 +- Cargo.toml | 7 +- crates/cranelift/Cargo.toml | 19 + crates/cranelift/README.md | 4 + .../src/func_environ.rs | 260 +------------- .../src/cranelift.rs => cranelift/src/lib.rs} | 29 +- crates/environ/Cargo.toml | 2 - crates/environ/src/builtin.rs | 123 +++++++ crates/environ/src/lib.rs | 18 +- crates/environ/src/lightbeam.rs | 71 ---- crates/jit/Cargo.toml | 4 +- crates/jit/src/compiler.rs | 4 +- crates/lightbeam/wasmtime/Cargo.toml | 18 + crates/lightbeam/wasmtime/README.md | 4 + crates/lightbeam/wasmtime/src/lib.rs | 334 ++++++++++++++++++ crates/wast/Cargo.toml | 3 - scripts/publish.rs | 4 +- src/obj.rs | 2 - 19 files changed, 564 insertions(+), 369 deletions(-) create mode 100644 crates/cranelift/Cargo.toml create mode 100644 crates/cranelift/README.md rename crates/{environ => cranelift}/src/func_environ.rs (84%) rename crates/{environ/src/cranelift.rs => cranelift/src/lib.rs} (96%) create mode 100644 crates/environ/src/builtin.rs delete mode 100644 crates/environ/src/lightbeam.rs create mode 100644 crates/lightbeam/wasmtime/Cargo.toml create mode 100644 crates/lightbeam/wasmtime/README.md create mode 100644 crates/lightbeam/wasmtime/src/lib.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 88674b47d3..6f3a79baeb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -242,6 +242,7 @@ jobs: --features test-programs/test_programs \ --all \ --exclude lightbeam \ + --exclude wasmtime-lightbeam \ --exclude peepmatic \ --exclude peepmatic-automata \ --exclude peepmatic-fuzzing \ @@ -375,6 +376,7 @@ jobs: --release \ --all \ --exclude lightbeam \ + --exclude wasmtime-lightbeam \ --exclude peepmatic \ --exclude peepmatic-automata \ --exclude peepmatic-fuzzing \ diff --git a/Cargo.lock b/Cargo.lock index 77e99deddf..db70f7da2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2414,6 +2414,17 @@ dependencies = [ "wat", ] +[[package]] +name = "wasmtime-cranelift" +version = "0.19.0" +dependencies = [ + "cranelift-codegen", + "cranelift-entity", + "cranelift-frontend", + "cranelift-wasm", + "wasmtime-environ", +] + [[package]] name = "wasmtime-debug" version = "0.19.0" @@ -2436,11 +2447,9 @@ dependencies = [ "cfg-if", "cranelift-codegen", "cranelift-entity", - "cranelift-frontend", "cranelift-wasm", "gimli 0.21.0", "indexmap", - "lightbeam", "log", "more-asserts", "serde", @@ -2500,14 +2509,26 @@ dependencies = [ "target-lexicon", "thiserror", "wasmparser 0.59.0", + "wasmtime-cranelift", "wasmtime-debug", "wasmtime-environ", + "wasmtime-lightbeam", "wasmtime-obj", "wasmtime-profiling", "wasmtime-runtime", "winapi", ] +[[package]] +name = "wasmtime-lightbeam" +version = "0.19.0" +dependencies = [ + "cranelift-codegen", + "lightbeam", + "wasmparser 0.59.0", + "wasmtime-environ", +] + [[package]] name = "wasmtime-obj" version = "0.19.0" diff --git a/Cargo.toml b/Cargo.toml index 983811b326..ba36d92e89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,12 +74,7 @@ members = [ [features] default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation"] -lightbeam = [ - "wasmtime-environ/lightbeam", - "wasmtime-jit/lightbeam", - "wasmtime-wast/lightbeam", - "wasmtime/lightbeam", -] +lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml new file mode 100644 index 0000000000..f3a586e9dd --- /dev/null +++ b/crates/cranelift/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "wasmtime-cranelift" +version = "0.19.0" +authors = ["The Wasmtime Project Developers"] +description = "Integration between Cranelift and Wasmtime" +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/wasmtime" +documentation = "https://docs.rs/wasmtime-cranelift/" +categories = ["wasm"] +keywords = ["webassembly", "wasm"] +readme = "README.md" +edition = "2018" + +[dependencies] +wasmtime-environ = { path = "../environ", version = "0.19.0" } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.66.0" } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.66.0" } +cranelift-frontend = { path = "../../cranelift/frontend", version = "0.66.0" } +cranelift-entity = { path = "../../cranelift/entity", version = "0.66.0" } diff --git a/crates/cranelift/README.md b/crates/cranelift/README.md new file mode 100644 index 0000000000..c5f7934a8e --- /dev/null +++ b/crates/cranelift/README.md @@ -0,0 +1,4 @@ +# `wasmtime-cranelfit` + +This crate provides an implementation of the `Compiler` trait which is +connected to Cranelift. diff --git a/crates/environ/src/func_environ.rs b/crates/cranelift/src/func_environ.rs similarity index 84% rename from crates/environ/src/func_environ.rs rename to crates/cranelift/src/func_environ.rs index aeaf5f6e79..a7845361eb 100644 --- a/crates/environ/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1,6 +1,3 @@ -use crate::module::{MemoryPlan, MemoryStyle, TableStyle}; -use crate::vmoffsets::VMOffsets; -use crate::{Module, Tunables, INTERRUPTED, WASM_PAGE_SIZE}; use cranelift_codegen::cursor::FuncCursor; use cranelift_codegen::ir; use cranelift_codegen::ir::condcodes::*; @@ -14,20 +11,18 @@ use cranelift_wasm::{ self, FuncIndex, GlobalIndex, GlobalVariable, MemoryIndex, SignatureIndex, TableIndex, TargetEnvironment, WasmError, WasmResult, WasmType, }; -#[cfg(feature = "lightbeam")] -use cranelift_wasm::{DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex}; use std::convert::TryFrom; +use wasmtime_environ::{ + BuiltinFunctionIndex, MemoryPlan, MemoryStyle, Module, TableStyle, Tunables, VMOffsets, + INTERRUPTED, WASM_PAGE_SIZE, +}; /// Compute an `ir::ExternalName` for a given wasm function index. pub fn get_func_name(func_index: FuncIndex) -> ir::ExternalName { ir::ExternalName::user(0, func_index.as_u32()) } -/// An index type for builtin functions. -#[derive(Copy, Clone, Debug)] -pub struct BuiltinFunctionIndex(u32); - -macro_rules! declare_builtin_functions { +macro_rules! declare_function_signatures { ( $( $( #[$attr:meta] )* @@ -91,111 +86,10 @@ macro_rules! declare_builtin_functions { } )* } - - impl BuiltinFunctionIndex { - declare_builtin_functions!( - @indices; - 0; - $( $( #[$attr] )* $name; )* - ); - } }; - - // Base case: no more indices to declare, so define the total number of - // function indices. - ( - @indices; - $len:expr; - ) => { - /// Returns the total number of builtin functions. - pub const fn builtin_functions_total_number() -> u32 { - $len - } - }; - - // Recursive case: declare the next index, and then keep declaring the rest of - // the indices. - ( - @indices; - $index:expr; - $( #[$this_attr:meta] )* - $this_name:ident; - $( - $( #[$rest_attr:meta] )* - $rest_name:ident; - )* - ) => { - $( #[$this_attr] )* - pub const fn $this_name() -> Self { - Self($index) - } - - declare_builtin_functions!( - @indices; - ($index + 1); - $( $( #[$rest_attr] )* $rest_name; )* - ); - } } -declare_builtin_functions! { - /// Returns an index for wasm's `memory.grow` builtin function. - memory32_grow(vmctx, i32, i32) -> (i32); - /// Returns an index for wasm's imported `memory.grow` builtin function. - imported_memory32_grow(vmctx, i32, i32) -> (i32); - /// Returns an index for wasm's `memory.size` builtin function. - memory32_size(vmctx, i32) -> (i32); - /// Returns an index for wasm's imported `memory.size` builtin function. - imported_memory32_size(vmctx, i32) -> (i32); - /// Returns an index for wasm's `table.copy` when both tables are locally - /// defined. - table_copy(vmctx, i32, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `table.init`. - table_init(vmctx, i32, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `elem.drop`. - elem_drop(vmctx, i32) -> (); - /// Returns an index for wasm's `memory.copy` for locally defined memories. - defined_memory_copy(vmctx, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `memory.copy` for imported memories. - imported_memory_copy(vmctx, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `memory.fill` for locally defined memories. - memory_fill(vmctx, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `memory.fill` for imported memories. - imported_memory_fill(vmctx, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `memory.init` instruction. - memory_init(vmctx, i32, i32, i32, i32, i32) -> (); - /// Returns an index for wasm's `data.drop` instruction. - data_drop(vmctx, i32) -> (); - /// Returns an index for Wasm's `table.grow` instruction for `funcref`s. - table_grow_funcref(vmctx, i32, i32, pointer) -> (i32); - /// Returns an index for Wasm's `table.grow` instruction for `externref`s. - table_grow_externref(vmctx, i32, i32, reference) -> (i32); - /// Returns an index for Wasm's `table.fill` instruction for `externref`s. - table_fill_externref(vmctx, i32, i32, reference, i32) -> (); - /// Returns an index for Wasm's `table.fill` instruction for `funcref`s. - table_fill_funcref(vmctx, i32, i32, pointer, i32) -> (); - /// Returns an index to drop a `VMExternRef`. - drop_externref(pointer) -> (); - /// Returns an index to do a GC and then insert a `VMExternRef` into the - /// `VMExternRefActivationsTable`. - activations_table_insert_with_gc(vmctx, reference) -> (); - /// Returns an index for Wasm's `global.get` instruction for `externref`s. - externref_global_get(vmctx, i32) -> (reference); - /// Returns an index for Wasm's `global.get` instruction for `externref`s. - externref_global_set(vmctx, i32, reference) -> (); -} - -impl BuiltinFunctionIndex { - /// Create a new `BuiltinFunctionIndex` from its index - pub const fn from_u32(i: u32) -> Self { - Self(i) - } - - /// Return the index as an u32 number. - pub const fn index(&self) -> u32 { - self.0 - } -} +wasmtime_environ::foreach_builtin_function!(declare_function_signatures); /// The `FuncEnvironment` implementation for use by the `ModuleEnvironment`. pub struct FuncEnvironment<'module_environment> { @@ -464,153 +358,13 @@ impl<'module_environment> FuncEnvironment<'module_environment> { } } -// TODO: This is necessary as if Lightbeam used `FuncEnvironment` directly it would cause -// a circular dependency graph. We should extract common types out into a separate -// crate that Lightbeam can use but until then we need this trait. -#[cfg(feature = "lightbeam")] -impl lightbeam::ModuleContext for FuncEnvironment<'_> { - type Signature = ir::Signature; - type GlobalType = ir::Type; - - fn func_index(&self, defined_func_index: u32) -> u32 { - self.module - .func_index(DefinedFuncIndex::from_u32(defined_func_index)) - .as_u32() - } - - fn defined_func_index(&self, func_index: u32) -> Option { - self.module - .defined_func_index(FuncIndex::from_u32(func_index)) - .map(DefinedFuncIndex::as_u32) - } - - fn defined_global_index(&self, global_index: u32) -> Option { - self.module - .defined_global_index(GlobalIndex::from_u32(global_index)) - .map(DefinedGlobalIndex::as_u32) - } - - fn global_type(&self, global_index: u32) -> &Self::GlobalType { - &self.module.globals[GlobalIndex::from_u32(global_index)].ty - } - - fn func_type_index(&self, func_idx: u32) -> u32 { - self.module.functions[FuncIndex::from_u32(func_idx)].as_u32() - } - - fn signature(&self, index: u32) -> &Self::Signature { - &self.module.signatures[SignatureIndex::from_u32(index)].1 - } - - fn defined_table_index(&self, table_index: u32) -> Option { - self.module - .defined_table_index(TableIndex::from_u32(table_index)) - .map(DefinedTableIndex::as_u32) - } - - fn defined_memory_index(&self, memory_index: u32) -> Option { - self.module - .defined_memory_index(MemoryIndex::from_u32(memory_index)) - .map(DefinedMemoryIndex::as_u32) - } - - fn vmctx_builtin_function(&self, func_index: u32) -> u32 { - self.offsets - .vmctx_builtin_function(BuiltinFunctionIndex::from_u32(func_index)) - } - - fn vmctx_vmfunction_import_body(&self, func_index: u32) -> u32 { - self.offsets - .vmctx_vmfunction_import_body(FuncIndex::from_u32(func_index)) - } - fn vmctx_vmfunction_import_vmctx(&self, func_index: u32) -> u32 { - self.offsets - .vmctx_vmfunction_import_vmctx(FuncIndex::from_u32(func_index)) - } - - fn vmctx_vmglobal_import_from(&self, global_index: u32) -> u32 { - self.offsets - .vmctx_vmglobal_import_from(GlobalIndex::from_u32(global_index)) - } - fn vmctx_vmglobal_definition(&self, defined_global_index: u32) -> u32 { - self.offsets - .vmctx_vmglobal_definition(DefinedGlobalIndex::from_u32(defined_global_index)) - } - fn vmctx_vmmemory_import_from(&self, memory_index: u32) -> u32 { - self.offsets - .vmctx_vmmemory_import_from(MemoryIndex::from_u32(memory_index)) - } - fn vmctx_vmmemory_definition(&self, defined_memory_index: u32) -> u32 { - self.offsets - .vmctx_vmmemory_definition(DefinedMemoryIndex::from_u32(defined_memory_index)) - } - fn vmctx_vmmemory_definition_base(&self, defined_memory_index: u32) -> u32 { - self.offsets - .vmctx_vmmemory_definition_base(DefinedMemoryIndex::from_u32(defined_memory_index)) - } - fn vmctx_vmmemory_definition_current_length(&self, defined_memory_index: u32) -> u32 { - self.offsets - .vmctx_vmmemory_definition_current_length(DefinedMemoryIndex::from_u32( - defined_memory_index, - )) - } - fn vmmemory_definition_base(&self) -> u8 { - self.offsets.vmmemory_definition_base() - } - fn vmmemory_definition_current_length(&self) -> u8 { - self.offsets.vmmemory_definition_current_length() - } - fn vmctx_vmtable_import_from(&self, table_index: u32) -> u32 { - self.offsets - .vmctx_vmtable_import_from(TableIndex::from_u32(table_index)) - } - fn vmctx_vmtable_definition(&self, defined_table_index: u32) -> u32 { - self.offsets - .vmctx_vmtable_definition(DefinedTableIndex::from_u32(defined_table_index)) - } - fn vmctx_vmtable_definition_base(&self, defined_table_index: u32) -> u32 { - self.offsets - .vmctx_vmtable_definition_base(DefinedTableIndex::from_u32(defined_table_index)) - } - fn vmctx_vmtable_definition_current_elements(&self, defined_table_index: u32) -> u32 { - self.offsets - .vmctx_vmtable_definition_current_elements(DefinedTableIndex::from_u32( - defined_table_index, - )) - } - fn vmtable_definition_base(&self) -> u8 { - self.offsets.vmtable_definition_base() - } - fn vmtable_definition_current_elements(&self) -> u8 { - self.offsets.vmtable_definition_current_elements() - } - fn vmcaller_checked_anyfunc_type_index(&self) -> u8 { - self.offsets.vmcaller_checked_anyfunc_type_index() - } - fn vmcaller_checked_anyfunc_func_ptr(&self) -> u8 { - self.offsets.vmcaller_checked_anyfunc_func_ptr() - } - fn vmcaller_checked_anyfunc_vmctx(&self) -> u8 { - self.offsets.vmcaller_checked_anyfunc_vmctx() - } - fn size_of_vmcaller_checked_anyfunc(&self) -> u8 { - self.offsets.size_of_vmcaller_checked_anyfunc() - } - fn vmctx_vmshared_signature_id(&self, signature_idx: u32) -> u32 { - self.offsets - .vmctx_vmshared_signature_id(SignatureIndex::from_u32(signature_idx)) - } - - // TODO: type of a global -} - impl<'module_environment> TargetEnvironment for FuncEnvironment<'module_environment> { fn target_config(&self) -> TargetFrontendConfig { self.target_config } fn reference_type(&self, ty: WasmType) -> ir::Type { - crate::reference_type(ty, self.pointer_type()) + wasmtime_environ::reference_type(ty, self.pointer_type()) } } diff --git a/crates/environ/src/cranelift.rs b/crates/cranelift/src/lib.rs similarity index 96% rename from crates/environ/src/cranelift.rs rename to crates/cranelift/src/lib.rs index 86b55a253f..c6670659fc 100644 --- a/crates/environ/src/cranelift.rs +++ b/crates/cranelift/src/lib.rs @@ -1,4 +1,7 @@ //! Support for compiling with Cranelift. +//! +//! This crate provides an implementation of [`Compiler`] in the form of +//! [`Cranelift`]. // # How does Wasmtime prevent stack overflow? // @@ -86,13 +89,6 @@ // assume no valid stack pointer will ever be `usize::max_value() - 32k`. use crate::func_environ::{get_func_name, FuncEnvironment}; -use crate::Compiler; -use crate::{ - CompileError, CompiledFunction, Relocation, RelocationTarget, StackMapInformation, - TrapInformation, -}; -use crate::{FunctionAddressMap, InstructionAddressMap}; -use crate::{FunctionBodyData, ModuleTranslation}; use cranelift_codegen::ir::{self, ExternalName}; use cranelift_codegen::machinst::buffer::MachSrcLoc; use cranelift_codegen::print_errors::pretty_error; @@ -100,14 +96,21 @@ use cranelift_codegen::{binemit, isa, Context}; use cranelift_wasm::{DefinedFuncIndex, FuncIndex, FuncTranslator}; use std::convert::TryFrom; use std::sync::Mutex; +use wasmtime_environ::{ + CompileError, CompiledFunction, Compiler, FunctionAddressMap, FunctionBodyData, + InstructionAddressMap, ModuleTranslation, Relocation, RelocationTarget, StackMapInformation, + TrapInformation, +}; + +mod func_environ; /// Implementation of a relocation sink that just saves all the information for later -pub struct RelocSink { +struct RelocSink { /// Current function index. func_index: FuncIndex, /// Relocations recorded for the function. - pub func_relocs: Vec, + func_relocs: Vec, } impl binemit::RelocSink for RelocSink { @@ -166,7 +169,7 @@ impl binemit::RelocSink for RelocSink { impl RelocSink { /// Return a new `RelocSink` instance. - pub fn new(func_index: FuncIndex) -> Self { + fn new(func_index: FuncIndex) -> Self { Self { func_index, func_relocs: Vec::new(), @@ -176,14 +179,14 @@ impl RelocSink { /// Implementation of a trap sink that simply stores all trap info in-memory #[derive(Default)] -pub struct TrapSink { +struct TrapSink { /// The in-memory vector of trap info - pub traps: Vec, + traps: Vec, } impl TrapSink { /// Create a new `TrapSink` - pub fn new() -> Self { + fn new() -> Self { Self::default() } } diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index 0dac39f985..636079815a 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -15,10 +15,8 @@ edition = "2018" anyhow = "1.0" cranelift-codegen = { path = "../../cranelift/codegen", version = "0.66.0", features = ["enable-serde"] } cranelift-entity = { path = "../../cranelift/entity", version = "0.66.0", features = ["enable-serde"] } -cranelift-frontend = { path = "../../cranelift/frontend", version = "0.66.0" } cranelift-wasm = { path = "../../cranelift/wasm", version = "0.66.0", features = ["enable-serde"] } wasmparser = "0.59.0" -lightbeam = { path = "../lightbeam", optional = true, version = "0.19.0" } indexmap = { version = "1.0.2", features = ["serde-1"] } thiserror = "1.0.4" serde = { version = "1.0.94", features = ["derive"] } diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs new file mode 100644 index 0000000000..1060f7caaa --- /dev/null +++ b/crates/environ/src/builtin.rs @@ -0,0 +1,123 @@ +/// Helper macro to iterate over all builtin functions and their signatures. +#[macro_export] +macro_rules! foreach_builtin_function { + ($mac:ident) => { + $mac! { + /// Returns an index for wasm's `memory.grow` builtin function. + memory32_grow(vmctx, i32, i32) -> (i32); + /// Returns an index for wasm's imported `memory.grow` builtin function. + imported_memory32_grow(vmctx, i32, i32) -> (i32); + /// Returns an index for wasm's `memory.size` builtin function. + memory32_size(vmctx, i32) -> (i32); + /// Returns an index for wasm's imported `memory.size` builtin function. + imported_memory32_size(vmctx, i32) -> (i32); + /// Returns an index for wasm's `table.copy` when both tables are locally + /// defined. + table_copy(vmctx, i32, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `table.init`. + table_init(vmctx, i32, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `elem.drop`. + elem_drop(vmctx, i32) -> (); + /// Returns an index for wasm's `memory.copy` for locally defined memories. + defined_memory_copy(vmctx, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `memory.copy` for imported memories. + imported_memory_copy(vmctx, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `memory.fill` for locally defined memories. + memory_fill(vmctx, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `memory.fill` for imported memories. + imported_memory_fill(vmctx, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `memory.init` instruction. + memory_init(vmctx, i32, i32, i32, i32, i32) -> (); + /// Returns an index for wasm's `data.drop` instruction. + data_drop(vmctx, i32) -> (); + /// Returns an index for Wasm's `table.grow` instruction for `funcref`s. + table_grow_funcref(vmctx, i32, i32, pointer) -> (i32); + /// Returns an index for Wasm's `table.grow` instruction for `externref`s. + table_grow_externref(vmctx, i32, i32, reference) -> (i32); + /// Returns an index for Wasm's `table.fill` instruction for `externref`s. + table_fill_externref(vmctx, i32, i32, reference, i32) -> (); + /// Returns an index for Wasm's `table.fill` instruction for `funcref`s. + table_fill_funcref(vmctx, i32, i32, pointer, i32) -> (); + /// Returns an index to drop a `VMExternRef`. + drop_externref(pointer) -> (); + /// Returns an index to do a GC and then insert a `VMExternRef` into the + /// `VMExternRefActivationsTable`. + activations_table_insert_with_gc(vmctx, reference) -> (); + /// Returns an index for Wasm's `global.get` instruction for `externref`s. + externref_global_get(vmctx, i32) -> (reference); + /// Returns an index for Wasm's `global.get` instruction for `externref`s. + externref_global_set(vmctx, i32, reference) -> (); + } + }; +} + +/// An index type for builtin functions. +#[derive(Copy, Clone, Debug)] +pub struct BuiltinFunctionIndex(u32); + +impl BuiltinFunctionIndex { + /// Create a new `BuiltinFunctionIndex` from its index + pub const fn from_u32(i: u32) -> Self { + Self(i) + } + + /// Return the index as an u32 number. + pub const fn index(&self) -> u32 { + self.0 + } +} + +macro_rules! declare_indexes { + ( + $( + $( #[$attr:meta] )* + $name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* ); + )* + ) => { + impl BuiltinFunctionIndex { + declare_indexes!( + @indices; + 0; + $( $( #[$attr] )* $name; )* + ); + } + }; + + // Base case: no more indices to declare, so define the total number of + // function indices. + ( + @indices; + $len:expr; + ) => { + /// Returns the total number of builtin functions. + pub const fn builtin_functions_total_number() -> u32 { + $len + } + }; + + // Recursive case: declare the next index, and then keep declaring the rest of + // the indices. + ( + @indices; + $index:expr; + $( #[$this_attr:meta] )* + $this_name:ident; + $( + $( #[$rest_attr:meta] )* + $rest_name:ident; + )* + ) => { + $( #[$this_attr] )* + pub const fn $this_name() -> Self { + Self($index) + } + + declare_indexes!( + @indices; + ($index + 1); + $( $( #[$rest_attr] )* $rest_name; )* + ); + } +} + +foreach_builtin_function!(declare_indexes); diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index d077926321..f5ab4358c5 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -25,25 +25,19 @@ )] mod address_map; +mod builtin; mod compilation; mod data_structures; -mod func_environ; mod module; mod module_environ; mod tunables; mod vmoffsets; -pub mod cranelift; -#[cfg(feature = "lightbeam")] -pub mod lightbeam; - pub use crate::address_map::*; +pub use crate::builtin::*; pub use crate::compilation::*; -pub use crate::cranelift::Cranelift; pub use crate::data_structures::*; -pub use crate::func_environ::BuiltinFunctionIndex; -#[cfg(feature = "lightbeam")] -pub use crate::lightbeam::Lightbeam; +// pub use crate::func_environ::BuiltinFunctionIndex; pub use crate::module::{ EntityIndex, MemoryPlan, MemoryStyle, Module, TableElements, TablePlan, TableStyle, }; @@ -60,10 +54,8 @@ pub const WASM_MAX_PAGES: u32 = 0x10000; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); -pub(crate) fn reference_type( - wasm_ty: cranelift_wasm::WasmType, - pointer_type: ir::Type, -) -> ir::Type { +/// Returns the reference type to use for the provided wasm type. +pub fn reference_type(wasm_ty: cranelift_wasm::WasmType, pointer_type: ir::Type) -> ir::Type { match wasm_ty { cranelift_wasm::WasmType::FuncRef => pointer_type, cranelift_wasm::WasmType::ExternRef => match pointer_type { diff --git a/crates/environ/src/lightbeam.rs b/crates/environ/src/lightbeam.rs deleted file mode 100644 index caaba4b024..0000000000 --- a/crates/environ/src/lightbeam.rs +++ /dev/null @@ -1,71 +0,0 @@ -//! Support for compiling with Lightbeam. - -use crate::compilation::{CompileError, CompiledFunction, Compiler}; -use crate::cranelift::{RelocSink, TrapSink}; -use crate::func_environ::FuncEnvironment; -use crate::{FunctionBodyData, ModuleTranslation}; -use cranelift_codegen::isa; -use cranelift_wasm::DefinedFuncIndex; -use lightbeam::{CodeGenSession, NullOffsetSink, Sinks}; - -/// A compiler that compiles a WebAssembly module with Lightbeam, directly translating the Wasm file. -pub struct Lightbeam; - -impl Compiler for Lightbeam { - fn compile_function( - &self, - translation: &ModuleTranslation, - i: DefinedFuncIndex, - function_body: &FunctionBodyData<'_>, - isa: &dyn isa::TargetIsa, - ) -> Result { - if translation.tunables.debug_info { - return Err(CompileError::DebugInfoNotSupported); - } - let func_index = translation.module.func_index(i); - - let env = FuncEnvironment::new( - isa.frontend_config(), - &translation.module, - &translation.tunables, - ); - let mut codegen_session: CodeGenSession<_> = CodeGenSession::new( - translation.function_body_inputs.len() as u32, - &env, - lightbeam::microwasm::I32, - ); - - let mut reloc_sink = RelocSink::new(func_index); - let mut trap_sink = TrapSink::new(); - lightbeam::translate_function( - &mut codegen_session, - Sinks { - relocs: &mut reloc_sink, - traps: &mut trap_sink, - offsets: &mut NullOffsetSink, - }, - i.as_u32(), - wasmparser::FunctionBody::new(0, function_body.data), - ) - .map_err(|e| CompileError::Codegen(format!("Failed to translate function: {}", e)))?; - - let code_section = codegen_session - .into_translated_code_section() - .map_err(|e| CompileError::Codegen(format!("Failed to generate output code: {}", e)))?; - - Ok(CompiledFunction { - // TODO: try to remove copy here (?) - body: code_section.buffer().to_vec(), - traps: trap_sink.traps, - relocations: reloc_sink.func_relocs, - - // not implemented for lightbeam currently - unwind_info: None, - stack_maps: Default::default(), - stack_slots: Default::default(), - value_labels_ranges: Default::default(), - address_map: Default::default(), - jt_offsets: Default::default(), - }) - } -} diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index 1896cbc4e2..71bb031d3b 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -19,6 +19,8 @@ cranelift-native = { path = "../../cranelift/native", version = "0.66.0" } cranelift-frontend = { path = "../../cranelift/frontend", version = "0.66.0" } wasmtime-environ = { path = "../environ", version = "0.19.0" } wasmtime-runtime = { path = "../runtime", version = "0.19.0" } +wasmtime-cranelift = { path = "../cranelift", version = "0.19.0" } +wasmtime-lightbeam = { path = "../lightbeam/wasmtime", version = "0.19.0", optional = true } wasmtime-debug = { path = "../debug", version = "0.19.0" } wasmtime-profiling = { path = "../profiling", version = "0.19.0" } wasmtime-obj = { path = "../obj", version = "0.19.0" } @@ -39,7 +41,7 @@ serde = { version = "1.0.94", features = ["derive"] } winapi = { version = "0.3.8", features = ["winnt", "impl-default"] } [features] -lightbeam = ["wasmtime-environ/lightbeam"] +lightbeam = ["wasmtime-lightbeam"] jitdump = ["wasmtime-profiling/jitdump"] vtune = ["wasmtime-profiling/vtune"] parallel-compilation = ["rayon"] diff --git a/crates/jit/src/compiler.rs b/crates/jit/src/compiler.rs index 349135b33e..192b0abcc0 100644 --- a/crates/jit/src/compiler.rs +++ b/crates/jit/src/compiler.rs @@ -50,10 +50,10 @@ impl Compiler { strategy, compiler: match strategy { CompilationStrategy::Auto | CompilationStrategy::Cranelift => { - Box::new(wasmtime_environ::cranelift::Cranelift::default()) + Box::new(wasmtime_cranelift::Cranelift::default()) } #[cfg(feature = "lightbeam")] - CompilationStrategy::Lightbeam => Box::new(wasmtime_environ::lightbeam::Lightbeam), + CompilationStrategy::Lightbeam => Box::new(wasmtime_lightbeam::Lightbeam), }, tunables, } diff --git a/crates/lightbeam/wasmtime/Cargo.toml b/crates/lightbeam/wasmtime/Cargo.toml new file mode 100644 index 0000000000..2a5271eb92 --- /dev/null +++ b/crates/lightbeam/wasmtime/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "wasmtime-lightbeam" +version = "0.19.0" +authors = ["The Wasmtime Project Developers"] +description = "Integration between Lightbeam and Wasmtime" +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/wasmtime" +documentation = "https://docs.rs/wasmtime-lightbeam/" +categories = ["wasm"] +keywords = ["webassembly", "wasm"] +readme = "README.md" +edition = "2018" + +[dependencies] +lightbeam = { path = "..", version = "0.19.0" } +wasmparser = "0.59" +cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.66.0" } +wasmtime-environ = { path = "../../environ", version = "0.19.0" } diff --git a/crates/lightbeam/wasmtime/README.md b/crates/lightbeam/wasmtime/README.md new file mode 100644 index 0000000000..e86218faa9 --- /dev/null +++ b/crates/lightbeam/wasmtime/README.md @@ -0,0 +1,4 @@ +# `wasmtime-lightbeam` + +This crate provides an implementation of the `Compiler` trait which is +connected to Lightbeam. diff --git a/crates/lightbeam/wasmtime/src/lib.rs b/crates/lightbeam/wasmtime/src/lib.rs new file mode 100644 index 0000000000..d7337b6a76 --- /dev/null +++ b/crates/lightbeam/wasmtime/src/lib.rs @@ -0,0 +1,334 @@ +//! Support for compiling with Lightbeam. +//! +//! This crates provides an implementation of [`Compiler`] in the form of +//! [`Lightbeam`]. + +use cranelift_codegen::binemit; +use cranelift_codegen::ir::{self, ExternalName}; +use cranelift_codegen::isa; +use lightbeam::{CodeGenSession, NullOffsetSink, Sinks}; +use wasmtime_environ::wasm::{ + DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, + GlobalIndex, MemoryIndex, SignatureIndex, TableIndex, +}; +use wasmtime_environ::{ + BuiltinFunctionIndex, CompileError, CompiledFunction, Compiler, FunctionBodyData, Module, + ModuleTranslation, Relocation, RelocationTarget, TrapInformation, VMOffsets, +}; + +/// A compiler that compiles a WebAssembly module with Lightbeam, directly translating the Wasm file. +pub struct Lightbeam; + +impl Compiler for Lightbeam { + fn compile_function( + &self, + translation: &ModuleTranslation, + i: DefinedFuncIndex, + function_body: &FunctionBodyData<'_>, + isa: &dyn isa::TargetIsa, + ) -> Result { + if translation.tunables.debug_info { + return Err(CompileError::DebugInfoNotSupported); + } + let func_index = translation.module.func_index(i); + + let env = FuncEnvironment::new(isa.frontend_config().pointer_bytes(), &translation.module); + let mut codegen_session: CodeGenSession<_> = CodeGenSession::new( + translation.function_body_inputs.len() as u32, + &env, + lightbeam::microwasm::I32, + ); + + let mut reloc_sink = RelocSink::new(func_index); + let mut trap_sink = TrapSink::new(); + lightbeam::translate_function( + &mut codegen_session, + Sinks { + relocs: &mut reloc_sink, + traps: &mut trap_sink, + offsets: &mut NullOffsetSink, + }, + i.as_u32(), + wasmparser::FunctionBody::new(0, function_body.data), + ) + .map_err(|e| CompileError::Codegen(format!("Failed to translate function: {}", e)))?; + + let code_section = codegen_session + .into_translated_code_section() + .map_err(|e| CompileError::Codegen(format!("Failed to generate output code: {}", e)))?; + + Ok(CompiledFunction { + // TODO: try to remove copy here (?) + body: code_section.buffer().to_vec(), + traps: trap_sink.traps, + relocations: reloc_sink.func_relocs, + + // not implemented for lightbeam currently + unwind_info: None, + stack_maps: Default::default(), + stack_slots: Default::default(), + value_labels_ranges: Default::default(), + address_map: Default::default(), + jt_offsets: Default::default(), + }) + } +} + +/// Implementation of a relocation sink that just saves all the information for later +struct RelocSink { + /// Current function index. + func_index: FuncIndex, + + /// Relocations recorded for the function. + func_relocs: Vec, +} + +impl binemit::RelocSink for RelocSink { + fn reloc_block( + &mut self, + _offset: binemit::CodeOffset, + _reloc: binemit::Reloc, + _block_offset: binemit::CodeOffset, + ) { + // This should use the `offsets` field of `ir::Function`. + panic!("block headers not yet implemented"); + } + fn reloc_external( + &mut self, + offset: binemit::CodeOffset, + _srcloc: ir::SourceLoc, + reloc: binemit::Reloc, + name: &ExternalName, + addend: binemit::Addend, + ) { + let reloc_target = if let ExternalName::User { namespace, index } = *name { + debug_assert_eq!(namespace, 0); + RelocationTarget::UserFunc(FuncIndex::from_u32(index)) + } else if let ExternalName::LibCall(libcall) = *name { + RelocationTarget::LibCall(libcall) + } else { + panic!("unrecognized external name") + }; + self.func_relocs.push(Relocation { + reloc, + reloc_target, + offset, + addend, + }); + } + + fn reloc_constant( + &mut self, + _code_offset: binemit::CodeOffset, + _reloc: binemit::Reloc, + _constant_offset: ir::ConstantOffset, + ) { + // Do nothing for now: cranelift emits constant data after the function code and also emits + // function code with correct relative offsets to the constant data. + } + + fn reloc_jt(&mut self, offset: binemit::CodeOffset, reloc: binemit::Reloc, jt: ir::JumpTable) { + self.func_relocs.push(Relocation { + reloc, + reloc_target: RelocationTarget::JumpTable(self.func_index, jt), + offset, + addend: 0, + }); + } +} + +impl RelocSink { + /// Return a new `RelocSink` instance. + fn new(func_index: FuncIndex) -> Self { + Self { + func_index, + func_relocs: Vec::new(), + } + } +} + +/// Implementation of a trap sink that simply stores all trap info in-memory +#[derive(Default)] +struct TrapSink { + /// The in-memory vector of trap info + traps: Vec, +} + +impl TrapSink { + /// Create a new `TrapSink` + fn new() -> Self { + Self::default() + } +} + +impl binemit::TrapSink for TrapSink { + fn trap( + &mut self, + code_offset: binemit::CodeOffset, + source_loc: ir::SourceLoc, + trap_code: ir::TrapCode, + ) { + self.traps.push(TrapInformation { + code_offset, + source_loc, + trap_code, + }); + } +} + +/// The `FuncEnvironment` implementation for use by the `ModuleEnvironment`. +struct FuncEnvironment<'module_environment> { + /// The module-level environment which this function-level environment belongs to. + module: &'module_environment Module, + + /// Offsets to struct fields accessed by JIT code. + offsets: VMOffsets, +} + +impl<'module_environment> FuncEnvironment<'module_environment> { + fn new(pointer_bytes: u8, module: &'module_environment Module) -> Self { + Self { + module, + offsets: VMOffsets::new(pointer_bytes, module), + } + } +} + +// TODO: This is necessary as if Lightbeam used `FuncEnvironment` directly it would cause +// a circular dependency graph. We should extract common types out into a separate +// crate that Lightbeam can use but until then we need this trait. +impl lightbeam::ModuleContext for FuncEnvironment<'_> { + type Signature = ir::Signature; + type GlobalType = ir::Type; + + fn func_index(&self, defined_func_index: u32) -> u32 { + self.module + .func_index(DefinedFuncIndex::from_u32(defined_func_index)) + .as_u32() + } + + fn defined_func_index(&self, func_index: u32) -> Option { + self.module + .defined_func_index(FuncIndex::from_u32(func_index)) + .map(DefinedFuncIndex::as_u32) + } + + fn defined_global_index(&self, global_index: u32) -> Option { + self.module + .defined_global_index(GlobalIndex::from_u32(global_index)) + .map(DefinedGlobalIndex::as_u32) + } + + fn global_type(&self, global_index: u32) -> &Self::GlobalType { + &self.module.globals[GlobalIndex::from_u32(global_index)].ty + } + + fn func_type_index(&self, func_idx: u32) -> u32 { + self.module.functions[FuncIndex::from_u32(func_idx)].as_u32() + } + + fn signature(&self, index: u32) -> &Self::Signature { + &self.module.signatures[SignatureIndex::from_u32(index)].1 + } + + fn defined_table_index(&self, table_index: u32) -> Option { + self.module + .defined_table_index(TableIndex::from_u32(table_index)) + .map(DefinedTableIndex::as_u32) + } + + fn defined_memory_index(&self, memory_index: u32) -> Option { + self.module + .defined_memory_index(MemoryIndex::from_u32(memory_index)) + .map(DefinedMemoryIndex::as_u32) + } + + fn vmctx_builtin_function(&self, func_index: u32) -> u32 { + self.offsets + .vmctx_builtin_function(BuiltinFunctionIndex::from_u32(func_index)) + } + + fn vmctx_vmfunction_import_body(&self, func_index: u32) -> u32 { + self.offsets + .vmctx_vmfunction_import_body(FuncIndex::from_u32(func_index)) + } + fn vmctx_vmfunction_import_vmctx(&self, func_index: u32) -> u32 { + self.offsets + .vmctx_vmfunction_import_vmctx(FuncIndex::from_u32(func_index)) + } + + fn vmctx_vmglobal_import_from(&self, global_index: u32) -> u32 { + self.offsets + .vmctx_vmglobal_import_from(GlobalIndex::from_u32(global_index)) + } + fn vmctx_vmglobal_definition(&self, defined_global_index: u32) -> u32 { + self.offsets + .vmctx_vmglobal_definition(DefinedGlobalIndex::from_u32(defined_global_index)) + } + fn vmctx_vmmemory_import_from(&self, memory_index: u32) -> u32 { + self.offsets + .vmctx_vmmemory_import_from(MemoryIndex::from_u32(memory_index)) + } + fn vmctx_vmmemory_definition(&self, defined_memory_index: u32) -> u32 { + self.offsets + .vmctx_vmmemory_definition(DefinedMemoryIndex::from_u32(defined_memory_index)) + } + fn vmctx_vmmemory_definition_base(&self, defined_memory_index: u32) -> u32 { + self.offsets + .vmctx_vmmemory_definition_base(DefinedMemoryIndex::from_u32(defined_memory_index)) + } + fn vmctx_vmmemory_definition_current_length(&self, defined_memory_index: u32) -> u32 { + self.offsets + .vmctx_vmmemory_definition_current_length(DefinedMemoryIndex::from_u32( + defined_memory_index, + )) + } + fn vmmemory_definition_base(&self) -> u8 { + self.offsets.vmmemory_definition_base() + } + fn vmmemory_definition_current_length(&self) -> u8 { + self.offsets.vmmemory_definition_current_length() + } + fn vmctx_vmtable_import_from(&self, table_index: u32) -> u32 { + self.offsets + .vmctx_vmtable_import_from(TableIndex::from_u32(table_index)) + } + fn vmctx_vmtable_definition(&self, defined_table_index: u32) -> u32 { + self.offsets + .vmctx_vmtable_definition(DefinedTableIndex::from_u32(defined_table_index)) + } + fn vmctx_vmtable_definition_base(&self, defined_table_index: u32) -> u32 { + self.offsets + .vmctx_vmtable_definition_base(DefinedTableIndex::from_u32(defined_table_index)) + } + fn vmctx_vmtable_definition_current_elements(&self, defined_table_index: u32) -> u32 { + self.offsets + .vmctx_vmtable_definition_current_elements(DefinedTableIndex::from_u32( + defined_table_index, + )) + } + fn vmtable_definition_base(&self) -> u8 { + self.offsets.vmtable_definition_base() + } + fn vmtable_definition_current_elements(&self) -> u8 { + self.offsets.vmtable_definition_current_elements() + } + fn vmcaller_checked_anyfunc_type_index(&self) -> u8 { + self.offsets.vmcaller_checked_anyfunc_type_index() + } + fn vmcaller_checked_anyfunc_func_ptr(&self) -> u8 { + self.offsets.vmcaller_checked_anyfunc_func_ptr() + } + fn vmcaller_checked_anyfunc_vmctx(&self) -> u8 { + self.offsets.vmcaller_checked_anyfunc_vmctx() + } + fn size_of_vmcaller_checked_anyfunc(&self) -> u8 { + self.offsets.size_of_vmcaller_checked_anyfunc() + } + fn vmctx_vmshared_signature_id(&self, signature_idx: u32) -> u32 { + self.offsets + .vmctx_vmshared_signature_id(SignatureIndex::from_u32(signature_idx)) + } + + // TODO: type of a global +} diff --git a/crates/wast/Cargo.toml b/crates/wast/Cargo.toml index 5c405d5628..5686413114 100644 --- a/crates/wast/Cargo.toml +++ b/crates/wast/Cargo.toml @@ -17,6 +17,3 @@ wast = "22.0.0" [badges] maintenance = { status = "actively-developed" } - -[features] -lightbeam = ["wasmtime/lightbeam"] diff --git a/scripts/publish.rs b/scripts/publish.rs index 152f6b960d..1cc5b9105b 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -56,6 +56,8 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "wasmtime-debug", "wasmtime-profiling", "wasmtime-obj", + "wasmtime-cranelift", + "wasmtime-lightbeam", "wasmtime-jit", "wasmtime-cache", "wasmtime", @@ -298,7 +300,7 @@ fn verify(crates: &[Crate]) { .arg("--manifest-path") .arg(&krate.manifest) .env("CARGO_TARGET_DIR", "./target"); - if krate.name == "lightbeam" || krate.name == "witx" { + if krate.name.contains("lightbeam") || krate.name == "witx" { cmd.arg("--no-verify"); } let status = cmd.status().unwrap(); diff --git a/src/obj.rs b/src/obj.rs index a9b43fc7ab..573e0b68c3 100644 --- a/src/obj.rs +++ b/src/obj.rs @@ -2,8 +2,6 @@ use anyhow::{bail, Context as _, Result}; use object::write::Object; use target_lexicon::Triple; use wasmtime::Strategy; -#[cfg(feature = "lightbeam")] -use wasmtime_environ::Lightbeam; use wasmtime_environ::{settings, settings::Configurable, ModuleEnvironment, Tunables}; use wasmtime_jit::{native, Compiler}; From a518c101416cd62202d5a5baab4a6d3f160f0e97 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 20 Aug 2020 13:26:03 +0100 Subject: [PATCH 16/34] arm64: Implement SIMD i64x2 multiply Copyright (c) 2020, Arm Limited. --- build.rs | 2 + .../codegen/src/isa/aarch64/inst/args.rs | 24 ++++ .../codegen/src/isa/aarch64/inst/emit.rs | 60 ++++++--- .../src/isa/aarch64/inst/emit_tests.rs | 124 ++++++++++++++++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 81 +++++++++++- .../codegen/src/isa/aarch64/lower_inst.rs | 119 ++++++++++++++++- 6 files changed, 380 insertions(+), 30 deletions(-) diff --git a/build.rs b/build.rs index 04dd042853..cd9ebdc610 100644 --- a/build.rs +++ b/build.rs @@ -211,6 +211,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_bitwise") => return false, ("simd", "simd_bit_shift") => return false, ("simd", "simd_boolean") => return false, + ("simd", "simd_const") => return false, ("simd", "simd_f32x4") => return false, ("simd", "simd_f32x4_arith") => return false, ("simd", "simd_f32x4_cmp") => return false, @@ -228,6 +229,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_i32x4_arith") => return false, ("simd", "simd_i32x4_arith2") => return false, ("simd", "simd_i32x4_cmp") => return false, + ("simd", "simd_i64x2_arith") => return false, ("simd", "simd_lane") => return false, ("simd", "simd_load_extend") => return false, ("simd", "simd_load_splat") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 060660fbd9..0045e5b088 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -647,6 +647,30 @@ impl VectorSize { VectorSize::Size64x2 => ScalarSize::Size64, } } + + pub fn is_128bits(&self) -> bool { + match self { + VectorSize::Size8x8 => false, + VectorSize::Size8x16 => true, + VectorSize::Size16x4 => false, + VectorSize::Size16x8 => true, + VectorSize::Size32x2 => false, + VectorSize::Size32x4 => true, + VectorSize::Size64x2 => true, + } + } + + pub fn widen(&self) -> VectorSize { + match self { + VectorSize::Size8x8 => VectorSize::Size16x8, + VectorSize::Size8x16 => VectorSize::Size16x8, + VectorSize::Size16x4 => VectorSize::Size32x4, + VectorSize::Size16x8 => VectorSize::Size32x4, + VectorSize::Size32x2 => VectorSize::Size64x2, + VectorSize::Size32x4 => VectorSize::Size64x2, + VectorSize::Size64x2 => unreachable!(), + } + } } //============================================================================= diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 6dcfb56249..fb69790981 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -352,12 +352,12 @@ fn enc_fround(top22: u32, rd: Writable, rn: Reg) -> u32 { (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) } -fn enc_vec_rr_misc(u: u32, size: u32, bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { - debug_assert_eq!(u & 0b1, u); +fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(qu & 0b11, qu); debug_assert_eq!(size & 0b11, size); debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); - let bits = 0b0_1_0_01110_00_10000_00000_10_00000_00000; - bits | u << 29 + let bits = 0b0_00_01110_00_10000_00000_10_00000_00000; + bits | qu << 29 | size << 22 | bits_12_16 << 12 | machreg_to_vec(rn) << 5 @@ -1367,13 +1367,14 @@ impl MachInstEmit for Inst { sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); } &Inst::VecMisc { op, rd, rn, size } => { - let enc_size = match size { - VectorSize::Size8x16 => 0b00, - VectorSize::Size16x8 => 0b01, - VectorSize::Size32x4 => 0b10, - VectorSize::Size64x2 => 0b11, - _ => unimplemented!(), + let enc_size = match size.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + ScalarSize::Size64 => 0b11, + _ => unreachable!(), }; + let q = if size.is_128bits() { 1 } else { 0 }; let (u, bits_12_16, size) = match op { VecMisc2::Not => (0b1, 0b00101, 0b00), VecMisc2::Neg => (0b1, 0b01011, enc_size), @@ -1390,8 +1391,17 @@ impl MachInstEmit for Inst { debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); (0b1, 0b11111, enc_size) } + VecMisc2::Rev64 => { + debug_assert_ne!(VectorSize::Size64x2, size); + (0b0, 0b00000, enc_size) + } + VecMisc2::Shll => { + debug_assert_ne!(VectorSize::Size64x2, size); + debug_assert!(!size.is_128bits()); + (0b1, 0b10011, enc_size) + } }; - sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); + sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } &Inst::VecLanes { op, rd, rn, size } => { let (q, size) = match size { @@ -1651,6 +1661,17 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } + &Inst::VecMiscNarrow { op, rd, rn, size } => { + debug_assert!(!size.is_128bits()); + let size = match size.widen() { + VectorSize::Size64x2 => 0b10, + _ => unimplemented!(), + }; + let (u, bits_12_16) = match op { + VecMiscNarrowOp::Xtn => (0b0, 0b10010), + }; + sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); + } &Inst::VecMovElement { rd, rn, @@ -1685,12 +1706,12 @@ impl MachInstEmit for Inst { alu_op, size, } => { - let enc_size = match size { - VectorSize::Size8x16 => 0b00, - VectorSize::Size16x8 => 0b01, - VectorSize::Size32x4 => 0b10, - VectorSize::Size64x2 => 0b11, - _ => 0, + let enc_size = match size.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + ScalarSize::Size64 => 0b11, + _ => unreachable!(), }; let is_float = match alu_op { VecALUOp::Fcmeq @@ -1751,6 +1772,11 @@ impl MachInstEmit for Inst { VecALUOp::Fmax => (0b010_01110_00_1, 0b111101), VecALUOp::Fmin => (0b010_01110_10_1, 0b111101), VecALUOp::Fmul => (0b011_01110_00_1, 0b110111), + VecALUOp::Addp => (0b010_01110_00_1 | enc_size << 1, 0b101111), + VecALUOp::Umlal => { + debug_assert!(!size.is_128bits()); + (0b001_01110_00_1 | enc_size << 1, 0b100000) + } }; let top11 = if is_float { top11 | enc_float_size << 1 diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 2b2f48f802..7fba35f2bc 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2082,6 +2082,17 @@ fn test_aarch64_binemit() { "mov v31.s[1], v16.s[0]", )); + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Xtn, + rd: writable_vreg(22), + rn: vreg(8), + size: VectorSize::Size32x2, + }, + "1629A10E", + "xtn v22.2s, v8.2d", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3066,6 +3077,53 @@ fn test_aarch64_binemit() { "fmul v2.2d, v0.2d, v5.2d", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "90BD214E", + "addp v16.16b, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88BDAE4E", + "addp v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umlal, + rd: writable_vreg(9), + rn: vreg(20), + rm: vreg(17), + size: VectorSize::Size32x2, + }, + "8982B12E", + "umlal v9.2d, v20.2s, v17.2s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Not, + rd: writable_vreg(20), + rn: vreg(17), + size: VectorSize::Size8x8, + }, + "345A202E", + "mvn v20.8b, v17.8b", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, @@ -3077,6 +3135,17 @@ fn test_aarch64_binemit() { "mvn v2.16b, v1.16b", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(3), + rn: vreg(7), + size: VectorSize::Size8x8, + }, + "E3B8202E", + "neg v3.8b, v7.8b", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Neg, @@ -3121,6 +3190,17 @@ fn test_aarch64_binemit() { "neg v10.2d, v8.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(3), + rn: vreg(1), + size: VectorSize::Size8x8, + }, + "23B8200E", + "abs v3.8b, v1.8b", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Abs, @@ -3198,6 +3278,50 @@ fn test_aarch64_binemit() { "fsqrt v7.2d, v18.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Rev64, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size32x4, + }, + "4109A04E", + "rev64 v1.4s, v10.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(12), + rn: vreg(5), + size: VectorSize::Size8x8, + }, + "AC38212E", + "shll v12.8h, v5.8b, #8", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(9), + rn: vreg(1), + size: VectorSize::Size16x4, + }, + "2938612E", + "shll v9.4s, v1.4h, #16", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size32x2, + }, + "4139A12E", + "shll v1.2d, v10.2s, #32", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 27868f96dc..46f6edc2e8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -283,6 +283,10 @@ pub enum VecALUOp { Fmin, /// Floating-point multiply Fmul, + /// Add pairwise + Addp, + /// Unsigned multiply add long + Umlal, } /// A Vector miscellaneous operation with two registers. @@ -300,6 +304,17 @@ pub enum VecMisc2 { Fneg, /// Floating-point square root Fsqrt, + /// Reverse elements in 64-bit doublewords + Rev64, + /// Shift left long (by element size) + Shll, +} + +/// A Vector narrowing operation with two registers. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecMiscNarrowOp { + /// Extract Narrow + Xtn, } /// An operation across the lanes of vectors. @@ -880,6 +895,14 @@ pub enum Inst { size: VectorSize, }, + /// Vector narrowing operation. + VecMiscNarrow { + op: VecMiscNarrowOp, + rd: Writable, + rn: Reg, + size: VectorSize, + }, + /// A vector ALU op. VecRRR { alu_op: VecALUOp, @@ -1605,10 +1628,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(rd); collector.add_use(rn); } + &Inst::VecMiscNarrow { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } &Inst::VecRRR { alu_op, rd, rn, rm, .. } => { - if alu_op == VecALUOp::Bsl { + if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal { collector.add_mod(rd); } else { collector.add_def(rd); @@ -2270,6 +2297,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_mod(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecMiscNarrow { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } &mut Inst::VecRRR { alu_op, ref mut rd, @@ -2277,7 +2312,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut rm, .. } => { - if alu_op == VecALUOp::Bsl { + if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal { map_mod(mapper, rd); } else { map_def(mapper, rd); @@ -3144,6 +3179,14 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, idx2, size); format!("mov {}, {}", rd, rn) } + &Inst::VecMiscNarrow { op, rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size.widen()); + let op = match op { + VecMiscNarrowOp::Xtn => "xtn", + }; + format!("{} {}, {}", op, rd, rn) + } &Inst::VecRRR { rd, rn, @@ -3186,25 +3229,51 @@ impl Inst { VecALUOp::Fmax => ("fmax", size), VecALUOp::Fmin => ("fmin", size), VecALUOp::Fmul => ("fmul", size), + VecALUOp::Addp => ("addp", size), + VecALUOp::Umlal => ("umlal", size), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rd_size = if alu_op == VecALUOp::Umlal { + size.widen() + } else { + size + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); let rn = show_vreg_vector(rn, mb_rru, size); let rm = show_vreg_vector(rm, mb_rru, size); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { + let is_shll = op == VecMisc2::Shll; + let suffix = match (is_shll, size) { + (true, VectorSize::Size8x8) => ", #8", + (true, VectorSize::Size16x4) => ", #16", + (true, VectorSize::Size32x2) => ", #32", + _ => "", + }; + let (op, size) = match op { - VecMisc2::Not => ("mvn", VectorSize::Size8x16), + VecMisc2::Not => ( + "mvn", + if size.is_128bits() { + VectorSize::Size8x16 + } else { + VectorSize::Size8x8 + }, + ), VecMisc2::Neg => ("neg", size), VecMisc2::Abs => ("abs", size), VecMisc2::Fabs => ("fabs", size), VecMisc2::Fneg => ("fneg", size), VecMisc2::Fsqrt => ("fsqrt", size), + VecMisc2::Rev64 => ("rev64", size), + VecMisc2::Shll => ("shll", size), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rd_size = if is_shll { size.widen() } else { size }; + + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); let rn = show_vreg_vector(rn, mb_rru, size); - format!("{} {}, {}", op, rd, rn) + format!("{} {}, {}{}", op, rd, rn, suffix) } &Inst::VecLanes { op, rd, rn, size } => { let op = match op { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5fe62da697..5f8823a3d5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -211,13 +211,118 @@ pub(crate) fn lower_insn_to_regs>( ra: zero_reg(), }); } else { - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Mul, - rd, - rn, - rm, - size: VectorSize::from_ty(ty), - }); + if ty == I64X2 { + let tmp1 = ctx.alloc_tmp(RegClass::V128, I64X2); + let tmp2 = ctx.alloc_tmp(RegClass::V128, I64X2); + + // This I64X2 multiplication is performed with several 32-bit + // operations. + + // 64-bit numbers x and y, can be represented as: + // x = a + 2^32(b) + // y = c + 2^32(d) + + // A 64-bit multiplication is: + // x * y = ac + 2^32(ad + bc) + 2^64(bd) + // note: `2^64(bd)` can be ignored, the value is too large to fit in + // 64 bits. + + // This sequence implements a I64X2 multiply, where the registers + // `rn` and `rm` are split up into 32-bit components: + // rn = |d|c|b|a| + // rm = |h|g|f|e| + // + // rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)| + // + // The sequence is: + // rev64 rd.4s, rm.4s + // mul rd.4s, rd.4s, rn.4s + // xtn tmp1.2s, rn.2d + // addp rd.4s, rd.4s, rd.4s + // xtn tmp2.2s, rm.2d + // shll rd.2d, rd.2s, #32 + // umlal rd.2d, tmp2.2s, tmp1.2s + + // Reverse the 32-bit elements in the 64-bit words. + // rd = |g|h|e|f| + ctx.emit(Inst::VecMisc { + op: VecMisc2::Rev64, + rd, + rn: rm, + size: VectorSize::Size32x4, + }); + + // Calculate the high half components. + // rd = |dg|ch|be|af| + // + // Note that this 32-bit multiply of the high half + // discards the bits that would overflow, same as + // if 64-bit operations were used. Also the Shll + // below would shift out the overflow bits anyway. + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Mul, + rd, + rn: rd.to_reg(), + rm: rn, + size: VectorSize::Size32x4, + }); + + // Extract the low half components of rn. + // tmp1 = |c|a| + ctx.emit(Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Xtn, + rd: tmp1, + rn, + size: VectorSize::Size32x2, + }); + + // Sum the respective high half components. + // rd = |dg+ch|be+af||dg+ch|be+af| + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: rd, + rn: rd.to_reg(), + rm: rd.to_reg(), + size: VectorSize::Size32x4, + }); + + // Extract the low half components of rm. + // tmp2 = |g|e| + ctx.emit(Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Xtn, + rd: tmp2, + rn: rm, + size: VectorSize::Size32x2, + }); + + // Shift the high half components, into the high half. + // rd = |dg+ch << 32|be+af << 32| + ctx.emit(Inst::VecMisc { + op: VecMisc2::Shll, + rd, + rn: rd.to_reg(), + size: VectorSize::Size32x2, + }); + + // Multiply the low components together, and accumulate with the high + // half. + // rd = |rd[1] + cg|rd[0] + ae| + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Umlal, + rd, + rn: tmp2.to_reg(), + rm: tmp1.to_reg(), + size: VectorSize::Size32x2, + }); + } else { + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Mul, + rd, + rn, + rm, + size: VectorSize::from_ty(ty), + }); + } } } From cf598dc35befed88e72d3a0d0d180a90820f304a Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 4 Aug 2020 14:19:45 -0700 Subject: [PATCH 17/34] machinst x64: add packed moves for different vector types --- cranelift/codegen/src/isa/x64/inst/args.rs | 6 ++++++ cranelift/codegen/src/isa/x64/inst/emit.rs | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 600381496f..6b13b5283e 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -380,6 +380,8 @@ pub enum SseOpcode { Movaps, Movapd, Movd, + Movdqa, + Movdqu, Movq, Movss, Movsd, @@ -487,6 +489,8 @@ impl SseOpcode { | SseOpcode::Movq | SseOpcode::Movsd | SseOpcode::Movupd + | SseOpcode::Movdqa + | SseOpcode::Movdqu | SseOpcode::Mulpd | SseOpcode::Mulsd | SseOpcode::Orpd @@ -571,6 +575,8 @@ impl fmt::Debug for SseOpcode { SseOpcode::Movaps => "movaps", SseOpcode::Movapd => "movapd", SseOpcode::Movd => "movd", + SseOpcode::Movdqa => "movdqa", + SseOpcode::Movdqu => "movdqu", SseOpcode::Movq => "movq", SseOpcode::Movss => "movss", SseOpcode::Movsd => "movsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6118284674..f2633bb1db 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1596,8 +1596,12 @@ pub(crate) fn emit( let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { + SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), + SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A), SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28), + SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F6F), + SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F6F), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), SseOpcode::Movups => (LegacyPrefix::None, 0x0F10), @@ -1606,8 +1610,6 @@ pub(crate) fn emit( SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51), SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51), - SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), - SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -1839,10 +1841,14 @@ pub(crate) fn emit( srcloc, } => { let (prefix, opcode) = match op { + SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29), + SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F29), + SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F7F), + SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F7F), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11), - SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29), SseOpcode::Movups => (LegacyPrefix::None, 0x0F11), + SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F11), _ => unimplemented!("Opcode {:?} not implemented", op), }; let dst = &dst.finalize(state); From 2767b2efc6c067f9db4d153bef5244043a228716 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 4 Aug 2020 14:29:37 -0700 Subject: [PATCH 18/34] machinst x64: add `Inst::[move|load|store]` for choosing the correct x86 instruction This change primarily adds the ability to lower packed `[move|load|store]` instructions (the vector types were previously unimplemented), but with the addition of the utility `Inst::[move|load|store]` functions it became possible to remove duplicated code (e.g. `stack_load` and `stack_store`) and use these utility functions elsewhere (though not exhaustively). --- cranelift/codegen/src/isa/x64/abi.rs | 117 ++++++--------------- cranelift/codegen/src/isa/x64/inst/args.rs | 10 ++ cranelift/codegen/src/isa/x64/inst/emit.rs | 14 +-- cranelift/codegen/src/isa/x64/inst/mod.rs | 102 ++++++++++++++++-- 4 files changed, 137 insertions(+), 106 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4bc22357fd..b74cb39cfc 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -325,10 +325,13 @@ impl ABIBody for X64ABIBody { self.fp_to_arg_offset() + off <= u32::max_value() as i64, "large offset nyi" ); - load_stack( - Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()), - to_reg, + let from_addr = Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()); + Inst::load( ty, + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, ) } } @@ -420,8 +423,10 @@ impl ABIBody for X64ABIBody { "large stack return offset nyi" ); - let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); - ret.push(store_stack(mem, from_reg.to_reg(), ty)) + let from_reg = from_reg.to_reg(); + let to_mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); + let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); + ret.push(store) } } @@ -464,17 +469,20 @@ impl ABIBody for X64ABIBody { unimplemented!("store_stackslot") } - fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { + fn load_spillslot(&self, slot: SpillSlot, ty: Type, to_reg: Writable) -> Inst { // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. let islot = slot.get() as i64; let spill_off = islot * 8; let sp_off = self.stack_slots_size as i64 + spill_off; debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - into_reg, + let from_addr = SyntheticAmode::nominal_sp_offset(sp_off as u32); + Inst::load( ty, + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, ) } @@ -485,11 +493,8 @@ impl ABIBody for X64ABIBody { let sp_off = self.stack_slots_size as i64 + spill_off; debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - from_reg, - ty, - ) + let to_mem = SyntheticAmode::nominal_sp_offset(sp_off as u32); + Inst::store(ty, from_reg, to_mem, /* infallible store */ None) } fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { @@ -1003,66 +1008,6 @@ fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { } } -fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) -> Inst { - let (is_int, ext_mode) = match ty { - types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), - types::B16 | types::I16 => (true, Some(ExtMode::WQ)), - types::B32 | types::I32 => (true, Some(ExtMode::LQ)), - types::B64 | types::I64 | types::R64 => (true, None), - types::F32 | types::F64 => (false, None), - _ => panic!("load_stack({})", ty), - }; - - let mem = mem.into(); - - if is_int { - match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r( - ext_mode, - RegMem::mem(mem), - into_reg, - /* infallible load */ None, - ), - None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), - } - } else { - let sse_op = match ty { - types::F32 => SseOpcode::Movss, - types::F64 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov( - sse_op, - RegMem::mem(mem), - into_reg, - None, /* infallible */ - ) - } -} - -fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst { - let (is_int, size) = match ty { - types::B1 | types::B8 | types::I8 => (true, 1), - types::B16 | types::I16 => (true, 2), - types::B32 | types::I32 => (true, 4), - types::B64 | types::I64 | types::R64 => (true, 8), - types::F32 => (false, 4), - types::F64 => (false, 8), - _ => unimplemented!("store_stack({})", ty), - }; - let mem = mem.into(); - if is_int { - Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) - } else { - let sse_op = match size { - 4 => SseOpcode::Movss, - 8 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) - } -} - /// X64 ABI object for a function call. pub struct X64ABICall { sig: ABISig, @@ -1212,11 +1157,9 @@ impl ABICall for X64ABICall { debug_assert!(off <= u32::max_value() as i64); debug_assert!(off >= 0); - ctx.emit(store_stack( - Amode::imm_reg(off as u32, regs::rsp()), - from_reg, - ty, - )) + let to_mem = Amode::imm_reg(off as u32, regs::rsp()); + let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); + ctx.emit(store) } } } @@ -1225,21 +1168,25 @@ impl ABICall for X64ABICall { &self, ctx: &mut C, idx: usize, - into_reg: Writable, + to_reg: Writable, ) { match &self.sig.rets[idx] { - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), + &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(to_reg, reg.to_reg(), ty)), &ABIArg::Stack(off, ty, _) => { let ret_area_base = self.sig.stack_arg_space; let sp_offset = off + ret_area_base; // TODO handle offsets bigger than u32::max debug_assert!(sp_offset >= 0); debug_assert!(sp_offset <= u32::max_value() as i64); - ctx.emit(load_stack( - Amode::imm_reg(sp_offset as u32, regs::rsp()), - into_reg, + let from_addr = Amode::imm_reg(sp_offset as u32, regs::rsp()); + let load = Inst::load( ty, - )); + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, + ); + ctx.emit(load); } } } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 6b13b5283e..343f3322d0 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -634,6 +634,16 @@ impl fmt::Display for SseOpcode { } } +/// This defines the ways a value can be extended: either signed- or zero-extension, or none for +/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which +/// values can be extended. +#[derive(Clone, PartialEq)] +pub enum ExtKind { + None, + SignExtend, + ZeroExtend, +} + /// These indicate ways of extending (widening) a value, using the Intel /// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 #[derive(Clone, PartialEq)] diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f2633bb1db..9bae562c5c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,6 @@ use crate::binemit::Reloc; use crate::ir::immediates::{Ieee32, Ieee64}; -use crate::ir::{types, TrapCode}; +use crate::ir::TrapCode; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::machinst::{MachBuffer, MachInstEmit, MachLabel}; @@ -1807,17 +1807,9 @@ pub(crate) fn emit( // "constant inline" code should be replaced by constant pool integration. // Load the inline constant. - let opcode = match *ty { - types::F32X4 => SseOpcode::Movups, - types::F64X2 => SseOpcode::Movupd, - types::I8X16 => SseOpcode::Movupd, // TODO replace with MOVDQU - _ => unimplemented!("cannot yet load constants for type: {}", ty), - }; let constant_start_label = sink.get_label(); - let load_offset = RegMem::mem(Amode::rip_relative(BranchTarget::Label( - constant_start_label, - ))); - let load = Inst::xmm_unary_rm_r(opcode, load_offset, *dst); + let load_offset = Amode::rip_relative(BranchTarget::Label(constant_start_label)); + let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None); load.emit(sink, flags, state); // Jump over the constant. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 145831c01e..712a9b508e 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -526,6 +526,7 @@ impl Inst { Inst::Mov_R_R { is_64, src, dst } } + // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level) pub(crate) fn xmm_mov( op: SseOpcode, src: RegMem, @@ -935,6 +936,85 @@ impl Inst { srcloc, } } + + /// Choose which instruction to use for loading a register value from memory. For loads smaller + /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend], + /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this. + pub(crate) fn load( + ty: Type, + from_addr: impl Into, + to_reg: Writable, + ext_kind: ExtKind, + srcloc: Option, + ) -> Inst { + let rc = to_reg.to_reg().get_class(); + match rc { + RegClass::I64 => { + let ext_mode = match ty.bytes() { + 1 => Some(ExtMode::BQ), + 2 => Some(ExtMode::WQ), + 4 => Some(ExtMode::LQ), + 8 => None, + _ => unreachable!("the type should never use a scalar load: {}", ty), + }; + if let Some(ext_mode) = ext_mode { + // Values smaller than 64 bits must be extended in some way. + match ext_kind { + ExtKind::SignExtend => { + Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc) + } + ExtKind::ZeroExtend => { + Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc) + } + ExtKind::None => panic!( + "expected an extension kind for extension mode: {:?}", + ext_mode + ), + } + } else { + // 64-bit values can be moved directly. + Inst::mov64_m_r(from_addr, to_reg, srcloc) + } + } + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movups, + types::F64X2 => SseOpcode::Movupd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ => unimplemented!("unable to load type: {}", ty), + }; + Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg) + } + _ => panic!("unable to generate load for register class: {:?}", rc), + } + } + + /// Choose which instruction to use for storing a register value to memory. + pub(crate) fn store( + ty: Type, + from_reg: Reg, + to_addr: impl Into, + srcloc: Option, + ) -> Inst { + let rc = from_reg.get_class(); + match rc { + RegClass::I64 => Inst::mov_r_m(ty.bytes() as u8, from_reg, to_addr, srcloc), + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movups, + types::F64X2 => SseOpcode::Movupd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ => unimplemented!("unable to store type: {}", ty), + }; + Inst::xmm_mov_r_m(opcode, from_reg, to_addr, srcloc) + } + _ => panic!("unable to generate store for register class: {:?}", rc), + } + } } // Inst helpers. @@ -2093,16 +2173,18 @@ impl MachInst for Inst { debug_assert!(rc_dst == rc_src); match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), - RegClass::V128 => match ty { - types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), - types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), - _ if ty.is_vector() && ty.bits() == 128 => { - // TODO Specialize this move for different types: MOVUPD, MOVDQU, etc. - Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None) - } - _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty), - }, - _ => panic!("gen_move(x64): unhandled regclass"), + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movaps, + types::F64X2 => SseOpcode::Movapd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa, + _ => unimplemented!("unable to move type: {}", ty), + }; + Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg) + } + _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst), } } From 2154b76fa82044bacdd7320aa0b7f4967c5f8a25 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 4 Aug 2020 14:33:04 -0700 Subject: [PATCH 19/34] machinst x64: enable simd_const.wast spec test --- build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/build.rs b/build.rs index cd9ebdc610..a0e374082b 100644 --- a/build.rs +++ b/build.rs @@ -181,6 +181,7 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str match (testsuite, testname) { ("simd", "simd_address") => return false, + ("simd", "simd_const") => return false, ("simd", "simd_f32x4_arith") => return false, ("simd", "simd_f32x4_cmp") => return false, ("simd", "simd_f64x2_arith") => return false, From 0b67b22eda66560d137de5374f15182d5983d35d Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Thu, 20 Aug 2020 12:41:06 -0700 Subject: [PATCH 20/34] Update renamed document lint for broken links Renames intra_doc_link_resolution_failure to broken_intra_doc_links --- crates/wasmtime/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index 987b8db6a3..e914cb724c 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -229,7 +229,8 @@ //! # } //! ``` -#![deny(missing_docs, intra_doc_link_resolution_failure)] +#![allow(unknown_lints)] +#![deny(missing_docs, broken_intra_doc_links)] #![doc(test(attr(deny(warnings))))] #![doc(test(attr(allow(dead_code, unused_variables, unused_mut))))] From 6513e90914a6d01386e7ef20acaa0100d8cd0592 Mon Sep 17 00:00:00 2001 From: Gabor Greif Date: Fri, 21 Aug 2020 15:16:09 +0200 Subject: [PATCH 21/34] use the obvious DW_OP_const4u for 0xffff_ffff (#2154) No point in resorting to LEB128 encoding for such constants, using the native `u32` is faster and more compact. Adds `write_u32` method to facilitate this. --- crates/debug/src/transform/expression.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index c5386d11e4..601be39bf6 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -64,6 +64,10 @@ impl ExpressionWriter { write::Writer::write_u8(&mut self.0, b) } + pub fn write_u32(&mut self, b: u32) -> write::Result<()> { + write::Writer::write_u32(&mut self.0, b) + } + pub fn write_uleb128(&mut self, i: u64) -> write::Result<()> { write::Writer::write_uleb128(&mut self.0, i) } @@ -196,8 +200,8 @@ fn append_memory_deref( } writer.write_op(gimli::constants::DW_OP_deref)?; writer.write_op(gimli::constants::DW_OP_swap)?; - writer.write_op(gimli::constants::DW_OP_constu)?; - writer.write_uleb128(0xffff_ffff)?; + writer.write_op(gimli::constants::DW_OP_const4u)?; + writer.write_u32(0xffff_ffff)?; writer.write_op(gimli::constants::DW_OP_and)?; writer.write_op(gimli::constants::DW_OP_plus)?; buf.extend(writer.into_vec()); From b895ac0e407632928ad5907f7297cb693145f9ed Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Wed, 19 Aug 2020 20:46:08 +0100 Subject: [PATCH 22/34] AArch64: Implement SIMD conversions Copyright (c) 2020, Arm Limited. --- build.rs | 33 -- .../codegen/src/isa/aarch64/inst/args.rs | 9 + .../codegen/src/isa/aarch64/inst/emit.rs | 51 ++- .../src/isa/aarch64/inst/emit_tests.rs | 87 ++++- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 78 +++- .../codegen/src/isa/aarch64/lower_inst.rs | 368 +++++++++++------- 6 files changed, 424 insertions(+), 202 deletions(-) diff --git a/build.rs b/build.rs index a0e374082b..e23e8c2180 100644 --- a/build.rs +++ b/build.rs @@ -196,7 +196,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { - let target = env::var("TARGET").unwrap(); match strategy { #[cfg(feature = "lightbeam")] "Lightbeam" => match (testsuite, testname) { @@ -207,38 +206,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { _ => (), }, "Cranelift" => match (testsuite, testname) { - ("simd", "simd_address") => return false, - ("simd", "simd_align") => return false, - ("simd", "simd_bitwise") => return false, - ("simd", "simd_bit_shift") => return false, - ("simd", "simd_boolean") => return false, - ("simd", "simd_const") => return false, - ("simd", "simd_f32x4") => return false, - ("simd", "simd_f32x4_arith") => return false, - ("simd", "simd_f32x4_cmp") => return false, - ("simd", "simd_f64x2") => return false, - ("simd", "simd_f64x2_arith") => return false, - ("simd", "simd_f64x2_cmp") => return false, - ("simd", "simd_i8x16_arith") => return false, - ("simd", "simd_i8x16_arith2") => return false, - ("simd", "simd_i8x16_cmp") => return false, - ("simd", "simd_i8x16_sat_arith") => return false, - ("simd", "simd_i16x8_arith") => return false, - ("simd", "simd_i16x8_arith2") => return false, - ("simd", "simd_i16x8_cmp") => return false, - ("simd", "simd_i16x8_sat_arith") => return false, - ("simd", "simd_i32x4_arith") => return false, - ("simd", "simd_i32x4_arith2") => return false, - ("simd", "simd_i32x4_cmp") => return false, - ("simd", "simd_i64x2_arith") => return false, - ("simd", "simd_lane") => return false, - ("simd", "simd_load_extend") => return false, - ("simd", "simd_load_splat") => return false, - ("simd", "simd_store") => return false, - // Most simd tests are known to fail on aarch64 for now, it's going - // to be a big chunk of work to implement them all there! - ("simd", _) if target.contains("aarch64") => return true, - // TODO(#1886): Ignore reference types tests if this isn't x64, // because Cranelift only supports reference types on x64. ("reference_types", _) => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 0045e5b088..729d21d121 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -671,6 +671,15 @@ impl VectorSize { VectorSize::Size64x2 => unreachable!(), } } + + pub fn halve(&self) -> VectorSize { + match self { + VectorSize::Size8x16 => VectorSize::Size8x8, + VectorSize::Size16x8 => VectorSize::Size16x4, + VectorSize::Size32x4 => VectorSize::Size32x2, + _ => *self, + } + } } //============================================================================= diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index fb69790981..32fe3aa6cf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1400,6 +1400,22 @@ impl MachInstEmit for Inst { debug_assert!(!size.is_128bits()); (0b1, 0b10011, enc_size) } + VecMisc2::Fcvtzs => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11011, enc_size) + } + VecMisc2::Fcvtzu => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11011, enc_size) + } + VecMisc2::Scvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11101, enc_size & 0b1) + } + VecMisc2::Ucvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11101, enc_size & 0b1) + } }; sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } @@ -1644,7 +1660,12 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecExtend { t, rd, rn } => { + &Inst::VecExtend { + t, + rd, + rn, + high_half, + } => { let (u, immh) = match t { VecExtendOp::Sxtl8 => (0b0, 0b001), VecExtendOp::Sxtl16 => (0b0, 0b010), @@ -1655,22 +1676,38 @@ impl MachInstEmit for Inst { }; sink.put4( 0b000_011110_0000_000_101001_00000_00000 + | ((high_half as u32) << 30) | (u << 29) | (immh << 19) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecMiscNarrow { op, rd, rn, size } => { - debug_assert!(!size.is_128bits()); - let size = match size.widen() { - VectorSize::Size64x2 => 0b10, - _ => unimplemented!(), + &Inst::VecMiscNarrow { + op, + rd, + rn, + size, + high_half, + } => { + let size = match size.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + _ => panic!("Unexpected vector operand lane size!"), }; let (u, bits_12_16) = match op { VecMiscNarrowOp::Xtn => (0b0, 0b10010), + VecMiscNarrowOp::Sqxtn => (0b0, 0b10100), + VecMiscNarrowOp::Sqxtun => (0b1, 0b10010), }; - sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); } &Inst::VecMovElement { rd, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 7fba35f2bc..e2f08abb21 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2008,6 +2008,7 @@ fn test_aarch64_binemit() { t: VecExtendOp::Sxtl8, rd: writable_vreg(4), rn: vreg(27), + high_half: false, }, "64A7080F", "sxtl v4.8h, v27.8b", @@ -2017,15 +2018,17 @@ fn test_aarch64_binemit() { t: VecExtendOp::Sxtl16, rd: writable_vreg(17), rn: vreg(19), + high_half: true, }, - "71A6100F", - "sxtl v17.4s, v19.4h", + "71A6104F", + "sxtl2 v17.4s, v19.8h", )); insns.push(( Inst::VecExtend { t: VecExtendOp::Sxtl32, rd: writable_vreg(30), rn: vreg(6), + high_half: false, }, "DEA4200F", "sxtl v30.2d, v6.2s", @@ -2035,15 +2038,17 @@ fn test_aarch64_binemit() { t: VecExtendOp::Uxtl8, rd: writable_vreg(3), rn: vreg(29), + high_half: true, }, - "A3A7082F", - "uxtl v3.8h, v29.8b", + "A3A7086F", + "uxtl2 v3.8h, v29.16b", )); insns.push(( Inst::VecExtend { t: VecExtendOp::Uxtl16, rd: writable_vreg(15), rn: vreg(12), + high_half: false, }, "8FA5102F", "uxtl v15.4s, v12.4h", @@ -2053,9 +2058,10 @@ fn test_aarch64_binemit() { t: VecExtendOp::Uxtl32, rd: writable_vreg(28), rn: vreg(2), + high_half: true, }, - "5CA4202F", - "uxtl v28.2d, v2.2s", + "5CA4206F", + "uxtl2 v28.2d, v2.4s", )); insns.push(( @@ -2088,11 +2094,36 @@ fn test_aarch64_binemit() { rd: writable_vreg(22), rn: vreg(8), size: VectorSize::Size32x2, + high_half: false, }, "1629A10E", "xtn v22.2s, v8.2d", )); + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtn, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size16x8, + high_half: true, + }, + "1F48614E", + "sqxtn2 v31.8h, v0.4s", + )); + + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtun, + rd: writable_vreg(16), + rn: vreg(23), + size: VectorSize::Size8x16, + high_half: false, + }, + "F02A212E", + "sqxtun v16.8b, v23.8h", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3322,6 +3353,50 @@ fn test_aarch64_binemit() { "shll v1.2d, v10.2s, #32", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzs, + rd: writable_vreg(4), + rn: vreg(22), + size: VectorSize::Size32x4, + }, + "C4BAA14E", + "fcvtzs v4.4s, v22.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzu, + rd: writable_vreg(29), + rn: vreg(15), + size: VectorSize::Size64x2, + }, + "FDB9E16E", + "fcvtzu v29.2d, v15.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Scvtf, + rd: writable_vreg(20), + rn: vreg(8), + size: VectorSize::Size32x4, + }, + "14D9214E", + "scvtf v20.4s, v8.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Ucvtf, + rd: writable_vreg(10), + rn: vreg(19), + size: VectorSize::Size64x2, + }, + "6ADA616E", + "ucvtf v10.2d, v19.2d", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 46f6edc2e8..b90dccd41a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -308,6 +308,14 @@ pub enum VecMisc2 { Rev64, /// Shift left long (by element size) Shll, + /// Floating-point convert to signed integer, rounding toward zero + Fcvtzs, + /// Floating-point convert to unsigned integer, rounding toward zero + Fcvtzu, + /// Signed integer convert to floating-point + Scvtf, + /// Unsigned integer convert to floating-point + Ucvtf, } /// A Vector narrowing operation with two registers. @@ -315,6 +323,10 @@ pub enum VecMisc2 { pub enum VecMiscNarrowOp { /// Extract Narrow Xtn, + /// Signed saturating extract narrow + Sqxtn, + /// Signed saturating extract unsigned narrow + Sqxtun, } /// An operation across the lanes of vectors. @@ -884,6 +896,7 @@ pub enum Inst { t: VecExtendOp, rd: Writable, rn: Reg, + high_half: bool, }, /// Move vector element to another vector element. @@ -901,6 +914,7 @@ pub enum Inst { rd: Writable, rn: Reg, size: VectorSize, + high_half: bool, }, /// A vector ALU op. @@ -1628,9 +1642,16 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(rd); collector.add_use(rn); } - &Inst::VecMiscNarrow { rd, rn, .. } => { - collector.add_def(rd); + &Inst::VecMiscNarrow { + rd, rn, high_half, .. + } => { collector.add_use(rn); + + if high_half { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } } &Inst::VecRRR { alu_op, rd, rn, rm, .. @@ -2300,10 +2321,16 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::VecMiscNarrow { ref mut rd, ref mut rn, + high_half, .. } => { - map_def(mapper, rd); map_use(mapper, rn); + + if high_half { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } } &mut Inst::VecRRR { alu_op, @@ -3155,14 +3182,20 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, 0, size); format!("dup {}, {}", rd, rn) } - &Inst::VecExtend { t, rd, rn } => { - let (op, dest, src) = match t { - VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), - VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), - VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), - VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), - VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), - VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), + &Inst::VecExtend { t, rd, rn, high_half } => { + let (op, dest, src) = match (t, high_half) { + (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4), + (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); let rn = show_vreg_vector(rn, mb_rru, src); @@ -3179,11 +3212,22 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, idx2, size); format!("mov {}, {}", rd, rn) } - &Inst::VecMiscNarrow { op, rd, rn, size } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => { + let dest_size = if high_half { + assert!(size.is_128bits()); + size + } else { + size.halve() + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); let rn = show_vreg_vector(rn, mb_rru, size.widen()); - let op = match op { - VecMiscNarrowOp::Xtn => "xtn", + let op = match (op, high_half) { + (VecMiscNarrowOp::Xtn, false) => "xtn", + (VecMiscNarrowOp::Xtn, true) => "xtn2", + (VecMiscNarrowOp::Sqxtn, false) => "sqxtn", + (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2", + (VecMiscNarrowOp::Sqxtun, false) => "sqxtun", + (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2", }; format!("{} {}, {}", op, rd, rn) } @@ -3267,6 +3311,10 @@ impl Inst { VecMisc2::Fsqrt => ("fsqrt", size), VecMisc2::Rev64 => ("rev64", size), VecMisc2::Shll => ("shll", size), + VecMisc2::Fcvtzs => ("fcvtzs", size), + VecMisc2::Fcvtzu => ("fcvtzu", size), + VecMisc2::Scvtf => ("scvtf", size), + VecMisc2::Ucvtf => ("ucvtf", size), }; let rd_size = if is_shll { size.widen() } else { size }; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index ecc4c1ca67..b2915d024e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; use crate::machinst::lower::*; use crate::machinst::*; -use crate::CodegenResult; +use crate::{CodegenError, CodegenResult}; use crate::isa::aarch64::abi::*; use crate::isa::aarch64::inst::*; @@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let (rm, negated) = put_input_in_rse_imm12_maybe_negated( ctx, inputs[1], @@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let (rm, negated) = put_input_in_rse_imm12_maybe_negated( ctx, inputs[1], @@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs>( let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let narrow_mode = if is_signed { NarrowValueMode::SignExtend64 } else { @@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ineg => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rn = zero_reg(); let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); @@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64); ctx.emit(Inst::AluRRRR { alu_op, @@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs>( rd: tmp1, rn, size: VectorSize::Size32x2, + high_half: false, }); // Sum the respective high half components. @@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs>( rd: tmp2, rn: rm, size: VectorSize::Size32x2, + high_half: false, }); // Shift the high half components, into the high half. @@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Bnot => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); // NOT rd, rm ==> ORR_NOT rd, zero, rm @@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::BxorNot => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); let alu_op = match op { @@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let size = OperandSize::from_bits(ty_bits(ty)); let narrow_mode = match (op, size) { (Opcode::Ishl, _) => NarrowValueMode::None, @@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs>( t, rd, rn: rd.to_reg(), + high_half: false, }); } } @@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Bitselect | Opcode::Vselect => { let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { debug_assert_ne!(Opcode::Vselect, op); let tmp = ctx.alloc_tmp(RegClass::I64, I64); let rd = get_output_reg(ctx, outputs[0]); @@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs>( }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); @@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs>( let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { match ty_bits(ty) { 32 => { ctx.emit(Inst::FpuCmp32 { rn, rm }); @@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if bits < 128 { + if !ty.is_vector() { let fpu_op = match (op, bits) { (Opcode::Fadd, 32) => FPUOp2::Add32, (Opcode::Fadd, 64) => FPUOp2::Add64, @@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs>( let bits = ty_bits(ty); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if bits < 128 { + if !ty.is_vector() { let fpu_op = match (op, bits) { (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, @@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::FcvtFromUint | Opcode::FcvtFromSint => { - let in_bits = ty_bits(ctx.input_ty(insn, 0)); - let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let ty = ty.unwrap(); let signed = op == Opcode::FcvtFromSint; - let op = match (signed, in_bits, out_bits) { - (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, - (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, - (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, - (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, - (false, 64, 32) => IntToFpuOp::U64ToF32, - (true, 64, 32) => IntToFpuOp::I64ToF32, - (false, 64, 64) => IntToFpuOp::U64ToF64, - (true, 64, 64) => IntToFpuOp::I64ToF64, - _ => panic!("Unknown input/output-bits combination"), - }; - let narrow_mode = match (signed, in_bits) { - (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, - (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, - (false, 64) => NarrowValueMode::ZeroExtend64, - (true, 64) => NarrowValueMode::SignExtend64, - _ => panic!("Unknown input size"), - }; - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::IntToFpu { op, rd, rn }); + + if ty.is_vector() { + let op = if signed { + VecMisc2::Scvtf + } else { + VecMisc2::Ucvtf + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + + ctx.emit(Inst::VecMisc { + op, + rd, + rn, + size: VectorSize::from_ty(ty), + }); + } else { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ty); + let op = match (signed, in_bits, out_bits) { + (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 64, 32) => IntToFpuOp::U64ToF32, + (true, 64, 32) => IntToFpuOp::I64ToF32, + (false, 64, 64) => IntToFpuOp::U64ToF64, + (true, 64, 64) => IntToFpuOp::I64ToF64, + _ => panic!("Unknown input/output-bits combination"), + }; + let narrow_mode = match (signed, in_bits) { + (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, + (false, 64) => NarrowValueMode::ZeroExtend64, + (true, 64) => NarrowValueMode::SignExtend64, + _ => panic!("Unknown input size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + ctx.emit(Inst::IntToFpu { op, rd, rn }); + } } Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { - let in_ty = ctx.input_ty(insn, 0); - let in_bits = ty_bits(in_ty); - let out_ty = ctx.output_ty(insn, 0); - let out_bits = ty_bits(out_ty); + let ty = ty.unwrap(); let out_signed = op == Opcode::FcvtToSintSat; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX - // FMIN Vtmp2, Vin, Vtmp1 - // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN - // FMAX Vtmp2, Vtmp2, Vtmp1 - // (if signed) FIMM Vtmp1, 0 - // FCMP Vin, Vin - // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0 - // convert Rout, Vtmp2 + if ty.is_vector() { + let op = if out_signed { + VecMisc2::Fcvtzs + } else { + VecMisc2::Fcvtzu + }; - assert!(in_bits == 32 || in_bits == 64); - assert!(out_bits == 32 || out_bits == 64); - - let min: f64 = match (out_bits, out_signed) { - (32, true) => std::i32::MIN as f64, - (32, false) => 0.0, - (64, true) => std::i64::MIN as f64, - (64, false) => 0.0, - _ => unreachable!(), - }; - - let max = match (out_bits, out_signed) { - (32, true) => std::i32::MAX as f64, - (32, false) => std::u32::MAX as f64, - (64, true) => std::i64::MAX as f64, - (64, false) => std::u64::MAX as f64, - _ => unreachable!(), - }; - - let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); - let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); - - if in_bits == 32 { - ctx.emit(Inst::LoadFpuConst32 { - rd: rtmp1, - const_data: max as f32, + ctx.emit(Inst::VecMisc { + op, + rd, + rn, + size: VectorSize::from_ty(ty), }); } else { - ctx.emit(Inst::LoadFpuConst64 { - rd: rtmp1, - const_data: max, - }); - } - ctx.emit(Inst::FpuRRR { - fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), - rd: rtmp2, - rn: rn, - rm: rtmp1.to_reg(), - }); - if in_bits == 32 { - ctx.emit(Inst::LoadFpuConst32 { - rd: rtmp1, - const_data: min as f32, - }); - } else { - ctx.emit(Inst::LoadFpuConst64 { - rd: rtmp1, - const_data: min, - }); - } - ctx.emit(Inst::FpuRRR { - fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), - rd: rtmp2, - rn: rtmp2.to_reg(), - rm: rtmp1.to_reg(), - }); - if out_signed { + let in_ty = ctx.input_ty(insn, 0); + let in_bits = ty_bits(in_ty); + let out_bits = ty_bits(ty); + // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX + // FMIN Vtmp2, Vin, Vtmp1 + // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN + // FMAX Vtmp2, Vtmp2, Vtmp1 + // (if signed) FIMM Vtmp1, 0 + // FCMP Vin, Vin + // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0 + // convert Rout, Vtmp2 + + assert!(in_bits == 32 || in_bits == 64); + assert!(out_bits == 32 || out_bits == 64); + + let min: f64 = match (out_bits, out_signed) { + (32, true) => std::i32::MIN as f64, + (32, false) => 0.0, + (64, true) => std::i64::MIN as f64, + (64, false) => 0.0, + _ => unreachable!(), + }; + + let max = match (out_bits, out_signed) { + (32, true) => std::i32::MAX as f64, + (32, false) => std::u32::MAX as f64, + (64, true) => std::i64::MAX as f64, + (64, false) => std::u64::MAX as f64, + _ => unreachable!(), + }; + + let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); + let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); + if in_bits == 32 { ctx.emit(Inst::LoadFpuConst32 { rd: rtmp1, - const_data: 0.0, + const_data: max as f32, }); } else { ctx.emit(Inst::LoadFpuConst64 { rd: rtmp1, - const_data: 0.0, + const_data: max, }); } - } - if in_bits == 32 { - ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); - ctx.emit(Inst::FpuCSel32 { + ctx.emit(Inst::FpuRRR { + fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), rd: rtmp2, - rn: rtmp1.to_reg(), - rm: rtmp2.to_reg(), - cond: Cond::Ne, + rn: rn, + rm: rtmp1.to_reg(), }); - } else { - ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); - ctx.emit(Inst::FpuCSel64 { + if in_bits == 32 { + ctx.emit(Inst::LoadFpuConst32 { + rd: rtmp1, + const_data: min as f32, + }); + } else { + ctx.emit(Inst::LoadFpuConst64 { + rd: rtmp1, + const_data: min, + }); + } + ctx.emit(Inst::FpuRRR { + fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), rd: rtmp2, - rn: rtmp1.to_reg(), - rm: rtmp2.to_reg(), - cond: Cond::Ne, + rn: rtmp2.to_reg(), + rm: rtmp1.to_reg(), }); - } + if out_signed { + if in_bits == 32 { + ctx.emit(Inst::LoadFpuConst32 { + rd: rtmp1, + const_data: 0.0, + }); + } else { + ctx.emit(Inst::LoadFpuConst64 { + rd: rtmp1, + const_data: 0.0, + }); + } + } + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCSel32 { + rd: rtmp2, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + cond: Cond::Ne, + }); + } else { + ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCSel64 { + rd: rtmp2, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + cond: Cond::Ne, + }); + } - let cvt = match (in_bits, out_bits, out_signed) { - (32, 32, false) => FpuToIntOp::F32ToU32, - (32, 32, true) => FpuToIntOp::F32ToI32, - (32, 64, false) => FpuToIntOp::F32ToU64, - (32, 64, true) => FpuToIntOp::F32ToI64, - (64, 32, false) => FpuToIntOp::F64ToU32, - (64, 32, true) => FpuToIntOp::F64ToI32, - (64, 64, false) => FpuToIntOp::F64ToU64, - (64, 64, true) => FpuToIntOp::F64ToI64, - _ => unreachable!(), - }; - ctx.emit(Inst::FpuToInt { - op: cvt, - rd, - rn: rtmp2.to_reg(), - }); + let cvt = match (in_bits, out_bits, out_signed) { + (32, 32, false) => FpuToIntOp::F32ToU32, + (32, 32, true) => FpuToIntOp::F32ToI32, + (32, 64, false) => FpuToIntOp::F32ToU64, + (32, 64, true) => FpuToIntOp::F32ToI64, + (64, 32, false) => FpuToIntOp::F64ToU32, + (64, 32, true) => FpuToIntOp::F64ToI32, + (64, 64, false) => FpuToIntOp::F64ToU64, + (64, 64, true) => FpuToIntOp::F64ToI64, + _ => unreachable!(), + }; + ctx.emit(Inst::FpuToInt { + op: cvt, + rd, + rn: rtmp2.to_reg(), + }); + } } Opcode::IaddIfcout => { @@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::Snarrow - | Opcode::Unarrow - | Opcode::SwidenLow - | Opcode::SwidenHigh - | Opcode::UwidenLow - | Opcode::UwidenHigh => unimplemented!(), + Opcode::Snarrow | Opcode::Unarrow => { + let op = if op == Opcode::Snarrow { + VecMiscNarrowOp::Sqxtn + } else { + VecMiscNarrowOp::Sqxtun + }; + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + + ctx.emit(Inst::VecMiscNarrow { + op, + rd, + rn, + size: VectorSize::from_ty(ty), + high_half: false, + }); + ctx.emit(Inst::VecMiscNarrow { + op, + rd, + rn: rn2, + size: VectorSize::from_ty(ty), + high_half: true, + }); + } + + Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { + let lane_type = ty.unwrap().lane_type(); + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let (t, high_half) = match (lane_type, op) { + (I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false), + (I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true), + (I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false), + (I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true), + (I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false), + (I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true), + (I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false), + (I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true), + _ => { + return Err(CodegenError::Unsupported(format!( + "Unsupported SIMD vector lane type: {:?}", + lane_type + ))); + } + }; + + ctx.emit(Inst::VecExtend { + t, + rd, + rn, + high_half, + }); + } + Opcode::TlsValue => unimplemented!(), } From ec87aee147cb0c8a8ae7b5db192daa2163207c8a Mon Sep 17 00:00:00 2001 From: Gabor Greif Date: Fri, 21 Aug 2020 20:12:30 +0200 Subject: [PATCH 23/34] Revert #2137, the extra (seemingly leftover) DW_OP_deref is in fact essential (#2156) * Revert "don't leave Deref bytecode for Code chunk (#2137)" This reverts commit 30b9e691367724d37094703ef54a63f87c892056. * add explanation that DW_OP_deref is needed --- crates/debug/src/transform/expression.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index 601be39bf6..d21a00d4fe 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -458,7 +458,8 @@ where Operation::Deref { .. } => { flush_code_chunk!(); parts.push(CompiledExpressionPart::Deref); - continue; + // Don't re-enter the loop here (i.e. continue), because the + // DW_OP_deref still needs to be kept. } _ => { return Ok(None); @@ -724,7 +725,7 @@ mod tests { }, CompiledExpressionPart::Code(vec![35, 5]), CompiledExpressionPart::Deref, - CompiledExpressionPart::Code(vec![159]) + CompiledExpressionPart::Code(vec![6, 159]) ], need_deref: false } From 620e4b4e823f1942e5ab9dba7a6e1053d26b6835 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Thu, 20 Aug 2020 07:36:19 +0200 Subject: [PATCH 24/34] This patch fills in the missing pieces needed to support wasm atomics on newBE/x64. It does this by providing an implementation of the CLIF instructions `AtomicRmw`, `AtomicCas`, `AtomicLoad`, `AtomicStore` and `Fence`. The translation is straightforward. `AtomicCas` is translated into x64 `cmpxchg`, `AtomicLoad` becomes a normal load because x64-TSO provides adequate sequencing, `AtomicStore` becomes a normal store followed by `mfence`, and `Fence` becomes `mfence`. `AtomicRmw` is the only complex case: it becomes a normal load, followed by a loop which computes an updated value, tries to `cmpxchg` it back to memory, and repeats if necessary. This is a minimum-effort initial implementation. `AtomicRmw` could be implemented more efficiently using LOCK-prefixed integer read-modify-write instructions in the case where the old value in memory is not required. Subsequent work could add that, if required. The x64 emitter has been updated to emit the new instructions, obviously. The `LegacyPrefix` mechanism has been revised to handle multiple prefix bytes, not just one, since it is now sometimes necessary to emit both 0x66 (Operand Size Override) and F0 (Lock). In the aarch64 implementation of atomics, there has been some minor renaming for the sake of clarity, and for consistency with this x64 implementation. --- .../codegen/src/isa/aarch64/inst/args.rs | 28 -- .../codegen/src/isa/aarch64/inst/emit.rs | 14 +- .../src/isa/aarch64/inst/emit_tests.rs | 4 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 2 +- cranelift/codegen/src/isa/aarch64/lower.rs | 5 +- .../codegen/src/isa/aarch64/lower_inst.rs | 13 +- cranelift/codegen/src/isa/x64/inst/args.rs | 11 + cranelift/codegen/src/isa/x64/inst/emit.rs | 401 ++++++++++++------ .../codegen/src/isa/x64/inst/emit_tests.rs | 177 +++++++- cranelift/codegen/src/isa/x64/inst/mod.rs | 102 ++++- cranelift/codegen/src/isa/x64/lower.rs | 158 +++++++ cranelift/codegen/src/machinst/inst_common.rs | 36 ++ cranelift/codegen/src/machinst/mod.rs | 2 + 13 files changed, 761 insertions(+), 192 deletions(-) create mode 100644 cranelift/codegen/src/machinst/inst_common.rs diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 729d21d121..fe8660bbaf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -3,7 +3,6 @@ // Some variants are never constructed, but we still want them as options in the future. #![allow(dead_code)] -use crate::ir; use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; use crate::ir::Type; use crate::isa::aarch64::inst::*; @@ -681,30 +680,3 @@ impl VectorSize { } } } - -//============================================================================= -// Instruction sub-components: atomic memory update operations - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[repr(u8)] -pub enum AtomicRMWOp { - Add, - Sub, - And, - Or, - Xor, - Xchg, -} - -impl AtomicRMWOp { - pub fn from(ir_op: ir::AtomicRmwOp) -> Self { - match ir_op { - ir::AtomicRmwOp::Add => AtomicRMWOp::Add, - ir::AtomicRmwOp::Sub => AtomicRMWOp::Sub, - ir::AtomicRmwOp::And => AtomicRMWOp::And, - ir::AtomicRmwOp::Or => AtomicRMWOp::Or, - ir::AtomicRmwOp::Xor => AtomicRMWOp::Xor, - ir::AtomicRmwOp::Xchg => AtomicRMWOp::Xchg, - } - } -} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 32fe3aa6cf..60a81eb005 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1090,18 +1090,18 @@ impl MachInstEmit for Inst { } sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25] - if op == AtomicRMWOp::Xchg { + if op == inst_common::AtomicRmwOp::Xchg { // mov x28, x26 sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26)) } else { // add/sub/and/orr/eor x28, x27, x26 let bits_31_21 = match op { - AtomicRMWOp::Add => 0b100_01011_00_0, - AtomicRMWOp::Sub => 0b110_01011_00_0, - AtomicRMWOp::And => 0b100_01010_00_0, - AtomicRMWOp::Or => 0b101_01010_00_0, - AtomicRMWOp::Xor => 0b110_01010_00_0, - AtomicRMWOp::Xchg => unreachable!(), + inst_common::AtomicRmwOp::Add => 0b100_01011_00_0, + inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0, + inst_common::AtomicRmwOp::And => 0b100_01010_00_0, + inst_common::AtomicRmwOp::Or => 0b101_01010_00_0, + inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0, + inst_common::AtomicRmwOp::Xchg => unreachable!(), }; sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index e2f08abb21..f8b446de31 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -4551,7 +4551,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicRMW { ty: I16, - op: AtomicRMWOp::Xor, + op: inst_common::AtomicRmwOp::Xor, srcloc: None, }, "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5", @@ -4561,7 +4561,7 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicRMW { ty: I32, - op: AtomicRMWOp::Xchg, + op: inst_common::AtomicRmwOp::Xchg, srcloc: None, }, "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index b90dccd41a..b527b7dc19 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -649,7 +649,7 @@ pub enum Inst { /// x28 (wr) scratch reg; value afterwards has no meaning AtomicRMW { ty: Type, // I8, I16, I32 or I64 - op: AtomicRMWOp, + op: inst_common::AtomicRmwOp, srcloc: Option, }, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index d399b90ed0..55b675a714 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -7,10 +7,11 @@ //! //! - Floating-point immediates (FIMM instruction). +use crate::ir; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::Inst as IRInst; -use crate::ir::{AtomicRmwOp, InstructionData, Opcode, TrapCode, Type}; +use crate::ir::{InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; use crate::CodegenResult; @@ -1067,7 +1068,7 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option { } } -pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option { +pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option { match data { &InstructionData::AtomicRmw { op, .. } => Some(op), _ => None, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index b2915d024e..b52f01364d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -21,7 +21,8 @@ use smallvec::SmallVec; use super::lower::*; -fn is_single_word_int_ty(ty: Type) -> bool { +/// This is target-word-size dependent. And it excludes booleans and reftypes. +fn is_valid_atomic_transaction_ty(ty: Type) -> bool { match ty { I8 | I16 | I32 | I64 => true, _ => false, @@ -1228,7 +1229,7 @@ pub(crate) fn lower_insn_to_regs>( let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty_access = ty.unwrap(); - assert!(is_single_word_int_ty(ty_access)); + assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); let srcloc = if !memflags.notrap() { Some(ctx.srcloc(insn)) @@ -1244,7 +1245,7 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); // Now the AtomicRMW insn itself - let op = AtomicRMWOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); + let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); ctx.emit(Inst::AtomicRMW { ty: ty_access, op, @@ -1264,7 +1265,7 @@ pub(crate) fn lower_insn_to_regs>( let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let ty_access = ty.unwrap(); - assert!(is_single_word_int_ty(ty_access)); + assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); let srcloc = if !memflags.notrap() { Some(ctx.srcloc(insn)) @@ -1302,7 +1303,7 @@ pub(crate) fn lower_insn_to_regs>( let r_data = get_output_reg(ctx, outputs[0]); let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty_access = ty.unwrap(); - assert!(is_single_word_int_ty(ty_access)); + assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); let srcloc = if !memflags.notrap() { Some(ctx.srcloc(insn)) @@ -1321,7 +1322,7 @@ pub(crate) fn lower_insn_to_regs>( let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty_access = ctx.input_ty(insn, 0); - assert!(is_single_word_int_ty(ty_access)); + assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); let srcloc = if !memflags.notrap() { Some(ctx.srcloc(insn)) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 343f3322d0..8690c57a4c 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1010,3 +1010,14 @@ impl OperandSize { } } } + +/// An x64 memory fence kind. +#[derive(Clone)] +pub enum FenceKind { + /// `mfence` instruction ("Memory Fence") + MFence, + /// `lfence` instruction ("Load Fence") + LFence, + /// `sfence` instruction ("Store Fence") + SFence, +} diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 9bae562c5c..b54de499c9 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -3,7 +3,7 @@ use crate::ir::immediates::{Ieee32, Ieee64}; use crate::ir::TrapCode; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; -use crate::machinst::{MachBuffer, MachInstEmit, MachLabel}; +use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel}; use core::convert::TryInto; use log::debug; use regalloc::{Reg, RegClass, Writable}; @@ -118,25 +118,38 @@ impl RexFlags { } } -/// For specifying the legacy prefixes (or `None` if no prefix required) to -/// be used at the start an instruction. A given prefix may be required for -/// various operations, including instructions that operate on GPR, SSE, and Vex -/// registers. -enum LegacyPrefix { +/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum +/// covers only the small set of possibilities that we actually need. +enum LegacyPrefixes { + /// No prefix bytes None, + /// Operand Size Override -- here, denoting "16-bit operation" _66, + /// The Lock prefix + _F0, + /// Operand size override and Lock + _66F0, + /// REPNE, but no specific meaning here -- is just an opcode extension _F2, + /// REP/REPE, but no specific meaning here -- is just an opcode extension _F3, } -impl LegacyPrefix { +impl LegacyPrefixes { #[inline(always)] fn emit(&self, sink: &mut MachBuffer) { match self { - LegacyPrefix::_66 => sink.put1(0x66), - LegacyPrefix::_F2 => sink.put1(0xF2), - LegacyPrefix::_F3 => sink.put1(0xF3), - LegacyPrefix::None => (), + LegacyPrefixes::_66 => sink.put1(0x66), + LegacyPrefixes::_F0 => sink.put1(0xF0), + LegacyPrefixes::_66F0 => { + // I don't think the order matters, but in any case, this is the same order that + // the GNU assembler uses. + sink.put1(0x66); + sink.put1(0xF0); + } + LegacyPrefixes::_F2 => sink.put1(0xF2), + LegacyPrefixes::_F3 => sink.put1(0xF3), + LegacyPrefixes::None => (), } } } @@ -145,15 +158,16 @@ impl LegacyPrefix { /// /// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`, /// create and emit: -/// - first the REX prefix, +/// - first the legacy prefixes, if any +/// - then the REX prefix, if needed /// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`), /// - then the MOD/RM byte, /// - then optionally, a SIB byte, /// - and finally optionally an immediate that will be derived from the `mem_e` operand. /// /// For most instructions up to and including SSE4.2, that will be the whole instruction: this is -/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX instructions -/// will require their own emitter functions. +/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed +/// instructions will require their own emitter functions. /// /// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided. /// @@ -168,7 +182,7 @@ impl LegacyPrefix { /// indicate a 64-bit operation. fn emit_std_enc_mem( sink: &mut MachBuffer, - prefix: LegacyPrefix, + prefixes: LegacyPrefixes, opcodes: u32, mut num_opcodes: usize, enc_g: u8, @@ -179,7 +193,7 @@ fn emit_std_enc_mem( // 64-bit integer registers, because they are part of an address // expression. But `enc_g` can be derived from a register of any class. - prefix.emit(sink); + prefixes.emit(sink); match mem_e { Amode::ImmReg { simm32, base } => { @@ -304,7 +318,7 @@ fn emit_std_enc_mem( /// operand is a register rather than memory. Hence it is much simpler. fn emit_std_enc_enc( sink: &mut MachBuffer, - prefix: LegacyPrefix, + prefixes: LegacyPrefixes, opcodes: u32, mut num_opcodes: usize, enc_g: u8, @@ -316,8 +330,8 @@ fn emit_std_enc_enc( // integer-to-FP conversion insn, one might be RegClass::I64 and the other // RegClass::V128. - // The operand-size override. - prefix.emit(sink); + // The legacy prefixes. + prefixes.emit(sink); // The rex byte. rex.emit_two_op(sink, enc_g, enc_e); @@ -338,7 +352,7 @@ fn emit_std_enc_enc( fn emit_std_reg_mem( sink: &mut MachBuffer, - prefix: LegacyPrefix, + prefixes: LegacyPrefixes, opcodes: u32, num_opcodes: usize, reg_g: Reg, @@ -346,12 +360,12 @@ fn emit_std_reg_mem( rex: RexFlags, ) { let enc_g = reg_enc(reg_g); - emit_std_enc_mem(sink, prefix, opcodes, num_opcodes, enc_g, mem_e, rex); + emit_std_enc_mem(sink, prefixes, opcodes, num_opcodes, enc_g, mem_e, rex); } fn emit_std_reg_reg( sink: &mut MachBuffer, - prefix: LegacyPrefix, + prefixes: LegacyPrefixes, opcodes: u32, num_opcodes: usize, reg_g: Reg, @@ -360,7 +374,7 @@ fn emit_std_reg_reg( ) { let enc_g = reg_enc(reg_g); let enc_e = reg_enc(reg_e); - emit_std_enc_enc(sink, prefix, opcodes, num_opcodes, enc_g, enc_e, rex); + emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex); } /// Write a suitable number of bits from an imm64 to the sink. @@ -481,7 +495,7 @@ pub(crate) fn emit( RegMemImm::Reg { reg: reg_e } => { emit_std_reg_reg( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x0FAF, 2, reg_g.to_reg(), @@ -493,7 +507,7 @@ pub(crate) fn emit( RegMemImm::Mem { addr } => { emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x0FAF, 2, reg_g.to_reg(), @@ -508,7 +522,7 @@ pub(crate) fn emit( // Yes, really, reg_g twice. emit_std_reg_reg( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcode, 1, reg_g.to_reg(), @@ -535,7 +549,7 @@ pub(crate) fn emit( // code easily. emit_std_reg_reg( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcode_r, 1, *reg_e, @@ -550,7 +564,7 @@ pub(crate) fn emit( // Here we revert to the "normal" G-E ordering. emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcode_m, 1, reg_g.to_reg(), @@ -566,7 +580,7 @@ pub(crate) fn emit( let enc_g = int_reg_enc(reg_g.to_reg()); emit_std_enc_enc( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcode, 1, subopcode_i, @@ -581,9 +595,9 @@ pub(crate) fn emit( Inst::UnaryRmR { size, op, src, dst } => { let (prefix, rex_flags) = match size { - 2 => (LegacyPrefix::_66, RexFlags::clear_w()), - 4 => (LegacyPrefix::None, RexFlags::clear_w()), - 8 => (LegacyPrefix::None, RexFlags::set_w()), + 2 => (LegacyPrefixes::_66, RexFlags::clear_w()), + 4 => (LegacyPrefixes::None, RexFlags::clear_w()), + 8 => (LegacyPrefixes::None, RexFlags::set_w()), _ => unreachable!(), }; @@ -621,9 +635,9 @@ pub(crate) fn emit( loc, } => { let (prefix, rex_flags) = match size { - 2 => (LegacyPrefix::_66, RexFlags::clear_w()), - 4 => (LegacyPrefix::None, RexFlags::clear_w()), - 8 => (LegacyPrefix::None, RexFlags::set_w()), + 2 => (LegacyPrefixes::_66, RexFlags::clear_w()), + 4 => (LegacyPrefixes::None, RexFlags::clear_w()), + 8 => (LegacyPrefixes::None, RexFlags::set_w()), _ => unreachable!(), }; @@ -649,9 +663,9 @@ pub(crate) fn emit( Inst::MulHi { size, signed, rhs } => { let (prefix, rex_flags) = match size { - 2 => (LegacyPrefix::_66, RexFlags::clear_w()), - 4 => (LegacyPrefix::None, RexFlags::clear_w()), - 8 => (LegacyPrefix::None, RexFlags::set_w()), + 2 => (LegacyPrefixes::_66, RexFlags::clear_w()), + 4 => (LegacyPrefixes::None, RexFlags::clear_w()), + 8 => (LegacyPrefixes::None, RexFlags::set_w()), _ => unreachable!(), }; @@ -826,7 +840,7 @@ pub(crate) fn emit( } else { RexFlags::clear_w() }; - emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex); + emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex); } Inst::MovZX_RM_R { @@ -880,7 +894,7 @@ pub(crate) fn emit( } emit_std_reg_reg( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcodes, num_opcodes, dst.to_reg(), @@ -899,7 +913,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcodes, num_opcodes, dst.to_reg(), @@ -920,7 +934,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x8B, 1, dst.to_reg(), @@ -931,7 +945,7 @@ pub(crate) fn emit( Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x8D, 1, dst.to_reg(), @@ -982,7 +996,7 @@ pub(crate) fn emit( } emit_std_reg_reg( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcodes, num_opcodes, dst.to_reg(), @@ -1001,7 +1015,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcodes, num_opcodes, dst.to_reg(), @@ -1038,14 +1052,14 @@ pub(crate) fn emit( }; // MOV r8, r/m8 is (REX.W==0) 88 /r - emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex) + emit_std_reg_mem(sink, LegacyPrefixes::None, 0x88, 1, *src, dst, rex) } 2 => { // MOV r16, r/m16 is 66 (REX.W==0) 89 /r emit_std_reg_mem( sink, - LegacyPrefix::_66, + LegacyPrefixes::_66, 0x89, 1, *src, @@ -1058,7 +1072,7 @@ pub(crate) fn emit( // MOV r32, r/m32 is (REX.W==0) 89 /r emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x89, 1, *src, @@ -1071,7 +1085,7 @@ pub(crate) fn emit( // MOV r64, r/m64 is (REX.W==1) 89 /r emit_std_reg_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0x89, 1, *src, @@ -1109,7 +1123,7 @@ pub(crate) fn emit( None => { // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode - emit_std_enc_enc(sink, LegacyPrefix::None, 0xD3, 1, subopcode, enc_dst, rex); + emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex); } Some(num_bits) => { @@ -1117,7 +1131,7 @@ pub(crate) fn emit( // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib // When the shift amount is 1, there's an even shorter encoding, but we don't // bother with that nicety here. - emit_std_enc_enc(sink, LegacyPrefix::None, 0xC1, 1, subopcode, enc_dst, rex); + emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex); sink.put1(*num_bits); } } @@ -1125,7 +1139,7 @@ pub(crate) fn emit( Inst::XmmRmiReg { opcode, src, dst } => { let rex = RexFlags::clear_w(); - let prefix = LegacyPrefix::_66; + let prefix = LegacyPrefixes::_66; if let RegMemImm::Imm { simm32 } = src { let (opcode_bytes, reg_digit) = match opcode { SseOpcode::Psllw => (0x0F71, 6), @@ -1175,9 +1189,9 @@ pub(crate) fn emit( src: src_e, dst: reg_g, } => { - let mut prefix = LegacyPrefix::None; + let mut prefix = LegacyPrefixes::None; if *size == 2 { - prefix = LegacyPrefix::_66; + prefix = LegacyPrefixes::_66; } let mut rex = match size { @@ -1245,7 +1259,7 @@ pub(crate) fn emit( rex_flags.always_emit(); emit_std_enc_enc( sink, - LegacyPrefix::None, + LegacyPrefixes::None, opcode, 2, 0, @@ -1261,9 +1275,9 @@ pub(crate) fn emit( dst: reg_g, } => { let (prefix, rex_flags) = match size { - 2 => (LegacyPrefix::_66, RexFlags::clear_w()), - 4 => (LegacyPrefix::None, RexFlags::clear_w()), - 8 => (LegacyPrefix::None, RexFlags::set_w()), + 2 => (LegacyPrefixes::_66, RexFlags::clear_w()), + 4 => (LegacyPrefixes::None, RexFlags::clear_w()), + 8 => (LegacyPrefixes::None, RexFlags::set_w()), _ => unreachable!("invalid size spec for cmove"), }; let opcode = 0x0F40 + cc.get_enc() as u32; @@ -1315,7 +1329,7 @@ pub(crate) fn emit( let addr = &addr.finalize(state); emit_std_enc_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0xFF, 1, 6, /*subopcode*/ @@ -1371,7 +1385,7 @@ pub(crate) fn emit( let reg_enc = int_reg_enc(*reg); emit_std_enc_enc( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0xFF, 1, 2, /*subopcode*/ @@ -1384,7 +1398,7 @@ pub(crate) fn emit( let addr = &addr.finalize(state); emit_std_enc_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0xFF, 1, 2, /*subopcode*/ @@ -1461,7 +1475,7 @@ pub(crate) fn emit( let reg_enc = int_reg_enc(*reg); emit_std_enc_enc( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0xFF, 1, 4, /*subopcode*/ @@ -1474,7 +1488,7 @@ pub(crate) fn emit( let addr = &addr.finalize(state); emit_std_enc_mem( sink, - LegacyPrefix::None, + LegacyPrefixes::None, 0xFF, 1, 4, /*subopcode*/ @@ -1596,20 +1610,20 @@ pub(crate) fn emit( let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { - SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), - SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A), - SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28), - SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28), - SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F6F), - SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F6F), - SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), - SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), - SseOpcode::Movups => (LegacyPrefix::None, 0x0F10), - SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10), - SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51), - SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51), - SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), - SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51), + SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A), + SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A), + SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28), + SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28), + SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F), + SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F), + SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10), + SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10), + SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10), + SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10), + SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51), + SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51), + SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51), + SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -1635,49 +1649,49 @@ pub(crate) fn emit( } => { let rex = RexFlags::clear_w(); let (prefix, opcode, length) = match op { - SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2), - SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2), - SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2), - SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2), - SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2), - SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2), - SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2), - SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2), - SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2), - SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2), - SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2), - SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2), - SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2), - SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2), - SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2), - SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2), - SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2), - SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2), - SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2), - SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2), - SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2), - SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2), - SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2), - SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2), - SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2), - SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2), - SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2), - SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2), - SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2), - SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2), - SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3), - SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2), - SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2), - SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2), - SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2), - SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2), - SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2), - SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2), - SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2), - SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2), - SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2), - SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2), - SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2), + SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2), + SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2), + SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2), + SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2), + SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), + SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2), + SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), + SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), + SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), + SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2), + SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2), + SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2), + SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2), + SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2), + SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2), + SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2), + SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2), + SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2), + SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2), + SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2), + SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2), + SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2), + SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2), + SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2), + SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2), + SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2), + SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2), + SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2), + SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2), + SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2), + SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3), + SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2), + SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2), + SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2), + SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2), + SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2), + SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2), + SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2), + SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), + SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2), + SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2), + SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2), + SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; @@ -1780,10 +1794,10 @@ pub(crate) fn emit( Inst::XmmRmRImm { op, src, dst, imm } => { let prefix = match op { - SseOpcode::Cmpps => LegacyPrefix::None, - SseOpcode::Cmppd => LegacyPrefix::_66, - SseOpcode::Cmpss => LegacyPrefix::_F3, - SseOpcode::Cmpsd => LegacyPrefix::_F2, + SseOpcode::Cmpps => LegacyPrefixes::None, + SseOpcode::Cmppd => LegacyPrefixes::_66, + SseOpcode::Cmpss => LegacyPrefixes::_F3, + SseOpcode::Cmpsd => LegacyPrefixes::_F2, _ => unimplemented!("Opcode {:?} not implemented", op), }; let opcode = 0x0FC2; @@ -1833,14 +1847,14 @@ pub(crate) fn emit( srcloc, } => { let (prefix, opcode) = match op { - SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29), - SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F29), - SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F7F), - SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F7F), - SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), - SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11), - SseOpcode::Movups => (LegacyPrefix::None, 0x0F11), - SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F11), + SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29), + SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29), + SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F), + SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F), + SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11), + SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11), + SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11), + SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11), _ => unimplemented!("Opcode {:?} not implemented", op), }; let dst = &dst.finalize(state); @@ -1860,9 +1874,9 @@ pub(crate) fn emit( let (prefix, opcode, dst_first) = match op { // Movd and movq use the same opcode; the presence of the REX prefix (set below) // actually determines which is used. - SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false), - SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true), - SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true), + SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false), + SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true), + SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true), _ => panic!("unexpected opcode {:?}", op), }; let rex = match dst_size { @@ -1888,9 +1902,9 @@ pub(crate) fn emit( let (prefix, opcode) = match op { // Movd and movq use the same opcode; the presence of the REX prefix (set below) // actually determines which is used. - SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E), - SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A), - SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A), + SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E), + SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A), + SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A), _ => panic!("unexpected opcode {:?}", op), }; let rex = match *src_size { @@ -1911,8 +1925,8 @@ pub(crate) fn emit( Inst::XMM_Cmp_RM_R { op, src, dst } => { let rex = RexFlags::clear_w(); let (prefix, opcode) = match op { - SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E), - SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E), + SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E), + SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E), _ => unimplemented!("Emit xmm cmp rm r"), }; @@ -2431,6 +2445,113 @@ pub(crate) fn emit( } } + Inst::LockCmpxchg { + ty, + src, + dst, + srcloc, + } => { + if let Some(srcloc) = srcloc { + sink.add_trap(*srcloc, TrapCode::HeapOutOfBounds); + } + // lock cmpxchg{b,w,l,q} %src, (dst) + // Note that 0xF0 is the Lock prefix. + let (prefix, rex, opcodes) = match *ty { + types::I8 => { + let mut rex_flags = RexFlags::clear_w(); + let enc_src = int_reg_enc(*src); + if enc_src >= 4 && enc_src <= 7 { + rex_flags.always_emit(); + }; + (LegacyPrefixes::_F0, rex_flags, 0x0FB0) + } + types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1), + types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1), + types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1), + _ => unreachable!(), + }; + emit_std_reg_mem(sink, prefix, opcodes, 2, *src, &dst.finalize(state), rex); + } + + Inst::AtomicRmwSeq { ty, op, srcloc } => { + // Emit this: + // + // mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value + // again: + // movq %rax, %r11 // rax = old value, r11 = old value + // `op`q %r10, %r11 // rax = old value, r11 = new value + // lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value + // jnz again // If this is taken, rax will have a "revised" old value + // + // Operand conventions: + // IN: %r9 (addr), %r10 (2nd arg for `op`) + // OUT: %rax (old value), %r11 (trashed), %rflags (trashed) + // + // In the case where the operation is 'xchg', the "`op`q" instruction is instead + // movq %r10, %r11 + // so that we simply write in the destination, the "2nd arg for `op`". + let rax = regs::rax(); + let r9 = regs::r9(); + let r10 = regs::r10(); + let r11 = regs::r11(); + let rax_w = Writable::from_reg(rax); + let r11_w = Writable::from_reg(r11); + let amode = Amode::imm_reg(0, r9); + let again_label = sink.get_label(); + + // mov{zbq,zwq,zlq,q} (%r9), %rax + // No need to call `add_trap` here, since the `i1` emit will do that. + let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend, *srcloc); + i1.emit(sink, flags, state); + + // again: + sink.bind_label(again_label); + + // movq %rax, %r11 + let i2 = Inst::mov_r_r(true, rax, r11_w); + i2.emit(sink, flags, state); + + // opq %r10, %r11 + let r10_rmi = RegMemImm::reg(r10); + let i3 = if *op == inst_common::AtomicRmwOp::Xchg { + Inst::mov_r_r(true, r10, r11_w) + } else { + let alu_op = match op { + inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add, + inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub, + inst_common::AtomicRmwOp::And => AluRmiROpcode::And, + inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or, + inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor, + inst_common::AtomicRmwOp::Xchg => unreachable!(), + }; + Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w) + }; + i3.emit(sink, flags, state); + + // lock cmpxchg{b,w,l,q} %r11, (%r9) + // No need to call `add_trap` here, since the `i4` emit will do that. + let i4 = Inst::LockCmpxchg { + ty: *ty, + src: r11, + dst: amode.into(), + srcloc: *srcloc, + }; + i4.emit(sink, flags, state); + + // jnz again + one_way_jmp(sink, CC::NZ, again_label); + } + + Inst::Fence { kind } => { + sink.put1(0x0F); + sink.put1(0xAE); + match kind { + FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0 + FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8 + FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8 + } + } + Inst::Hlt => { sink.put1(0xcc); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index e0f2ea1acd..cb1a6b855a 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4,10 +4,13 @@ //! //! to see stdout: cargo test -- --nocapture //! -//! for this specific case: +//! for this specific case, as of 24 Aug 2020: //! -//! (cd cranelift/codegen && \ -//! RUST_BACKTRACE=1 cargo test isa::x64::inst::test_x64_insn_encoding_and_printing -- --nocapture) +//! cd to the top of your wasmtime tree, then: +//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \ +//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \ +//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \ +//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit use super::*; use crate::isa::test_utils; @@ -3272,6 +3275,174 @@ fn test_x64_emit() { "cmpps $0, %xmm15, %xmm7", )); + // ======================================================== + // Pertaining to atomics. + let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into(); + // `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing + // for retention of the apparently-redundant rex prefix in the 8-bit case. + let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into(); + + // A general 8-bit case. + insns.push(( + Inst::LockCmpxchg { + ty: types::I8, + src: rbx, + dst: am1, + srcloc: None, + }, + "F0410FB09C9241010000", + "lock cmpxchgb %bl, 321(%r10,%rdx,4)", + )); + // Check redundant rex retention in 8-bit cases. + insns.push(( + Inst::LockCmpxchg { + ty: types::I8, + src: rdx, + dst: am2.clone(), + srcloc: None, + }, + "F00FB094F1C7CFFFFF", + "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I8, + src: rsi, + dst: am2.clone(), + srcloc: None, + }, + "F0400FB0B4F1C7CFFFFF", + "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I8, + src: r10, + dst: am2.clone(), + srcloc: None, + }, + "F0440FB094F1C7CFFFFF", + "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I8, + src: r15, + dst: am2.clone(), + srcloc: None, + }, + "F0440FB0BCF1C7CFFFFF", + "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)", + )); + // 16 bit cases + insns.push(( + Inst::LockCmpxchg { + ty: types::I16, + src: rsi, + dst: am2.clone(), + srcloc: None, + }, + "66F00FB1B4F1C7CFFFFF", + "lock cmpxchgw %si, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I16, + src: r10, + dst: am2.clone(), + srcloc: None, + }, + "66F0440FB194F1C7CFFFFF", + "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)", + )); + // 32 bit cases + insns.push(( + Inst::LockCmpxchg { + ty: types::I32, + src: rsi, + dst: am2.clone(), + srcloc: None, + }, + "F00FB1B4F1C7CFFFFF", + "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I32, + src: r10, + dst: am2.clone(), + srcloc: None, + }, + "F0440FB194F1C7CFFFFF", + "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)", + )); + // 64 bit cases + insns.push(( + Inst::LockCmpxchg { + ty: types::I64, + src: rsi, + dst: am2.clone(), + srcloc: None, + }, + "F0480FB1B4F1C7CFFFFF", + "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)", + )); + insns.push(( + Inst::LockCmpxchg { + ty: types::I64, + src: r10, + dst: am2.clone(), + srcloc: None, + }, + "F04C0FB194F1C7CFFFFF", + "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)", + )); + + // AtomicRmwSeq + insns.push(( + Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, srcloc: None }, + "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF", + "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + )); + insns.push(( + Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, srcloc: None }, + "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF", + "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + )); + insns.push(( + Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, srcloc: None }, + "418B014989C34D89D3F0450FB1190F85EFFFFFFF", + "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + )); + insns.push(( + Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, srcloc: None }, + "498B014989C34D01D3F04D0FB1190F85EFFFFFFF", + "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }" + )); + + // Fence + insns.push(( + Inst::Fence { + kind: FenceKind::MFence, + }, + "0FAEF0", + "mfence", + )); + insns.push(( + Inst::Fence { + kind: FenceKind::LFence, + }, + "0FAEE8", + "lfence", + )); + insns.push(( + Inst::Fence { + kind: FenceKind::SFence, + }, + "0FAEF8", + "sfence", + )); + // ======================================================== // Misc instructions. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 712a9b508e..da2dca2060 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -404,6 +404,56 @@ pub enum Inst { offset: i64, }, + // ===================================== + // Instructions pertaining to atomic memory accesses. + /// A standard (native) `lock cmpxchg src, (amode)`, with register conventions: + /// + /// `dst` (read) address + /// `src` (read) replacement value + /// %rax (modified) in: expected value, out: value that was actually at `dst` + /// %rflags is written. Do not assume anything about it after the instruction. + /// + /// The instruction "succeeded" iff the lowest `ty` bits of %rax afterwards are the same as + /// they were before. + LockCmpxchg { + ty: Type, // I8, I16, I32 or I64 + src: Reg, + dst: SyntheticAmode, + srcloc: Option, + }, + + /// A synthetic instruction, based on a loop around a native `lock cmpxchg` instruction. + /// This atomically modifies a value in memory and returns the old value. The sequence + /// consists of an initial "normal" load from `dst`, followed by a loop which computes the + /// new value and tries to compare-and-swap ("CAS") it into `dst`, using the native + /// instruction `lock cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful. + /// If there is no contention, there will be only one pass through the loop body. The + /// sequence does *not* perform any explicit memory fence instructions + /// (mfence/sfence/lfence). + /// + /// Note that the transaction is atomic in the sense that, as observed by some other thread, + /// `dst` either has the initial or final value, but no other. It isn't atomic in the sense + /// of guaranteeing that no other thread writes to `dst` in between the initial load and the + /// CAS -- but that would cause the CAS to fail unless the other thread's last write before + /// the CAS wrote the same value that was already there. In other words, this + /// implementation suffers (unavoidably) from the A-B-A problem. + /// + /// This instruction sequence has fixed register uses as follows: + /// + /// %r9 (read) address + /// %r10 (read) second operand for `op` + /// %r11 (written) scratch reg; value afterwards has no meaning + /// %rax (written) the old value at %r9 + /// %rflags is written. Do not assume anything about it after the instruction. + AtomicRmwSeq { + ty: Type, // I8, I16, I32 or I64 + op: inst_common::AtomicRmwOp, + srcloc: Option, + }, + + /// A memory fence (mfence, lfence or sfence). + Fence { kind: FenceKind }, + // ===================================== // Meta-instructions generating no code. /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This @@ -1521,6 +1571,26 @@ impl ShowWithRRU for Inst { show_ireg_sized(dst.to_reg(), mb_rru, 8), ), + Inst::LockCmpxchg { ty, src, dst, .. } => { + let size = ty.bytes() as u8; + format!("lock cmpxchg{} {}, {}", + suffixBWLQ(size), show_ireg_sized(*src, mb_rru, size), dst.show_rru(mb_rru)) + } + + Inst::AtomicRmwSeq { ty, op, .. } => { + format!( + "atomically {{ {}_bits_at_[%r9]) {:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}", + ty.bits(), op) + }, + + Inst::Fence { kind } => { + match kind { + FenceKind::MFence => "mfence".to_string(), + FenceKind::LFence => "lfence".to_string(), + FenceKind::SFence => "sfence".to_string(), + } + } + Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), Inst::Hlt => "hlt".into(), @@ -1737,6 +1807,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(*dst); } + Inst::LockCmpxchg { src, dst, .. } => { + dst.get_regs_as_uses(collector); + collector.add_use(*src); + collector.add_mod(Writable::from_reg(regs::rax())); + } + + Inst::AtomicRmwSeq { .. } => { + collector.add_use(regs::r9()); + collector.add_use(regs::r10()); + collector.add_def(Writable::from_reg(regs::r11())); + collector.add_def(Writable::from_reg(regs::rax())); + } + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } @@ -1745,7 +1828,8 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Hlt - | Inst::Ud2 { .. } => { + | Inst::Ud2 { .. } + | Inst::Fence { .. } => { // No registers are used. } } @@ -2091,6 +2175,15 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst), + Inst::LockCmpxchg { + ref mut src, + ref mut dst, + .. + } => { + map_use(mapper, src); + dst.map_uses(mapper); + } + Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } @@ -2099,8 +2192,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::Ud2 { .. } - | Inst::Hlt => { - // No registers are used. + | Inst::Hlt + | Inst::AtomicRmwSeq { .. } + | Inst::Fence { .. } => { + // Instruction doesn't explicitly mention any regs, so it can't have any virtual + // regs that we'd need to remap. Hence no action required. } } } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f4eb306882..1b494db706 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2,6 +2,7 @@ #![allow(non_snake_case)] +use crate::ir; use crate::ir::{ condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Signature, TrapCode, Type, @@ -45,6 +46,14 @@ fn is_bool_ty(ty: Type) -> bool { } } +/// This is target-word-size dependent. And it excludes booleans and reftypes. +fn is_valid_atomic_transaction_ty(ty: Type) -> bool { + match ty { + types::I8 | types::I16 | types::I32 | types::I64 => true, + _ => false, + } +} + fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option { ctx.get_constant(inst) } @@ -82,6 +91,13 @@ fn inst_fp_condcode(data: &InstructionData) -> Option { } } +fn inst_atomic_rmw_op(data: &InstructionData) -> Option { + match data { + &InstructionData::AtomicRmw { op, .. } => Some(op), + _ => None, + } +} + fn ldst_offset(data: &InstructionData) -> Option { match data { &InstructionData::Load { offset, .. } @@ -1732,6 +1748,148 @@ fn lower_insn_to_regs>( }); } + Opcode::AtomicRmw => { + // This is a simple, general-case atomic update, based on a loop involving + // `cmpxchg`. Note that we could do much better than this in the case where the old + // value at the location (that is to say, the SSA `Value` computed by this CLIF + // instruction) is not required. In that case, we could instead implement this + // using a single `lock`-prefixed x64 read-modify-write instruction. Also, even in + // the case where the old value is required, for the `add` and `sub` cases, we can + // use the single instruction `lock xadd`. However, those improvements have been + // left for another day. + // TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153 + let dst = output_to_reg(ctx, outputs[0]); + let mut addr = input_to_reg(ctx, inputs[0]); + let mut arg2 = input_to_reg(ctx, inputs[1]); + let ty_access = ty.unwrap(); + assert!(is_valid_atomic_transaction_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // Make sure that both args are in virtual regs, since in effect we have to do a + // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not + // guaranteed safe if either is in a real reg. + addr = ctx.ensure_in_vreg(addr, types::I64); + arg2 = ctx.ensure_in_vreg(arg2, types::I64); + // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq` + // operates at whatever width is specified by `ty`, so there's no need to + // zero-extend `arg2` in the case of `ty` being I8/I16/I32. + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::r9()), + addr, + types::I64, + )); + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::r10()), + arg2, + types::I64, + )); + // Now the AtomicRmwSeq (pseudo-) instruction itself + let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); + ctx.emit(Inst::AtomicRmwSeq { + ty: ty_access, + op, + srcloc, + }); + // And finally, copy the preordained AtomicRmwSeq output reg to its destination. + ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + } + + Opcode::AtomicCas => { + // This is very similar to, but not identical to, the `AtomicRmw` case. As with + // `AtomicRmw`, there's no need to zero-extend narrow values here. + let dst = output_to_reg(ctx, outputs[0]); + let addr = input_to_reg(ctx, inputs[0]); + let expected = input_to_reg(ctx, inputs[1]); + let replacement = input_to_reg(ctx, inputs[2]); + let ty_access = ty.unwrap(); + assert!(is_valid_atomic_transaction_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // Move the expected value into %rax. Because there's only one fixed register on + // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the + // `AtomicRmw` case. + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::rax()), + expected, + types::I64, + )); + ctx.emit(Inst::LockCmpxchg { + ty: ty_access, + src: replacement, + dst: Amode::imm_reg(0, addr).into(), + srcloc, + }); + // And finally, copy the old value at the location to its destination reg. + ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + } + + Opcode::AtomicLoad => { + // This is a normal load. The x86-TSO memory model provides sufficient sequencing + // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the + // need for any fence instructions. + let data = output_to_reg(ctx, outputs[0]); + let addr = input_to_reg(ctx, inputs[0]); + let ty_access = ty.unwrap(); + assert!(is_valid_atomic_transaction_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // For the amode, we could do better, but for now just use `0(addr)`. + let rm = RegMem::mem(Amode::imm_reg(0, addr)); + if ty_access == types::I64 { + ctx.emit(Inst::mov64_rm_r(rm, data, srcloc)); + } else { + let ext_mode = match ty_access { + types::I8 => ExtMode::BQ, + types::I16 => ExtMode::WQ, + types::I32 => ExtMode::LQ, + _ => panic!("lowering AtomicLoad: invalid type"), + }; + ctx.emit(Inst::movzx_rm_r(ext_mode, rm, data, srcloc)); + } + } + + Opcode::AtomicStore => { + // This is a normal store, followed by an `mfence` instruction. + let data = input_to_reg(ctx, inputs[0]); + let addr = input_to_reg(ctx, inputs[1]); + let ty_access = ctx.input_ty(insn, 0); + assert!(is_valid_atomic_transaction_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // For the amode, we could do better, but for now just use `0(addr)`. + ctx.emit(Inst::mov_r_m( + ty_access.bytes() as u8, + data, + Amode::imm_reg(0, addr), + srcloc, + )); + ctx.emit(Inst::Fence { + kind: FenceKind::MFence, + }); + } + + Opcode::Fence => { + ctx.emit(Inst::Fence { + kind: FenceKind::MFence, + }); + } + Opcode::FuncAddr => { let dst = output_to_reg(ctx, outputs[0]); let (extname, _) = ctx.call_target(insn).unwrap(); diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs new file mode 100644 index 0000000000..9566c56e53 --- /dev/null +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -0,0 +1,36 @@ +//! A place to park MachInst::Inst fragments which are common across multiple architectures. + +use crate::ir; + +/// Atomic memory update operations. As of 21 Aug 2020 these are used for the aarch64 and x64 +/// targets. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum AtomicRmwOp { + /// Add + Add, + /// Sub + Sub, + /// And + And, + /// Or + Or, + /// Exclusive Or + Xor, + /// Exchange (swap operands) + Xchg, +} + +impl AtomicRmwOp { + /// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`. + pub fn from(ir_op: ir::AtomicRmwOp) -> Self { + match ir_op { + ir::AtomicRmwOp::Add => AtomicRmwOp::Add, + ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub, + ir::AtomicRmwOp::And => AtomicRmwOp::And, + ir::AtomicRmwOp::Or => AtomicRmwOp::Or, + ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor, + ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg, + } + } +} diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index b8ec275133..915764436e 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -133,6 +133,8 @@ pub mod adapter; pub use adapter::*; pub mod helpers; pub use helpers::*; +pub mod inst_common; +pub use inst_common::*; /// A machine instruction. pub trait MachInst: Clone + Debug { From cca10b87cb09ca98c0441bb4ee867a6ae2a96ea6 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 18 Aug 2020 16:30:47 +0200 Subject: [PATCH 25/34] machinst x64: optimize select/brz/brnz when the input is a comparison; --- cranelift/codegen/src/isa/x64/inst/args.rs | 10 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 17 ++ cranelift/codegen/src/isa/x64/inst/mod.rs | 17 +- cranelift/codegen/src/isa/x64/lower.rs | 247 ++++++++++++++++++--- 4 files changed, 257 insertions(+), 34 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 8690c57a4c..51e3f03c89 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -849,7 +849,7 @@ impl CC { FloatCC::Ordered => CC::NP, FloatCC::Unordered => CC::P, // Alias for NE - FloatCC::NotEqual | FloatCC::OrderedNotEqual => CC::NZ, + FloatCC::OrderedNotEqual => CC::NZ, // Alias for E FloatCC::UnorderedOrEqual => CC::Z, // Alias for A @@ -859,12 +859,14 @@ impl CC { FloatCC::UnorderedOrLessThan => CC::B, FloatCC::UnorderedOrLessThanOrEqual => CC::BE, FloatCC::Equal + | FloatCC::NotEqual | FloatCC::LessThan | FloatCC::LessThanOrEqual | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - panic!("No single condition code to guarantee ordered. Treat as special case.") - } + | FloatCC::UnorderedOrGreaterThanOrEqual => panic!( + "{:?} can't be lowered to a CC code; treat as special case.", + floatcc + ), } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b54de499c9..b091a21eff 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1298,6 +1298,8 @@ pub(crate) fn emit( src, dst, } => { + // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that + // this doesn't clobber flags. Make sure to not do so here. let next = sink.get_label(); // Jump if cc is *not* set. @@ -1432,6 +1434,21 @@ pub(crate) fn emit( sink.put4(disp); } + Inst::JmpIf { cc, taken } => { + let cond_start = sink.cur_offset(); + let cond_disp_off = cond_start + 2; + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_disp_off, l, LabelUse::JmpRel32); + // Since this is not a terminator, don't enroll in the branch inversion mechanism. + } + + let taken_disp = taken.as_offset32_or_zero(); + let taken_disp = taken_disp as u32; + sink.put1(0x0F); + sink.put1(0x80 + cc.get_enc()); + sink.put4(taken_disp); + } + Inst::JmpCond { cc, taken, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index da2dca2060..ec52f79d5c 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -359,6 +359,9 @@ pub enum Inst { /// Jump to a known target: jmp simm32. JmpKnown { dst: BranchTarget }, + /// One-way conditional branch: jcond cond target. + JmpIf { cc: CC, taken: BranchTarget }, + /// Two-way conditional branch: jcond cond target target. /// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate. JmpCond { @@ -966,6 +969,10 @@ impl Inst { Inst::JmpKnown { dst } } + pub(crate) fn jmp_if(cc: CC, taken: BranchTarget) -> Inst { + Inst::JmpIf { cc, taken } + } + pub(crate) fn jmp_cond(cc: CC, taken: BranchTarget, not_taken: BranchTarget) -> Inst { Inst::JmpCond { cc, @@ -1536,12 +1543,18 @@ impl ShowWithRRU for Inst { format!("{} {}", ljustify("jmp".to_string()), dst.show_rru(mb_rru)) } + Inst::JmpIf { cc, taken } => format!( + "{} {}", + ljustify2("j".to_string(), cc.to_string()), + taken.show_rru(mb_rru), + ), + Inst::JmpCond { cc, taken, not_taken, } => format!( - "{} taken={} not_taken={}", + "{} {}; j {}", ljustify2("j".to_string(), cc.to_string()), taken.show_rru(mb_rru), not_taken.show_rru(mb_rru) @@ -1823,6 +1836,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { Inst::Ret | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } + | Inst::JmpIf { .. } | Inst::JmpCond { .. } | Inst::Nop { .. } | Inst::TrapIf { .. } @@ -2188,6 +2202,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { | Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpCond { .. } + | Inst::JmpIf { .. } | Inst::Nop { .. } | Inst::TrapIf { .. } | Inst::VirtualSPOffsetAdj { .. } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1b494db706..1433912444 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -81,13 +81,13 @@ fn inst_condcode(data: &InstructionData) -> IntCC { } } -fn inst_fp_condcode(data: &InstructionData) -> Option { +fn inst_fp_condcode(data: &InstructionData) -> FloatCC { match data { &InstructionData::BranchFloat { cond, .. } | &InstructionData::FloatCompare { cond, .. } | &InstructionData::FloatCond { cond, .. } - | &InstructionData::FloatCondTrap { cond, .. } => Some(cond), - _ => None, + | &InstructionData::FloatCondTrap { cond, .. } => cond, + _ => panic!("inst_fp_condcode(x64): unhandled: {:?}", data), } } @@ -230,7 +230,13 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) { ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); } -fn emit_fcmp(ctx: Ctx, insn: IRInst) { +#[derive(PartialEq)] +enum FcmpOperands { + Swap, + DontSwap, +} + +fn emit_fcmp(ctx: Ctx, insn: IRInst, swap_operands: FcmpOperands) { // The only valid CC constructed with `from_floatcc` can be put in the flag // register with a direct float comparison; do this here. let input_ty = ctx.input_ty(insn, 0); @@ -240,8 +246,17 @@ fn emit_fcmp(ctx: Ctx, insn: IRInst) { _ => panic!("Bad input type to Fcmp"), }; let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); + let (lhs, rhs) = if swap_operands == FcmpOperands::Swap { + ( + input_to_reg(ctx, inputs[1]), + input_to_reg_mem(ctx, inputs[0]), + ) + } else { + ( + input_to_reg(ctx, inputs[0]), + input_to_reg_mem(ctx, inputs[1]), + ) + }; ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); } @@ -894,7 +909,7 @@ fn lower_insn_to_regs>( } Opcode::Fcmp => { - let condcode = inst_fp_condcode(ctx.data(insn)).unwrap(); + let condcode = inst_fp_condcode(ctx.data(insn)); let input_ty = ctx.input_ty(insn, 0); if !input_ty.is_vector() { let op = match input_ty { @@ -1107,12 +1122,12 @@ fn lower_insn_to_regs>( emit_cmp(ctx, ifcmp_insn); cc } else { - let condcode = inst_fp_condcode(ctx.data(insn)).unwrap(); + let condcode = inst_fp_condcode(ctx.data(insn)); let cc = CC::from_floatcc(condcode); // Verification ensures that the input is always a single-def ffcmp. let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); - emit_fcmp(ctx, ffcmp_insn); + emit_fcmp(ctx, ffcmp_insn, FcmpOperands::DontSwap); cc }; @@ -1933,26 +1948,144 @@ fn lower_insn_to_regs>( ctx.emit(inst); } - Opcode::Select | Opcode::Selectif | Opcode::SelectifSpectreGuard => { - let cc = if op == Opcode::Select { - // The input is a boolean value, compare it against zero. - let size = ctx.input_ty(insn, 0).bytes() as u8; - let test = input_to_reg(ctx, inputs[0]); - ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test)); + Opcode::Select => { + let flag_input = inputs[0]; + if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { + let cond_code = inst_fp_condcode(ctx.data(fcmp)); - CC::NZ + // See comments in the lowering of Fcmp. + let (cond_code, swap_op, was_equal) = match cond_code { + FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => { + (cond_code.reverse(), FcmpOperands::Swap, false) + } + FloatCC::Equal => { + // Additionally, we invert Equal to NotEqual too: taking LHS if equal would + // mean take it if both CC::NP and CC::Z are set, the conjunction of which + // can't be modeled with a single cmov instruction. Instead, we'll swap LHS + // and RHS in the select operation, and invert the equal to a not-equal + // here. + (FloatCC::NotEqual, FcmpOperands::DontSwap, true) + } + _ => (cond_code, FcmpOperands::DontSwap, false), + }; + emit_fcmp(ctx, fcmp, swap_op); + + let (lhs, rhs) = if was_equal { + // See comment above about inverting conditional code. + ( + input_to_reg_mem(ctx, inputs[2]), + input_to_reg(ctx, inputs[1]), + ) + } else { + ( + input_to_reg_mem(ctx, inputs[1]), + input_to_reg(ctx, inputs[2]), + ) + }; + + let dst = output_to_reg(ctx, outputs[0]); + + let ty = ctx.output_ty(insn, 0); + + let lhs = if is_int_ty(ty) { + let size = ty.bytes() as u8; + if size == 1 { + // Sign-extend operands to 32, then do a cmove of size 4. + let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); + RegMem::reg(lhs_se.to_reg()) + } else { + ctx.emit(Inst::gen_move(dst, rhs, ty)); + lhs + } + } else { + debug_assert!(ty == types::F32 || ty == types::F64); + ctx.emit(Inst::gen_move(dst, rhs, ty)); + lhs + }; + + match cond_code { + FloatCC::Equal => { + // See comment above about inverting conditional code. + panic!("can't happen because of above guard"); + } + + FloatCC::NotEqual => { + // Take lhs if not-equal, that is CC::P or CC:NZ. + if is_int_ty(ty) { + let size = u8::max(ty.bytes() as u8, 4); + ctx.emit(Inst::cmove(size, CC::P, lhs.clone(), dst)); + ctx.emit(Inst::cmove(size, CC::NZ, lhs, dst)); + } else { + ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::P, lhs.clone(), dst)); + ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::NZ, lhs, dst)); + } + } + + _ => { + let cc = CC::from_floatcc(cond_code); + if is_int_ty(ty) { + let size = u8::max(ty.bytes() as u8, 4); + ctx.emit(Inst::cmove(size, cc, lhs, dst)); + } else { + ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); + } + } + } } else { - // Verification ensures that the input is always a single-def ifcmp. - let cmp_insn = ctx - .get_input(inputs[0].insn, inputs[0].input) - .inst - .unwrap() - .0; - debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); - emit_cmp(ctx, cmp_insn); + let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { + emit_cmp(ctx, icmp); + let cond_code = inst_condcode(ctx.data(icmp)); + CC::from_intcc(cond_code) + } else { + // The input is a boolean value, compare it against zero. + let size = ctx.input_ty(insn, 0).bytes() as u8; + let test = input_to_reg(ctx, inputs[0]); + ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test)); + CC::NZ + }; - CC::from_intcc(inst_condcode(ctx.data(insn))) - }; + let lhs = input_to_reg_mem(ctx, inputs[1]); + let rhs = input_to_reg(ctx, inputs[2]); + let dst = output_to_reg(ctx, outputs[0]); + + let ty = ctx.output_ty(insn, 0); + + if is_int_ty(ty) { + let size = ty.bytes() as u8; + if size == 1 { + // Sign-extend operands to 32, then do a cmove of size 4. + let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); + ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); + } else { + ctx.emit(Inst::gen_move(dst, rhs, ty)); + ctx.emit(Inst::cmove(size, cc, lhs, dst)); + } + } else { + debug_assert!(ty == types::F32 || ty == types::F64); + ctx.emit(Inst::gen_move(dst, rhs, ty)); + ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); + } + } + } + + Opcode::Selectif | Opcode::SelectifSpectreGuard => { + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + emit_cmp(ctx, cmp_insn); + + let cc = CC::from_intcc(inst_condcode(ctx.data(insn))); let lhs = input_to_reg_mem(ctx, inputs[1]); let rhs = input_to_reg(ctx, inputs[2]); @@ -2200,8 +2333,65 @@ impl LowerBackend for X64Backend { match op0 { Opcode::Brz | Opcode::Brnz => { + let flag_input = InsnInput { + insn: branches[0], + input: 0, + }; + let src_ty = ctx.input_ty(branches[0], 0); - if is_int_ty(src_ty) || is_bool_ty(src_ty) { + + if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { + emit_cmp(ctx, icmp); + + let cond_code = inst_condcode(ctx.data(icmp)); + let cond_code = if op0 == Opcode::Brz { + cond_code.inverse() + } else { + cond_code + }; + let cc = CC::from_intcc(cond_code); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); + } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { + let cond_code = inst_fp_condcode(ctx.data(fcmp)); + + let cond_code = if op0 == Opcode::Brz { + cond_code.inverse() + } else { + cond_code + }; + + // See comments in the lowering of Fcmp. + let (cond_code, swap_op) = match cond_code { + FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => { + (cond_code.reverse(), FcmpOperands::Swap) + } + _ => (cond_code, FcmpOperands::DontSwap), + }; + emit_fcmp(ctx, fcmp, swap_op); + + match cond_code { + FloatCC::Equal => { + // Jump to taken if CC::NP and CC::Z, that is, jump to not-taken if + // CC::P or CC::NZ. + ctx.emit(Inst::jmp_if(CC::P, not_taken)); + ctx.emit(Inst::jmp_cond(CC::NZ, not_taken, taken)); + } + + FloatCC::NotEqual => { + // Jump to taken if CC::P or CC::NZ. + ctx.emit(Inst::jmp_if(CC::P, taken)); + ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken)); + } + + _ => { + let cc = CC::from_floatcc(cond_code); + ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); + } + } + } else if is_int_ty(src_ty) || is_bool_ty(src_ty) { let src = input_to_reg( ctx, InsnInput { @@ -2250,8 +2440,7 @@ impl LowerBackend for X64Backend { } } - // TODO: Brif/icmp, Brff/icmp, jump tables - _ => unimplemented!("branch opcode"), + _ => panic!("unexpected branch opcode: {:?}", op0), } } else { assert_eq!(branches.len(), 1); From b830ee79dea73fa66b08a315373b045ba7fbf632 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 18 Aug 2020 17:13:31 +0200 Subject: [PATCH 26/34] machinst x64: commute operands of integer operations if one input is an immediate; --- cranelift/codegen/src/isa/x64/lower.rs | 58 +++++++++++++++++++++----- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1433912444..7aec77e5dc 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -135,12 +135,14 @@ fn matches_input>(c: &mut C, input: InsnInput, op: Opcode) None } +/// Put the given input into a register, and mark it as used (side-effect). fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg { let inputs = ctx.get_input(spec.insn, spec.input); ctx.use_input_reg(inputs); inputs.reg } +/// An extension specification for `extend_input_to_reg`. enum ExtSpec { ZeroExtendTo32, ZeroExtendTo64, @@ -148,6 +150,8 @@ enum ExtSpec { SignExtendTo64, } +/// Put the given input into a register, marking it as used, and do a zero- or signed- extension if +/// required. (This obviously causes side-effects.) fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { let requested_size = match ext_spec { ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32, @@ -188,15 +192,17 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { dst.to_reg() } +/// Put the given input into a register or a memory operand. +/// Effectful: may mark the given input as used, when returning the register form. fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { // TODO handle memory. RegMem::reg(input_to_reg(ctx, spec)) } -/// Try to use an immediate for constant inputs, and a register otherwise. -/// TODO: handle memory as well! -fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { - let imm = ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { +/// Returns whether the given input is an immediate that can be properly sign-extended, without any +/// possible side-effect. +fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { + ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend // to 64 bits. For other sizes, it doesn't matter and we can just use the plain // constant. @@ -205,10 +211,18 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { } else { None } - }); - match imm { + }) +} + +/// Put the given input into an immediate, a register or a memory operand. +/// Effectful: may mark the given input as used, when returning the register form. +fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { + match input_to_sext_imm(ctx, spec) { Some(x) => RegMemImm::imm(x), - None => RegMemImm::reg(input_to_reg(ctx, spec)), + None => match input_to_reg_mem(ctx, spec) { + RegMem::Reg { reg } => RegMemImm::reg(reg), + RegMem::Mem { addr } => RegMemImm::mem(addr), + }, } } @@ -369,8 +383,6 @@ fn lower_insn_to_regs>( | Opcode::Band | Opcode::Bor | Opcode::Bxor => { - // TODO For commutative operations (add, mul, and, or, xor), try to commute the - // operands if one is an immediate. let ty = ty.unwrap(); if ty.lane_count() > 1 { let sse_op = match op { @@ -413,8 +425,32 @@ fn lower_insn_to_regs>( Opcode::Bxor => AluRmiROpcode::Xor, _ => unreachable!(), }; - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem_imm(ctx, inputs[1]); + + let (lhs, rhs) = match op { + Opcode::Iadd + | Opcode::IaddIfcout + | Opcode::Imul + | Opcode::Band + | Opcode::Bor + | Opcode::Bxor => { + // For commutative operations, try to commute operands if one is an + // immediate. + if let Some(imm) = input_to_sext_imm(ctx, inputs[0]) { + (input_to_reg(ctx, inputs[1]), RegMemImm::imm(imm)) + } else { + ( + input_to_reg(ctx, inputs[0]), + input_to_reg_mem_imm(ctx, inputs[1]), + ) + } + } + Opcode::Isub => ( + input_to_reg(ctx, inputs[0]), + input_to_reg_mem_imm(ctx, inputs[1]), + ), + _ => unreachable!(), + }; + let dst = output_to_reg(ctx, outputs[0]); ctx.emit(Inst::mov_r_r(true, lhs, dst)); ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst)); From efff43e7697ad6716e532e47dadd264c89c683f7 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 18 Aug 2020 19:31:20 +0200 Subject: [PATCH 27/34] machinst x64: fold address modes on loads/stores; --- cranelift/codegen/src/isa/x64/lower.rs | 109 +++++++++++++++++++++---- 1 file changed, 94 insertions(+), 15 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 7aec77e5dc..047eac99d8 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -124,10 +124,14 @@ struct InsnOutput { output: usize, } -fn matches_input>(c: &mut C, input: InsnInput, op: Opcode) -> Option { - let inputs = c.get_input(input.insn, input.input); +fn matches_input>( + ctx: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + let inputs = ctx.get_input(input.insn, input.input); if let Some((src_inst, _)) = inputs.inst { - let data = c.data(src_inst); + let data = ctx.data(src_inst); if data.opcode() == op { return Some(src_inst); } @@ -214,6 +218,10 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { }) } +fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option { + ctx.get_input(spec.insn, spec.input).constant +} + /// Put the given input into an immediate, a register or a memory operand. /// Effectful: may mark the given input as used, when returning the register form. fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { @@ -340,6 +348,80 @@ fn emit_vm_call>( Ok(()) } +/// Returns whether the given input is a shift by a constant value less or equal than 3. +/// The goal is to embed it within an address mode. +fn matches_small_cst_shift>( + ctx: &mut C, + spec: InsnInput, +) -> Option<(InsnInput, u8)> { + if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) { + if let Some(shift_amt) = input_to_imm( + ctx, + InsnInput { + insn: shift, + input: 1, + }, + ) { + if shift_amt <= 3 { + return Some(( + InsnInput { + insn: shift, + input: 0, + }, + shift_amt as u8, + )); + } + } + } + None +} + +fn lower_amode>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode { + // We now either have an add that we must materialize, or some other input; as well as the + // final offset. + if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) { + let add_inputs = &[ + InsnInput { + insn: add, + input: 0, + }, + InsnInput { + insn: add, + input: 1, + }, + ]; + + // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations + // aren't happening in the wasm case. We could do better, given some range analysis. + let (base, index, shift) = if let Some((shift_input, shift_amt)) = + matches_small_cst_shift(ctx, add_inputs[0]) + { + ( + input_to_reg(ctx, add_inputs[1]), + input_to_reg(ctx, shift_input), + shift_amt, + ) + } else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) { + ( + input_to_reg(ctx, add_inputs[0]), + input_to_reg(ctx, shift_input), + shift_amt, + ) + } else { + ( + input_to_reg(ctx, add_inputs[0]), + input_to_reg(ctx, add_inputs[1]), + 0, + ) + }; + + return Amode::imm_reg_reg_shift(offset, base, index, shift); + } + + let input = input_to_reg(ctx, spec); + Amode::imm_reg(offset, input) +} + //============================================================================= // Top-level instruction lowering entry point, for one instruction. @@ -1660,7 +1742,7 @@ fn lower_insn_to_regs>( _ => false, }; - let addr = match op { + let amode = match op { Opcode::Load | Opcode::Uload8 | Opcode::Sload8 @@ -1669,8 +1751,7 @@ fn lower_insn_to_regs>( | Opcode::Uload32 | Opcode::Sload32 => { assert_eq!(inputs.len(), 1, "only one input for load operands"); - let base = input_to_reg(ctx, inputs[0]); - Amode::imm_reg(offset as u32, base) + lower_amode(ctx, inputs[0], offset as u32) } Opcode::LoadComplex @@ -1704,7 +1785,7 @@ fn lower_insn_to_regs>( // so ext-mode is defined in this case. ctx.emit(Inst::movsx_rm_r( ext_mode.unwrap(), - RegMem::mem(addr), + RegMem::mem(amode), dst, srcloc, )); @@ -1712,12 +1793,12 @@ fn lower_insn_to_regs>( (false, false) => { if elem_ty.bytes() == 8 { // Use a plain load. - ctx.emit(Inst::mov64_m_r(addr, dst, srcloc)) + ctx.emit(Inst::mov64_m_r(amode, dst, srcloc)) } else { // Use a zero-extended load. ctx.emit(Inst::movzx_rm_r( ext_mode.unwrap(), - RegMem::mem(addr), + RegMem::mem(amode), dst, srcloc, )) @@ -1726,13 +1807,13 @@ fn lower_insn_to_regs>( (_, true) => { ctx.emit(match elem_ty { types::F32 => { - Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst, srcloc) } types::F64 => { - Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst, srcloc) } _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { - Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc) + Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst, srcloc) } // TODO Specialize for different types: MOVUPD, MOVDQU _ => unreachable!("unexpected type for load: {:?}", elem_ty), }); @@ -1761,9 +1842,7 @@ fn lower_insn_to_regs>( let addr = match op { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { assert_eq!(inputs.len(), 2, "only one input for store memory operands"); - let base = input_to_reg(ctx, inputs[1]); - // TODO sign? - Amode::imm_reg(offset as u32, base) + lower_amode(ctx, inputs[1], offset as u32) } Opcode::StoreComplex From ee76e01efc88db29e1eefdeed9c29cd441c92493 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 19 Aug 2020 14:52:42 +0200 Subject: [PATCH 28/34] machinst: fix the pinned reg hack; The pinned register hack didn't work because the GetPinnedReg is marked as having side-effects, so that GVN wouldn't try to common it out. This commit tweaks the function used during lowering to vcode, so that the GetPinnedReg opcode is specially handled. It's a bit lame, but it makes the hack work again. Also, use_input needs to be a no-op for real registers. --- cranelift/codegen/src/inst_predicates.rs | 8 +++++--- cranelift/codegen/src/machinst/lower.rs | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 9afe2ff6e1..6830fcda1c 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -41,9 +41,11 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool { trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data) } -/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load? -pub fn has_side_effect_or_load(func: &Function, inst: Inst) -> bool { - has_side_effect(func, inst) || func.dfg[inst].opcode().can_load() +/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load, +/// but not the get_pinned_reg opcode? +pub fn has_side_effect_or_load_not_get_pinned_reg(func: &Function, inst: Inst) -> bool { + let op = func.dfg[inst].opcode(); + op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load()) } /// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index c4cbbd820d..b421f79254 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -4,7 +4,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; -use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; +use crate::inst_predicates::{has_side_effect_or_load_not_get_pinned_reg, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; use crate::ir::{ @@ -372,7 +372,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for bb in f.layout.blocks() { cur_color += 1; for inst in f.layout.block_insts(bb) { - let side_effect = has_side_effect_or_load(f, inst); + let side_effect = has_side_effect_or_load_not_get_pinned_reg(f, inst); // Assign colors. A new color is chosen *after* any side-effecting instruction. inst_colors[inst] = InstColor::new(cur_color); @@ -800,14 +800,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> { debug!(" -> src inst {}", src_inst); debug!( " -> has side effect: {}", - has_side_effect_or_load(self.f, src_inst) + has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) ); debug!( " -> our color is {:?}, src inst is {:?}", self.inst_color(at_inst), self.inst_color(src_inst) ); - if !has_side_effect_or_load(self.f, src_inst) + if !has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) || self.inst_color(at_inst) == self.inst_color(src_inst) { Some((src_inst, result_idx)) @@ -989,7 +989,9 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { fn use_input_reg(&mut self, input: LowerInput) { debug!("use_input_reg: vreg {:?} is needed", input.reg); - self.vreg_needed[input.reg.get_index()] = true; + if input.reg.is_virtual() { + self.vreg_needed[input.reg.get_index()] = true; + } } fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool { From 7c856542854bc8c5da9d5fb1a0b41f3c660d8484 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 21 Aug 2020 12:40:47 +0200 Subject: [PATCH 29/34] Address review comments. --- cranelift/codegen/src/inst_predicates.rs | 2 +- cranelift/codegen/src/isa/x64/inst/mod.rs | 7 + cranelift/codegen/src/isa/x64/lower.rs | 542 ++++++++++++---------- cranelift/codegen/src/machinst/lower.rs | 12 +- 4 files changed, 300 insertions(+), 263 deletions(-) diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 6830fcda1c..1aac4be2fd 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -43,7 +43,7 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool { /// Does the given instruction have any side-effect as per [has_side_effect], or else is a load, /// but not the get_pinned_reg opcode? -pub fn has_side_effect_or_load_not_get_pinned_reg(func: &Function, inst: Inst) -> bool { +pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool { let op = func.dfg[inst].opcode(); op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load()) } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index ec52f79d5c..a39b0e6857 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -360,6 +360,13 @@ pub enum Inst { JmpKnown { dst: BranchTarget }, /// One-way conditional branch: jcond cond target. + /// + /// This instruction is useful when we have conditional jumps depending on more than two + /// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs. + /// + /// A note of caution: in contexts where the branch target is another block, this has to be the + /// same successor as the one specified in the terminator branch of the current block. + /// Otherwise, this might confuse register allocation by creating new invisible edges. JmpIf { cc: CC, taken: BranchTarget }, /// Two-way conditional branch: jcond cond target target. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 047eac99d8..c57cf40057 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -124,29 +124,37 @@ struct InsnOutput { output: usize, } +/// Returns whether the given specified `input` is a result produced by an instruction with Opcode +/// `op`. +// TODO investigate failures with checking against the result index. fn matches_input>( ctx: &mut C, input: InsnInput, op: Opcode, ) -> Option { let inputs = ctx.get_input(input.insn, input.input); - if let Some((src_inst, _)) = inputs.inst { + inputs.inst.and_then(|(src_inst, _)| { let data = ctx.data(src_inst); if data.opcode() == op { return Some(src_inst); } - } - None + None + }) +} + +fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg { + ctx.use_input_reg(input); + input.reg } /// Put the given input into a register, and mark it as used (side-effect). fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg { - let inputs = ctx.get_input(spec.insn, spec.input); - ctx.use_input_reg(inputs); - inputs.reg + let input = ctx.get_input(spec.insn, spec.input); + lowerinput_to_reg(ctx, input) } /// An extension specification for `extend_input_to_reg`. +#[derive(Clone, Copy)] enum ExtSpec { ZeroExtendTo32, ZeroExtendTo64, @@ -163,6 +171,12 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { }; let input_size = ctx.input_ty(spec.insn, spec.input).bits(); + let requested_ty = if requested_size == 32 { + types::I32 + } else { + types::I64 + }; + let ext_mode = match (input_size, requested_size) { (a, b) if a == b => return input_to_reg(ctx, spec), (a, 32) if a == 1 || a == 8 => ExtMode::BL, @@ -173,12 +187,6 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { _ => unreachable!(), }; - let requested_ty = if requested_size == 32 { - types::I32 - } else { - types::I64 - }; - let src = input_to_reg_mem(ctx, spec); let dst = ctx.alloc_tmp(RegClass::I64, requested_ty); match ext_spec { @@ -196,21 +204,26 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { dst.to_reg() } +fn lowerinput_to_reg_mem(ctx: Ctx, input: LowerInput) -> RegMem { + // TODO handle memory. + RegMem::reg(lowerinput_to_reg(ctx, input)) +} + /// Put the given input into a register or a memory operand. /// Effectful: may mark the given input as used, when returning the register form. fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { - // TODO handle memory. - RegMem::reg(input_to_reg(ctx, spec)) + let input = ctx.get_input(spec.insn, spec.input); + lowerinput_to_reg_mem(ctx, input) } /// Returns whether the given input is an immediate that can be properly sign-extended, without any /// possible side-effect. -fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { - ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { +fn lowerinput_to_sext_imm(input: LowerInput, input_ty: Type) -> Option { + input.constant.and_then(|x| { // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend // to 64 bits. For other sizes, it doesn't matter and we can just use the plain // constant. - if ctx.input_ty(spec.insn, spec.input).bytes() != 8 || low32_will_sign_extend_to_64(x) { + if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) { Some(x as u32) } else { None @@ -218,6 +231,12 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { }) } +fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option { + let input = ctx.get_input(spec.insn, spec.input); + let input_ty = ctx.input_ty(spec.insn, spec.input); + lowerinput_to_sext_imm(input, input_ty) +} + fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option { ctx.get_input(spec.insn, spec.input).constant } @@ -225,9 +244,11 @@ fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option { /// Put the given input into an immediate, a register or a memory operand. /// Effectful: may mark the given input as used, when returning the register form. fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { - match input_to_sext_imm(ctx, spec) { + let input = ctx.get_input(spec.insn, spec.input); + let input_ty = ctx.input_ty(spec.insn, spec.input); + match lowerinput_to_sext_imm(input, input_ty) { Some(x) => RegMemImm::imm(x), - None => match input_to_reg_mem(ctx, spec) { + None => match lowerinput_to_reg_mem(ctx, input) { RegMem::Reg { reg } => RegMemImm::reg(reg), RegMem::Mem { addr } => RegMemImm::mem(addr), }, @@ -252,34 +273,88 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) { ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); } -#[derive(PartialEq)] -enum FcmpOperands { - Swap, - DontSwap, +/// A specification for a fcmp emission. +enum FcmpSpec { + /// Normal flow. + Normal, + + /// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that + /// happens with `InvertedEqualOrConditions`. + /// + /// This is useful in contexts where it is hard/inefficient to produce a single instruction (or + /// sequence of instructions) that check for an "AND" combination of condition codes; see for + /// instance lowering of Select. + InvertEqual, } -fn emit_fcmp(ctx: Ctx, insn: IRInst, swap_operands: FcmpOperands) { +/// This explains how to interpret the results of an fcmp instruction. +enum FcmpCondResult { + /// The given condition code must be set. + Condition(CC), + + /// Both condition codes must be set. + AndConditions(CC, CC), + + /// Either of the conditions codes must be set. + OrConditions(CC, CC), + + /// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either + /// of the condition codes must be set, and the user must invert meaning of analyzing the + /// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be + /// reached. + InvertedEqualOrConditions(CC, CC), +} + +fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult { + let (flip_operands, inverted_equal) = match cond_code { + FloatCC::LessThan + | FloatCC::LessThanOrEqual + | FloatCC::UnorderedOrGreaterThan + | FloatCC::UnorderedOrGreaterThanOrEqual => { + cond_code = cond_code.reverse(); + (true, false) + } + FloatCC::Equal => { + let inverted_equal = match spec { + FcmpSpec::Normal => false, + FcmpSpec::InvertEqual => { + cond_code = FloatCC::NotEqual; // same as .inverse() + true + } + }; + (false, inverted_equal) + } + _ => (false, false), + }; + // The only valid CC constructed with `from_floatcc` can be put in the flag // register with a direct float comparison; do this here. - let input_ty = ctx.input_ty(insn, 0); - let op = match input_ty { + let op = match ctx.input_ty(insn, 0) { types::F32 => SseOpcode::Ucomiss, types::F64 => SseOpcode::Ucomisd, _ => panic!("Bad input type to Fcmp"), }; + let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let (lhs, rhs) = if swap_operands == FcmpOperands::Swap { - ( - input_to_reg(ctx, inputs[1]), - input_to_reg_mem(ctx, inputs[0]), - ) + let (lhs_input, rhs_input) = if flip_operands { + (inputs[1], inputs[0]) } else { - ( - input_to_reg(ctx, inputs[0]), - input_to_reg_mem(ctx, inputs[1]), - ) + (inputs[0], inputs[1]) }; + let lhs = input_to_reg(ctx, lhs_input); + let rhs = input_to_reg_mem(ctx, rhs_input); ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); + + let cond_result = match cond_code { + FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z), + FloatCC::NotEqual if inverted_equal => { + FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ) + } + FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ), + _ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)), + }; + + cond_result } fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature { @@ -350,33 +425,31 @@ fn emit_vm_call>( /// Returns whether the given input is a shift by a constant value less or equal than 3. /// The goal is to embed it within an address mode. -fn matches_small_cst_shift>( +fn matches_small_constant_shift>( ctx: &mut C, spec: InsnInput, ) -> Option<(InsnInput, u8)> { - if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) { - if let Some(shift_amt) = input_to_imm( + matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| { + match input_to_imm( ctx, InsnInput { insn: shift, input: 1, }, ) { - if shift_amt <= 3 { - return Some(( - InsnInput { - insn: shift, - input: 0, - }, - shift_amt as u8, - )); - } + Some(shift_amt) if shift_amt <= 3 => Some(( + InsnInput { + insn: shift, + input: 0, + }, + shift_amt as u8, + )), + _ => None, } - } - None + }) } -fn lower_amode>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode { +fn lower_to_amode>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode { // We now either have an add that we must materialize, or some other input; as well as the // final offset. if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) { @@ -394,14 +467,16 @@ fn lower_amode>(ctx: &mut C, spec: InsnInput, offset: u32) // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations // aren't happening in the wasm case. We could do better, given some range analysis. let (base, index, shift) = if let Some((shift_input, shift_amt)) = - matches_small_cst_shift(ctx, add_inputs[0]) + matches_small_constant_shift(ctx, add_inputs[0]) { ( input_to_reg(ctx, add_inputs[1]), input_to_reg(ctx, shift_input), shift_amt, ) - } else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) { + } else if let Some((shift_input, shift_amt)) = + matches_small_constant_shift(ctx, add_inputs[1]) + { ( input_to_reg(ctx, add_inputs[0]), input_to_reg(ctx, shift_input), @@ -1027,15 +1102,9 @@ fn lower_insn_to_regs>( } Opcode::Fcmp => { - let condcode = inst_fp_condcode(ctx.data(insn)); + let cond_code = inst_fp_condcode(ctx.data(insn)); let input_ty = ctx.input_ty(insn, 0); if !input_ty.is_vector() { - let op = match input_ty { - types::F32 => SseOpcode::Ucomiss, - types::F64 => SseOpcode::Ucomisd, - _ => panic!("Bad input type to fcmp: {}", input_ty), - }; - // Unordered is returned by setting ZF, PF, CF <- 111 // Greater than by ZF, PF, CF <- 000 // Less than by ZF, PF, CF <- 001 @@ -1051,71 +1120,35 @@ fn lower_insn_to_regs>( // set, then both the ZF and CF flag bits must also be set we can get away with using // one setcc for most condition codes. - match condcode { - FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - // setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1 - // which doesn't exclude unorderdness. To get around this we can reverse the - // operands and the cc test to instead check if CF and ZF are 0 which would - // also excludes unorderedness. Using similiar logic we also reverse - // UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF - // or CF is 1 to exclude orderedness. - let lhs = input_to_reg_mem(ctx, inputs[0]); - let rhs = input_to_reg(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs)); - let condcode = condcode.reverse(); - let cc = CC::from_floatcc(condcode); + let dst = output_to_reg(ctx, outputs[0]); + + match emit_fcmp(ctx, insn, cond_code, FcmpSpec::Normal) { + FcmpCondResult::Condition(cc) => { ctx.emit(Inst::setcc(cc, dst)); } - - FloatCC::Equal => { - // Outlier case: equal means both the operands are ordered and equal; we cannot - // get around checking the parity bit to determine if the result was ordered. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(CC::NP, tmp_gpr1)); - ctx.emit(Inst::setcc(CC::Z, dst)); + FcmpCondResult::AndConditions(cc1, cc2) => { + let tmp = ctx.alloc_tmp(RegClass::I64, types::I32); + ctx.emit(Inst::setcc(cc1, tmp)); + ctx.emit(Inst::setcc(cc2, dst)); ctx.emit(Inst::alu_rmi_r( false, AluRmiROpcode::And, - RegMemImm::reg(tmp_gpr1.to_reg()), + RegMemImm::reg(tmp.to_reg()), dst, )); } - - FloatCC::NotEqual => { - // Outlier case: not equal means either the operands are unordered, or they're - // not the same value. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(CC::P, tmp_gpr1)); - ctx.emit(Inst::setcc(CC::NZ, dst)); + FcmpCondResult::OrConditions(cc1, cc2) => { + let tmp = ctx.alloc_tmp(RegClass::I64, types::I32); + ctx.emit(Inst::setcc(cc1, tmp)); + ctx.emit(Inst::setcc(cc2, dst)); ctx.emit(Inst::alu_rmi_r( false, AluRmiROpcode::Or, - RegMemImm::reg(tmp_gpr1.to_reg()), + RegMemImm::reg(tmp.to_reg()), dst, )); } - - _ => { - // For all remaining condition codes we can handle things with one check. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = output_to_reg(ctx, outputs[0]); - let cc = CC::from_floatcc(condcode); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - ctx.emit(Inst::setcc(cc, dst)); - } + FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), } } else { let op = match input_ty { @@ -1126,7 +1159,7 @@ fn lower_insn_to_regs>( // Since some packed comparisons are not available, some of the condition codes // must be inverted, with a corresponding `flip` of the operands. - let (imm, flip) = match condcode { + let (imm, flip) = match cond_code { FloatCC::GreaterThan => (FcmpImm::LessThan, true), FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true), FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true), @@ -1134,9 +1167,9 @@ fn lower_insn_to_regs>( (FcmpImm::UnorderedOrGreaterThanOrEqual, true) } FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => { - panic!("unsupported float condition code: {}", condcode) + panic!("unsupported float condition code: {}", cond_code) } - _ => (FcmpImm::from(condcode), false), + _ => (FcmpImm::from(cond_code), false), }; // Determine the operands of the comparison, possibly by flipping them. @@ -1225,35 +1258,77 @@ fn lower_insn_to_regs>( let srcloc = ctx.srcloc(insn); let trap_code = inst_trapcode(ctx.data(insn)).unwrap(); - let cc = if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() { - let condcode = inst_condcode(ctx.data(insn)); + if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() { + let cond_code = inst_condcode(ctx.data(insn)); // The flags must not have been clobbered by any other instruction between the // iadd_ifcout and this instruction, as verified by the CLIF validator; so we can // simply use the flags here. - CC::from_intcc(condcode) + let cc = CC::from_intcc(cond_code); + + ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc, + }); } else if op == Opcode::Trapif { - let condcode = inst_condcode(ctx.data(insn)); - let cc = CC::from_intcc(condcode); + let cond_code = inst_condcode(ctx.data(insn)); + let cc = CC::from_intcc(cond_code); // Verification ensures that the input is always a single-def ifcmp. - let ifcmp_insn = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap(); - emit_cmp(ctx, ifcmp_insn); - cc + let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap(); + emit_cmp(ctx, ifcmp); + + ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc, + }); } else { - let condcode = inst_fp_condcode(ctx.data(insn)); - let cc = CC::from_floatcc(condcode); + let cond_code = inst_fp_condcode(ctx.data(insn)); // Verification ensures that the input is always a single-def ffcmp. - let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); - emit_fcmp(ctx, ffcmp_insn, FcmpOperands::DontSwap); - cc - }; + let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); - ctx.emit_safepoint(Inst::TrapIf { - trap_code, - srcloc, - cc, - }); + match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) { + FcmpCondResult::Condition(cc) => ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc, + }), + FcmpCondResult::AndConditions(cc1, cc2) => { + // A bit unfortunate, but materialize the flags in their own register, and + // check against this. + let tmp = ctx.alloc_tmp(RegClass::I64, types::I32); + let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I32); + ctx.emit(Inst::setcc(cc1, tmp)); + ctx.emit(Inst::setcc(cc2, tmp2)); + ctx.emit(Inst::alu_rmi_r( + false, /* is_64 */ + AluRmiROpcode::And, + RegMemImm::reg(tmp.to_reg()), + tmp2, + )); + ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc: CC::NZ, + }); + } + FcmpCondResult::OrConditions(cc1, cc2) => { + ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc: cc1, + }); + ctx.emit_safepoint(Inst::TrapIf { + trap_code, + srcloc, + cc: cc2, + }); + } + FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), + }; + }; } Opcode::F64const => { @@ -1751,7 +1826,7 @@ fn lower_insn_to_regs>( | Opcode::Uload32 | Opcode::Sload32 => { assert_eq!(inputs.len(), 1, "only one input for load operands"); - lower_amode(ctx, inputs[0], offset as u32) + lower_to_amode(ctx, inputs[0], offset as u32) } Opcode::LoadComplex @@ -1842,7 +1917,7 @@ fn lower_insn_to_regs>( let addr = match op { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { assert_eq!(inputs.len(), 2, "only one input for store memory operands"); - lower_amode(ctx, inputs[1], offset as u32) + lower_to_amode(ctx, inputs[1], offset as u32) } Opcode::StoreComplex @@ -1899,11 +1974,13 @@ fn lower_insn_to_regs>( } else { None }; + // Make sure that both args are in virtual regs, since in effect we have to do a // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not // guaranteed safe if either is in a real reg. addr = ctx.ensure_in_vreg(addr, types::I64); arg2 = ctx.ensure_in_vreg(arg2, types::I64); + // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq` // operates at whatever width is specified by `ty`, so there's no need to // zero-extend `arg2` in the case of `ty` being I8/I16/I32. @@ -1917,6 +1994,7 @@ fn lower_insn_to_regs>( arg2, types::I64, )); + // Now the AtomicRmwSeq (pseudo-) instruction itself let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); ctx.emit(Inst::AtomicRmwSeq { @@ -1924,6 +2002,7 @@ fn lower_insn_to_regs>( op, srcloc, }); + // And finally, copy the preordained AtomicRmwSeq output reg to its destination. ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); } @@ -1932,7 +2011,7 @@ fn lower_insn_to_regs>( // This is very similar to, but not identical to, the `AtomicRmw` case. As with // `AtomicRmw`, there's no need to zero-extend narrow values here. let dst = output_to_reg(ctx, outputs[0]); - let addr = input_to_reg(ctx, inputs[0]); + let addr = lower_to_amode(ctx, inputs[0], 0); let expected = input_to_reg(ctx, inputs[1]); let replacement = input_to_reg(ctx, inputs[2]); let ty_access = ty.unwrap(); @@ -1943,6 +2022,7 @@ fn lower_insn_to_regs>( } else { None }; + // Move the expected value into %rax. Because there's only one fixed register on // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the // `AtomicRmw` case. @@ -1954,7 +2034,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::LockCmpxchg { ty: ty_access, src: replacement, - dst: Amode::imm_reg(0, addr).into(), + dst: addr.into(), srcloc, }); // And finally, copy the old value at the location to its destination reg. @@ -1966,7 +2046,7 @@ fn lower_insn_to_regs>( // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the // need for any fence instructions. let data = output_to_reg(ctx, outputs[0]); - let addr = input_to_reg(ctx, inputs[0]); + let addr = lower_to_amode(ctx, inputs[0], 0); let ty_access = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); @@ -1975,8 +2055,8 @@ fn lower_insn_to_regs>( } else { None }; - // For the amode, we could do better, but for now just use `0(addr)`. - let rm = RegMem::mem(Amode::imm_reg(0, addr)); + + let rm = RegMem::mem(addr); if ty_access == types::I64 { ctx.emit(Inst::mov64_rm_r(rm, data, srcloc)); } else { @@ -1993,7 +2073,7 @@ fn lower_insn_to_regs>( Opcode::AtomicStore => { // This is a normal store, followed by an `mfence` instruction. let data = input_to_reg(ctx, inputs[0]); - let addr = input_to_reg(ctx, inputs[1]); + let addr = lower_to_amode(ctx, inputs[1], 0); let ty_access = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(ty_access)); let memflags = ctx.memflags(insn).expect("memory flags"); @@ -2002,13 +2082,8 @@ fn lower_insn_to_regs>( } else { None }; - // For the amode, we could do better, but for now just use `0(addr)`. - ctx.emit(Inst::mov_r_m( - ty_access.bytes() as u8, - data, - Amode::imm_reg(0, addr), - srcloc, - )); + + ctx.emit(Inst::mov_r_m(ty_access.bytes() as u8, data, addr, srcloc)); ctx.emit(Inst::Fence { kind: FenceKind::MFence, }); @@ -2068,81 +2143,36 @@ fn lower_insn_to_regs>( if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { let cond_code = inst_fp_condcode(ctx.data(fcmp)); - // See comments in the lowering of Fcmp. - let (cond_code, swap_op, was_equal) = match cond_code { - FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - (cond_code.reverse(), FcmpOperands::Swap, false) - } - FloatCC::Equal => { - // Additionally, we invert Equal to NotEqual too: taking LHS if equal would - // mean take it if both CC::NP and CC::Z are set, the conjunction of which - // can't be modeled with a single cmov instruction. Instead, we'll swap LHS - // and RHS in the select operation, and invert the equal to a not-equal - // here. - (FloatCC::NotEqual, FcmpOperands::DontSwap, true) - } - _ => (cond_code, FcmpOperands::DontSwap, false), - }; - emit_fcmp(ctx, fcmp, swap_op); + // we request inversion of Equal to NotEqual here: taking LHS if equal would mean + // take it if both CC::NP and CC::Z are set, the conjunction of which can't be + // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the + // select operation, and invert the equal to a not-equal here. + let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual); - let (lhs, rhs) = if was_equal { - // See comment above about inverting conditional code. - ( - input_to_reg_mem(ctx, inputs[2]), - input_to_reg(ctx, inputs[1]), - ) - } else { - ( - input_to_reg_mem(ctx, inputs[1]), - input_to_reg(ctx, inputs[2]), - ) + let (lhs_input, rhs_input) = match fcmp_results { + FcmpCondResult::InvertedEqualOrConditions(_, _) => (inputs[2], inputs[1]), + FcmpCondResult::Condition(_) + | FcmpCondResult::AndConditions(_, _) + | FcmpCondResult::OrConditions(_, _) => (inputs[1], inputs[2]), }; - let dst = output_to_reg(ctx, outputs[0]); - let ty = ctx.output_ty(insn, 0); - - let lhs = if is_int_ty(ty) { - let size = ty.bytes() as u8; - if size == 1 { - // Sign-extend operands to 32, then do a cmove of size 4. - let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); - RegMem::reg(lhs_se.to_reg()) - } else { - ctx.emit(Inst::gen_move(dst, rhs, ty)); - lhs - } + let rhs = input_to_reg(ctx, rhs_input); + let dst = output_to_reg(ctx, outputs[0]); + let lhs = if is_int_ty(ty) && ty.bytes() < 4 { + // Special case: since the higher bits are undefined per CLIF semantics, we + // can just apply a 32-bit cmove here. Force inputs into registers, to + // avoid partial spilling out-of-bounds with memory accesses, though. + // Sign-extend operands to 32, then do a cmove of size 4. + RegMem::reg(input_to_reg(ctx, lhs_input)) } else { - debug_assert!(ty == types::F32 || ty == types::F64); - ctx.emit(Inst::gen_move(dst, rhs, ty)); - lhs + input_to_reg_mem(ctx, lhs_input) }; - match cond_code { - FloatCC::Equal => { - // See comment above about inverting conditional code. - panic!("can't happen because of above guard"); - } + ctx.emit(Inst::gen_move(dst, rhs, ty)); - FloatCC::NotEqual => { - // Take lhs if not-equal, that is CC::P or CC:NZ. - if is_int_ty(ty) { - let size = u8::max(ty.bytes() as u8, 4); - ctx.emit(Inst::cmove(size, CC::P, lhs.clone(), dst)); - ctx.emit(Inst::cmove(size, CC::NZ, lhs, dst)); - } else { - ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::P, lhs.clone(), dst)); - ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::NZ, lhs, dst)); - } - } - - _ => { - let cc = CC::from_floatcc(cond_code); + match fcmp_results { + FcmpCondResult::Condition(cc) => { if is_int_ty(ty) { let size = u8::max(ty.bytes() as u8, 4); ctx.emit(Inst::cmove(size, cc, lhs, dst)); @@ -2150,6 +2180,22 @@ fn lower_insn_to_regs>( ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); } } + FcmpCondResult::AndConditions(_, _) => { + unreachable!( + "can't AND with select; see above comment about inverting equal" + ); + } + FcmpCondResult::InvertedEqualOrConditions(cc1, cc2) + | FcmpCondResult::OrConditions(cc1, cc2) => { + if is_int_ty(ty) { + let size = u8::max(ty.bytes() as u8, 4); + ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst)); + ctx.emit(Inst::cmove(size, cc2, lhs, dst)); + } else { + ctx.emit(Inst::xmm_cmove(ty == types::F64, cc1, lhs.clone(), dst)); + ctx.emit(Inst::xmm_cmove(ty == types::F64, cc2, lhs, dst)); + } + } } } else { let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { @@ -2164,27 +2210,27 @@ fn lower_insn_to_regs>( CC::NZ }; - let lhs = input_to_reg_mem(ctx, inputs[1]); let rhs = input_to_reg(ctx, inputs[2]); let dst = output_to_reg(ctx, outputs[0]); - let ty = ctx.output_ty(insn, 0); + ctx.emit(Inst::gen_move(dst, rhs, ty)); + if is_int_ty(ty) { - let size = ty.bytes() as u8; - if size == 1 { - // Sign-extend operands to 32, then do a cmove of size 4. - let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); - ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); - ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); + let mut size = ty.bytes() as u8; + let lhs = if size < 4 { + // Special case: since the higher bits are undefined per CLIF semantics, we + // can just apply a 32-bit cmove here. Force inputs into registers, to + // avoid partial spilling out-of-bounds with memory accesses, though. + size = 4; + RegMem::reg(input_to_reg(ctx, inputs[1])) } else { - ctx.emit(Inst::gen_move(dst, rhs, ty)); - ctx.emit(Inst::cmove(size, cc, lhs, dst)); - } + input_to_reg_mem(ctx, inputs[1]) + }; + ctx.emit(Inst::cmove(size, cc, lhs, dst)); } else { debug_assert!(ty == types::F32 || ty == types::F64); - ctx.emit(Inst::gen_move(dst, rhs, ty)); + let lhs = input_to_reg_mem(ctx, inputs[1]); ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); } } @@ -2464,47 +2510,29 @@ impl LowerBackend for X64Backend { } else { cond_code }; + let cc = CC::from_intcc(cond_code); ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { let cond_code = inst_fp_condcode(ctx.data(fcmp)); - let cond_code = if op0 == Opcode::Brz { cond_code.inverse() } else { cond_code }; - - // See comments in the lowering of Fcmp. - let (cond_code, swap_op) = match cond_code { - FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - (cond_code.reverse(), FcmpOperands::Swap) - } - _ => (cond_code, FcmpOperands::DontSwap), - }; - emit_fcmp(ctx, fcmp, swap_op); - - match cond_code { - FloatCC::Equal => { - // Jump to taken if CC::NP and CC::Z, that is, jump to not-taken if - // CC::P or CC::NZ. - ctx.emit(Inst::jmp_if(CC::P, not_taken)); - ctx.emit(Inst::jmp_cond(CC::NZ, not_taken, taken)); - } - - FloatCC::NotEqual => { - // Jump to taken if CC::P or CC::NZ. - ctx.emit(Inst::jmp_if(CC::P, taken)); - ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken)); - } - - _ => { - let cc = CC::from_floatcc(cond_code); + match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) { + FcmpCondResult::Condition(cc) => { ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); } + FcmpCondResult::AndConditions(cc1, cc2) => { + ctx.emit(Inst::jmp_if(cc1.invert(), not_taken)); + ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken)); + } + FcmpCondResult::OrConditions(cc1, cc2) => { + ctx.emit(Inst::jmp_if(cc1, taken)); + ctx.emit(Inst::jmp_cond(cc2, taken, not_taken)); + } + FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), } } else if is_int_ty(src_ty) || is_bool_ty(src_ty) { let src = input_to_reg( diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index b421f79254..9ec313916e 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -4,7 +4,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; -use crate::inst_predicates::{has_side_effect_or_load_not_get_pinned_reg, is_constant_64bit}; +use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; use crate::ir::{ @@ -372,7 +372,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for bb in f.layout.blocks() { cur_color += 1; for inst in f.layout.block_insts(bb) { - let side_effect = has_side_effect_or_load_not_get_pinned_reg(f, inst); + let side_effect = has_lowering_side_effect(f, inst); // Assign colors. A new color is chosen *after* any side-effecting instruction. inst_colors[inst] = InstColor::new(cur_color); @@ -799,15 +799,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> { ValueDef::Result(src_inst, result_idx) => { debug!(" -> src inst {}", src_inst); debug!( - " -> has side effect: {}", - has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) + " -> has lowering side effect: {}", + has_lowering_side_effect(self.f, src_inst) ); debug!( " -> our color is {:?}, src inst is {:?}", self.inst_color(at_inst), self.inst_color(src_inst) ); - if !has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) + if !has_lowering_side_effect(self.f, src_inst) || self.inst_color(at_inst) == self.inst_color(src_inst) { Some((src_inst, result_idx)) @@ -989,6 +989,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { fn use_input_reg(&mut self, input: LowerInput) { debug!("use_input_reg: vreg {:?} is needed", input.reg); + // We may directly return a real (machine) register when we know that register holds the + // result of an opcode (e.g. GetPinnedReg). if input.reg.is_virtual() { self.vreg_needed[input.reg.get_index()] = true; } From d6884586194f1562b3b19f721c7800084b2635de Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 25 Aug 2020 09:02:08 -0700 Subject: [PATCH 30/34] Add a fuzz target for instantiating `wasm-smith` modules --- Cargo.lock | 19 +++++++++++++++---- fuzz/Cargo.toml | 7 +++++++ fuzz/fuzz_targets/instantiate-wasm-smith.rs | 13 +++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) create mode 100755 fuzz/fuzz_targets/instantiate-wasm-smith.rs diff --git a/Cargo.lock b/Cargo.lock index db70f7da2f..d09e8fed93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,9 +56,9 @@ checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" [[package]] name = "arbitrary" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb544f1057eaaff4b34f8c4dcf56fc3cd04debd291998405d135017a7c3c0f4" +checksum = "0922a3e746b5a44e111e5603feb6704e5cc959116f66737f50bb5cbd264e9d87" dependencies = [ "derive_arbitrary", ] @@ -656,9 +656,9 @@ dependencies = [ [[package]] name = "derive_arbitrary" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b43185d3e7ce7dcd44a23ca761ec026359753ebf480283a571e6463853d2ef" +checksum = "d0f7c6c81276b6b8702074defbdb1938933ddf98c7f7e0dca8d9e9214dd6c730" dependencies = [ "proc-macro2", "quote", @@ -2285,6 +2285,16 @@ dependencies = [ "yanix", ] +[[package]] +name = "wasm-smith" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ec52a2524c15abcc827e59c1eef9cdb742b4ba57a6db6a1ccd914e357326edd" +dependencies = [ + "arbitrary", + "leb128", +] + [[package]] name = "wasmparser" version = "0.57.0" @@ -2467,6 +2477,7 @@ dependencies = [ "libfuzzer-sys", "peepmatic-fuzzing", "target-lexicon", + "wasm-smith", "wasmtime", "wasmtime-fuzzing", ] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 78733f5862..e1103478f0 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -17,6 +17,7 @@ target-lexicon = "0.10" peepmatic-fuzzing = { path = "../cranelift/peepmatic/crates/fuzzing", optional = true } wasmtime = { path = "../crates/wasmtime" } wasmtime-fuzzing = { path = "../crates/fuzzing" } +wasm-smith = "0.1.1" [[bin]] name = "compile" @@ -100,3 +101,9 @@ required-features = ["peepmatic-fuzzing"] [features] binaryen = ["wasmtime-fuzzing/binaryen"] + +[[bin]] +name = "instantiate-wasm-smith" +path = "fuzz_targets/instantiate-wasm-smith.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/instantiate-wasm-smith.rs b/fuzz/fuzz_targets/instantiate-wasm-smith.rs new file mode 100755 index 0000000000..9c081eff88 --- /dev/null +++ b/fuzz/fuzz_targets/instantiate-wasm-smith.rs @@ -0,0 +1,13 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use wasm_smith::Module; +use wasmtime::Strategy; +use wasmtime_fuzzing::oracles; + +fuzz_target!(|module: Module| { + let mut module = module; + module.ensure_termination(1000); + let wasm_bytes = module.to_bytes(); + oracles::instantiate(&wasm_bytes, Strategy::Auto); +}); From fceea4e7d2a0150c4a7adee3f10b8232e8c2c872 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 25 Aug 2020 11:16:48 -0700 Subject: [PATCH 31/34] Update crates/wasi-common/src/sys/unix/mod.rs Co-authored-by: iximeow --- crates/wasi-common/src/sys/unix/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/wasi-common/src/sys/unix/mod.rs b/crates/wasi-common/src/sys/unix/mod.rs index 79ab5c9010..9cf2129a23 100644 --- a/crates/wasi-common/src/sys/unix/mod.rs +++ b/crates/wasi-common/src/sys/unix/mod.rs @@ -50,7 +50,10 @@ impl AsFile for T { pub(super) fn get_file_type(file: &File) -> io::Result { let ft = file.metadata()?.file_type(); let file_type = if ft.is_block_device() { - tracing::debug!("Host fd {:?} is a block device", file.as_raw_fd()); + tracing::debug!( + host_fd = tracing::field::display(file.as_raw_fd()), + "Host fd is a block device" + ); types::Filetype::BlockDevice } else if ft.is_char_device() { tracing::debug!("Host fd {:?} is a char device", file.as_raw_fd()); From 963fe37eeaf1e5c2941dd7e55336ed979567ddb8 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 25 Aug 2020 11:17:08 -0700 Subject: [PATCH 32/34] Update crates/wasi-common/src/sys/windows/path.rs Co-authored-by: iximeow --- crates/wasi-common/src/sys/windows/path.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/wasi-common/src/sys/windows/path.rs b/crates/wasi-common/src/sys/windows/path.rs index 1e92f71afb..2651d54c1e 100644 --- a/crates/wasi-common/src/sys/windows/path.rs +++ b/crates/wasi-common/src/sys/windows/path.rs @@ -170,7 +170,10 @@ pub(crate) fn link( let new_path = concatenate(new_dirfd, new_path)?; if follow_symlinks { // in particular, this will return an error if the target path doesn't exist - tracing::debug!("Following symlinks for path: {:?}", old_path); + tracing::debug!( + old_path = tracing::field::display(&old_path), + "Following symlinks" + ); old_path = fs::canonicalize(&old_path).map_err(|e| match e.raw_os_error() { // fs::canonicalize under Windows will return: // * ERROR_FILE_NOT_FOUND, if it encounters a dangling symlink From 930912f783b2d00c09eb3cd2c4a48d21f9ed42f2 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 25 Aug 2020 11:29:34 -0700 Subject: [PATCH 33/34] tracing fixes in unix poll; add some missing debug impls --- crates/wasi-common/src/entry.rs | 6 ++++++ crates/wasi-common/src/poll.rs | 1 + crates/wasi-common/src/sys/unix/poll.rs | 13 +++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/crates/wasi-common/src/entry.rs b/crates/wasi-common/src/entry.rs index 8734c63d3f..73923e6902 100644 --- a/crates/wasi-common/src/entry.rs +++ b/crates/wasi-common/src/entry.rs @@ -18,6 +18,12 @@ impl EntryHandle { } } +impl std::fmt::Debug for EntryHandle { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("EntryHandle").field("opaque", &()).finish() + } +} + impl From> for EntryHandle { fn from(handle: Box) -> Self { Self(handle.into()) diff --git a/crates/wasi-common/src/poll.rs b/crates/wasi-common/src/poll.rs index a0be6b17b5..97f3f9ea38 100644 --- a/crates/wasi-common/src/poll.rs +++ b/crates/wasi-common/src/poll.rs @@ -9,6 +9,7 @@ pub(crate) struct ClockEventData { pub(crate) userdata: types::Userdata, } +#[derive(Debug)] pub(crate) struct FdEventData { pub(crate) handle: EntryHandle, pub(crate) r#type: types::Eventtype, diff --git a/crates/wasi-common/src/sys/unix/poll.rs b/crates/wasi-common/src/sys/unix/poll.rs index 8b30861b8d..022a2ba644 100644 --- a/crates/wasi-common/src/sys/unix/poll.rs +++ b/crates/wasi-common/src/sys/unix/poll.rs @@ -38,7 +38,10 @@ pub(crate) fn oneoff( let delay = timeout.delay / 1_000_000; // poll syscall requires delay to expressed in milliseconds delay.try_into().unwrap_or(libc::c_int::max_value()) }); - tracing::debug!("poll_oneoff poll_timeout = {:?}", poll_timeout); + tracing::debug!( + poll_timeout = tracing::field::debug(poll_timeout), + "poll_oneoff" + ); let ready = loop { match poll(&mut poll_fds, poll_timeout) { @@ -91,15 +94,17 @@ fn handle_fd_event( } for (fd_event, poll_fd) in ready_events { - tracing::debug!("poll_oneoff_handle_fd_event poll_fd = {:?}", poll_fd); + tracing::debug!( + poll_fd = tracing::field::debug(poll_fd), + poll_event = tracing::field::debug(&fd_event), + "poll_oneoff handle_fd_event" + ); let revents = match poll_fd.revents() { Some(revents) => revents, None => continue, }; - tracing::debug!("poll_oneoff_handle_fd_event revents = {:?}", revents); - let nbytes = if fd_event.r#type == types::Eventtype::FdRead { query_nbytes(fd_event.handle)? } else { From 5e0ca3c13bff47cc991c40bdcf3bd7709143bd37 Mon Sep 17 00:00:00 2001 From: Pat Hickey Date: Tue, 25 Aug 2020 11:32:24 -0700 Subject: [PATCH 34/34] tracing: some windows fixes --- crates/wasi-common/src/sys/windows/path.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/wasi-common/src/sys/windows/path.rs b/crates/wasi-common/src/sys/windows/path.rs index 2651d54c1e..89252d3d00 100644 --- a/crates/wasi-common/src/sys/windows/path.rs +++ b/crates/wasi-common/src/sys/windows/path.rs @@ -46,7 +46,7 @@ fn concatenate>(file: &OsDir, path: P) -> Result { // components with `out_path` let out_path = PathBuf::from(strip_extended_prefix(out_path)); - tracing::debug!("out_path={:?}", out_path); + tracing::debug!(out_path = tracing::field::debug(&out_path)); Ok(out_path) } @@ -171,7 +171,7 @@ pub(crate) fn link( if follow_symlinks { // in particular, this will return an error if the target path doesn't exist tracing::debug!( - old_path = tracing::field::display(&old_path), + old_path = tracing::field::display(old_path.display()), "Following symlinks" ); old_path = fs::canonicalize(&old_path).map_err(|e| match e.raw_os_error() {