Merge pull request #3752 from fitzgen/newtypes-for-register-classes
cranelift: Add newtype wrappers for x64 register classes
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -2381,9 +2381,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.33"
|
||||
version = "0.0.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d808cff91dfca7b239d40b972ba628add94892b1d9e19a842aedc5cfae8ab1a"
|
||||
checksum = "62446b1d3ebf980bdc68837700af1d77b37bc430e524bf95319c6eada2a4cc02"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
|
||||
@@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true }
|
||||
bincode = { version = "1.2.1", optional = true }
|
||||
gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true }
|
||||
smallvec = { version = "1.6.1" }
|
||||
regalloc = { version = "0.0.33" }
|
||||
regalloc = "0.0.34"
|
||||
souper-ir = { version = "2.1.0", optional = true }
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
|
||||
@@ -406,6 +406,8 @@ fn rebuild_isle(
|
||||
) -> Result<(), Box<dyn std::error::Error + 'static>> {
|
||||
use cranelift_isle as isle;
|
||||
|
||||
println!("Rebuilding {}", compilation.output.display());
|
||||
|
||||
// First, remove the manifest, if any; we will recreate it
|
||||
// below if the compilation is successful. Ignore error if no
|
||||
// manifest was present.
|
||||
|
||||
@@ -871,15 +871,10 @@
|
||||
|
||||
(type BoxCallInfo (primitive BoxCallInfo))
|
||||
(type BoxCallIndInfo (primitive BoxCallIndInfo))
|
||||
(type VecMachLabel (primitive VecMachLabel))
|
||||
(type CondBrKind (primitive CondBrKind))
|
||||
(type BranchTarget (primitive BranchTarget))
|
||||
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
|
||||
(type BoxExternalName (primitive BoxExternalName))
|
||||
(type CodeOffset (primitive CodeOffset))
|
||||
(type ExternalName (primitive ExternalName))
|
||||
(type ValueLabel (primitive ValueLabel))
|
||||
(type UnwindInst (primitive UnwindInst))
|
||||
|
||||
(type ExtendOp extern
|
||||
(enum
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle 2bfcafbef6b29358
|
||||
src/isa/aarch64/inst.isle 944323ff7d6db098
|
||||
src/prelude.isle 6aaf8ce0f5a5c2ec
|
||||
src/isa/aarch64/inst.isle dafd813ba278ce19
|
||||
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -678,11 +678,7 @@
|
||||
|
||||
(type BoxCallInfo (primitive BoxCallInfo))
|
||||
(type BoxCallIndInfo (primitive BoxCallIndInfo))
|
||||
(type MachLabel (primitive MachLabel))
|
||||
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
|
||||
(type BoxExternalName (primitive BoxExternalName))
|
||||
(type ValueLabel (primitive ValueLabel))
|
||||
(type UnwindInst (primitive UnwindInst))
|
||||
|
||||
;; An ALU operation.
|
||||
(type ALUOp
|
||||
@@ -1041,10 +1037,6 @@
|
||||
|
||||
;; Helpers for machine label vectors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; VecMachLabel needs to be passed by reference, so it cannot be
|
||||
;; declared as primitive type. Declare as extern enum instead.
|
||||
(type VecMachLabel extern (enum))
|
||||
|
||||
(decl vec_length_minus1 (VecMachLabel) u32)
|
||||
(extern constructor vec_length_minus1 vec_length_minus1)
|
||||
|
||||
@@ -2963,5 +2955,3 @@
|
||||
(decl fcmp_reg (Type Reg Reg) ProducesFlags)
|
||||
(rule (fcmp_reg $F32 src1 src2) (fpu_cmp32 src1 src2))
|
||||
(rule (fcmp_reg $F64 src1 src2) (fpu_cmp64 src1 src2))
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle 2bfcafbef6b29358
|
||||
src/isa/s390x/inst.isle 1d525c87f7c77c26
|
||||
src/prelude.isle 6aaf8ce0f5a5c2ec
|
||||
src/isa/s390x/inst.isle f5af3708848ef1aa
|
||||
src/isa/s390x/lower.isle 57dcc39cbab2d1c6
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -47,7 +47,8 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
|
||||
|
||||
/// Get the encoding number of a GPR.
|
||||
#[inline(always)]
|
||||
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
|
||||
pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
|
||||
let reg = reg.into();
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert_eq!(reg.get_class(), RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
@@ -55,7 +56,8 @@ pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
|
||||
|
||||
/// Get the encoding number of any register.
|
||||
#[inline(always)]
|
||||
pub(crate) fn reg_enc(reg: Reg) -> u8 {
|
||||
pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
|
||||
let reg = reg.into();
|
||||
debug_assert!(reg.is_real());
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -13,6 +13,309 @@ use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt;
|
||||
use std::string::String;
|
||||
|
||||
/// An extenstion trait for converting `Writable{Xmm,Gpr}` to `Writable<Reg>`.
|
||||
pub trait ToWritableReg {
|
||||
fn to_writable_reg(&self) -> Writable<Reg>;
|
||||
}
|
||||
|
||||
/// An extension trait for converting `Writable<Reg>` to `Writable{Xmm,Gpr}`.
|
||||
pub trait FromWritableReg: Sized {
|
||||
fn from_writable_reg(w: Writable<Reg>) -> Option<Self>;
|
||||
}
|
||||
|
||||
/// An extension trait for mapping register uses on `{Xmm,Gpr}`.
|
||||
pub trait MapUseExt {
|
||||
fn map_use<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper;
|
||||
}
|
||||
|
||||
/// An extension trait for mapping register mods and defs on
|
||||
/// `Writable{Xmm,Gpr}`.
|
||||
pub trait MapDefModExt {
|
||||
fn map_def<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper;
|
||||
|
||||
fn map_mod<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper;
|
||||
}
|
||||
|
||||
/// A macro for defining a newtype of `Reg` that enforces some invariant about
|
||||
/// the wrapped `Reg` (such as that it is of a particular register class).
|
||||
macro_rules! newtype_of_reg {
|
||||
(
|
||||
$newtype_reg:ident,
|
||||
$newtype_writable_reg:ident,
|
||||
$newtype_reg_mem:ident,
|
||||
$newtype_reg_mem_imm:ident,
|
||||
|$check_reg:ident| $check:expr
|
||||
) => {
|
||||
/// A newtype wrapper around `Reg`.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct $newtype_reg(Reg);
|
||||
|
||||
impl PartialEq<Reg> for $newtype_reg {
|
||||
fn eq(&self, other: &Reg) -> bool {
|
||||
self.0 == *other
|
||||
}
|
||||
}
|
||||
|
||||
impl From<$newtype_reg> for Reg {
|
||||
fn from(r: $newtype_reg) -> Self {
|
||||
r.0
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for $newtype_reg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.0.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl $newtype_reg {
|
||||
/// Create this newtype from the given register, or return `None` if the register
|
||||
/// is not a valid instance of this newtype.
|
||||
pub fn new($check_reg: Reg) -> Option<Self> {
|
||||
if $check {
|
||||
Some(Self($check_reg))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get this newtype's underlying `Reg`.
|
||||
pub fn to_reg(self) -> Reg {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience impl so that people working with this newtype can use it
|
||||
// "just like" a plain `Reg`.
|
||||
//
|
||||
// NB: We cannot implement `DerefMut` because that would let people do
|
||||
// nasty stuff like `*my_gpr.deref_mut() = some_xmm_reg`, breaking the
|
||||
// invariants that `Gpr` provides.
|
||||
impl std::ops::Deref for $newtype_reg {
|
||||
type Target = Reg;
|
||||
|
||||
fn deref(&self) -> &Reg {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl MapUseExt for $newtype_reg {
|
||||
fn map_use<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
let mut reg = self.0;
|
||||
mapper.map_use(&mut reg);
|
||||
debug_assert!({
|
||||
let $check_reg = reg;
|
||||
$check
|
||||
});
|
||||
*self = $newtype_reg(reg);
|
||||
}
|
||||
}
|
||||
|
||||
pub type $newtype_writable_reg = Writable<$newtype_reg>;
|
||||
|
||||
impl ToWritableReg for $newtype_writable_reg {
|
||||
fn to_writable_reg(&self) -> Writable<Reg> {
|
||||
Writable::from_reg(self.to_reg().to_reg())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromWritableReg for $newtype_writable_reg {
|
||||
fn from_writable_reg(w: Writable<Reg>) -> Option<Self> {
|
||||
Some(Writable::from_reg($newtype_reg::new(w.to_reg())?))
|
||||
}
|
||||
}
|
||||
|
||||
impl MapDefModExt for $newtype_writable_reg {
|
||||
fn map_def<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
let mut reg = self.to_writable_reg();
|
||||
mapper.map_def(&mut reg);
|
||||
debug_assert!({
|
||||
let $check_reg = reg.to_reg();
|
||||
$check
|
||||
});
|
||||
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
|
||||
}
|
||||
|
||||
fn map_mod<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
let mut reg = self.to_writable_reg();
|
||||
mapper.map_mod(&mut reg);
|
||||
debug_assert!({
|
||||
let $check_reg = reg.to_reg();
|
||||
$check
|
||||
});
|
||||
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
|
||||
}
|
||||
}
|
||||
|
||||
/// A newtype wrapper around `RegMem` for general-purpose registers.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct $newtype_reg_mem(RegMem);
|
||||
|
||||
impl From<$newtype_reg_mem> for RegMem {
|
||||
fn from(rm: $newtype_reg_mem) -> Self {
|
||||
rm.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<$newtype_reg> for $newtype_reg_mem {
|
||||
fn from(r: $newtype_reg) -> Self {
|
||||
$newtype_reg_mem(RegMem::reg(r.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl $newtype_reg_mem {
|
||||
/// Construct a `RegMem` newtype from the given `RegMem`, or return
|
||||
/// `None` if the `RegMem` is not a valid instance of this `RegMem`
|
||||
/// newtype.
|
||||
pub fn new(rm: RegMem) -> Option<Self> {
|
||||
match rm {
|
||||
RegMem::Mem { addr: _ } => Some(Self(rm)),
|
||||
RegMem::Reg { reg: $check_reg } if $check => Some(Self(rm)),
|
||||
RegMem::Reg { reg: _ } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert this newtype into its underlying `RegMem`.
|
||||
pub fn to_reg_mem(self) -> RegMem {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used by some newtypes and not others.
|
||||
pub fn map_uses<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
self.0.map_uses(mapper);
|
||||
debug_assert!(match self.0 {
|
||||
RegMem::Reg { reg: $check_reg } => $check,
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used by some newtypes and not others.
|
||||
pub fn map_as_def<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
self.0.map_as_def(mapper);
|
||||
debug_assert!(match self.0 {
|
||||
RegMem::Reg { reg: $check_reg } => $check,
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for $newtype_reg_mem {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.0.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrintSized for $newtype_reg_mem {
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
self.0.show_rru_sized(mb_rru, size)
|
||||
}
|
||||
}
|
||||
|
||||
/// A newtype wrapper around `RegMemImm`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct $newtype_reg_mem_imm(RegMemImm);
|
||||
|
||||
impl From<$newtype_reg_mem_imm> for RegMemImm {
|
||||
fn from(rmi: $newtype_reg_mem_imm) -> RegMemImm {
|
||||
rmi.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<$newtype_reg> for $newtype_reg_mem_imm {
|
||||
fn from(r: $newtype_reg) -> Self {
|
||||
$newtype_reg_mem_imm(RegMemImm::reg(r.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl $newtype_reg_mem_imm {
|
||||
/// Construct this newtype from the given `RegMemImm`, or return
|
||||
/// `None` if the `RegMemImm` is not a valid instance of this
|
||||
/// newtype.
|
||||
pub fn new(rmi: RegMemImm) -> Option<Self> {
|
||||
match rmi {
|
||||
RegMemImm::Imm { .. } => Some(Self(rmi)),
|
||||
RegMemImm::Mem { addr: _ } => Some(Self(rmi)),
|
||||
RegMemImm::Reg { reg: $check_reg } if $check => Some(Self(rmi)),
|
||||
RegMemImm::Reg { reg: _ } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert this newtype into its underlying `RegMemImm`.
|
||||
#[allow(dead_code)] // Used by some newtypes and not others.
|
||||
pub fn to_reg_mem_imm(self) -> RegMemImm {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used by some newtypes and not others.
|
||||
pub fn map_uses<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
self.0.map_uses(mapper);
|
||||
debug_assert!(match self.0 {
|
||||
RegMemImm::Reg { reg: $check_reg } => $check,
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used by some newtypes and not others.
|
||||
pub fn map_as_def<RM>(&mut self, mapper: &RM)
|
||||
where
|
||||
RM: RegMapper,
|
||||
{
|
||||
self.0.map_as_def(mapper);
|
||||
debug_assert!(match self.0 {
|
||||
RegMemImm::Reg { reg: $check_reg } => $check,
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrint for $newtype_reg_mem_imm {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.0.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrettyPrintSized for $newtype_reg_mem_imm {
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
self.0.show_rru_sized(mb_rru, size)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Define a newtype of `Reg` for general-purpose registers.
|
||||
newtype_of_reg!(Gpr, WritableGpr, GprMem, GprMemImm, |reg| {
|
||||
reg.get_class() == RegClass::I64
|
||||
});
|
||||
|
||||
// Define a newtype of `Reg` for XMM registers.
|
||||
newtype_of_reg!(Xmm, WritableXmm, XmmMem, XmmMemImm, |reg| {
|
||||
reg.get_class() == RegClass::V128
|
||||
});
|
||||
|
||||
/// A possible addressing mode (amode) that can be used in instructions.
|
||||
/// These denote a 64-bit value only.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -27,8 +330,8 @@ pub enum Amode {
|
||||
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
|
||||
ImmRegRegShift {
|
||||
simm32: u32,
|
||||
base: Reg,
|
||||
index: Reg,
|
||||
base: Gpr,
|
||||
index: Gpr,
|
||||
shift: u8, /* 0 .. 3 only */
|
||||
flags: MemFlags,
|
||||
},
|
||||
@@ -48,7 +351,7 @@ impl Amode {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
|
||||
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Self {
|
||||
debug_assert!(base.get_class() == RegClass::I64);
|
||||
debug_assert!(index.get_class() == RegClass::I64);
|
||||
debug_assert!(shift <= 3);
|
||||
@@ -96,8 +399,8 @@ impl Amode {
|
||||
collector.add_use(*base);
|
||||
}
|
||||
Amode::ImmRegRegShift { base, index, .. } => {
|
||||
collector.add_use(*base);
|
||||
collector.add_use(*index);
|
||||
collector.add_use(base.to_reg());
|
||||
collector.add_use(index.to_reg());
|
||||
}
|
||||
Amode::RipRelative { .. } => {
|
||||
// RIP isn't involved in regalloc.
|
||||
@@ -225,7 +528,7 @@ impl PrettyPrint for SyntheticAmode {
|
||||
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
|
||||
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
|
||||
/// `simm32` is its sign-extension out to 64 bits.
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum RegMemImm {
|
||||
Reg { reg: Reg },
|
||||
Mem { addr: SyntheticAmode },
|
||||
|
||||
@@ -298,14 +298,14 @@ pub(crate) fn emit(
|
||||
Popcnt => (0x0fb8, 2),
|
||||
};
|
||||
|
||||
match src {
|
||||
match src.clone().into() {
|
||||
RegMem::Reg { reg: src } => emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
*src,
|
||||
dst.to_reg().to_reg(),
|
||||
src,
|
||||
rex_flags,
|
||||
),
|
||||
RegMem::Mem { addr: src } => {
|
||||
@@ -317,7 +317,7 @@ pub(crate) fn emit(
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
dst.to_reg(),
|
||||
dst.to_reg().to_reg(),
|
||||
&amode,
|
||||
rex_flags,
|
||||
);
|
||||
@@ -327,7 +327,7 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::Not { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -342,7 +342,7 @@ pub(crate) fn emit(
|
||||
|
||||
Inst::Neg { size, src, dst } => {
|
||||
debug_assert_eq!(*src, dst.to_reg());
|
||||
let rex_flags = RexFlags::from((*size, dst.to_reg()));
|
||||
let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
|
||||
let (opcode, prefix) = match size {
|
||||
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
|
||||
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
|
||||
@@ -728,7 +728,7 @@ pub(crate) fn emit(
|
||||
LegacyPrefixes::None,
|
||||
0x8D,
|
||||
1,
|
||||
dst.to_reg(),
|
||||
dst.to_reg().to_reg(),
|
||||
&amode,
|
||||
RexFlags::set_w(),
|
||||
);
|
||||
@@ -884,6 +884,7 @@ pub(crate) fn emit(
|
||||
debug_assert_eq!(*src1, dst.to_reg());
|
||||
let rex = RexFlags::clear_w();
|
||||
let prefix = LegacyPrefixes::_66;
|
||||
let src2 = src2.clone().to_reg_mem_imm();
|
||||
if let RegMemImm::Imm { simm32 } = src2 {
|
||||
let (opcode_bytes, reg_digit) = match opcode {
|
||||
SseOpcode::Psllw => (0x0F71, 6),
|
||||
@@ -898,7 +899,7 @@ pub(crate) fn emit(
|
||||
};
|
||||
let dst_enc = reg_enc(dst.to_reg());
|
||||
emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
|
||||
let imm = (*simm32)
|
||||
let imm = (simm32)
|
||||
.try_into()
|
||||
.expect("the immediate must be convertible to a u8");
|
||||
sink.put1(imm);
|
||||
@@ -917,7 +918,15 @@ pub(crate) fn emit(
|
||||
|
||||
match src2 {
|
||||
RegMemImm::Reg { reg } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode_bytes,
|
||||
2,
|
||||
dst.to_reg().to_reg(),
|
||||
reg,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
RegMemImm::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
@@ -928,7 +937,7 @@ pub(crate) fn emit(
|
||||
prefix,
|
||||
opcode_bytes,
|
||||
2,
|
||||
dst.to_reg(),
|
||||
dst.to_reg().to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
@@ -1335,7 +1344,12 @@ pub(crate) fn emit(
|
||||
// might be negative; use a sign-extension.
|
||||
let inst = Inst::movsx_rm_r(
|
||||
ExtMode::LQ,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
0,
|
||||
Gpr::new(tmp1.to_reg()).unwrap(),
|
||||
Gpr::new(tmp2.to_reg()).unwrap(),
|
||||
2,
|
||||
)),
|
||||
*tmp2,
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
@@ -1424,15 +1438,15 @@ pub(crate) fn emit(
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
match src_e.clone().to_reg_mem() {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
reg_g.to_reg(),
|
||||
*reg_e,
|
||||
reg_g.to_reg().to_reg(),
|
||||
reg_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
@@ -1445,7 +1459,7 @@ pub(crate) fn emit(
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
reg_g.to_reg(),
|
||||
reg_g.to_reg().to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
@@ -1460,7 +1474,7 @@ pub(crate) fn emit(
|
||||
Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
match src {
|
||||
match src.clone().to_reg_mem() {
|
||||
RegMem::Reg { reg: src } => EvexInstruction::new()
|
||||
.length(EvexVectorLength::V128)
|
||||
.prefix(prefix)
|
||||
@@ -1587,9 +1601,17 @@ pub(crate) fn emit(
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
match src_e.clone().to_reg_mem() {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
length,
|
||||
reg_g.to_reg().to_reg(),
|
||||
reg_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
@@ -1600,7 +1622,7 @@ pub(crate) fn emit(
|
||||
prefix,
|
||||
opcode,
|
||||
length,
|
||||
reg_g.to_reg(),
|
||||
reg_g.to_reg().to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
@@ -1619,7 +1641,7 @@ pub(crate) fn emit(
|
||||
Avx512Opcode::Vpmullq => (true, 0x40),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
match src1 {
|
||||
match src1.clone().to_reg_mem() {
|
||||
RegMem::Reg { reg: src } => EvexInstruction::new()
|
||||
.length(EvexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
@@ -1845,9 +1867,9 @@ pub(crate) fn emit(
|
||||
};
|
||||
let rex = RexFlags::from(*dst_size);
|
||||
let (src, dst) = if dst_first {
|
||||
(dst.to_reg(), *src)
|
||||
(dst.to_reg().to_reg(), src.to_reg())
|
||||
} else {
|
||||
(*src, dst.to_reg())
|
||||
(src.to_reg(), dst.to_reg().to_reg())
|
||||
};
|
||||
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
|
||||
@@ -1870,7 +1892,15 @@ pub(crate) fn emit(
|
||||
let rex = RexFlags::from(*src_size);
|
||||
match src_e {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
2,
|
||||
reg_g.to_reg().to_reg(),
|
||||
*reg_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
@@ -1881,7 +1911,7 @@ pub(crate) fn emit(
|
||||
prefix,
|
||||
opcode,
|
||||
2,
|
||||
reg_g.to_reg(),
|
||||
reg_g.to_reg().to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
@@ -1950,7 +1980,11 @@ pub(crate) fn emit(
|
||||
// If x seen as a signed int64 is not negative, a signed-conversion will do the right
|
||||
// thing.
|
||||
// TODO use tst src, src here.
|
||||
let inst = Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::imm(0), src.to_reg());
|
||||
let inst = Inst::cmp_rmi_r(
|
||||
OperandSize::Size64,
|
||||
RegMemImm::imm(0),
|
||||
src.to_reg().to_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
one_way_jmp(sink, CC::L, handle_negative);
|
||||
@@ -1961,8 +1995,8 @@ pub(crate) fn emit(
|
||||
sink,
|
||||
info,
|
||||
state,
|
||||
src.to_reg(),
|
||||
*dst,
|
||||
src.to_reg().to_reg(),
|
||||
dst.to_writable_reg(),
|
||||
*dst_size == OperandSize::Size64,
|
||||
);
|
||||
|
||||
@@ -1973,7 +2007,11 @@ pub(crate) fn emit(
|
||||
|
||||
// Divide x by two to get it in range for the signed conversion, keep the LSB, and
|
||||
// scale it back up on the FP side.
|
||||
let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), types::I64);
|
||||
let inst = Inst::gen_move(
|
||||
tmp_gpr1.to_writable_reg(),
|
||||
src.to_reg().to_reg(),
|
||||
types::I64,
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
// tmp_gpr1 := src >> 1
|
||||
@@ -1981,26 +2019,30 @@ pub(crate) fn emit(
|
||||
OperandSize::Size64,
|
||||
ShiftKind::ShiftRightLogical,
|
||||
Some(1),
|
||||
*tmp_gpr1,
|
||||
tmp_gpr1.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);
|
||||
let inst = Inst::gen_move(
|
||||
tmp_gpr2.to_writable_reg(),
|
||||
src.to_reg().to_reg(),
|
||||
types::I64,
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::And,
|
||||
RegMemImm::imm(1),
|
||||
*tmp_gpr2,
|
||||
tmp_gpr2.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Or,
|
||||
RegMemImm::reg(tmp_gpr1.to_reg()),
|
||||
*tmp_gpr2,
|
||||
RegMemImm::reg(tmp_gpr1.to_reg().to_reg()),
|
||||
tmp_gpr2.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
@@ -2008,8 +2050,8 @@ pub(crate) fn emit(
|
||||
sink,
|
||||
info,
|
||||
state,
|
||||
tmp_gpr2.to_reg(),
|
||||
*dst,
|
||||
tmp_gpr2.to_reg().to_reg(),
|
||||
dst.to_writable_reg(),
|
||||
*dst_size == OperandSize::Size64,
|
||||
);
|
||||
|
||||
@@ -2018,7 +2060,11 @@ pub(crate) fn emit(
|
||||
} else {
|
||||
SseOpcode::Addss
|
||||
};
|
||||
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst);
|
||||
let inst = Inst::xmm_rm_r(
|
||||
add_op,
|
||||
RegMem::reg(dst.to_reg().to_reg()),
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
sink.bind_label(done);
|
||||
@@ -2091,18 +2137,18 @@ pub(crate) fn emit(
|
||||
let not_nan = sink.get_label();
|
||||
|
||||
// The truncation.
|
||||
let inst = Inst::xmm_to_gpr(trunc_op, src, *dst, *dst_size);
|
||||
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), dst.to_writable_reg(), *dst_size);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
// Compare against 1, in case of overflow the dst operand was INT_MIN.
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg());
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg().to_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
one_way_jmp(sink, CC::NO, done); // no overflow => done
|
||||
|
||||
// Check for NaN.
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src.to_reg()), src.to_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN
|
||||
@@ -2112,8 +2158,8 @@ pub(crate) fn emit(
|
||||
let inst = Inst::alu_rmi_r(
|
||||
*dst_size,
|
||||
AluRmiROpcode::Xor,
|
||||
RegMemImm::reg(dst.to_reg()),
|
||||
*dst,
|
||||
RegMemImm::reg(dst.to_reg().to_reg()),
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
@@ -2125,11 +2171,18 @@ pub(crate) fn emit(
|
||||
// If the input was positive, saturate to INT_MAX.
|
||||
|
||||
// Zero out tmp_xmm.
|
||||
let inst =
|
||||
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||
let inst = Inst::xmm_rm_r(
|
||||
SseOpcode::Xorpd,
|
||||
RegMem::reg(tmp_xmm.to_reg().to_reg()),
|
||||
tmp_xmm.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||
let inst = Inst::xmm_cmp_rm_r(
|
||||
cmp_op,
|
||||
RegMem::reg(src.to_reg()),
|
||||
tmp_xmm.to_reg().to_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
// Jump if >= to done.
|
||||
@@ -2137,10 +2190,14 @@ pub(crate) fn emit(
|
||||
|
||||
// Otherwise, put INT_MAX.
|
||||
if *dst_size == OperandSize::Size64 {
|
||||
let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
|
||||
let inst = Inst::imm(
|
||||
OperandSize::Size64,
|
||||
0x7fffffffffffffff,
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
} else {
|
||||
let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
|
||||
let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, dst.to_writable_reg());
|
||||
inst.emit(sink, info, state);
|
||||
}
|
||||
} else {
|
||||
@@ -2162,7 +2219,8 @@ pub(crate) fn emit(
|
||||
match *src_size {
|
||||
OperandSize::Size32 => {
|
||||
let cst = Ieee32::pow2(output_bits - 1).neg().bits();
|
||||
let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
|
||||
let inst =
|
||||
Inst::imm(OperandSize::Size32, cst as u64, tmp_gpr.to_writable_reg());
|
||||
inst.emit(sink, info, state);
|
||||
}
|
||||
OperandSize::Size64 => {
|
||||
@@ -2174,17 +2232,26 @@ pub(crate) fn emit(
|
||||
} else {
|
||||
Ieee64::pow2(output_bits - 1).neg()
|
||||
};
|
||||
let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
|
||||
let inst =
|
||||
Inst::imm(OperandSize::Size64, cst.bits(), tmp_gpr.to_writable_reg());
|
||||
inst.emit(sink, info, state);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let inst =
|
||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
||||
let inst = Inst::gpr_to_xmm(
|
||||
cast_op,
|
||||
RegMem::reg(tmp_gpr.to_reg().to_reg()),
|
||||
*src_size,
|
||||
tmp_xmm.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src);
|
||||
let inst = Inst::xmm_cmp_rm_r(
|
||||
cmp_op,
|
||||
RegMem::reg(tmp_xmm.to_reg().to_reg()),
|
||||
src.to_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
// jump over trap if src >= or > threshold
|
||||
@@ -2198,11 +2265,18 @@ pub(crate) fn emit(
|
||||
sink.bind_label(check_positive);
|
||||
|
||||
// Zero out the tmp_xmm register.
|
||||
let inst =
|
||||
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
|
||||
let inst = Inst::xmm_rm_r(
|
||||
SseOpcode::Xorpd,
|
||||
RegMem::reg(tmp_xmm.to_reg().to_reg()),
|
||||
tmp_xmm.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
|
||||
let inst = Inst::xmm_cmp_rm_r(
|
||||
cmp_op,
|
||||
RegMem::reg(src.to_reg()),
|
||||
tmp_xmm.to_reg().to_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src
|
||||
@@ -2282,14 +2356,22 @@ pub(crate) fn emit(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let inst = Inst::imm(*src_size, cst, *tmp_gpr);
|
||||
let inst = Inst::imm(*src_size, cst, tmp_gpr.to_writable_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst =
|
||||
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
|
||||
let inst = Inst::gpr_to_xmm(
|
||||
cast_op,
|
||||
RegMem::reg(tmp_gpr.to_reg().to_reg()),
|
||||
*src_size,
|
||||
tmp_xmm.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src.to_reg());
|
||||
let inst = Inst::xmm_cmp_rm_r(
|
||||
cmp_op,
|
||||
RegMem::reg(tmp_xmm.to_reg().to_reg()),
|
||||
src.to_reg().to_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let handle_large = sink.get_label();
|
||||
@@ -2303,8 +2385,8 @@ pub(crate) fn emit(
|
||||
let inst = Inst::alu_rmi_r(
|
||||
*dst_size,
|
||||
AluRmiROpcode::Xor,
|
||||
RegMemImm::reg(dst.to_reg()),
|
||||
*dst,
|
||||
RegMemImm::reg(dst.to_reg().to_reg()),
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
@@ -2321,10 +2403,15 @@ pub(crate) fn emit(
|
||||
// Actual truncation for small inputs: if the result is not positive, then we had an
|
||||
// overflow.
|
||||
|
||||
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
|
||||
let inst = Inst::xmm_to_gpr(
|
||||
trunc_op,
|
||||
src.to_reg().to_reg(),
|
||||
dst.to_writable_reg(),
|
||||
*dst_size,
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg());
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done
|
||||
@@ -2335,8 +2422,8 @@ pub(crate) fn emit(
|
||||
let inst = Inst::alu_rmi_r(
|
||||
*dst_size,
|
||||
AluRmiROpcode::Xor,
|
||||
RegMemImm::reg(dst.to_reg()),
|
||||
*dst,
|
||||
RegMemImm::reg(dst.to_reg().to_reg()),
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
@@ -2352,13 +2439,22 @@ pub(crate) fn emit(
|
||||
|
||||
sink.bind_label(handle_large);
|
||||
|
||||
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src);
|
||||
let inst = Inst::xmm_rm_r(
|
||||
sub_op,
|
||||
RegMem::reg(tmp_xmm.to_reg().to_reg()),
|
||||
src.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
|
||||
let inst = Inst::xmm_to_gpr(
|
||||
trunc_op,
|
||||
src.to_reg().to_reg(),
|
||||
dst.to_writable_reg(),
|
||||
*dst_size,
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg());
|
||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let next_is_large = sink.get_label();
|
||||
@@ -2374,7 +2470,7 @@ pub(crate) fn emit(
|
||||
} else {
|
||||
u32::max_value() as u64
|
||||
},
|
||||
*dst,
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
@@ -2388,14 +2484,14 @@ pub(crate) fn emit(
|
||||
sink.bind_label(next_is_large);
|
||||
|
||||
if *dst_size == OperandSize::Size64 {
|
||||
let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
|
||||
let inst = Inst::imm(OperandSize::Size64, 1 << 63, tmp_gpr.to_writable_reg());
|
||||
inst.emit(sink, info, state);
|
||||
|
||||
let inst = Inst::alu_rmi_r(
|
||||
OperandSize::Size64,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::reg(tmp_gpr.to_reg()),
|
||||
*dst,
|
||||
RegMemImm::reg(tmp_gpr.to_reg().to_reg()),
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
} else {
|
||||
@@ -2403,7 +2499,7 @@ pub(crate) fn emit(
|
||||
OperandSize::Size32,
|
||||
AluRmiROpcode::Add,
|
||||
RegMemImm::imm(1 << 31),
|
||||
*dst,
|
||||
dst.to_writable_reg(),
|
||||
);
|
||||
inst.emit(sink, info, state);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use super::*;
|
||||
use crate::isa::x64;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
impl Inst {
|
||||
@@ -21,8 +22,8 @@ impl Inst {
|
||||
debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
|
||||
Inst::Neg {
|
||||
size,
|
||||
src: src.to_reg(),
|
||||
dst: src,
|
||||
src: Gpr::new(src.to_reg()).unwrap(),
|
||||
dst: WritableGpr::from_writable_reg(src).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -693,42 +694,66 @@ fn test_x64_emit() {
|
||||
//
|
||||
// Addr_IRRS, offset max simm8
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(rax).unwrap(), Gpr::new(rax).unwrap(), 0),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B5C007F",
|
||||
"movq 127(%rax,%rax,1), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(rdi).unwrap(), Gpr::new(rax).unwrap(), 1),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B5C477F",
|
||||
"movq 127(%rdi,%rax,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(r8).unwrap(), Gpr::new(rax).unwrap(), 2),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B5C807F",
|
||||
"movq 127(%r8,%rax,4), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(r15).unwrap(), Gpr::new(rax).unwrap(), 3),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B5CC77F",
|
||||
"movq 127(%r15,%rax,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(rax).unwrap(), Gpr::new(rdi).unwrap(), 3),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B5CF87F",
|
||||
"movq 127(%rax,%rdi,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(rdi).unwrap(), Gpr::new(rdi).unwrap(), 2),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B5CBF7F",
|
||||
"movq 127(%rdi,%rdi,4), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(r8).unwrap(), Gpr::new(rdi).unwrap(), 1),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B5C787F",
|
||||
"movq 127(%r8,%rdi,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(127, Gpr::new(r15).unwrap(), Gpr::new(rdi).unwrap(), 0),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B5C3F7F",
|
||||
"movq 127(%r15,%rdi,1), %r11",
|
||||
));
|
||||
@@ -736,42 +761,106 @@ fn test_x64_emit() {
|
||||
// ========================================================
|
||||
// Addr_IRRS, offset min simm8
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
2,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B5C8080",
|
||||
"movq -128(%rax,%r8,4), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B5CC780",
|
||||
"movq -128(%rdi,%r8,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(r8).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B5C0080",
|
||||
"movq -128(%r8,%r8,1), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
1,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B5C4780",
|
||||
"movq -128(%r15,%r8,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
1,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B5C7880",
|
||||
"movq -128(%rax,%r15,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B5C3F80",
|
||||
"movq -128(%rdi,%r15,1), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(r8).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B5CF880",
|
||||
"movq -128(%r8,%r15,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
-128i32 as u32,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
2,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B5CBF80",
|
||||
"movq -128(%r15,%r15,4), %r11",
|
||||
));
|
||||
@@ -779,42 +868,96 @@ fn test_x64_emit() {
|
||||
// ========================================================
|
||||
// Addr_IRRS, offset large positive simm32
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(rax).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B9C00BE25664F",
|
||||
"movq 1332094398(%rax,%rax,1), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(rax).unwrap(),
|
||||
1,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B9C47BE25664F",
|
||||
"movq 1332094398(%rdi,%rax,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(0x4f6625be, Gpr::new(r8).unwrap(), Gpr::new(rax).unwrap(), 2),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B9C80BE25664F",
|
||||
"movq 1332094398(%r8,%rax,4), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(rax).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B9CC7BE25664F",
|
||||
"movq 1332094398(%r15,%rax,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(rdi).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B9CF8BE25664F",
|
||||
"movq 1332094398(%rax,%rdi,8), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(rdi).unwrap(),
|
||||
2,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4C8B9CBFBE25664F",
|
||||
"movq 1332094398(%rdi,%rdi,4), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(0x4f6625be, Gpr::new(r8).unwrap(), Gpr::new(rdi).unwrap(), 1),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B9C78BE25664F",
|
||||
"movq 1332094398(%r8,%rdi,2), %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(
|
||||
0x4f6625be,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(rdi).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4D8B9C3FBE25664F",
|
||||
"movq 1332094398(%r15,%rdi,1), %r11",
|
||||
));
|
||||
@@ -823,7 +966,12 @@ fn test_x64_emit() {
|
||||
// Addr_IRRS, offset large negative simm32
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
2,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B9C8070E9B2D9",
|
||||
@@ -831,7 +979,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B9CC770E9B2D9",
|
||||
@@ -839,7 +992,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(r8).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B9C0070E9B2D9",
|
||||
@@ -847,7 +1005,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(r8).unwrap(),
|
||||
1,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B9C4770E9B2D9",
|
||||
@@ -855,7 +1018,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(rax).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
1,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B9C7870E9B2D9",
|
||||
@@ -863,7 +1031,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
0,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4E8B9C3F70E9B2D9",
|
||||
@@ -871,7 +1044,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(r8).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
3,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B9CF870E9B2D9",
|
||||
@@ -879,7 +1057,12 @@ fn test_x64_emit() {
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2),
|
||||
Amode::imm_reg_reg_shift(
|
||||
-0x264d1690i32 as u32,
|
||||
Gpr::new(r15).unwrap(),
|
||||
Gpr::new(r15).unwrap(),
|
||||
2,
|
||||
),
|
||||
w_r11,
|
||||
),
|
||||
"4F8B9CBF70E9B2D9",
|
||||
@@ -1828,42 +2011,66 @@ fn test_x64_emit() {
|
||||
// ========================================================
|
||||
// Mov64_M_R
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(rbx).unwrap(), 0),
|
||||
w_rcx,
|
||||
),
|
||||
"488B8C18B3000000",
|
||||
"movq 179(%rax,%rbx,1), %rcx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(rbx).unwrap(), 0),
|
||||
w_r8,
|
||||
),
|
||||
"4C8B8418B3000000",
|
||||
"movq 179(%rax,%rbx,1), %r8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(r9).unwrap(), 0),
|
||||
w_rcx,
|
||||
),
|
||||
"4A8B8C08B3000000",
|
||||
"movq 179(%rax,%r9,1), %rcx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(r9).unwrap(), 0),
|
||||
w_r8,
|
||||
),
|
||||
"4E8B8408B3000000",
|
||||
"movq 179(%rax,%r9,1), %r8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(rbx).unwrap(), 0),
|
||||
w_rcx,
|
||||
),
|
||||
"498B8C1AB3000000",
|
||||
"movq 179(%r10,%rbx,1), %rcx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(rbx).unwrap(), 0),
|
||||
w_r8,
|
||||
),
|
||||
"4D8B841AB3000000",
|
||||
"movq 179(%r10,%rbx,1), %r8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
|
||||
w_rcx,
|
||||
),
|
||||
"4B8B8C0AB3000000",
|
||||
"movq 179(%r10,%r9,1), %rcx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
|
||||
Inst::mov64_m_r(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
|
||||
w_r8,
|
||||
),
|
||||
"4F8B840AB3000000",
|
||||
"movq 179(%r10,%r9,1), %r8",
|
||||
));
|
||||
@@ -1881,7 +2088,10 @@ fn test_x64_emit() {
|
||||
"lea 42(%r10), %r15",
|
||||
));
|
||||
insns.push((
|
||||
Inst::lea(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
|
||||
Inst::lea(
|
||||
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
|
||||
w_r8,
|
||||
),
|
||||
"4F8D840AB3000000",
|
||||
"lea 179(%r10,%r9,1), %r8",
|
||||
));
|
||||
@@ -3115,7 +3325,12 @@ fn test_x64_emit() {
|
||||
Inst::cmove(
|
||||
OperandSize::Size16,
|
||||
CC::NO,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)),
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
37,
|
||||
Gpr::new(rdi).unwrap(),
|
||||
Gpr::new(rsi).unwrap(),
|
||||
2,
|
||||
)),
|
||||
w_r15,
|
||||
),
|
||||
"66440F417CB725",
|
||||
@@ -3157,12 +3372,22 @@ fn test_x64_emit() {
|
||||
insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
|
||||
insns.push((Inst::push64(RegMemImm::reg(r8)), "4150", "pushq %r8"));
|
||||
insns.push((
|
||||
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
|
||||
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(rsi).unwrap(),
|
||||
Gpr::new(rcx).unwrap(),
|
||||
3,
|
||||
))),
|
||||
"FFB4CE41010000",
|
||||
"pushq 321(%rsi,%rcx,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, r9, rbx, 2))),
|
||||
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(r9).unwrap(),
|
||||
Gpr::new(rbx).unwrap(),
|
||||
2,
|
||||
))),
|
||||
"41FFB49941010000",
|
||||
"pushq 321(%r9,%rbx,4)",
|
||||
));
|
||||
@@ -3226,12 +3451,22 @@ fn test_x64_emit() {
|
||||
insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp"));
|
||||
insns.push((call_unknown(RegMem::reg(r11)), "41FFD3", "call *%r11"));
|
||||
insns.push((
|
||||
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
|
||||
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(rsi).unwrap(),
|
||||
Gpr::new(rcx).unwrap(),
|
||||
3,
|
||||
))),
|
||||
"FF94CE41010000",
|
||||
"call *321(%rsi,%rcx,8)",
|
||||
));
|
||||
insns.push((
|
||||
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
|
||||
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(r10).unwrap(),
|
||||
Gpr::new(rdx).unwrap(),
|
||||
2,
|
||||
))),
|
||||
"41FF949241010000",
|
||||
"call *321(%r10,%rdx,4)",
|
||||
));
|
||||
@@ -3301,12 +3536,22 @@ fn test_x64_emit() {
|
||||
"jmp *%r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
|
||||
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(rsi).unwrap(),
|
||||
Gpr::new(rcx).unwrap(),
|
||||
3,
|
||||
))),
|
||||
"FFA4CE41010000",
|
||||
"jmp *321(%rsi,%rcx,8)",
|
||||
));
|
||||
insns.push((
|
||||
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
|
||||
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(r10).unwrap(),
|
||||
Gpr::new(rdx).unwrap(),
|
||||
2,
|
||||
))),
|
||||
"41FFA49241010000",
|
||||
"jmp *321(%r10,%rdx,4)",
|
||||
));
|
||||
@@ -3354,7 +3599,12 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(
|
||||
SseOpcode::Addss,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
123,
|
||||
Gpr::new(r10).unwrap(),
|
||||
Gpr::new(rdx).unwrap(),
|
||||
2,
|
||||
)),
|
||||
w_xmm0,
|
||||
),
|
||||
"F3410F5844927B",
|
||||
@@ -3379,7 +3629,12 @@ fn test_x64_emit() {
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(
|
||||
SseOpcode::Subss,
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
|
||||
RegMem::mem(Amode::imm_reg_reg_shift(
|
||||
321,
|
||||
Gpr::new(r10).unwrap(),
|
||||
Gpr::new(rax).unwrap(),
|
||||
3,
|
||||
)),
|
||||
w_xmm10,
|
||||
),
|
||||
"F3450F5C94C241010000",
|
||||
@@ -4200,10 +4455,17 @@ fn test_x64_emit() {
|
||||
|
||||
// ========================================================
|
||||
// Pertaining to atomics.
|
||||
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
|
||||
let am1: SyntheticAmode =
|
||||
Amode::imm_reg_reg_shift(321, Gpr::new(r10).unwrap(), Gpr::new(rdx).unwrap(), 2).into();
|
||||
// `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
|
||||
// for retention of the apparently-redundant rex prefix in the 8-bit case.
|
||||
let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
|
||||
let am2: SyntheticAmode = Amode::imm_reg_reg_shift(
|
||||
-12345i32 as u32,
|
||||
Gpr::new(rcx).unwrap(),
|
||||
Gpr::new(rsi).unwrap(),
|
||||
3,
|
||||
)
|
||||
.into();
|
||||
|
||||
// A general 8-bit case.
|
||||
insns.push((
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -95,23 +95,23 @@
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(iadd x y)))
|
||||
(value_reg (paddb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(iadd x y)))
|
||||
(value_reg (paddw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(iadd x y)))
|
||||
(value_reg (paddd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddd (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(iadd x y)))
|
||||
(value_reg (paddq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddq (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (iadd x y)))
|
||||
@@ -131,25 +131,25 @@
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddsb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(sadd_sat x y)))
|
||||
(value_reg (paddsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddsw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddusb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(uadd_sat x y)))
|
||||
(value_reg (paddusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (paddusw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -264,23 +264,23 @@
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(isub x y)))
|
||||
(value_reg (psubb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(isub x y)))
|
||||
(value_reg (psubw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
(isub x y)))
|
||||
(value_reg (psubd (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubd (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(isub x y)))
|
||||
(value_reg (psubq (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubq (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;; `i128`
|
||||
(rule (lower (has_type $I128 (isub x y)))
|
||||
@@ -300,25 +300,25 @@
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubsb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(ssub_sat x y)))
|
||||
(value_reg (psubsw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubsw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusb (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubusb (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(usub_sat x y)))
|
||||
(value_reg (psubusw (put_in_reg x)
|
||||
(put_in_reg_mem y))))
|
||||
(value_xmm (psubusw (put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -360,14 +360,16 @@
|
||||
|
||||
;; SSE.
|
||||
|
||||
(decl sse_and (Type Reg RegMem) Reg)
|
||||
(decl sse_and (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_and $F32X4 x y) (andps x y))
|
||||
(rule (sse_and $F64X2 x y) (andpd x y))
|
||||
(rule (sse_and (multi_lane _bits _lanes) x y) (pand x y))
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(band x y)))
|
||||
(value_reg (sse_and ty (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (sse_and ty
|
||||
(put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
@@ -432,14 +434,16 @@
|
||||
|
||||
;; SSE.
|
||||
|
||||
(decl sse_or (Type Reg RegMem) Reg)
|
||||
(decl sse_or (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_or $F32X4 x y) (orps x y))
|
||||
(rule (sse_or $F64X2 x y) (orpd x y))
|
||||
(rule (sse_or (multi_lane _bits _lanes) x y) (por x y))
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(bor x y)))
|
||||
(value_reg (sse_or ty (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (sse_or ty
|
||||
(put_in_xmm x)
|
||||
(put_in_xmm_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
@@ -507,7 +511,7 @@
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
|
||||
(value_reg (sse_xor ty (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (sse_xor ty (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;; `{i,b}128`.
|
||||
|
||||
@@ -578,16 +582,16 @@
|
||||
;; instructions. The basic idea, whether the amount to shift by is an immediate
|
||||
;; or not, is to use a 16x8 shift and then mask off the incorrect bits to 0s.
|
||||
(rule (lower (has_type $I8X16 (ishl src amt)))
|
||||
(let ((src_ Reg (put_in_reg src))
|
||||
(let ((src_ Xmm (put_in_xmm src))
|
||||
(amt_gpr RegMemImm (put_in_reg_mem_imm amt))
|
||||
(amt_xmm RegMemImm (reg_mem_imm_to_xmm amt_gpr))
|
||||
(amt_xmm XmmMemImm (mov_rmi_to_xmm amt_gpr))
|
||||
;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be
|
||||
;; correct for half of the lanes; the others must be fixed up with
|
||||
;; the mask below.
|
||||
(unmasked Reg (psllw src_ amt_xmm))
|
||||
(unmasked Xmm (psllw src_ amt_xmm))
|
||||
(mask_addr SyntheticAmode (ishl_i8x16_mask amt_gpr))
|
||||
(mask Reg (x64_load $I8X16 mask_addr (ExtKind.None))))
|
||||
(value_reg (sse_and $I8X16 unmasked (RegMem.Reg mask)))))
|
||||
(value_xmm (sse_and $I8X16 unmasked (reg_mem_to_xmm_mem (RegMem.Reg mask))))))
|
||||
|
||||
;; Get the address of the mask to use when fixing up the lanes that weren't
|
||||
;; correctly generated by the 16x8 shift.
|
||||
@@ -608,25 +612,28 @@
|
||||
(extern constructor ishl_i8x16_mask_table ishl_i8x16_mask_table)
|
||||
(rule (ishl_i8x16_mask (RegMemImm.Reg amt))
|
||||
(let ((mask_table SyntheticAmode (ishl_i8x16_mask_table))
|
||||
(base_mask_addr Reg (lea mask_table))
|
||||
(base_mask_addr Gpr (lea mask_table))
|
||||
(mask_offset Reg (shl $I64 amt (Imm8Reg.Imm8 4))))
|
||||
(amode_to_synthetic_amode (amode_imm_reg_reg_shift 0
|
||||
base_mask_addr
|
||||
mask_offset
|
||||
(gpr_new mask_offset)
|
||||
0))))
|
||||
(rule (ishl_i8x16_mask (RegMemImm.Mem amt))
|
||||
(ishl_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None)))))
|
||||
|
||||
;; 16x8, 32x4, and 64x2 shifts can each use a single instruction.
|
||||
|
||||
(rule (lower (has_type $I16X8 (ishl src amt)))
|
||||
(value_reg (psllw (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psllw (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (ishl src amt)))
|
||||
(value_reg (pslld (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (pslld (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
(rule (lower (has_type $I64X2 (ishl src amt)))
|
||||
(value_reg (psllq (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psllq (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -672,16 +679,18 @@
|
||||
;; There are no 8x16 shifts in x64. Do the same 16x8-shift-and-mask thing we do
|
||||
;; with 8x16 `ishl`.
|
||||
(rule (lower (has_type $I8X16 (ushr src amt)))
|
||||
(let ((src_ Reg (put_in_reg src))
|
||||
(let ((src_ Xmm (put_in_xmm src))
|
||||
(amt_gpr RegMemImm (put_in_reg_mem_imm amt))
|
||||
(amt_xmm RegMemImm (reg_mem_imm_to_xmm amt_gpr))
|
||||
(amt_xmm XmmMemImm (mov_rmi_to_xmm amt_gpr))
|
||||
;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be
|
||||
;; correct for half of the lanes; the others must be fixed up with
|
||||
;; the mask below.
|
||||
(unmasked Reg (psrlw src_ amt_xmm))
|
||||
(unmasked Xmm (psrlw src_ amt_xmm))
|
||||
(mask_addr SyntheticAmode (ushr_i8x16_mask amt_gpr))
|
||||
(mask Reg (x64_load $I8X16 mask_addr (ExtKind.None))))
|
||||
(value_reg (sse_and $I8X16 unmasked (RegMem.Reg mask)))))
|
||||
(value_xmm (sse_and $I8X16
|
||||
unmasked
|
||||
(reg_mem_to_xmm_mem (RegMem.Reg mask))))))
|
||||
|
||||
;; Get the address of the mask to use when fixing up the lanes that weren't
|
||||
;; correctly generated by the 16x8 shift.
|
||||
@@ -702,25 +711,28 @@
|
||||
(extern constructor ushr_i8x16_mask_table ushr_i8x16_mask_table)
|
||||
(rule (ushr_i8x16_mask (RegMemImm.Reg amt))
|
||||
(let ((mask_table SyntheticAmode (ushr_i8x16_mask_table))
|
||||
(base_mask_addr Reg (lea mask_table))
|
||||
(base_mask_addr Gpr (lea mask_table))
|
||||
(mask_offset Reg (shl $I64 amt (Imm8Reg.Imm8 4))))
|
||||
(amode_to_synthetic_amode (amode_imm_reg_reg_shift 0
|
||||
base_mask_addr
|
||||
mask_offset
|
||||
(gpr_new mask_offset)
|
||||
0))))
|
||||
(rule (ushr_i8x16_mask (RegMemImm.Mem amt))
|
||||
(ushr_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None)))))
|
||||
|
||||
;; 16x8, 32x4, and 64x2 shifts can each use a single instruction.
|
||||
|
||||
(rule (lower (has_type $I16X8 (ushr src amt)))
|
||||
(value_reg (psrlw (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psrlw (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (ushr src amt)))
|
||||
(value_reg (psrld (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psrld (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
(rule (lower (has_type $I64X2 (ushr src amt)))
|
||||
(value_reg (psrlq (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psrlq (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -780,33 +792,35 @@
|
||||
;; shifted_hi.i16x8 = shift each lane of `high`
|
||||
;; result = [s0'', s1'', ..., s15'']
|
||||
(rule (lower (has_type $I8X16 (sshr src amt @ (value_type amt_ty))))
|
||||
(let ((src_ Reg (put_in_reg src))
|
||||
(let ((src_ Xmm (put_in_xmm src))
|
||||
;; In order for `packsswb` later to only use the high byte of each
|
||||
;; 16x8 lane, we shift right an extra 8 bits, relying on `psraw` to
|
||||
;; fill in the upper bits appropriately.
|
||||
(lo Reg (punpcklbw src_ (RegMem.Reg src_)))
|
||||
(hi Reg (punpckhbw src_ (RegMem.Reg src_)))
|
||||
(amt_ RegMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt)))
|
||||
(shifted_lo Reg (psraw lo amt_))
|
||||
(shifted_hi Reg (psraw hi amt_)))
|
||||
(value_reg (packsswb shifted_lo (RegMem.Reg shifted_hi)))))
|
||||
(lo Xmm (punpcklbw src_ (xmm_to_xmm_mem src_)))
|
||||
(hi Xmm (punpckhbw src_ (xmm_to_xmm_mem src_)))
|
||||
(amt_ XmmMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt)))
|
||||
(shifted_lo Xmm (psraw lo amt_))
|
||||
(shifted_hi Xmm (psraw hi amt_)))
|
||||
(value_xmm (packsswb shifted_lo (xmm_to_xmm_mem shifted_hi)))))
|
||||
|
||||
(decl sshr_i8x16_bigger_shift (Type RegMemImm) RegMemImm)
|
||||
(decl sshr_i8x16_bigger_shift (Type RegMemImm) XmmMemImm)
|
||||
(rule (sshr_i8x16_bigger_shift _ty (RegMemImm.Imm i))
|
||||
(RegMemImm.Imm (u32_add i 8)))
|
||||
(xmm_mem_imm_new (RegMemImm.Imm (u32_add i 8))))
|
||||
(rule (sshr_i8x16_bigger_shift ty (RegMemImm.Reg r))
|
||||
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8)))))
|
||||
(mov_rmi_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8)))))
|
||||
(rule (sshr_i8x16_bigger_shift ty rmi @ (RegMemImm.Mem _m))
|
||||
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi))))
|
||||
(mov_rmi_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi))))
|
||||
|
||||
;; `sshr.{i16x8,i32x4}` can be a simple `psra{w,d}`, we just have to make sure
|
||||
;; that if the shift amount is in a register, it is in an XMM register.
|
||||
|
||||
(rule (lower (has_type $I16X8 (sshr src amt)))
|
||||
(value_reg (psraw (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psraw (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (sshr src amt)))
|
||||
(value_reg (psrad (put_in_reg src)
|
||||
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
(value_xmm (psrad (put_in_xmm src)
|
||||
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
|
||||
|
||||
;; The `sshr.i64x2` CLIF instruction has no single x86 instruction in the older
|
||||
;; feature sets. Newer ones like AVX512VL + AVX512F include `vpsraq`, a 128-bit
|
||||
@@ -817,14 +831,15 @@
|
||||
;;
|
||||
;; (TODO: when EVEX support is available, add an alternate lowering here).
|
||||
(rule (lower (has_type $I64X2 (sshr src amt)))
|
||||
(let ((src_ Reg (put_in_reg src))
|
||||
(lo Reg (pextrd $I64 src_ 0))
|
||||
(hi Reg (pextrd $I64 src_ 1))
|
||||
(let ((src_ Xmm (put_in_xmm src))
|
||||
(lo Gpr (pextrd $I64 src_ 0))
|
||||
(hi Gpr (pextrd $I64 src_ 1))
|
||||
(amt_ Imm8Reg (put_masked_in_imm8_reg amt $I64))
|
||||
(shifted_lo Reg (sar $I64 lo amt_))
|
||||
(shifted_hi Reg (sar $I64 hi amt_)))
|
||||
(value_reg (make_i64x2_from_lanes (RegMem.Reg shifted_lo)
|
||||
(RegMem.Reg shifted_hi)))))
|
||||
(shifted_lo Reg (sar $I64 (gpr_to_reg lo) amt_))
|
||||
(shifted_hi Reg (sar $I64 (gpr_to_reg hi) amt_)))
|
||||
(value_xmm (make_i64x2_from_lanes (reg_mem_to_gpr_mem (RegMem.Reg shifted_lo))
|
||||
(reg_mem_to_gpr_mem (RegMem.Reg shifted_hi))))))
|
||||
|
||||
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i16` and `i8`: we need to extend the shift amount, or mask the
|
||||
@@ -910,35 +925,35 @@
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (ineg x)))
|
||||
(value_reg (neg ty (put_in_reg x))))
|
||||
(value_gpr (neg ty (put_in_gpr x))))
|
||||
|
||||
;; SSE.
|
||||
|
||||
(rule (lower (has_type $I8X16 (ineg x)))
|
||||
(value_reg (psubb (imm $I8X16 0)
|
||||
(put_in_reg_mem x))))
|
||||
(value_xmm (psubb (xmm_new (imm $I8X16 0))
|
||||
(put_in_xmm_mem x))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (ineg x)))
|
||||
(value_reg (psubw (imm $I16X8 0)
|
||||
(put_in_reg_mem x))))
|
||||
(value_xmm (psubw (xmm_new (imm $I16X8 0))
|
||||
(put_in_xmm_mem x))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (ineg x)))
|
||||
(value_reg (psubd (imm $I32X4 0)
|
||||
(put_in_reg_mem x))))
|
||||
(value_xmm (psubd (xmm_new (imm $I32X4 0))
|
||||
(put_in_xmm_mem x))))
|
||||
|
||||
(rule (lower (has_type $I64X2 (ineg x)))
|
||||
(value_reg (psubq (imm $I64X2 0)
|
||||
(put_in_reg_mem x))))
|
||||
(value_xmm (psubq (xmm_new (imm $I64X2 0))
|
||||
(put_in_xmm_mem x))))
|
||||
|
||||
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type (multi_lane 8 16)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgb (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pavgb (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
(avg_round x y)))
|
||||
(value_reg (pavgw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pavgw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1017,10 +1032,10 @@
|
||||
;; (No i8x16 multiply.)
|
||||
|
||||
(rule (lower (has_type (multi_lane 16 8) (imul x y)))
|
||||
(value_reg (pmullw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmullw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type (multi_lane 32 4) (imul x y)))
|
||||
(value_reg (pmulld (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmulld (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;; With AVX-512 we can implement `i64x2` multiplication with a single
|
||||
;; instruction.
|
||||
@@ -1028,7 +1043,7 @@
|
||||
(avx512dq_enabled)
|
||||
(multi_lane 64 2))
|
||||
(imul x y)))
|
||||
(value_reg (vpmullq (put_in_reg_mem x) (put_in_reg y))))
|
||||
(value_xmm (vpmullq (put_in_xmm_mem x) (put_in_xmm y))))
|
||||
|
||||
;; Otherwise, for i64x2 multiplication we describe a lane A as being composed of
|
||||
;; a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand
|
||||
@@ -1052,24 +1067,24 @@
|
||||
;; 32-bits when doing calculations, i.e., `Ah == A >> 32`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
(imul a b)))
|
||||
(let ((a0 Reg (put_in_reg a))
|
||||
(b0 Reg (put_in_reg b))
|
||||
(let ((a0 Xmm (put_in_xmm a))
|
||||
(b0 Xmm (put_in_xmm b))
|
||||
;; a_hi = A >> 32
|
||||
(a_hi Reg (psrlq a0 (RegMemImm.Imm 32)))
|
||||
(a_hi Xmm (psrlq a0 (xmm_mem_imm_new (RegMemImm.Imm 32))))
|
||||
;; ah_bl = Ah * Bl
|
||||
(ah_bl Reg (pmuludq a_hi (RegMem.Reg b0)))
|
||||
(ah_bl Xmm (pmuludq a_hi (xmm_to_xmm_mem b0)))
|
||||
;; b_hi = B >> 32
|
||||
(b_hi Reg (psrlq b0 (RegMemImm.Imm 32)))
|
||||
(b_hi Xmm (psrlq b0 (xmm_mem_imm_new (RegMemImm.Imm 32))))
|
||||
;; al_bh = Al * Bh
|
||||
(al_bh Reg (pmuludq a0 (RegMem.Reg b_hi)))
|
||||
(al_bh Xmm (pmuludq a0 (xmm_to_xmm_mem b_hi)))
|
||||
;; aa_bb = ah_bl + al_bh
|
||||
(aa_bb Reg (paddq ah_bl (RegMem.Reg al_bh)))
|
||||
(aa_bb Xmm (paddq ah_bl (xmm_to_xmm_mem al_bh)))
|
||||
;; aa_bb_shifted = aa_bb << 32
|
||||
(aa_bb_shifted Reg (psllq aa_bb (RegMemImm.Imm 32)))
|
||||
(aa_bb_shifted Xmm (psllq aa_bb (xmm_mem_imm_new (RegMemImm.Imm 32))))
|
||||
;; al_bl = Al * Bl
|
||||
(al_bl Reg (pmuludq a0 (RegMem.Reg b0))))
|
||||
(al_bl Xmm (pmuludq a0 (xmm_to_xmm_mem b0))))
|
||||
;; al_bl + aa_bb_shifted
|
||||
(value_reg (paddq al_bl (RegMem.Reg aa_bb_shifted)))))
|
||||
(value_xmm (paddq al_bl (xmm_to_xmm_mem aa_bb_shifted)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
@@ -1077,13 +1092,13 @@
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovsxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovsxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
(let ((x1 Xmm (put_in_xmm x))
|
||||
(x2 Xmm (palignr x1 (xmm_to_xmm_mem x1) 8 (OperandSize.Size32)))
|
||||
(x3 Xmm (pmovsxbw (xmm_to_xmm_mem x2)))
|
||||
(y1 Xmm (put_in_xmm y))
|
||||
(y2 Xmm (palignr y1 (xmm_to_xmm_mem y1) 8 (OperandSize.Size32)))
|
||||
(y3 Xmm (pmovsxbw (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (pmullw x3 (xmm_to_xmm_mem y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
@@ -1091,11 +1106,11 @@
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
(let ((x2 Xmm (put_in_xmm x))
|
||||
(y2 Xmm (put_in_xmm y))
|
||||
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
|
||||
(hi Xmm (pmulhw x2 (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (punpckhwd lo (xmm_to_xmm_mem hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
@@ -1103,13 +1118,13 @@
|
||||
x)))
|
||||
(def_inst (swiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
(y2 Xmm (pshufd (put_in_xmm_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
(value_xmm (pmuldq x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
@@ -1117,9 +1132,9 @@
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovsxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovsxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
(let ((x2 Xmm (pmovsxbw (put_in_xmm_mem x)))
|
||||
(y2 Xmm (pmovsxbw (put_in_xmm_mem y))))
|
||||
(value_xmm (pmullw x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_s`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
@@ -1127,11 +1142,11 @@
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
(let ((x2 Xmm (put_in_xmm x))
|
||||
(y2 Xmm (put_in_xmm y))
|
||||
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
|
||||
(hi Xmm (pmulhw x2 (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (punpcklwd lo (xmm_to_xmm_mem hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_s`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
@@ -1139,13 +1154,13 @@
|
||||
x)))
|
||||
(def_inst (swiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
(y2 Xmm (pshufd (put_in_xmm_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
|
||||
(value_xmm (pmuldq x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
@@ -1153,13 +1168,13 @@
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x1 Reg (put_in_reg x))
|
||||
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
|
||||
(x3 Reg (pmovzxbw (RegMem.Reg x2)))
|
||||
(y1 Reg (put_in_reg y))
|
||||
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
|
||||
(y3 Reg (pmovzxbw (RegMem.Reg y2))))
|
||||
(value_reg (pmullw x3 (RegMem.Reg y3)))))
|
||||
(let ((x1 Xmm (put_in_xmm x))
|
||||
(x2 Xmm (palignr x1 (xmm_to_xmm_mem x1) 8 (OperandSize.Size32)))
|
||||
(x3 Xmm (pmovzxbw (xmm_to_xmm_mem x2)))
|
||||
(y1 Xmm (put_in_xmm y))
|
||||
(y2 Xmm (palignr y1 (xmm_to_xmm_mem y1) 8 (OperandSize.Size32)))
|
||||
(y3 Xmm (pmovzxbw (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (pmullw x3 (xmm_to_xmm_mem y3)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_high_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
@@ -1167,11 +1182,11 @@
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
|
||||
(let ((x2 Xmm (put_in_xmm x))
|
||||
(y2 Xmm (put_in_xmm y))
|
||||
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
|
||||
(hi Xmm (pmulhuw x2 (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (punpckhwd lo (xmm_to_xmm_mem hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_high_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
@@ -1179,13 +1194,13 @@
|
||||
x)))
|
||||
(def_inst (uwiden_high (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
|
||||
0xFA
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
(y2 Xmm (pshufd (put_in_xmm_mem y)
|
||||
0xFA
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
(value_xmm (pmuludq x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
||||
(rule (lower (has_type (multi_lane 16 8)
|
||||
@@ -1193,9 +1208,9 @@
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 8 16))
|
||||
y))))))
|
||||
(let ((x2 Reg (pmovzxbw (put_in_reg_mem x)))
|
||||
(y2 Reg (pmovzxbw (put_in_reg_mem y))))
|
||||
(value_reg (pmullw x2 (RegMem.Reg y2)))))
|
||||
(let ((x2 Xmm (pmovzxbw (put_in_xmm_mem x)))
|
||||
(y2 Xmm (pmovzxbw (put_in_xmm_mem y))))
|
||||
(value_xmm (pmullw x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;; Special case for `i32x4.extmul_low_i16x8_u`.
|
||||
(rule (lower (has_type (multi_lane 32 4)
|
||||
@@ -1203,11 +1218,11 @@
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 16 8))
|
||||
y))))))
|
||||
(let ((x2 Reg (put_in_reg x))
|
||||
(y2 Reg (put_in_reg y))
|
||||
(lo Reg (pmullw x2 (RegMem.Reg y2)))
|
||||
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
|
||||
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
|
||||
(let ((x2 Xmm (put_in_xmm x))
|
||||
(y2 Xmm (put_in_xmm y))
|
||||
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
|
||||
(hi Xmm (pmulhuw x2 (xmm_to_xmm_mem y2))))
|
||||
(value_xmm (punpcklwd lo (xmm_to_xmm_mem hi)))))
|
||||
|
||||
;; Special case for `i64x2.extmul_low_i32x4_u`.
|
||||
(rule (lower (has_type (multi_lane 64 2)
|
||||
@@ -1215,17 +1230,17 @@
|
||||
x)))
|
||||
(def_inst (uwiden_low (and (value_type (multi_lane 32 4))
|
||||
y))))))
|
||||
(let ((x2 Reg (pshufd (put_in_reg_mem x)
|
||||
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
|
||||
0x50
|
||||
(OperandSize.Size32)))
|
||||
(y2 Reg (pshufd (put_in_reg_mem y)
|
||||
(y2 Xmm (pshufd (put_in_xmm_mem y)
|
||||
0x50
|
||||
(OperandSize.Size32))))
|
||||
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
|
||||
(value_xmm (pmuludq x2 (xmm_to_xmm_mem y2)))))
|
||||
|
||||
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl sse_and_not (Type Reg RegMem) Reg)
|
||||
(decl sse_and_not (Type Xmm XmmMem) Xmm)
|
||||
(rule (sse_and_not $F32X4 x y) (andnps x y))
|
||||
(rule (sse_and_not $F64X2 x y) (andnpd x y))
|
||||
(rule (sse_and_not (multi_lane _bits _lanes) x y) (pandn x y))
|
||||
@@ -1238,64 +1253,66 @@
|
||||
;;
|
||||
;; pandn(x, y) = and(not(x), y)
|
||||
(rule (lower (has_type ty (band_not x y)))
|
||||
(value_reg (sse_and_not ty
|
||||
(put_in_reg y)
|
||||
(put_in_reg_mem x))))
|
||||
(value_xmm (sse_and_not ty
|
||||
(put_in_xmm y)
|
||||
(put_in_xmm_mem x))))
|
||||
|
||||
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (iabs x)))
|
||||
(value_reg (pabsb (put_in_reg_mem x))))
|
||||
(value_xmm (pabsb (put_in_xmm_mem x))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (iabs x)))
|
||||
(value_reg (pabsw (put_in_reg_mem x))))
|
||||
(value_xmm (pabsw (put_in_xmm_mem x))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (iabs x)))
|
||||
(value_reg (pabsd (put_in_reg_mem x))))
|
||||
(value_xmm (pabsd (put_in_xmm_mem x))))
|
||||
|
||||
;; When AVX512 is available, we can use a single `vpabsq` instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512f_enabled)
|
||||
$I64X2)
|
||||
(iabs x)))
|
||||
(value_reg (vpabsq (put_in_reg_mem x))))
|
||||
(value_xmm (vpabsq (put_in_xmm_mem x))))
|
||||
|
||||
;; Otherwise, we use a separate register, `neg`, to contain the results of `0 -
|
||||
;; Otherwise, we use a separate xmmister, `neg`, to contain the results of `0 -
|
||||
;; x` and then blend in those results with `blendvpd` if the MSB of `neg` was
|
||||
;; set to 1 (i.e. if `neg` was negative or, conversely, if `x` was originally
|
||||
;; positive).
|
||||
(rule (lower (has_type $I64X2 (iabs x)))
|
||||
(let ((rx Reg (put_in_reg x))
|
||||
(neg Reg (psubq (imm $I64X2 0) (RegMem.Reg rx))))
|
||||
(value_reg (blendvpd neg (RegMem.Reg rx) neg))))
|
||||
(let ((rx Xmm (put_in_xmm x))
|
||||
(neg Xmm (psubq (xmm_new (imm $I64X2 0)) (xmm_to_xmm_mem rx))))
|
||||
(value_xmm (blendvpd neg (xmm_to_xmm_mem rx) neg))))
|
||||
|
||||
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Special case for `f32x4.abs`.
|
||||
(rule (lower (has_type $F32X4 (fabs x)))
|
||||
(value_reg (andps (put_in_reg x)
|
||||
(RegMem.Reg (psrld (vector_all_ones $F32X4) (RegMemImm.Imm 1))))))
|
||||
(value_xmm (andps (put_in_xmm x)
|
||||
(xmm_to_xmm_mem (psrld (vector_all_ones $F32X4)
|
||||
(xmm_mem_imm_new (RegMemImm.Imm 1)))))))
|
||||
|
||||
;; Special case for `f64x2.abs`.
|
||||
(rule (lower (has_type $F64X2 (fabs x)))
|
||||
(value_reg (andpd (put_in_reg x)
|
||||
(RegMem.Reg (psrlq (vector_all_ones $F64X2) (RegMemImm.Imm 1))))))
|
||||
(value_xmm (andpd (put_in_xmm x)
|
||||
(xmm_to_xmm_mem (psrlq (vector_all_ones $F64X2)
|
||||
(xmm_mem_imm_new (RegMemImm.Imm 1)))))))
|
||||
|
||||
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
|
||||
(rule (lower (has_type (fits_in_64 ty) (bnot x)))
|
||||
(value_reg (not ty (put_in_reg x))))
|
||||
(value_gpr (not ty (put_in_gpr x))))
|
||||
|
||||
;; `i128`.
|
||||
|
||||
(decl i128_not (Value) ValueRegs)
|
||||
(rule (i128_not x)
|
||||
(let ((x_regs ValueRegs (put_in_regs x))
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
(x_hi Reg (value_regs_get x_regs 1)))
|
||||
(value_regs (not $I64 x_lo)
|
||||
(x_lo Gpr (gpr_new (value_regs_get x_regs 0)))
|
||||
(x_hi Gpr (gpr_new (value_regs_get x_regs 1))))
|
||||
(value_gprs (not $I64 x_lo)
|
||||
(not $I64 x_hi))))
|
||||
|
||||
(rule (lower (has_type $I128 (bnot x)))
|
||||
@@ -1307,7 +1324,7 @@
|
||||
;; Special case for vector-types where bit-negation is an xor against an
|
||||
;; all-one value
|
||||
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
|
||||
(value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty)))))
|
||||
(value_xmm (sse_xor ty (put_in_xmm x) (xmm_to_xmm_mem (vector_all_ones ty)))))
|
||||
|
||||
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1318,48 +1335,53 @@
|
||||
;; a = and if_true, condition
|
||||
;; b = and_not condition, if_false
|
||||
;; or b, a
|
||||
(let ((cond_reg Reg (put_in_reg condition))
|
||||
(a Reg (sse_and ty (put_in_reg if_true) (RegMem.Reg cond_reg)))
|
||||
(b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false))))
|
||||
(value_reg (sse_or ty b (RegMem.Reg a)))))
|
||||
(let ((cond_xmm Xmm (put_in_xmm condition))
|
||||
(a Xmm (sse_and ty (put_in_xmm if_true) (xmm_to_xmm_mem cond_xmm)))
|
||||
(b Xmm (sse_and_not ty cond_xmm (put_in_xmm_mem if_false))))
|
||||
(value_xmm (sse_or ty b (xmm_to_xmm_mem a)))))
|
||||
|
||||
;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
|
||||
(vselect condition if_true if_false)))
|
||||
(value_reg (sse_blend ty
|
||||
(put_in_reg_mem condition)
|
||||
(put_in_reg_mem if_true)
|
||||
(put_in_reg if_false))))
|
||||
(value_xmm (sse_blend ty
|
||||
(put_in_xmm_mem condition)
|
||||
(put_in_xmm_mem if_true)
|
||||
(put_in_xmm if_false))))
|
||||
|
||||
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
|
||||
(value_reg (vec_insert_lane ty (put_in_reg vec) (put_in_reg_mem val) idx)))
|
||||
(value_xmm (vec_insert_lane ty (put_in_xmm vec) (put_in_reg_mem val) idx)))
|
||||
|
||||
;; Helper function used below for `insertlane` but also here for other
|
||||
;; lowerings.
|
||||
;;
|
||||
;; Note that the `Type` used here is the type of vector the insertion is
|
||||
;; happening into, or the type of the first `Reg` argument.
|
||||
(decl vec_insert_lane (Type Reg RegMem u8) Reg)
|
||||
(decl vec_insert_lane (Type Xmm RegMem u8) Xmm)
|
||||
|
||||
;; i8x16.replace_lane
|
||||
(rule (vec_insert_lane $I8X16 vec val idx) (pinsrb vec val idx))
|
||||
(rule (vec_insert_lane $I8X16 vec val idx)
|
||||
(pinsrb vec (reg_mem_to_gpr_mem val) idx))
|
||||
|
||||
;; i16x8.replace_lane
|
||||
(rule (vec_insert_lane $I16X8 vec val idx) (pinsrw vec val idx))
|
||||
(rule (vec_insert_lane $I16X8 vec val idx)
|
||||
(pinsrw vec (reg_mem_to_gpr_mem val) idx))
|
||||
|
||||
;; i32x4.replace_lane
|
||||
(rule (vec_insert_lane $I32X4 vec val idx) (pinsrd vec val idx (OperandSize.Size32)))
|
||||
(rule (vec_insert_lane $I32X4 vec val idx)
|
||||
(pinsrd vec (reg_mem_to_gpr_mem val) idx (OperandSize.Size32)))
|
||||
|
||||
;; i64x2.replace_lane
|
||||
(rule (vec_insert_lane $I64X2 vec val idx) (pinsrd vec val idx (OperandSize.Size64)))
|
||||
(rule (vec_insert_lane $I64X2 vec val idx)
|
||||
(pinsrd vec (reg_mem_to_gpr_mem val) idx (OperandSize.Size64)))
|
||||
|
||||
;; f32x4.replace_lane
|
||||
(rule (vec_insert_lane $F32X4 vec val idx) (insertps vec val (sse_insertps_lane_imm idx)))
|
||||
(rule (vec_insert_lane $F32X4 vec val idx)
|
||||
(insertps vec (reg_mem_to_xmm_mem val) (sse_insertps_lane_imm idx)))
|
||||
|
||||
;; external rust code used to calculate the immediate value to `insertps`
|
||||
;; External rust code used to calculate the immediate value to `insertps`.
|
||||
(decl sse_insertps_lane_imm (u8) u8)
|
||||
(extern constructor sse_insertps_lane_imm sse_insertps_lane_imm)
|
||||
|
||||
@@ -1378,60 +1400,63 @@
|
||||
;; load from memory into a temp register and then the second `movsd` (modeled
|
||||
;; internally as `xmm_rm_r` will merge the temp register into our `vec`
|
||||
;; register.
|
||||
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0) (movsd vec (RegMem.Reg val)))
|
||||
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0)
|
||||
(movsd vec (reg_mem_to_xmm_mem (RegMem.Reg val))))
|
||||
(rule (vec_insert_lane $F64X2 vec mem 0)
|
||||
(movsd vec (RegMem.Reg (xmm_unary_rm_r (SseOpcode.Movsd) mem))))
|
||||
(movsd vec (xmm_to_xmm_mem (xmm_unary_rm_r (SseOpcode.Movsd)
|
||||
(reg_mem_to_xmm_mem mem)))))
|
||||
|
||||
;; f64x2.replace_lane 1
|
||||
;;
|
||||
;; Here the `movlhps` instruction is used specifically to specialize moving
|
||||
;; into the second lane where unlike above cases we're not using the lane
|
||||
;; immediate as an immediate to the instruction itself.
|
||||
(rule (vec_insert_lane $F64X2 vec val 1) (movlhps vec val))
|
||||
(rule (vec_insert_lane $F64X2 vec val 1)
|
||||
(movlhps vec (reg_mem_to_xmm_mem val)))
|
||||
|
||||
;;;; Rules for `imax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (imax x y)))
|
||||
(value_reg (pmaxsb (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxsb (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (imax x y)))
|
||||
(value_reg (pmaxsw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxsw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (imax x y)))
|
||||
(value_reg (pmaxsd (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxsd (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `imin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (imin x y)))
|
||||
(value_reg (pminsb (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminsb (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (imin x y)))
|
||||
(value_reg (pminsw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminsw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (imin x y)))
|
||||
(value_reg (pminsd (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminsd (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (umax x y)))
|
||||
(value_reg (pmaxub (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxub (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (umax x y)))
|
||||
(value_reg (pmaxuw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxuw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (umax x y)))
|
||||
(value_reg (pmaxud (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pmaxud (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `umin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (umin x y)))
|
||||
(value_reg (pminub (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminub (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (umin x y)))
|
||||
(value_reg (pminuw (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminuw (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (umin x y)))
|
||||
(value_reg (pminud (put_in_reg x) (put_in_reg_mem y))))
|
||||
(value_xmm (pminud (put_in_xmm x) (put_in_xmm_mem y))))
|
||||
|
||||
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Lowering rules for X64.
|
||||
|
||||
// ISLE integration glue.
|
||||
mod isle;
|
||||
pub(super) mod isle;
|
||||
|
||||
use crate::data_value::DataValue;
|
||||
use crate::ir::{
|
||||
@@ -1057,7 +1057,13 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
|
||||
)
|
||||
};
|
||||
|
||||
return Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags);
|
||||
return Amode::imm_reg_reg_shift(
|
||||
offset as u32,
|
||||
Gpr::new(base).unwrap(),
|
||||
Gpr::new(index).unwrap(),
|
||||
shift,
|
||||
)
|
||||
.with_flags(flags);
|
||||
}
|
||||
|
||||
let input = put_input_in_reg(ctx, spec);
|
||||
@@ -3950,7 +3956,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let index = put_input_in_reg(ctx, inputs[1]);
|
||||
let shift = 0;
|
||||
let flags = ctx.memflags(insn).expect("load should have memflags");
|
||||
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
|
||||
Amode::imm_reg_reg_shift(
|
||||
offset as u32,
|
||||
Gpr::new(base).unwrap(),
|
||||
Gpr::new(index).unwrap(),
|
||||
shift,
|
||||
)
|
||||
.with_flags(flags)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
@@ -4054,7 +4066,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let index = put_input_in_reg(ctx, inputs[2]);
|
||||
let shift = 0;
|
||||
let flags = ctx.memflags(insn).expect("store should have memflags");
|
||||
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
|
||||
Amode::imm_reg_reg_shift(
|
||||
offset as u32,
|
||||
Gpr::new(base).unwrap(),
|
||||
Gpr::new(index).unwrap(),
|
||||
shift,
|
||||
)
|
||||
.with_flags(flags)
|
||||
}
|
||||
|
||||
_ => unreachable!(),
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
//! ISLE integration glue code for x64 lowering.
|
||||
|
||||
// Pull in the ISLE generated code.
|
||||
mod generated_code;
|
||||
pub(crate) mod generated_code;
|
||||
use generated_code::MInst;
|
||||
use regalloc::Writable;
|
||||
|
||||
// Types that the generated ISLE code uses via `use super::*`.
|
||||
use super::{
|
||||
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
|
||||
};
|
||||
use super::{is_mergeable_load, lower_to_amode, Reg};
|
||||
use crate::{
|
||||
ir::{immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueList},
|
||||
isa::x64::{
|
||||
inst::{
|
||||
args::{
|
||||
Amode, Avx512Opcode, CmpOpcode, ExtKind, ExtMode, FcmpImm, Imm8Reg, RegMem,
|
||||
ShiftKind, SseOpcode, SyntheticAmode, CC,
|
||||
},
|
||||
regs, x64_map_regs,
|
||||
},
|
||||
settings::Flags as IsaFlags,
|
||||
ir::{
|
||||
immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel,
|
||||
ValueList,
|
||||
},
|
||||
isa::{
|
||||
settings::Flags,
|
||||
unwind::UnwindInst,
|
||||
x64::{
|
||||
inst::{args::*, regs, x64_map_regs},
|
||||
settings::Flags as IsaFlags,
|
||||
},
|
||||
},
|
||||
machinst::{
|
||||
isle::*, AtomicRmwOp, InsnInput, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData,
|
||||
},
|
||||
machinst::{isle::*, InsnInput, InsnOutput, LowerCtx, VCodeConstantData},
|
||||
settings::Flags,
|
||||
};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
@@ -252,8 +254,8 @@ where
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm0(&mut self) -> WritableReg {
|
||||
WritableReg::from_reg(regs::xmm0())
|
||||
fn xmm0(&mut self) -> WritableXmm {
|
||||
WritableXmm::from_reg(Xmm::new(regs::xmm0()).unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -262,7 +264,7 @@ where
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn amode_imm_reg_reg_shift(&mut self, simm32: u32, base: Reg, index: Reg, shift: u8) -> Amode {
|
||||
fn amode_imm_reg_reg_shift(&mut self, simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Amode {
|
||||
Amode::imm_reg_reg_shift(simm32, base, index, shift)
|
||||
}
|
||||
|
||||
@@ -271,6 +273,16 @@ where
|
||||
amode.clone().into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
|
||||
r.to_writable_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
|
||||
r.to_writable_reg()
|
||||
}
|
||||
|
||||
fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
|
||||
// When the shift amount is known, we can statically (i.e. at compile
|
||||
// time) determine the mask to use and only emit that.
|
||||
@@ -306,6 +318,96 @@ where
|
||||
.use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
|
||||
SyntheticAmode::ConstantOffset(mask_table)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
|
||||
Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
|
||||
r.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
|
||||
r.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
|
||||
r.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
|
||||
r.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
|
||||
r.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn temp_writable_gpr(&mut self) -> WritableGpr {
|
||||
Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn temp_writable_xmm(&mut self) -> WritableXmm {
|
||||
Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
|
||||
XmmMem::new(rm.clone()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
|
||||
GprMemImm::new(rmi.clone()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
|
||||
XmmMemImm::new(rmi.clone()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
|
||||
r.into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
|
||||
xm.clone().into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
|
||||
gm.clone().into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm_new(&mut self, r: Reg) -> Xmm {
|
||||
Xmm::new(r).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn gpr_new(&mut self, r: Reg) -> Gpr {
|
||||
Gpr::new(r).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
|
||||
GprMem::new(rm.clone()).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
|
||||
GprMem::new(RegMem::reg(r)).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle 2bfcafbef6b29358
|
||||
src/isa/x64/inst.isle bbb6a3d201200cc8
|
||||
src/isa/x64/lower.isle 82db7f7d47ac7809
|
||||
src/prelude.isle 6aaf8ce0f5a5c2ec
|
||||
src/isa/x64/inst.isle 7513533d16948249
|
||||
src/isa/x64/lower.isle ccda13e9fe83c89a
|
||||
|
||||
2807
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
2807
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,25 @@
|
||||
use crate::ir::{Inst, Value};
|
||||
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{Reg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
pub use super::MachLabel;
|
||||
pub use crate::ir::ExternalName;
|
||||
pub use crate::isa::unwind::UnwindInst;
|
||||
|
||||
pub type Unit = ();
|
||||
pub type ValueSlice<'a> = &'a [Value];
|
||||
pub type ValueArray2 = [Value; 2];
|
||||
pub type ValueArray3 = [Value; 3];
|
||||
pub type WritableReg = Writable<Reg>;
|
||||
pub type OptionWritableReg = Option<WritableReg>;
|
||||
pub type VecReg = Vec<Reg>;
|
||||
pub type VecWritableReg = Vec<WritableReg>;
|
||||
pub type ValueRegs = crate::machinst::ValueRegs<Reg>;
|
||||
pub type VecMachLabel = Vec<MachLabel>;
|
||||
pub type BoxExternalName = Box<ExternalName>;
|
||||
|
||||
/// Helper macro to define methods in `prelude.isle` within `impl Context for
|
||||
/// ...` for each backend. These methods are shared amongst all backends.
|
||||
|
||||
@@ -48,6 +48,9 @@
|
||||
|
||||
(type Reg (primitive Reg))
|
||||
(type WritableReg (primitive WritableReg))
|
||||
(type OptionWritableReg (primitive OptionWritableReg))
|
||||
(type VecReg extern (enum))
|
||||
(type VecWritableReg extern (enum))
|
||||
|
||||
;; Construct a `ValueRegs` of one register.
|
||||
(decl value_reg (Reg) ValueRegs)
|
||||
@@ -106,6 +109,15 @@
|
||||
(let ((regs ValueRegs (put_in_regs val)))
|
||||
(value_regs_get regs 0)))
|
||||
|
||||
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type MachLabel (primitive MachLabel))
|
||||
(type VecMachLabel extern (enum))
|
||||
(type ValueLabel (primitive ValueLabel))
|
||||
(type UnwindInst (primitive UnwindInst))
|
||||
(type ExternalName (primitive ExternalName))
|
||||
(type BoxExternalName (primitive BoxExternalName))
|
||||
|
||||
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl u8_as_u64 (u8) u64)
|
||||
@@ -368,4 +380,3 @@
|
||||
|
||||
(decl avoid_div_traps () Type)
|
||||
(extern extractor avoid_div_traps avoid_div_traps)
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ pub struct Ident(pub String, pub Pos);
|
||||
pub struct Type {
|
||||
pub name: Ident,
|
||||
pub is_extern: bool,
|
||||
pub is_nodebug: bool,
|
||||
pub ty: TypeValue,
|
||||
pub pos: Pos,
|
||||
}
|
||||
|
||||
@@ -138,6 +138,7 @@ impl<'a> Codegen<'a> {
|
||||
&Type::Enum {
|
||||
name,
|
||||
is_extern,
|
||||
is_nodebug,
|
||||
ref variants,
|
||||
pos,
|
||||
..
|
||||
@@ -150,11 +151,20 @@ impl<'a> Codegen<'a> {
|
||||
pos.pretty_print_line(&self.typeenv.filenames[..])
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Generate the `derive`s.
|
||||
let debug_derive = if is_nodebug { "" } else { ", Debug" };
|
||||
if variants.iter().all(|v| v.fields.is_empty()) {
|
||||
writeln!(code, "#[derive(Copy, Clone, Debug, PartialEq, Eq)]").unwrap();
|
||||
writeln!(
|
||||
code,
|
||||
"#[derive(Copy, Clone, PartialEq, Eq{})]",
|
||||
debug_derive
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
writeln!(code, "#[derive(Clone, Debug)]").unwrap();
|
||||
writeln!(code, "#[derive(Clone{})]", debug_derive).unwrap();
|
||||
}
|
||||
|
||||
writeln!(code, "pub enum {} {{", name).unwrap();
|
||||
for variant in variants {
|
||||
let name = &self.typeenv.syms[variant.name.index()];
|
||||
|
||||
@@ -197,15 +197,29 @@ impl<'a> Parser<'a> {
|
||||
fn parse_type(&mut self) -> Result<Type> {
|
||||
let pos = self.pos();
|
||||
let name = self.parse_ident()?;
|
||||
|
||||
let mut is_extern = false;
|
||||
if self.is_sym_str("extern") {
|
||||
self.symbol()?;
|
||||
is_extern = true;
|
||||
let mut is_nodebug = false;
|
||||
|
||||
while self.lexer.peek().map_or(false, |(_pos, tok)| tok.is_sym()) {
|
||||
let sym = self.symbol()?;
|
||||
if sym == "extern" {
|
||||
is_extern = true;
|
||||
} else if sym == "nodebug" {
|
||||
is_nodebug = true;
|
||||
} else {
|
||||
return Err(self.error(
|
||||
self.pos(),
|
||||
format!("unknown type declaration modifier: {}", sym),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let ty = self.parse_typevalue()?;
|
||||
Ok(Type {
|
||||
name,
|
||||
is_extern,
|
||||
is_nodebug,
|
||||
ty,
|
||||
pos,
|
||||
})
|
||||
|
||||
@@ -109,6 +109,10 @@ pub enum Type {
|
||||
/// If so, ISLE will not emit a definition for it. If not, then it will
|
||||
/// emit a Rust definition for it.
|
||||
is_extern: bool,
|
||||
/// Whether this type should *not* derive `Debug`.
|
||||
///
|
||||
/// Incompatible with `is_extern`.
|
||||
is_nodebug: bool,
|
||||
/// The different variants for this enum.
|
||||
variants: Vec<Variant>,
|
||||
/// The ISLE source position where this `enum` is defined.
|
||||
@@ -607,7 +611,7 @@ impl TypeEnv {
|
||||
let ty = match tyenv.type_map.get(&ty) {
|
||||
Some(ty) => *ty,
|
||||
None => {
|
||||
tyenv.report_error(pos, "Unknown type for constant".into());
|
||||
tyenv.report_error(pos, "Unknown type for constant");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
@@ -635,9 +639,22 @@ impl TypeEnv {
|
||||
let name = self.intern(&ty.name).unwrap();
|
||||
match &ty.ty {
|
||||
&ast::TypeValue::Primitive(ref id, ..) => {
|
||||
if ty.is_nodebug {
|
||||
self.report_error(ty.pos, "primitive types cannot be marked `nodebug`");
|
||||
return None;
|
||||
}
|
||||
if ty.is_extern {
|
||||
self.report_error(ty.pos, "primitive types cannot be marked `extern`");
|
||||
return None;
|
||||
}
|
||||
Some(Type::Primitive(tid, self.intern_mut(id), ty.pos))
|
||||
}
|
||||
&ast::TypeValue::Enum(ref ty_variants, ..) => {
|
||||
if ty.is_extern && ty.is_nodebug {
|
||||
self.report_error(ty.pos, "external types cannot be marked `nodebug`");
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut variants = vec![];
|
||||
for variant in ty_variants {
|
||||
let combined_ident =
|
||||
@@ -696,6 +713,7 @@ impl TypeEnv {
|
||||
name,
|
||||
id: tid,
|
||||
is_extern: ty.is_extern,
|
||||
is_nodebug: ty.is_nodebug,
|
||||
variants,
|
||||
pos: ty.pos,
|
||||
})
|
||||
@@ -703,9 +721,9 @@ impl TypeEnv {
|
||||
}
|
||||
}
|
||||
|
||||
fn error(&self, pos: Pos, msg: String) -> Error {
|
||||
fn error(&self, pos: Pos, msg: impl Into<String>) -> Error {
|
||||
let e = Error::TypeError {
|
||||
msg,
|
||||
msg: msg.into(),
|
||||
src: Source::new(
|
||||
self.filenames[pos.file].clone(),
|
||||
self.file_texts[pos.file].clone(),
|
||||
@@ -716,7 +734,7 @@ impl TypeEnv {
|
||||
e
|
||||
}
|
||||
|
||||
fn report_error(&mut self, pos: Pos, msg: String) {
|
||||
fn report_error(&mut self, pos: Pos, msg: impl Into<String>) {
|
||||
let err = self.error(pos, msg);
|
||||
self.errors.push(err);
|
||||
}
|
||||
@@ -987,8 +1005,7 @@ impl TermEnv {
|
||||
tyenv.report_error(
|
||||
ext.pos,
|
||||
"Extractor macro body defined on term of incorrect kind; cannot be an \
|
||||
enum variant"
|
||||
.into(),
|
||||
enum variant",
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -1329,10 +1346,7 @@ impl TermEnv {
|
||||
let ty = match expected_ty {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tyenv.report_error(
|
||||
pos,
|
||||
"Need an implied type for an integer constant".into(),
|
||||
);
|
||||
tyenv.report_error(pos, "Need an implied type for an integer constant");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
@@ -1353,12 +1367,12 @@ impl TermEnv {
|
||||
let const_ty = match tyenv.const_types.get(&val) {
|
||||
Some(ty) => *ty,
|
||||
None => {
|
||||
tyenv.report_error(pos, "Unknown constant".into());
|
||||
tyenv.report_error(pos, "Unknown constant");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
if expected_ty.is_some() && expected_ty != Some(const_ty) {
|
||||
tyenv.report_error(pos, "Type mismatch for constant".into());
|
||||
tyenv.report_error(pos, "Type mismatch for constant");
|
||||
}
|
||||
Some((Pattern::ConstPrim(const_ty, val), const_ty))
|
||||
}
|
||||
@@ -1366,7 +1380,7 @@ impl TermEnv {
|
||||
let ty = match expected_ty {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
tyenv.report_error(pos, "Need an implied type for a wildcard".into());
|
||||
tyenv.report_error(pos, "Need an implied type for a wildcard");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
@@ -1775,7 +1789,7 @@ impl TermEnv {
|
||||
let const_ty = match tyenv.const_types.get(&val) {
|
||||
Some(ty) => *ty,
|
||||
None => {
|
||||
tyenv.report_error(pos, "Unknown constant".into());
|
||||
tyenv.report_error(pos, "Unknown constant");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
@@ -1920,6 +1934,7 @@ mod test {
|
||||
name: sym_a,
|
||||
id: TypeId(1),
|
||||
is_extern: true,
|
||||
is_nodebug: false,
|
||||
variants: vec![
|
||||
Variant {
|
||||
name: sym_b,
|
||||
|
||||
4
cranelift/isle/isle_examples/nodebug.isle
Normal file
4
cranelift/isle/isle_examples/nodebug.isle
Normal file
@@ -0,0 +1,4 @@
|
||||
(type DoesNotDeriveDebug nodebug
|
||||
(enum A
|
||||
B
|
||||
C))
|
||||
Reference in New Issue
Block a user