cranelift: Add newtype wrappers for x64 register classes

This primary motivation of this large commit (apologies for its size!) is to
introduce `Gpr` and `Xmm` newtypes over `Reg`. This should help catch
difficult-to-diagnose register class mixup bugs in x64 lowerings.

But having a newtype for `Gpr` and `Xmm` themselves isn't enough to catch all of
our operand-with-wrong-register-class bugs, because about 50% of operands on x64
aren't just a register, but a register or memory address or even an
immediate! So we have `{Gpr,Xmm}Mem[Imm]` newtypes as well.

Unfortunately, `GprMem` et al can't be `enum`s and are therefore a little bit
noisier to work with from ISLE. They need to maintain the invariant that their
registers really are of the claimed register class, so they need to encapsulate
the inner data. If they exposed the underlying `enum` variants, then anyone
could just change register classes or construct a `GprMem` that holds an XMM
register, defeating the whole point of these newtypes. So when working with
these newtypes from ISLE, we rely on external constructors like `(gpr_to_gpr_mem
my_gpr)` instead of `(GprMem.Gpr my_gpr)`.

A bit of extra lines of code are included to add support for register mapping
for all of these newtypes as well. Ultimately this is all a bit wordier than I'd
hoped it would be when I first started authoring this commit, but I think it is
all worth it nonetheless!

In the process of adding these newtypes, I didn't want to have to update both
the ISLE `extern` type definition of `MInst` and the Rust definition, so I move
the definition fully into ISLE, similar as aarch64.

Finally, this process isn't complete. I've introduced the newtypes here, and
I've made most XMM-using instructions switch from `Reg` to `Xmm`, as well as
register class-converting instructions, but I haven't moved all of the GPR-using
instructions over to the newtypes yet. I figured this commit was big enough as
it was, and I can continue the adoption of these newtypes in follow up commits.

Part of #3685.
This commit is contained in:
Nick Fitzgerald
2022-01-21 14:10:40 -08:00
parent e1f4e29efe
commit 795b0aaf9a
22 changed files with 4595 additions and 3212 deletions

View File

@@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true }
bincode = { version = "1.2.1", optional = true }
gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true }
smallvec = { version = "1.6.1" }
regalloc = { version = "0.0.33" }
regalloc = "0.0.34"
souper-ir = { version = "2.1.0", optional = true }
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary

View File

@@ -406,6 +406,8 @@ fn rebuild_isle(
) -> Result<(), Box<dyn std::error::Error + 'static>> {
use cranelift_isle as isle;
println!("Rebuilding {}", compilation.output.display());
// First, remove the manifest, if any; we will recreate it
// below if the compilation is successful. Ignore error if no
// manifest was present.

View File

@@ -871,15 +871,10 @@
(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type VecMachLabel (primitive VecMachLabel))
(type CondBrKind (primitive CondBrKind))
(type BranchTarget (primitive BranchTarget))
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
(type BoxExternalName (primitive BoxExternalName))
(type CodeOffset (primitive CodeOffset))
(type ExternalName (primitive ExternalName))
(type ValueLabel (primitive ValueLabel))
(type UnwindInst (primitive UnwindInst))
(type ExtendOp extern
(enum

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 2bfcafbef6b29358
src/isa/aarch64/inst.isle 944323ff7d6db098
src/prelude.isle 6aaf8ce0f5a5c2ec
src/isa/aarch64/inst.isle dafd813ba278ce19
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23

File diff suppressed because it is too large Load Diff

View File

@@ -678,11 +678,7 @@
(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type MachLabel (primitive MachLabel))
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
(type BoxExternalName (primitive BoxExternalName))
(type ValueLabel (primitive ValueLabel))
(type UnwindInst (primitive UnwindInst))
;; An ALU operation.
(type ALUOp
@@ -1041,10 +1037,6 @@
;; Helpers for machine label vectors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VecMachLabel needs to be passed by reference, so it cannot be
;; declared as primitive type. Declare as extern enum instead.
(type VecMachLabel extern (enum))
(decl vec_length_minus1 (VecMachLabel) u32)
(extern constructor vec_length_minus1 vec_length_minus1)
@@ -2963,5 +2955,3 @@
(decl fcmp_reg (Type Reg Reg) ProducesFlags)
(rule (fcmp_reg $F32 src1 src2) (fpu_cmp32 src1 src2))
(rule (fcmp_reg $F64 src1 src2) (fpu_cmp64 src1 src2))

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 2bfcafbef6b29358
src/isa/s390x/inst.isle 1d525c87f7c77c26
src/prelude.isle 6aaf8ce0f5a5c2ec
src/isa/s390x/inst.isle f5af3708848ef1aa
src/isa/s390x/lower.isle 57dcc39cbab2d1c6

File diff suppressed because it is too large Load Diff

View File

@@ -47,7 +47,8 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
/// Get the encoding number of a GPR.
#[inline(always)]
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
let reg = reg.into();
debug_assert!(reg.is_real());
debug_assert_eq!(reg.get_class(), RegClass::I64);
reg.get_hw_encoding()
@@ -55,7 +56,8 @@ pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
/// Get the encoding number of any register.
#[inline(always)]
pub(crate) fn reg_enc(reg: Reg) -> u8 {
pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
let reg = reg.into();
debug_assert!(reg.is_real());
reg.get_hw_encoding()
}

File diff suppressed because it is too large Load Diff

View File

@@ -13,6 +13,309 @@ use smallvec::{smallvec, SmallVec};
use std::fmt;
use std::string::String;
/// An extenstion trait for converting `Writable{Xmm,Gpr}` to `Writable<Reg>`.
pub trait ToWritableReg {
fn to_writable_reg(&self) -> Writable<Reg>;
}
/// An extension trait for converting `Writable<Reg>` to `Writable{Xmm,Gpr}`.
pub trait FromWritableReg: Sized {
fn from_writable_reg(w: Writable<Reg>) -> Option<Self>;
}
/// An extension trait for mapping register uses on `{Xmm,Gpr}`.
pub trait MapUseExt {
fn map_use<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
}
/// An extension trait for mapping register mods and defs on
/// `Writable{Xmm,Gpr}`.
pub trait MapDefModExt {
fn map_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
fn map_mod<RM>(&mut self, mapper: &RM)
where
RM: RegMapper;
}
/// A macro for defining a newtype of `Reg` that enforces some invariant about
/// the wrapped `Reg` (such as that it is of a particular register class).
macro_rules! newtype_of_reg {
(
$newtype_reg:ident,
$newtype_writable_reg:ident,
$newtype_reg_mem:ident,
$newtype_reg_mem_imm:ident,
|$check_reg:ident| $check:expr
) => {
/// A newtype wrapper around `Reg`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct $newtype_reg(Reg);
impl PartialEq<Reg> for $newtype_reg {
fn eq(&self, other: &Reg) -> bool {
self.0 == *other
}
}
impl From<$newtype_reg> for Reg {
fn from(r: $newtype_reg) -> Self {
r.0
}
}
impl PrettyPrint for $newtype_reg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl $newtype_reg {
/// Create this newtype from the given register, or return `None` if the register
/// is not a valid instance of this newtype.
pub fn new($check_reg: Reg) -> Option<Self> {
if $check {
Some(Self($check_reg))
} else {
None
}
}
/// Get this newtype's underlying `Reg`.
pub fn to_reg(self) -> Reg {
self.0
}
}
// Convenience impl so that people working with this newtype can use it
// "just like" a plain `Reg`.
//
// NB: We cannot implement `DerefMut` because that would let people do
// nasty stuff like `*my_gpr.deref_mut() = some_xmm_reg`, breaking the
// invariants that `Gpr` provides.
impl std::ops::Deref for $newtype_reg {
type Target = Reg;
fn deref(&self) -> &Reg {
&self.0
}
}
impl MapUseExt for $newtype_reg {
fn map_use<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.0;
mapper.map_use(&mut reg);
debug_assert!({
let $check_reg = reg;
$check
});
*self = $newtype_reg(reg);
}
}
pub type $newtype_writable_reg = Writable<$newtype_reg>;
impl ToWritableReg for $newtype_writable_reg {
fn to_writable_reg(&self) -> Writable<Reg> {
Writable::from_reg(self.to_reg().to_reg())
}
}
impl FromWritableReg for $newtype_writable_reg {
fn from_writable_reg(w: Writable<Reg>) -> Option<Self> {
Some(Writable::from_reg($newtype_reg::new(w.to_reg())?))
}
}
impl MapDefModExt for $newtype_writable_reg {
fn map_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.to_writable_reg();
mapper.map_def(&mut reg);
debug_assert!({
let $check_reg = reg.to_reg();
$check
});
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
}
fn map_mod<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
let mut reg = self.to_writable_reg();
mapper.map_mod(&mut reg);
debug_assert!({
let $check_reg = reg.to_reg();
$check
});
*self = Writable::from_reg($newtype_reg(reg.to_reg()));
}
}
/// A newtype wrapper around `RegMem` for general-purpose registers.
#[derive(Clone, Debug)]
pub struct $newtype_reg_mem(RegMem);
impl From<$newtype_reg_mem> for RegMem {
fn from(rm: $newtype_reg_mem) -> Self {
rm.0
}
}
impl From<$newtype_reg> for $newtype_reg_mem {
fn from(r: $newtype_reg) -> Self {
$newtype_reg_mem(RegMem::reg(r.into()))
}
}
impl $newtype_reg_mem {
/// Construct a `RegMem` newtype from the given `RegMem`, or return
/// `None` if the `RegMem` is not a valid instance of this `RegMem`
/// newtype.
pub fn new(rm: RegMem) -> Option<Self> {
match rm {
RegMem::Mem { addr: _ } => Some(Self(rm)),
RegMem::Reg { reg: $check_reg } if $check => Some(Self(rm)),
RegMem::Reg { reg: _ } => None,
}
}
/// Convert this newtype into its underlying `RegMem`.
pub fn to_reg_mem(self) -> RegMem {
self.0
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_uses<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_uses(mapper);
debug_assert!(match self.0 {
RegMem::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_as_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_as_def(mapper);
debug_assert!(match self.0 {
RegMem::Reg { reg: $check_reg } => $check,
_ => true,
});
}
}
impl PrettyPrint for $newtype_reg_mem {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl PrettyPrintSized for $newtype_reg_mem {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.0.show_rru_sized(mb_rru, size)
}
}
/// A newtype wrapper around `RegMemImm`.
#[derive(Clone, Debug)]
pub struct $newtype_reg_mem_imm(RegMemImm);
impl From<$newtype_reg_mem_imm> for RegMemImm {
fn from(rmi: $newtype_reg_mem_imm) -> RegMemImm {
rmi.0
}
}
impl From<$newtype_reg> for $newtype_reg_mem_imm {
fn from(r: $newtype_reg) -> Self {
$newtype_reg_mem_imm(RegMemImm::reg(r.into()))
}
}
impl $newtype_reg_mem_imm {
/// Construct this newtype from the given `RegMemImm`, or return
/// `None` if the `RegMemImm` is not a valid instance of this
/// newtype.
pub fn new(rmi: RegMemImm) -> Option<Self> {
match rmi {
RegMemImm::Imm { .. } => Some(Self(rmi)),
RegMemImm::Mem { addr: _ } => Some(Self(rmi)),
RegMemImm::Reg { reg: $check_reg } if $check => Some(Self(rmi)),
RegMemImm::Reg { reg: _ } => None,
}
}
/// Convert this newtype into its underlying `RegMemImm`.
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn to_reg_mem_imm(self) -> RegMemImm {
self.0
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_uses<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_uses(mapper);
debug_assert!(match self.0 {
RegMemImm::Reg { reg: $check_reg } => $check,
_ => true,
});
}
#[allow(dead_code)] // Used by some newtypes and not others.
pub fn map_as_def<RM>(&mut self, mapper: &RM)
where
RM: RegMapper,
{
self.0.map_as_def(mapper);
debug_assert!(match self.0 {
RegMemImm::Reg { reg: $check_reg } => $check,
_ => true,
});
}
}
impl PrettyPrint for $newtype_reg_mem_imm {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.0.show_rru(mb_rru)
}
}
impl PrettyPrintSized for $newtype_reg_mem_imm {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.0.show_rru_sized(mb_rru, size)
}
}
};
}
// Define a newtype of `Reg` for general-purpose registers.
newtype_of_reg!(Gpr, WritableGpr, GprMem, GprMemImm, |reg| {
reg.get_class() == RegClass::I64
});
// Define a newtype of `Reg` for XMM registers.
newtype_of_reg!(Xmm, WritableXmm, XmmMem, XmmMemImm, |reg| {
reg.get_class() == RegClass::V128
});
/// A possible addressing mode (amode) that can be used in instructions.
/// These denote a 64-bit value only.
#[derive(Clone, Debug)]
@@ -27,8 +330,8 @@ pub enum Amode {
/// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
ImmRegRegShift {
simm32: u32,
base: Reg,
index: Reg,
base: Gpr,
index: Gpr,
shift: u8, /* 0 .. 3 only */
flags: MemFlags,
},
@@ -48,7 +351,7 @@ impl Amode {
}
}
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Self {
debug_assert!(base.get_class() == RegClass::I64);
debug_assert!(index.get_class() == RegClass::I64);
debug_assert!(shift <= 3);
@@ -96,8 +399,8 @@ impl Amode {
collector.add_use(*base);
}
Amode::ImmRegRegShift { base, index, .. } => {
collector.add_use(*base);
collector.add_use(*index);
collector.add_use(base.to_reg());
collector.add_use(index.to_reg());
}
Amode::RipRelative { .. } => {
// RIP isn't involved in regalloc.
@@ -225,7 +528,7 @@ impl PrettyPrint for SyntheticAmode {
/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
/// `simm32` is its sign-extension out to 64 bits.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub enum RegMemImm {
Reg { reg: Reg },
Mem { addr: SyntheticAmode },

View File

@@ -298,14 +298,14 @@ pub(crate) fn emit(
Popcnt => (0x0fb8, 2),
};
match src {
match src.clone().into() {
RegMem::Reg { reg: src } => emit_std_reg_reg(
sink,
prefix,
opcode,
num_opcodes,
dst.to_reg(),
*src,
dst.to_reg().to_reg(),
src,
rex_flags,
),
RegMem::Mem { addr: src } => {
@@ -317,7 +317,7 @@ pub(crate) fn emit(
prefix,
opcode,
num_opcodes,
dst.to_reg(),
dst.to_reg().to_reg(),
&amode,
rex_flags,
);
@@ -327,7 +327,7 @@ pub(crate) fn emit(
Inst::Not { size, src, dst } => {
debug_assert_eq!(*src, dst.to_reg());
let rex_flags = RexFlags::from((*size, dst.to_reg()));
let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
let (opcode, prefix) = match size {
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -342,7 +342,7 @@ pub(crate) fn emit(
Inst::Neg { size, src, dst } => {
debug_assert_eq!(*src, dst.to_reg());
let rex_flags = RexFlags::from((*size, dst.to_reg()));
let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg()));
let (opcode, prefix) = match size {
OperandSize::Size8 => (0xF6, LegacyPrefixes::None),
OperandSize::Size16 => (0xF7, LegacyPrefixes::_66),
@@ -728,7 +728,7 @@ pub(crate) fn emit(
LegacyPrefixes::None,
0x8D,
1,
dst.to_reg(),
dst.to_reg().to_reg(),
&amode,
RexFlags::set_w(),
);
@@ -884,6 +884,7 @@ pub(crate) fn emit(
debug_assert_eq!(*src1, dst.to_reg());
let rex = RexFlags::clear_w();
let prefix = LegacyPrefixes::_66;
let src2 = src2.clone().to_reg_mem_imm();
if let RegMemImm::Imm { simm32 } = src2 {
let (opcode_bytes, reg_digit) = match opcode {
SseOpcode::Psllw => (0x0F71, 6),
@@ -898,7 +899,7 @@ pub(crate) fn emit(
};
let dst_enc = reg_enc(dst.to_reg());
emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
let imm = (*simm32)
let imm = (simm32)
.try_into()
.expect("the immediate must be convertible to a u8");
sink.put1(imm);
@@ -917,7 +918,15 @@ pub(crate) fn emit(
match src2 {
RegMemImm::Reg { reg } => {
emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
emit_std_reg_reg(
sink,
prefix,
opcode_bytes,
2,
dst.to_reg().to_reg(),
reg,
rex,
);
}
RegMemImm::Mem { addr } => {
let addr = &addr.finalize(state, sink);
@@ -928,7 +937,7 @@ pub(crate) fn emit(
prefix,
opcode_bytes,
2,
dst.to_reg(),
dst.to_reg().to_reg(),
addr,
rex,
);
@@ -1335,7 +1344,12 @@ pub(crate) fn emit(
// might be negative; use a sign-extension.
let inst = Inst::movsx_rm_r(
ExtMode::LQ,
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
RegMem::mem(Amode::imm_reg_reg_shift(
0,
Gpr::new(tmp1.to_reg()).unwrap(),
Gpr::new(tmp2.to_reg()).unwrap(),
2,
)),
*tmp2,
);
inst.emit(sink, info, state);
@@ -1424,15 +1438,15 @@ pub(crate) fn emit(
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src_e {
match src_e.clone().to_reg_mem() {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(
sink,
prefix,
opcode,
num_opcodes,
reg_g.to_reg(),
*reg_e,
reg_g.to_reg().to_reg(),
reg_e,
rex,
);
}
@@ -1445,7 +1459,7 @@ pub(crate) fn emit(
prefix,
opcode,
num_opcodes,
reg_g.to_reg(),
reg_g.to_reg().to_reg(),
addr,
rex,
);
@@ -1460,7 +1474,7 @@ pub(crate) fn emit(
Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src {
match src.clone().to_reg_mem() {
RegMem::Reg { reg: src } => EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(prefix)
@@ -1587,9 +1601,17 @@ pub(crate) fn emit(
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src_e {
match src_e.clone().to_reg_mem() {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
emit_std_reg_reg(
sink,
prefix,
opcode,
length,
reg_g.to_reg().to_reg(),
reg_e,
rex,
);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
@@ -1600,7 +1622,7 @@ pub(crate) fn emit(
prefix,
opcode,
length,
reg_g.to_reg(),
reg_g.to_reg().to_reg(),
addr,
rex,
);
@@ -1619,7 +1641,7 @@ pub(crate) fn emit(
Avx512Opcode::Vpmullq => (true, 0x40),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src1 {
match src1.clone().to_reg_mem() {
RegMem::Reg { reg: src } => EvexInstruction::new()
.length(EvexVectorLength::V128)
.prefix(LegacyPrefixes::_66)
@@ -1845,9 +1867,9 @@ pub(crate) fn emit(
};
let rex = RexFlags::from(*dst_size);
let (src, dst) = if dst_first {
(dst.to_reg(), *src)
(dst.to_reg().to_reg(), src.to_reg())
} else {
(*src, dst.to_reg())
(src.to_reg(), dst.to_reg().to_reg())
};
emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
@@ -1870,7 +1892,15 @@ pub(crate) fn emit(
let rex = RexFlags::from(*src_size);
match src_e {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
emit_std_reg_reg(
sink,
prefix,
opcode,
2,
reg_g.to_reg().to_reg(),
*reg_e,
rex,
);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
@@ -1881,7 +1911,7 @@ pub(crate) fn emit(
prefix,
opcode,
2,
reg_g.to_reg(),
reg_g.to_reg().to_reg(),
addr,
rex,
);
@@ -1950,7 +1980,11 @@ pub(crate) fn emit(
// If x seen as a signed int64 is not negative, a signed-conversion will do the right
// thing.
// TODO use tst src, src here.
let inst = Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::imm(0), src.to_reg());
let inst = Inst::cmp_rmi_r(
OperandSize::Size64,
RegMemImm::imm(0),
src.to_reg().to_reg(),
);
inst.emit(sink, info, state);
one_way_jmp(sink, CC::L, handle_negative);
@@ -1961,8 +1995,8 @@ pub(crate) fn emit(
sink,
info,
state,
src.to_reg(),
*dst,
src.to_reg().to_reg(),
dst.to_writable_reg(),
*dst_size == OperandSize::Size64,
);
@@ -1973,7 +2007,11 @@ pub(crate) fn emit(
// Divide x by two to get it in range for the signed conversion, keep the LSB, and
// scale it back up on the FP side.
let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), types::I64);
let inst = Inst::gen_move(
tmp_gpr1.to_writable_reg(),
src.to_reg().to_reg(),
types::I64,
);
inst.emit(sink, info, state);
// tmp_gpr1 := src >> 1
@@ -1981,26 +2019,30 @@ pub(crate) fn emit(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(1),
*tmp_gpr1,
tmp_gpr1.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);
let inst = Inst::gen_move(
tmp_gpr2.to_writable_reg(),
src.to_reg().to_reg(),
types::I64,
);
inst.emit(sink, info, state);
let inst = Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(1),
*tmp_gpr2,
tmp_gpr2.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(tmp_gpr1.to_reg()),
*tmp_gpr2,
RegMemImm::reg(tmp_gpr1.to_reg().to_reg()),
tmp_gpr2.to_writable_reg(),
);
inst.emit(sink, info, state);
@@ -2008,8 +2050,8 @@ pub(crate) fn emit(
sink,
info,
state,
tmp_gpr2.to_reg(),
*dst,
tmp_gpr2.to_reg().to_reg(),
dst.to_writable_reg(),
*dst_size == OperandSize::Size64,
);
@@ -2018,7 +2060,11 @@ pub(crate) fn emit(
} else {
SseOpcode::Addss
};
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst);
let inst = Inst::xmm_rm_r(
add_op,
RegMem::reg(dst.to_reg().to_reg()),
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
sink.bind_label(done);
@@ -2091,18 +2137,18 @@ pub(crate) fn emit(
let not_nan = sink.get_label();
// The truncation.
let inst = Inst::xmm_to_gpr(trunc_op, src, *dst, *dst_size);
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), dst.to_writable_reg(), *dst_size);
inst.emit(sink, info, state);
// Compare against 1, in case of overflow the dst operand was INT_MIN.
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg());
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg().to_reg());
inst.emit(sink, info, state);
one_way_jmp(sink, CC::NO, done); // no overflow => done
// Check for NaN.
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src.to_reg()), src.to_reg());
inst.emit(sink, info, state);
one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN
@@ -2112,8 +2158,8 @@ pub(crate) fn emit(
let inst = Inst::alu_rmi_r(
*dst_size,
AluRmiROpcode::Xor,
RegMemImm::reg(dst.to_reg()),
*dst,
RegMemImm::reg(dst.to_reg().to_reg()),
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
@@ -2125,11 +2171,18 @@ pub(crate) fn emit(
// If the input was positive, saturate to INT_MAX.
// Zero out tmp_xmm.
let inst =
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
let inst = Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg().to_reg()),
tmp_xmm.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
let inst = Inst::xmm_cmp_rm_r(
cmp_op,
RegMem::reg(src.to_reg()),
tmp_xmm.to_reg().to_reg(),
);
inst.emit(sink, info, state);
// Jump if >= to done.
@@ -2137,10 +2190,14 @@ pub(crate) fn emit(
// Otherwise, put INT_MAX.
if *dst_size == OperandSize::Size64 {
let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
let inst = Inst::imm(
OperandSize::Size64,
0x7fffffffffffffff,
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
} else {
let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, dst.to_writable_reg());
inst.emit(sink, info, state);
}
} else {
@@ -2162,7 +2219,8 @@ pub(crate) fn emit(
match *src_size {
OperandSize::Size32 => {
let cst = Ieee32::pow2(output_bits - 1).neg().bits();
let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
let inst =
Inst::imm(OperandSize::Size32, cst as u64, tmp_gpr.to_writable_reg());
inst.emit(sink, info, state);
}
OperandSize::Size64 => {
@@ -2174,17 +2232,26 @@ pub(crate) fn emit(
} else {
Ieee64::pow2(output_bits - 1).neg()
};
let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
let inst =
Inst::imm(OperandSize::Size64, cst.bits(), tmp_gpr.to_writable_reg());
inst.emit(sink, info, state);
}
_ => unreachable!(),
}
let inst =
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
let inst = Inst::gpr_to_xmm(
cast_op,
RegMem::reg(tmp_gpr.to_reg().to_reg()),
*src_size,
tmp_xmm.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src);
let inst = Inst::xmm_cmp_rm_r(
cmp_op,
RegMem::reg(tmp_xmm.to_reg().to_reg()),
src.to_reg(),
);
inst.emit(sink, info, state);
// jump over trap if src >= or > threshold
@@ -2198,11 +2265,18 @@ pub(crate) fn emit(
sink.bind_label(check_positive);
// Zero out the tmp_xmm register.
let inst =
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
let inst = Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg().to_reg()),
tmp_xmm.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
let inst = Inst::xmm_cmp_rm_r(
cmp_op,
RegMem::reg(src.to_reg()),
tmp_xmm.to_reg().to_reg(),
);
inst.emit(sink, info, state);
one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src
@@ -2282,14 +2356,22 @@ pub(crate) fn emit(
_ => unreachable!(),
};
let inst = Inst::imm(*src_size, cst, *tmp_gpr);
let inst = Inst::imm(*src_size, cst, tmp_gpr.to_writable_reg());
inst.emit(sink, info, state);
let inst =
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
let inst = Inst::gpr_to_xmm(
cast_op,
RegMem::reg(tmp_gpr.to_reg().to_reg()),
*src_size,
tmp_xmm.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src.to_reg());
let inst = Inst::xmm_cmp_rm_r(
cmp_op,
RegMem::reg(tmp_xmm.to_reg().to_reg()),
src.to_reg().to_reg(),
);
inst.emit(sink, info, state);
let handle_large = sink.get_label();
@@ -2303,8 +2385,8 @@ pub(crate) fn emit(
let inst = Inst::alu_rmi_r(
*dst_size,
AluRmiROpcode::Xor,
RegMemImm::reg(dst.to_reg()),
*dst,
RegMemImm::reg(dst.to_reg().to_reg()),
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
@@ -2321,10 +2403,15 @@ pub(crate) fn emit(
// Actual truncation for small inputs: if the result is not positive, then we had an
// overflow.
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
let inst = Inst::xmm_to_gpr(
trunc_op,
src.to_reg().to_reg(),
dst.to_writable_reg(),
*dst_size,
);
inst.emit(sink, info, state);
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg());
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
inst.emit(sink, info, state);
one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done
@@ -2335,8 +2422,8 @@ pub(crate) fn emit(
let inst = Inst::alu_rmi_r(
*dst_size,
AluRmiROpcode::Xor,
RegMemImm::reg(dst.to_reg()),
*dst,
RegMemImm::reg(dst.to_reg().to_reg()),
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
@@ -2352,13 +2439,22 @@ pub(crate) fn emit(
sink.bind_label(handle_large);
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src);
let inst = Inst::xmm_rm_r(
sub_op,
RegMem::reg(tmp_xmm.to_reg().to_reg()),
src.to_writable_reg(),
);
inst.emit(sink, info, state);
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
let inst = Inst::xmm_to_gpr(
trunc_op,
src.to_reg().to_reg(),
dst.to_writable_reg(),
*dst_size,
);
inst.emit(sink, info, state);
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg());
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg());
inst.emit(sink, info, state);
let next_is_large = sink.get_label();
@@ -2374,7 +2470,7 @@ pub(crate) fn emit(
} else {
u32::max_value() as u64
},
*dst,
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
@@ -2388,14 +2484,14 @@ pub(crate) fn emit(
sink.bind_label(next_is_large);
if *dst_size == OperandSize::Size64 {
let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
let inst = Inst::imm(OperandSize::Size64, 1 << 63, tmp_gpr.to_writable_reg());
inst.emit(sink, info, state);
let inst = Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::reg(tmp_gpr.to_reg()),
*dst,
RegMemImm::reg(tmp_gpr.to_reg().to_reg()),
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
} else {
@@ -2403,7 +2499,7 @@ pub(crate) fn emit(
OperandSize::Size32,
AluRmiROpcode::Add,
RegMemImm::imm(1 << 31),
*dst,
dst.to_writable_reg(),
);
inst.emit(sink, info, state);
}

View File

@@ -14,6 +14,7 @@
use super::*;
use crate::isa::x64;
use alloc::boxed::Box;
use alloc::vec::Vec;
impl Inst {
@@ -21,8 +22,8 @@ impl Inst {
debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
Inst::Neg {
size,
src: src.to_reg(),
dst: src,
src: Gpr::new(src.to_reg()).unwrap(),
dst: WritableGpr::from_writable_reg(src).unwrap(),
}
}
}
@@ -693,42 +694,66 @@ fn test_x64_emit() {
//
// Addr_IRRS, offset max simm8
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(rax).unwrap(), Gpr::new(rax).unwrap(), 0),
w_r11,
),
"4C8B5C007F",
"movq 127(%rax,%rax,1), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(rdi).unwrap(), Gpr::new(rax).unwrap(), 1),
w_r11,
),
"4C8B5C477F",
"movq 127(%rdi,%rax,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(r8).unwrap(), Gpr::new(rax).unwrap(), 2),
w_r11,
),
"4D8B5C807F",
"movq 127(%r8,%rax,4), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(r15).unwrap(), Gpr::new(rax).unwrap(), 3),
w_r11,
),
"4D8B5CC77F",
"movq 127(%r15,%rax,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(rax).unwrap(), Gpr::new(rdi).unwrap(), 3),
w_r11,
),
"4C8B5CF87F",
"movq 127(%rax,%rdi,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(rdi).unwrap(), Gpr::new(rdi).unwrap(), 2),
w_r11,
),
"4C8B5CBF7F",
"movq 127(%rdi,%rdi,4), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(r8).unwrap(), Gpr::new(rdi).unwrap(), 1),
w_r11,
),
"4D8B5C787F",
"movq 127(%r8,%rdi,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(127, Gpr::new(r15).unwrap(), Gpr::new(rdi).unwrap(), 0),
w_r11,
),
"4D8B5C3F7F",
"movq 127(%r15,%rdi,1), %r11",
));
@@ -736,42 +761,106 @@ fn test_x64_emit() {
// ========================================================
// Addr_IRRS, offset min simm8
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(rax).unwrap(),
Gpr::new(r8).unwrap(),
2,
),
w_r11,
),
"4E8B5C8080",
"movq -128(%rax,%r8,4), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(rdi).unwrap(),
Gpr::new(r8).unwrap(),
3,
),
w_r11,
),
"4E8B5CC780",
"movq -128(%rdi,%r8,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(r8).unwrap(),
Gpr::new(r8).unwrap(),
0,
),
w_r11,
),
"4F8B5C0080",
"movq -128(%r8,%r8,1), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(r15).unwrap(),
Gpr::new(r8).unwrap(),
1,
),
w_r11,
),
"4F8B5C4780",
"movq -128(%r15,%r8,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(rax).unwrap(),
Gpr::new(r15).unwrap(),
1,
),
w_r11,
),
"4E8B5C7880",
"movq -128(%rax,%r15,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(rdi).unwrap(),
Gpr::new(r15).unwrap(),
0,
),
w_r11,
),
"4E8B5C3F80",
"movq -128(%rdi,%r15,1), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(r8).unwrap(),
Gpr::new(r15).unwrap(),
3,
),
w_r11,
),
"4F8B5CF880",
"movq -128(%r8,%r15,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
-128i32 as u32,
Gpr::new(r15).unwrap(),
Gpr::new(r15).unwrap(),
2,
),
w_r11,
),
"4F8B5CBF80",
"movq -128(%r15,%r15,4), %r11",
));
@@ -779,42 +868,96 @@ fn test_x64_emit() {
// ========================================================
// Addr_IRRS, offset large positive simm32
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(rax).unwrap(),
Gpr::new(rax).unwrap(),
0,
),
w_r11,
),
"4C8B9C00BE25664F",
"movq 1332094398(%rax,%rax,1), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(rdi).unwrap(),
Gpr::new(rax).unwrap(),
1,
),
w_r11,
),
"4C8B9C47BE25664F",
"movq 1332094398(%rdi,%rax,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(0x4f6625be, Gpr::new(r8).unwrap(), Gpr::new(rax).unwrap(), 2),
w_r11,
),
"4D8B9C80BE25664F",
"movq 1332094398(%r8,%rax,4), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(r15).unwrap(),
Gpr::new(rax).unwrap(),
3,
),
w_r11,
),
"4D8B9CC7BE25664F",
"movq 1332094398(%r15,%rax,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(rax).unwrap(),
Gpr::new(rdi).unwrap(),
3,
),
w_r11,
),
"4C8B9CF8BE25664F",
"movq 1332094398(%rax,%rdi,8), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(rdi).unwrap(),
Gpr::new(rdi).unwrap(),
2,
),
w_r11,
),
"4C8B9CBFBE25664F",
"movq 1332094398(%rdi,%rdi,4), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(0x4f6625be, Gpr::new(r8).unwrap(), Gpr::new(rdi).unwrap(), 1),
w_r11,
),
"4D8B9C78BE25664F",
"movq 1332094398(%r8,%rdi,2), %r11",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(
0x4f6625be,
Gpr::new(r15).unwrap(),
Gpr::new(rdi).unwrap(),
0,
),
w_r11,
),
"4D8B9C3FBE25664F",
"movq 1332094398(%r15,%rdi,1), %r11",
));
@@ -823,7 +966,12 @@ fn test_x64_emit() {
// Addr_IRRS, offset large negative simm32
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(rax).unwrap(),
Gpr::new(r8).unwrap(),
2,
),
w_r11,
),
"4E8B9C8070E9B2D9",
@@ -831,7 +979,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(rdi).unwrap(),
Gpr::new(r8).unwrap(),
3,
),
w_r11,
),
"4E8B9CC770E9B2D9",
@@ -839,7 +992,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(r8).unwrap(),
Gpr::new(r8).unwrap(),
0,
),
w_r11,
),
"4F8B9C0070E9B2D9",
@@ -847,7 +1005,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(r15).unwrap(),
Gpr::new(r8).unwrap(),
1,
),
w_r11,
),
"4F8B9C4770E9B2D9",
@@ -855,7 +1018,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(rax).unwrap(),
Gpr::new(r15).unwrap(),
1,
),
w_r11,
),
"4E8B9C7870E9B2D9",
@@ -863,7 +1031,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(rdi).unwrap(),
Gpr::new(r15).unwrap(),
0,
),
w_r11,
),
"4E8B9C3F70E9B2D9",
@@ -871,7 +1044,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(r8).unwrap(),
Gpr::new(r15).unwrap(),
3,
),
w_r11,
),
"4F8B9CF870E9B2D9",
@@ -879,7 +1057,12 @@ fn test_x64_emit() {
));
insns.push((
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2),
Amode::imm_reg_reg_shift(
-0x264d1690i32 as u32,
Gpr::new(r15).unwrap(),
Gpr::new(r15).unwrap(),
2,
),
w_r11,
),
"4F8B9CBF70E9B2D9",
@@ -1828,42 +2011,66 @@ fn test_x64_emit() {
// ========================================================
// Mov64_M_R
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(rbx).unwrap(), 0),
w_rcx,
),
"488B8C18B3000000",
"movq 179(%rax,%rbx,1), %rcx",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(rbx).unwrap(), 0),
w_r8,
),
"4C8B8418B3000000",
"movq 179(%rax,%rbx,1), %r8",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(r9).unwrap(), 0),
w_rcx,
),
"4A8B8C08B3000000",
"movq 179(%rax,%r9,1), %rcx",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(rax).unwrap(), Gpr::new(r9).unwrap(), 0),
w_r8,
),
"4E8B8408B3000000",
"movq 179(%rax,%r9,1), %r8",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(rbx).unwrap(), 0),
w_rcx,
),
"498B8C1AB3000000",
"movq 179(%r10,%rbx,1), %rcx",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(rbx).unwrap(), 0),
w_r8,
),
"4D8B841AB3000000",
"movq 179(%r10,%rbx,1), %r8",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
w_rcx,
),
"4B8B8C0AB3000000",
"movq 179(%r10,%r9,1), %rcx",
));
insns.push((
Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
Inst::mov64_m_r(
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
w_r8,
),
"4F8B840AB3000000",
"movq 179(%r10,%r9,1), %r8",
));
@@ -1881,7 +2088,10 @@ fn test_x64_emit() {
"lea 42(%r10), %r15",
));
insns.push((
Inst::lea(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
Inst::lea(
Amode::imm_reg_reg_shift(179, Gpr::new(r10).unwrap(), Gpr::new(r9).unwrap(), 0),
w_r8,
),
"4F8D840AB3000000",
"lea 179(%r10,%r9,1), %r8",
));
@@ -3115,7 +3325,12 @@ fn test_x64_emit() {
Inst::cmove(
OperandSize::Size16,
CC::NO,
RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)),
RegMem::mem(Amode::imm_reg_reg_shift(
37,
Gpr::new(rdi).unwrap(),
Gpr::new(rsi).unwrap(),
2,
)),
w_r15,
),
"66440F417CB725",
@@ -3157,12 +3372,22 @@ fn test_x64_emit() {
insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
insns.push((Inst::push64(RegMemImm::reg(r8)), "4150", "pushq %r8"));
insns.push((
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(rsi).unwrap(),
Gpr::new(rcx).unwrap(),
3,
))),
"FFB4CE41010000",
"pushq 321(%rsi,%rcx,8)",
));
insns.push((
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, r9, rbx, 2))),
Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(r9).unwrap(),
Gpr::new(rbx).unwrap(),
2,
))),
"41FFB49941010000",
"pushq 321(%r9,%rbx,4)",
));
@@ -3226,12 +3451,22 @@ fn test_x64_emit() {
insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp"));
insns.push((call_unknown(RegMem::reg(r11)), "41FFD3", "call *%r11"));
insns.push((
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(rsi).unwrap(),
Gpr::new(rcx).unwrap(),
3,
))),
"FF94CE41010000",
"call *321(%rsi,%rcx,8)",
));
insns.push((
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(r10).unwrap(),
Gpr::new(rdx).unwrap(),
2,
))),
"41FF949241010000",
"call *321(%r10,%rdx,4)",
));
@@ -3301,12 +3536,22 @@ fn test_x64_emit() {
"jmp *%r11",
));
insns.push((
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(rsi).unwrap(),
Gpr::new(rcx).unwrap(),
3,
))),
"FFA4CE41010000",
"jmp *321(%rsi,%rcx,8)",
));
insns.push((
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(r10).unwrap(),
Gpr::new(rdx).unwrap(),
2,
))),
"41FFA49241010000",
"jmp *321(%r10,%rdx,4)",
));
@@ -3354,7 +3599,12 @@ fn test_x64_emit() {
insns.push((
Inst::xmm_rm_r(
SseOpcode::Addss,
RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
RegMem::mem(Amode::imm_reg_reg_shift(
123,
Gpr::new(r10).unwrap(),
Gpr::new(rdx).unwrap(),
2,
)),
w_xmm0,
),
"F3410F5844927B",
@@ -3379,7 +3629,12 @@ fn test_x64_emit() {
insns.push((
Inst::xmm_rm_r(
SseOpcode::Subss,
RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
RegMem::mem(Amode::imm_reg_reg_shift(
321,
Gpr::new(r10).unwrap(),
Gpr::new(rax).unwrap(),
3,
)),
w_xmm10,
),
"F3450F5C94C241010000",
@@ -4200,10 +4455,17 @@ fn test_x64_emit() {
// ========================================================
// Pertaining to atomics.
let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
let am1: SyntheticAmode =
Amode::imm_reg_reg_shift(321, Gpr::new(r10).unwrap(), Gpr::new(rdx).unwrap(), 2).into();
// `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
// for retention of the apparently-redundant rex prefix in the 8-bit case.
let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
let am2: SyntheticAmode = Amode::imm_reg_reg_shift(
-12345i32 as u32,
Gpr::new(rcx).unwrap(),
Gpr::new(rsi).unwrap(),
3,
)
.into();
// A general 8-bit case.
insns.push((

File diff suppressed because it is too large Load Diff

View File

@@ -95,23 +95,23 @@
(rule (lower (has_type (multi_lane 8 16)
(iadd x y)))
(value_reg (paddb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(iadd x y)))
(value_reg (paddw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddw (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 32 4)
(iadd x y)))
(value_reg (paddd (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddd (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 64 2)
(iadd x y)))
(value_reg (paddq (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddq (put_in_xmm x)
(put_in_xmm_mem y))))
;; `i128`
(rule (lower (has_type $I128 (iadd x y)))
@@ -131,25 +131,25 @@
(rule (lower (has_type (multi_lane 8 16)
(sadd_sat x y)))
(value_reg (paddsb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddsb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(sadd_sat x y)))
(value_reg (paddsw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddsw (put_in_xmm x)
(put_in_xmm_mem y))))
;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (multi_lane 8 16)
(uadd_sat x y)))
(value_reg (paddusb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddusb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(uadd_sat x y)))
(value_reg (paddusw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (paddusw (put_in_xmm x)
(put_in_xmm_mem y))))
;;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -264,23 +264,23 @@
(rule (lower (has_type (multi_lane 8 16)
(isub x y)))
(value_reg (psubb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(isub x y)))
(value_reg (psubw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubw (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 32 4)
(isub x y)))
(value_reg (psubd (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubd (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 64 2)
(isub x y)))
(value_reg (psubq (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubq (put_in_xmm x)
(put_in_xmm_mem y))))
;; `i128`
(rule (lower (has_type $I128 (isub x y)))
@@ -300,25 +300,25 @@
(rule (lower (has_type (multi_lane 8 16)
(ssub_sat x y)))
(value_reg (psubsb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubsb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(ssub_sat x y)))
(value_reg (psubsw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubsw (put_in_xmm x)
(put_in_xmm_mem y))))
;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (multi_lane 8 16)
(usub_sat x y)))
(value_reg (psubusb (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubusb (put_in_xmm x)
(put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(usub_sat x y)))
(value_reg (psubusw (put_in_reg x)
(put_in_reg_mem y))))
(value_xmm (psubusw (put_in_xmm x)
(put_in_xmm_mem y))))
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -360,14 +360,16 @@
;; SSE.
(decl sse_and (Type Reg RegMem) Reg)
(decl sse_and (Type Xmm XmmMem) Xmm)
(rule (sse_and $F32X4 x y) (andps x y))
(rule (sse_and $F64X2 x y) (andpd x y))
(rule (sse_and (multi_lane _bits _lanes) x y) (pand x y))
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(band x y)))
(value_reg (sse_and ty (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (sse_and ty
(put_in_xmm x)
(put_in_xmm_mem y))))
;; `{i,b}128`.
@@ -432,14 +434,16 @@
;; SSE.
(decl sse_or (Type Reg RegMem) Reg)
(decl sse_or (Type Xmm XmmMem) Xmm)
(rule (sse_or $F32X4 x y) (orps x y))
(rule (sse_or $F64X2 x y) (orpd x y))
(rule (sse_or (multi_lane _bits _lanes) x y) (por x y))
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(bor x y)))
(value_reg (sse_or ty (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (sse_or ty
(put_in_xmm x)
(put_in_xmm_mem y))))
;; `{i,b}128`.
@@ -507,7 +511,7 @@
;; SSE.
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y)))
(value_reg (sse_xor ty (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (sse_xor ty (put_in_xmm x) (put_in_xmm_mem y))))
;; `{i,b}128`.
@@ -578,16 +582,16 @@
;; instructions. The basic idea, whether the amount to shift by is an immediate
;; or not, is to use a 16x8 shift and then mask off the incorrect bits to 0s.
(rule (lower (has_type $I8X16 (ishl src amt)))
(let ((src_ Reg (put_in_reg src))
(let ((src_ Xmm (put_in_xmm src))
(amt_gpr RegMemImm (put_in_reg_mem_imm amt))
(amt_xmm RegMemImm (reg_mem_imm_to_xmm amt_gpr))
(amt_xmm XmmMemImm (mov_rmi_to_xmm amt_gpr))
;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be
;; correct for half of the lanes; the others must be fixed up with
;; the mask below.
(unmasked Reg (psllw src_ amt_xmm))
(unmasked Xmm (psllw src_ amt_xmm))
(mask_addr SyntheticAmode (ishl_i8x16_mask amt_gpr))
(mask Reg (x64_load $I8X16 mask_addr (ExtKind.None))))
(value_reg (sse_and $I8X16 unmasked (RegMem.Reg mask)))))
(value_xmm (sse_and $I8X16 unmasked (xmm_mem_new (RegMem.Reg mask))))))
;; Get the address of the mask to use when fixing up the lanes that weren't
;; correctly generated by the 16x8 shift.
@@ -608,25 +612,28 @@
(extern constructor ishl_i8x16_mask_table ishl_i8x16_mask_table)
(rule (ishl_i8x16_mask (RegMemImm.Reg amt))
(let ((mask_table SyntheticAmode (ishl_i8x16_mask_table))
(base_mask_addr Reg (lea mask_table))
(base_mask_addr Gpr (lea mask_table))
(mask_offset Reg (shl $I64 amt (Imm8Reg.Imm8 4))))
(amode_to_synthetic_amode (amode_imm_reg_reg_shift 0
base_mask_addr
mask_offset
(gpr_new mask_offset)
0))))
(rule (ishl_i8x16_mask (RegMemImm.Mem amt))
(ishl_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None)))))
;; 16x8, 32x4, and 64x2 shifts can each use a single instruction.
(rule (lower (has_type $I16X8 (ishl src amt)))
(value_reg (psllw (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psllw (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I32X4 (ishl src amt)))
(value_reg (pslld (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (pslld (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I64X2 (ishl src amt)))
(value_reg (psllq (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psllq (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -672,16 +679,18 @@
;; There are no 8x16 shifts in x64. Do the same 16x8-shift-and-mask thing we do
;; with 8x16 `ishl`.
(rule (lower (has_type $I8X16 (ushr src amt)))
(let ((src_ Reg (put_in_reg src))
(let ((src_ Xmm (put_in_xmm src))
(amt_gpr RegMemImm (put_in_reg_mem_imm amt))
(amt_xmm RegMemImm (reg_mem_imm_to_xmm amt_gpr))
(amt_xmm XmmMemImm (mov_rmi_to_xmm amt_gpr))
;; Shift `src` using 16x8. Unfortunately, a 16x8 shift will only be
;; correct for half of the lanes; the others must be fixed up with
;; the mask below.
(unmasked Reg (psrlw src_ amt_xmm))
(unmasked Xmm (psrlw src_ amt_xmm))
(mask_addr SyntheticAmode (ushr_i8x16_mask amt_gpr))
(mask Reg (x64_load $I8X16 mask_addr (ExtKind.None))))
(value_reg (sse_and $I8X16 unmasked (RegMem.Reg mask)))))
(value_xmm (sse_and $I8X16
unmasked
(xmm_mem_new (RegMem.Reg mask))))))
;; Get the address of the mask to use when fixing up the lanes that weren't
;; correctly generated by the 16x8 shift.
@@ -702,25 +711,28 @@
(extern constructor ushr_i8x16_mask_table ushr_i8x16_mask_table)
(rule (ushr_i8x16_mask (RegMemImm.Reg amt))
(let ((mask_table SyntheticAmode (ushr_i8x16_mask_table))
(base_mask_addr Reg (lea mask_table))
(base_mask_addr Gpr (lea mask_table))
(mask_offset Reg (shl $I64 amt (Imm8Reg.Imm8 4))))
(amode_to_synthetic_amode (amode_imm_reg_reg_shift 0
base_mask_addr
mask_offset
(gpr_new mask_offset)
0))))
(rule (ushr_i8x16_mask (RegMemImm.Mem amt))
(ushr_i8x16_mask (RegMemImm.Reg (x64_load $I64 amt (ExtKind.None)))))
;; 16x8, 32x4, and 64x2 shifts can each use a single instruction.
(rule (lower (has_type $I16X8 (ushr src amt)))
(value_reg (psrlw (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psrlw (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I32X4 (ushr src amt)))
(value_reg (psrld (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psrld (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I64X2 (ushr src amt)))
(value_reg (psrlq (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psrlq (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -780,33 +792,35 @@
;; shifted_hi.i16x8 = shift each lane of `high`
;; result = [s0'', s1'', ..., s15'']
(rule (lower (has_type $I8X16 (sshr src amt @ (value_type amt_ty))))
(let ((src_ Reg (put_in_reg src))
(let ((src_ Xmm (put_in_xmm src))
;; In order for `packsswb` later to only use the high byte of each
;; 16x8 lane, we shift right an extra 8 bits, relying on `psraw` to
;; fill in the upper bits appropriately.
(lo Reg (punpcklbw src_ (RegMem.Reg src_)))
(hi Reg (punpckhbw src_ (RegMem.Reg src_)))
(amt_ RegMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt)))
(shifted_lo Reg (psraw lo amt_))
(shifted_hi Reg (psraw hi amt_)))
(value_reg (packsswb shifted_lo (RegMem.Reg shifted_hi)))))
(lo Xmm (punpcklbw src_ (xmm_to_xmm_mem src_)))
(hi Xmm (punpckhbw src_ (xmm_to_xmm_mem src_)))
(amt_ XmmMemImm (sshr_i8x16_bigger_shift amt_ty (put_in_reg_mem_imm amt)))
(shifted_lo Xmm (psraw lo amt_))
(shifted_hi Xmm (psraw hi amt_)))
(value_xmm (packsswb shifted_lo (xmm_to_xmm_mem shifted_hi)))))
(decl sshr_i8x16_bigger_shift (Type RegMemImm) RegMemImm)
(decl sshr_i8x16_bigger_shift (Type RegMemImm) XmmMemImm)
(rule (sshr_i8x16_bigger_shift _ty (RegMemImm.Imm i))
(RegMemImm.Imm (u32_add i 8)))
(xmm_mem_imm_new (RegMemImm.Imm (u32_add i 8))))
(rule (sshr_i8x16_bigger_shift ty (RegMemImm.Reg r))
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8)))))
(mov_rmi_to_xmm (RegMemImm.Reg (add ty r (RegMemImm.Imm 8)))))
(rule (sshr_i8x16_bigger_shift ty rmi @ (RegMemImm.Mem _m))
(reg_mem_imm_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi))))
(mov_rmi_to_xmm (RegMemImm.Reg (add ty (imm ty 8) rmi))))
;; `sshr.{i16x8,i32x4}` can be a simple `psra{w,d}`, we just have to make sure
;; that if the shift amount is in a register, it is in an XMM register.
(rule (lower (has_type $I16X8 (sshr src amt)))
(value_reg (psraw (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psraw (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
(rule (lower (has_type $I32X4 (sshr src amt)))
(value_reg (psrad (put_in_reg src)
(reg_mem_imm_to_xmm (put_in_reg_mem_imm amt)))))
(value_xmm (psrad (put_in_xmm src)
(mov_rmi_to_xmm (put_in_reg_mem_imm amt)))))
;; The `sshr.i64x2` CLIF instruction has no single x86 instruction in the older
;; feature sets. Newer ones like AVX512VL + AVX512F include `vpsraq`, a 128-bit
@@ -817,14 +831,15 @@
;;
;; (TODO: when EVEX support is available, add an alternate lowering here).
(rule (lower (has_type $I64X2 (sshr src amt)))
(let ((src_ Reg (put_in_reg src))
(lo Reg (pextrd $I64 src_ 0))
(hi Reg (pextrd $I64 src_ 1))
(let ((src_ Xmm (put_in_xmm src))
(lo Gpr (pextrd $I64 src_ 0))
(hi Gpr (pextrd $I64 src_ 1))
(amt_ Imm8Reg (put_masked_in_imm8_reg amt $I64))
(shifted_lo Reg (sar $I64 lo amt_))
(shifted_hi Reg (sar $I64 hi amt_)))
(value_reg (make_i64x2_from_lanes (RegMem.Reg shifted_lo)
(RegMem.Reg shifted_hi)))))
(shifted_lo Reg (sar $I64 (gpr_to_reg lo) amt_))
(shifted_hi Reg (sar $I64 (gpr_to_reg hi) amt_)))
(value_xmm (make_i64x2_from_lanes (gpr_mem_new (RegMem.Reg shifted_lo))
(gpr_mem_new (RegMem.Reg shifted_hi))))))
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i16` and `i8`: we need to extend the shift amount, or mask the
@@ -910,35 +925,35 @@
;; `i64` and smaller.
(rule (lower (has_type (fits_in_64 ty) (ineg x)))
(value_reg (neg ty (put_in_reg x))))
(value_gpr (neg ty (put_in_gpr x))))
;; SSE.
(rule (lower (has_type $I8X16 (ineg x)))
(value_reg (psubb (imm $I8X16 0)
(put_in_reg_mem x))))
(value_xmm (psubb (xmm_new (imm $I8X16 0))
(put_in_xmm_mem x))))
(rule (lower (has_type $I16X8 (ineg x)))
(value_reg (psubw (imm $I16X8 0)
(put_in_reg_mem x))))
(value_xmm (psubw (xmm_new (imm $I16X8 0))
(put_in_xmm_mem x))))
(rule (lower (has_type $I32X4 (ineg x)))
(value_reg (psubd (imm $I32X4 0)
(put_in_reg_mem x))))
(value_xmm (psubd (xmm_new (imm $I32X4 0))
(put_in_xmm_mem x))))
(rule (lower (has_type $I64X2 (ineg x)))
(value_reg (psubq (imm $I64X2 0)
(put_in_reg_mem x))))
(value_xmm (psubq (xmm_new (imm $I64X2 0))
(put_in_xmm_mem x))))
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (multi_lane 8 16)
(avg_round x y)))
(value_reg (pavgb (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pavgb (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 16 8)
(avg_round x y)))
(value_reg (pavgw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pavgw (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1017,10 +1032,10 @@
;; (No i8x16 multiply.)
(rule (lower (has_type (multi_lane 16 8) (imul x y)))
(value_reg (pmullw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmullw (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type (multi_lane 32 4) (imul x y)))
(value_reg (pmulld (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmulld (put_in_xmm x) (put_in_xmm_mem y))))
;; With AVX-512 we can implement `i64x2` multiplication with a single
;; instruction.
@@ -1028,7 +1043,7 @@
(avx512dq_enabled)
(multi_lane 64 2))
(imul x y)))
(value_reg (vpmullq (put_in_reg_mem x) (put_in_reg y))))
(value_xmm (vpmullq (put_in_xmm_mem x) (put_in_xmm y))))
;; Otherwise, for i64x2 multiplication we describe a lane A as being composed of
;; a 32-bit upper half "Ah" and a 32-bit lower half "Al". The 32-bit long hand
@@ -1052,24 +1067,24 @@
;; 32-bits when doing calculations, i.e., `Ah == A >> 32`.
(rule (lower (has_type (multi_lane 64 2)
(imul a b)))
(let ((a0 Reg (put_in_reg a))
(b0 Reg (put_in_reg b))
(let ((a0 Xmm (put_in_xmm a))
(b0 Xmm (put_in_xmm b))
;; a_hi = A >> 32
(a_hi Reg (psrlq a0 (RegMemImm.Imm 32)))
(a_hi Xmm (psrlq a0 (xmm_mem_imm_new (RegMemImm.Imm 32))))
;; ah_bl = Ah * Bl
(ah_bl Reg (pmuludq a_hi (RegMem.Reg b0)))
(ah_bl Xmm (pmuludq a_hi (xmm_to_xmm_mem b0)))
;; b_hi = B >> 32
(b_hi Reg (psrlq b0 (RegMemImm.Imm 32)))
(b_hi Xmm (psrlq b0 (xmm_mem_imm_new (RegMemImm.Imm 32))))
;; al_bh = Al * Bh
(al_bh Reg (pmuludq a0 (RegMem.Reg b_hi)))
(al_bh Xmm (pmuludq a0 (xmm_to_xmm_mem b_hi)))
;; aa_bb = ah_bl + al_bh
(aa_bb Reg (paddq ah_bl (RegMem.Reg al_bh)))
(aa_bb Xmm (paddq ah_bl (xmm_to_xmm_mem al_bh)))
;; aa_bb_shifted = aa_bb << 32
(aa_bb_shifted Reg (psllq aa_bb (RegMemImm.Imm 32)))
(aa_bb_shifted Xmm (psllq aa_bb (xmm_mem_imm_new (RegMemImm.Imm 32))))
;; al_bl = Al * Bl
(al_bl Reg (pmuludq a0 (RegMem.Reg b0))))
(al_bl Xmm (pmuludq a0 (xmm_to_xmm_mem b0))))
;; al_bl + aa_bb_shifted
(value_reg (paddq al_bl (RegMem.Reg aa_bb_shifted)))))
(value_xmm (paddq al_bl (xmm_to_xmm_mem aa_bb_shifted)))))
;; Special case for `i16x8.extmul_high_i8x16_s`.
(rule (lower (has_type (multi_lane 16 8)
@@ -1077,13 +1092,13 @@
x)))
(def_inst (swiden_high (and (value_type (multi_lane 8 16))
y))))))
(let ((x1 Reg (put_in_reg x))
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
(x3 Reg (pmovsxbw (RegMem.Reg x2)))
(y1 Reg (put_in_reg y))
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
(y3 Reg (pmovsxbw (RegMem.Reg y2))))
(value_reg (pmullw x3 (RegMem.Reg y3)))))
(let ((x1 Xmm (put_in_xmm x))
(x2 Xmm (palignr x1 (xmm_to_xmm_mem x1) 8 (OperandSize.Size32)))
(x3 Xmm (pmovsxbw (xmm_to_xmm_mem x2)))
(y1 Xmm (put_in_xmm y))
(y2 Xmm (palignr y1 (xmm_to_xmm_mem y1) 8 (OperandSize.Size32)))
(y3 Xmm (pmovsxbw (xmm_to_xmm_mem y2))))
(value_xmm (pmullw x3 (xmm_to_xmm_mem y3)))))
;; Special case for `i32x4.extmul_high_i16x8_s`.
(rule (lower (has_type (multi_lane 32 4)
@@ -1091,11 +1106,11 @@
x)))
(def_inst (swiden_high (and (value_type (multi_lane 16 8))
y))))))
(let ((x2 Reg (put_in_reg x))
(y2 Reg (put_in_reg y))
(lo Reg (pmullw x2 (RegMem.Reg y2)))
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
(let ((x2 Xmm (put_in_xmm x))
(y2 Xmm (put_in_xmm y))
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
(hi Xmm (pmulhw x2 (xmm_to_xmm_mem y2))))
(value_xmm (punpckhwd lo (xmm_to_xmm_mem hi)))))
;; Special case for `i64x2.extmul_high_i32x4_s`.
(rule (lower (has_type (multi_lane 64 2)
@@ -1103,13 +1118,13 @@
x)))
(def_inst (swiden_high (and (value_type (multi_lane 32 4))
y))))))
(let ((x2 Reg (pshufd (put_in_reg_mem x)
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
0xFA
(OperandSize.Size32)))
(y2 Reg (pshufd (put_in_reg_mem y)
(y2 Xmm (pshufd (put_in_xmm_mem y)
0xFA
(OperandSize.Size32))))
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
(value_xmm (pmuldq x2 (xmm_to_xmm_mem y2)))))
;; Special case for `i16x8.extmul_low_i8x16_s`.
(rule (lower (has_type (multi_lane 16 8)
@@ -1117,9 +1132,9 @@
x)))
(def_inst (swiden_low (and (value_type (multi_lane 8 16))
y))))))
(let ((x2 Reg (pmovsxbw (put_in_reg_mem x)))
(y2 Reg (pmovsxbw (put_in_reg_mem y))))
(value_reg (pmullw x2 (RegMem.Reg y2)))))
(let ((x2 Xmm (pmovsxbw (put_in_xmm_mem x)))
(y2 Xmm (pmovsxbw (put_in_xmm_mem y))))
(value_xmm (pmullw x2 (xmm_to_xmm_mem y2)))))
;; Special case for `i32x4.extmul_low_i16x8_s`.
(rule (lower (has_type (multi_lane 32 4)
@@ -1127,11 +1142,11 @@
x)))
(def_inst (swiden_low (and (value_type (multi_lane 16 8))
y))))))
(let ((x2 Reg (put_in_reg x))
(y2 Reg (put_in_reg y))
(lo Reg (pmullw x2 (RegMem.Reg y2)))
(hi Reg (pmulhw x2 (RegMem.Reg y2))))
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
(let ((x2 Xmm (put_in_xmm x))
(y2 Xmm (put_in_xmm y))
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
(hi Xmm (pmulhw x2 (xmm_to_xmm_mem y2))))
(value_xmm (punpcklwd lo (xmm_to_xmm_mem hi)))))
;; Special case for `i64x2.extmul_low_i32x4_s`.
(rule (lower (has_type (multi_lane 64 2)
@@ -1139,13 +1154,13 @@
x)))
(def_inst (swiden_low (and (value_type (multi_lane 32 4))
y))))))
(let ((x2 Reg (pshufd (put_in_reg_mem x)
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
0x50
(OperandSize.Size32)))
(y2 Reg (pshufd (put_in_reg_mem y)
(y2 Xmm (pshufd (put_in_xmm_mem y)
0x50
(OperandSize.Size32))))
(value_reg (pmuldq x2 (RegMem.Reg y2)))))
(value_xmm (pmuldq x2 (xmm_to_xmm_mem y2)))))
;; Special case for `i16x8.extmul_high_i8x16_u`.
(rule (lower (has_type (multi_lane 16 8)
@@ -1153,13 +1168,13 @@
x)))
(def_inst (uwiden_high (and (value_type (multi_lane 8 16))
y))))))
(let ((x1 Reg (put_in_reg x))
(x2 Reg (palignr x1 (RegMem.Reg x1) 8 (OperandSize.Size32)))
(x3 Reg (pmovzxbw (RegMem.Reg x2)))
(y1 Reg (put_in_reg y))
(y2 Reg (palignr y1 (RegMem.Reg y1) 8 (OperandSize.Size32)))
(y3 Reg (pmovzxbw (RegMem.Reg y2))))
(value_reg (pmullw x3 (RegMem.Reg y3)))))
(let ((x1 Xmm (put_in_xmm x))
(x2 Xmm (palignr x1 (xmm_to_xmm_mem x1) 8 (OperandSize.Size32)))
(x3 Xmm (pmovzxbw (xmm_to_xmm_mem x2)))
(y1 Xmm (put_in_xmm y))
(y2 Xmm (palignr y1 (xmm_to_xmm_mem y1) 8 (OperandSize.Size32)))
(y3 Xmm (pmovzxbw (xmm_to_xmm_mem y2))))
(value_xmm (pmullw x3 (xmm_to_xmm_mem y3)))))
;; Special case for `i32x4.extmul_high_i16x8_u`.
(rule (lower (has_type (multi_lane 32 4)
@@ -1167,11 +1182,11 @@
x)))
(def_inst (uwiden_high (and (value_type (multi_lane 16 8))
y))))))
(let ((x2 Reg (put_in_reg x))
(y2 Reg (put_in_reg y))
(lo Reg (pmullw x2 (RegMem.Reg y2)))
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
(value_reg (punpckhwd lo (RegMem.Reg hi)))))
(let ((x2 Xmm (put_in_xmm x))
(y2 Xmm (put_in_xmm y))
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
(hi Xmm (pmulhuw x2 (xmm_to_xmm_mem y2))))
(value_xmm (punpckhwd lo (xmm_to_xmm_mem hi)))))
;; Special case for `i64x2.extmul_high_i32x4_u`.
(rule (lower (has_type (multi_lane 64 2)
@@ -1179,13 +1194,13 @@
x)))
(def_inst (uwiden_high (and (value_type (multi_lane 32 4))
y))))))
(let ((x2 Reg (pshufd (put_in_reg_mem x)
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
0xFA
(OperandSize.Size32)))
(y2 Reg (pshufd (put_in_reg_mem y)
(y2 Xmm (pshufd (put_in_xmm_mem y)
0xFA
(OperandSize.Size32))))
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
(value_xmm (pmuludq x2 (xmm_to_xmm_mem y2)))))
;; Special case for `i16x8.extmul_low_i8x16_u`.
(rule (lower (has_type (multi_lane 16 8)
@@ -1193,9 +1208,9 @@
x)))
(def_inst (uwiden_low (and (value_type (multi_lane 8 16))
y))))))
(let ((x2 Reg (pmovzxbw (put_in_reg_mem x)))
(y2 Reg (pmovzxbw (put_in_reg_mem y))))
(value_reg (pmullw x2 (RegMem.Reg y2)))))
(let ((x2 Xmm (pmovzxbw (put_in_xmm_mem x)))
(y2 Xmm (pmovzxbw (put_in_xmm_mem y))))
(value_xmm (pmullw x2 (xmm_to_xmm_mem y2)))))
;; Special case for `i32x4.extmul_low_i16x8_u`.
(rule (lower (has_type (multi_lane 32 4)
@@ -1203,11 +1218,11 @@
x)))
(def_inst (uwiden_low (and (value_type (multi_lane 16 8))
y))))))
(let ((x2 Reg (put_in_reg x))
(y2 Reg (put_in_reg y))
(lo Reg (pmullw x2 (RegMem.Reg y2)))
(hi Reg (pmulhuw x2 (RegMem.Reg y2))))
(value_reg (punpcklwd lo (RegMem.Reg hi)))))
(let ((x2 Xmm (put_in_xmm x))
(y2 Xmm (put_in_xmm y))
(lo Xmm (pmullw x2 (xmm_to_xmm_mem y2)))
(hi Xmm (pmulhuw x2 (xmm_to_xmm_mem y2))))
(value_xmm (punpcklwd lo (xmm_to_xmm_mem hi)))))
;; Special case for `i64x2.extmul_low_i32x4_u`.
(rule (lower (has_type (multi_lane 64 2)
@@ -1215,17 +1230,17 @@
x)))
(def_inst (uwiden_low (and (value_type (multi_lane 32 4))
y))))))
(let ((x2 Reg (pshufd (put_in_reg_mem x)
(let ((x2 Xmm (pshufd (put_in_xmm_mem x)
0x50
(OperandSize.Size32)))
(y2 Reg (pshufd (put_in_reg_mem y)
(y2 Xmm (pshufd (put_in_xmm_mem y)
0x50
(OperandSize.Size32))))
(value_reg (pmuludq x2 (RegMem.Reg y2)))))
(value_xmm (pmuludq x2 (xmm_to_xmm_mem y2)))))
;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl sse_and_not (Type Reg RegMem) Reg)
(decl sse_and_not (Type Xmm XmmMem) Xmm)
(rule (sse_and_not $F32X4 x y) (andnps x y))
(rule (sse_and_not $F64X2 x y) (andnpd x y))
(rule (sse_and_not (multi_lane _bits _lanes) x y) (pandn x y))
@@ -1238,64 +1253,66 @@
;;
;; pandn(x, y) = and(not(x), y)
(rule (lower (has_type ty (band_not x y)))
(value_reg (sse_and_not ty
(put_in_reg y)
(put_in_reg_mem x))))
(value_xmm (sse_and_not ty
(put_in_xmm y)
(put_in_xmm_mem x))))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (iabs x)))
(value_reg (pabsb (put_in_reg_mem x))))
(value_xmm (pabsb (put_in_xmm_mem x))))
(rule (lower (has_type $I16X8 (iabs x)))
(value_reg (pabsw (put_in_reg_mem x))))
(value_xmm (pabsw (put_in_xmm_mem x))))
(rule (lower (has_type $I32X4 (iabs x)))
(value_reg (pabsd (put_in_reg_mem x))))
(value_xmm (pabsd (put_in_xmm_mem x))))
;; When AVX512 is available, we can use a single `vpabsq` instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512f_enabled)
$I64X2)
(iabs x)))
(value_reg (vpabsq (put_in_reg_mem x))))
(value_xmm (vpabsq (put_in_xmm_mem x))))
;; Otherwise, we use a separate register, `neg`, to contain the results of `0 -
;; Otherwise, we use a separate xmmister, `neg`, to contain the results of `0 -
;; x` and then blend in those results with `blendvpd` if the MSB of `neg` was
;; set to 1 (i.e. if `neg` was negative or, conversely, if `x` was originally
;; positive).
(rule (lower (has_type $I64X2 (iabs x)))
(let ((rx Reg (put_in_reg x))
(neg Reg (psubq (imm $I64X2 0) (RegMem.Reg rx))))
(value_reg (blendvpd neg (RegMem.Reg rx) neg))))
(let ((rx Xmm (put_in_xmm x))
(neg Xmm (psubq (xmm_new (imm $I64X2 0)) (xmm_to_xmm_mem rx))))
(value_xmm (blendvpd neg (xmm_to_xmm_mem rx) neg))))
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Special case for `f32x4.abs`.
(rule (lower (has_type $F32X4 (fabs x)))
(value_reg (andps (put_in_reg x)
(RegMem.Reg (psrld (vector_all_ones $F32X4) (RegMemImm.Imm 1))))))
(value_xmm (andps (put_in_xmm x)
(xmm_to_xmm_mem (psrld (vector_all_ones $F32X4)
(xmm_mem_imm_new (RegMemImm.Imm 1)))))))
;; Special case for `f64x2.abs`.
(rule (lower (has_type $F64X2 (fabs x)))
(value_reg (andpd (put_in_reg x)
(RegMem.Reg (psrlq (vector_all_ones $F64X2) (RegMemImm.Imm 1))))))
(value_xmm (andpd (put_in_xmm x)
(xmm_to_xmm_mem (psrlq (vector_all_ones $F64X2)
(xmm_mem_imm_new (RegMemImm.Imm 1)))))))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
(rule (lower (has_type (fits_in_64 ty) (bnot x)))
(value_reg (not ty (put_in_reg x))))
(value_gpr (not ty (put_in_gpr x))))
;; `i128`.
(decl i128_not (Value) ValueRegs)
(rule (i128_not x)
(let ((x_regs ValueRegs (put_in_regs x))
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1)))
(value_regs (not $I64 x_lo)
(x_lo Gpr (gpr_new (value_regs_get x_regs 0)))
(x_hi Gpr (gpr_new (value_regs_get x_regs 1))))
(value_gprs (not $I64 x_lo)
(not $I64 x_hi))))
(rule (lower (has_type $I128 (bnot x)))
@@ -1307,7 +1324,7 @@
;; Special case for vector-types where bit-negation is an xor against an
;; all-one value
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
(value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty)))))
(value_xmm (sse_xor ty (put_in_xmm x) (xmm_to_xmm_mem (vector_all_ones ty)))))
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1318,48 +1335,53 @@
;; a = and if_true, condition
;; b = and_not condition, if_false
;; or b, a
(let ((cond_reg Reg (put_in_reg condition))
(a Reg (sse_and ty (put_in_reg if_true) (RegMem.Reg cond_reg)))
(b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false))))
(value_reg (sse_or ty b (RegMem.Reg a)))))
(let ((cond_xmm Xmm (put_in_xmm condition))
(a Xmm (sse_and ty (put_in_xmm if_true) (xmm_to_xmm_mem cond_xmm)))
(b Xmm (sse_and_not ty cond_xmm (put_in_xmm_mem if_false))))
(value_xmm (sse_or ty b (xmm_to_xmm_mem a)))))
;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(vselect condition if_true if_false)))
(value_reg (sse_blend ty
(put_in_reg_mem condition)
(put_in_reg_mem if_true)
(put_in_reg if_false))))
(value_xmm (sse_blend ty
(put_in_xmm_mem condition)
(put_in_xmm_mem if_true)
(put_in_xmm if_false))))
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
(value_reg (vec_insert_lane ty (put_in_reg vec) (put_in_reg_mem val) idx)))
(value_xmm (vec_insert_lane ty (put_in_xmm vec) (put_in_reg_mem val) idx)))
;; Helper function used below for `insertlane` but also here for other
;; lowerings.
;;
;; Note that the `Type` used here is the type of vector the insertion is
;; happening into, or the type of the first `Reg` argument.
(decl vec_insert_lane (Type Reg RegMem u8) Reg)
(decl vec_insert_lane (Type Xmm RegMem u8) Xmm)
;; i8x16.replace_lane
(rule (vec_insert_lane $I8X16 vec val idx) (pinsrb vec val idx))
(rule (vec_insert_lane $I8X16 vec val idx)
(pinsrb vec (gpr_mem_new val) idx))
;; i16x8.replace_lane
(rule (vec_insert_lane $I16X8 vec val idx) (pinsrw vec val idx))
(rule (vec_insert_lane $I16X8 vec val idx)
(pinsrw vec (gpr_mem_new val) idx))
;; i32x4.replace_lane
(rule (vec_insert_lane $I32X4 vec val idx) (pinsrd vec val idx (OperandSize.Size32)))
(rule (vec_insert_lane $I32X4 vec val idx)
(pinsrd vec (gpr_mem_new val) idx (OperandSize.Size32)))
;; i64x2.replace_lane
(rule (vec_insert_lane $I64X2 vec val idx) (pinsrd vec val idx (OperandSize.Size64)))
(rule (vec_insert_lane $I64X2 vec val idx)
(pinsrd vec (gpr_mem_new val) idx (OperandSize.Size64)))
;; f32x4.replace_lane
(rule (vec_insert_lane $F32X4 vec val idx) (insertps vec val (sse_insertps_lane_imm idx)))
(rule (vec_insert_lane $F32X4 vec val idx)
(insertps vec (xmm_mem_new val) (sse_insertps_lane_imm idx)))
;; external rust code used to calculate the immediate value to `insertps`
;; External rust code used to calculate the immediate value to `insertps`.
(decl sse_insertps_lane_imm (u8) u8)
(extern constructor sse_insertps_lane_imm sse_insertps_lane_imm)
@@ -1378,60 +1400,63 @@
;; load from memory into a temp register and then the second `movsd` (modeled
;; internally as `xmm_rm_r` will merge the temp register into our `vec`
;; register.
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0) (movsd vec (RegMem.Reg val)))
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0)
(movsd vec (xmm_mem_new (RegMem.Reg val))))
(rule (vec_insert_lane $F64X2 vec mem 0)
(movsd vec (RegMem.Reg (xmm_unary_rm_r (SseOpcode.Movsd) mem))))
(movsd vec (xmm_to_xmm_mem (xmm_unary_rm_r (SseOpcode.Movsd)
(xmm_mem_new mem)))))
;; f64x2.replace_lane 1
;;
;; Here the `movlhps` instruction is used specifically to specialize moving
;; into the second lane where unlike above cases we're not using the lane
;; immediate as an immediate to the instruction itself.
(rule (vec_insert_lane $F64X2 vec val 1) (movlhps vec val))
(rule (vec_insert_lane $F64X2 vec val 1)
(movlhps vec (xmm_mem_new val)))
;;;; Rules for `imax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (imax x y)))
(value_reg (pmaxsb (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxsb (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I16X8 (imax x y)))
(value_reg (pmaxsw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxsw (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I32X4 (imax x y)))
(value_reg (pmaxsd (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxsd (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `imin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (imin x y)))
(value_reg (pminsb (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminsb (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I16X8 (imin x y)))
(value_reg (pminsw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminsw (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I32X4 (imin x y)))
(value_reg (pminsd (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminsd (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (umax x y)))
(value_reg (pmaxub (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxub (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I16X8 (umax x y)))
(value_reg (pmaxuw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxuw (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I32X4 (umax x y)))
(value_reg (pmaxud (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pmaxud (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `umin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (umin x y)))
(value_reg (pminub (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminub (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I16X8 (umin x y)))
(value_reg (pminuw (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminuw (put_in_xmm x) (put_in_xmm_mem y))))
(rule (lower (has_type $I32X4 (umin x y)))
(value_reg (pminud (put_in_reg x) (put_in_reg_mem y))))
(value_xmm (pminud (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -1,7 +1,7 @@
//! Lowering rules for X64.
// ISLE integration glue.
mod isle;
pub(super) mod isle;
use crate::data_value::DataValue;
use crate::ir::{
@@ -1057,7 +1057,13 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
)
};
return Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags);
return Amode::imm_reg_reg_shift(
offset as u32,
Gpr::new(base).unwrap(),
Gpr::new(index).unwrap(),
shift,
)
.with_flags(flags);
}
let input = put_input_in_reg(ctx, spec);
@@ -3950,7 +3956,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let index = put_input_in_reg(ctx, inputs[1]);
let shift = 0;
let flags = ctx.memflags(insn).expect("load should have memflags");
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
Amode::imm_reg_reg_shift(
offset as u32,
Gpr::new(base).unwrap(),
Gpr::new(index).unwrap(),
shift,
)
.with_flags(flags)
}
_ => unreachable!(),
};
@@ -4054,7 +4066,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let index = put_input_in_reg(ctx, inputs[2]);
let shift = 0;
let flags = ctx.memflags(insn).expect("store should have memflags");
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
Amode::imm_reg_reg_shift(
offset as u32,
Gpr::new(base).unwrap(),
Gpr::new(index).unwrap(),
shift,
)
.with_flags(flags)
}
_ => unreachable!(),

View File

@@ -1,26 +1,28 @@
//! ISLE integration glue code for x64 lowering.
// Pull in the ISLE generated code.
mod generated_code;
pub(crate) mod generated_code;
use generated_code::MInst;
use regalloc::Writable;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
};
use super::{is_mergeable_load, lower_to_amode, Reg};
use crate::{
ir::{immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueList},
isa::x64::{
inst::{
args::{
Amode, Avx512Opcode, CmpOpcode, ExtKind, ExtMode, FcmpImm, Imm8Reg, RegMem,
ShiftKind, SseOpcode, SyntheticAmode, CC,
},
regs, x64_map_regs,
},
settings::Flags as IsaFlags,
ir::{
immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel,
ValueList,
},
isa::{
settings::Flags,
unwind::UnwindInst,
x64::{
inst::{args::*, regs, x64_map_regs},
settings::Flags as IsaFlags,
},
},
machinst::{
isle::*, AtomicRmwOp, InsnInput, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData,
},
machinst::{isle::*, InsnInput, InsnOutput, LowerCtx, VCodeConstantData},
settings::Flags,
};
use std::convert::TryFrom;
@@ -252,8 +254,8 @@ where
}
#[inline]
fn xmm0(&mut self) -> WritableReg {
WritableReg::from_reg(regs::xmm0())
fn xmm0(&mut self) -> WritableXmm {
WritableXmm::from_reg(Xmm::new(regs::xmm0()).unwrap())
}
#[inline]
@@ -262,7 +264,7 @@ where
}
#[inline]
fn amode_imm_reg_reg_shift(&mut self, simm32: u32, base: Reg, index: Reg, shift: u8) -> Amode {
fn amode_imm_reg_reg_shift(&mut self, simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Amode {
Amode::imm_reg_reg_shift(simm32, base, index, shift)
}
@@ -271,6 +273,16 @@ where
amode.clone().into()
}
#[inline]
fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
r.to_writable_reg()
}
#[inline]
fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
r.to_writable_reg()
}
fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
// When the shift amount is known, we can statically (i.e. at compile
// time) determine the mask to use and only emit that.
@@ -306,6 +318,96 @@ where
.use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
SyntheticAmode::ConstantOffset(mask_table)
}
#[inline]
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
}
#[inline]
fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
r.to_reg()
}
#[inline]
fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
r.to_reg()
}
#[inline]
fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
r.into()
}
#[inline]
fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
r.into()
}
#[inline]
fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
r.into()
}
#[inline]
fn temp_writable_gpr(&mut self) -> WritableGpr {
Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
}
#[inline]
fn temp_writable_xmm(&mut self) -> WritableXmm {
Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
}
#[inline]
fn xmm_mem_new(&mut self, rm: &RegMem) -> XmmMem {
XmmMem::new(rm.clone()).unwrap()
}
#[inline]
fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
GprMemImm::new(rmi.clone()).unwrap()
}
#[inline]
fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
XmmMemImm::new(rmi.clone()).unwrap()
}
#[inline]
fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
r.into()
}
#[inline]
fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
xm.clone().into()
}
#[inline]
fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
gm.clone().into()
}
#[inline]
fn xmm_new(&mut self, r: Reg) -> Xmm {
Xmm::new(r).unwrap()
}
#[inline]
fn gpr_new(&mut self, r: Reg) -> Gpr {
Gpr::new(r).unwrap()
}
#[inline]
fn gpr_mem_new(&mut self, rm: &RegMem) -> GprMem {
GprMem::new(rm.clone()).unwrap()
}
#[inline]
fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
GprMem::new(RegMem::reg(r)).unwrap()
}
}
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 2bfcafbef6b29358
src/isa/x64/inst.isle bbb6a3d201200cc8
src/isa/x64/lower.isle 82db7f7d47ac7809
src/prelude.isle 6aaf8ce0f5a5c2ec
src/isa/x64/inst.isle 2f76eb1f9ecf0c5e
src/isa/x64/lower.isle 144c33c4e64a17a7

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,25 @@
use crate::ir::{Inst, Value};
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{Reg, Writable};
use smallvec::SmallVec;
pub use super::MachLabel;
pub use crate::ir::ExternalName;
pub use crate::isa::unwind::UnwindInst;
pub type Unit = ();
pub type ValueSlice<'a> = &'a [Value];
pub type ValueArray2 = [Value; 2];
pub type ValueArray3 = [Value; 3];
pub type WritableReg = Writable<Reg>;
pub type OptionWritableReg = Option<WritableReg>;
pub type VecReg = Vec<Reg>;
pub type VecWritableReg = Vec<WritableReg>;
pub type ValueRegs = crate::machinst::ValueRegs<Reg>;
pub type VecMachLabel = Vec<MachLabel>;
pub type BoxExternalName = Box<ExternalName>;
/// Helper macro to define methods in `prelude.isle` within `impl Context for
/// ...` for each backend. These methods are shared amongst all backends.

View File

@@ -48,6 +48,9 @@
(type Reg (primitive Reg))
(type WritableReg (primitive WritableReg))
(type OptionWritableReg (primitive OptionWritableReg))
(type VecReg extern (enum))
(type VecWritableReg extern (enum))
;; Construct a `ValueRegs` of one register.
(decl value_reg (Reg) ValueRegs)
@@ -106,6 +109,15 @@
(let ((regs ValueRegs (put_in_regs val)))
(value_regs_get regs 0)))
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type MachLabel (primitive MachLabel))
(type VecMachLabel extern (enum))
(type ValueLabel (primitive ValueLabel))
(type UnwindInst (primitive UnwindInst))
(type ExternalName (primitive ExternalName))
(type BoxExternalName (primitive BoxExternalName))
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl u8_as_u64 (u8) u64)
@@ -368,4 +380,3 @@
(decl avoid_div_traps () Type)
(extern extractor avoid_div_traps avoid_div_traps)