x64: Implement SIMD fma (#4474)
* x64: Add VEX Instruction Encoder This uses a similar builder pattern to the EVEX Encoder. Does not yet support memory accesses. * x64: Add FMA Flag * x64: Implement SIMD `fma` * x64: Use 4 register Vex Inst * x64: Reorder VEX pretty print args
This commit is contained in:
@@ -159,6 +159,7 @@ impl From<(OperandSize, Reg)> for RexFlags {
|
||||
/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
|
||||
/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
|
||||
#[allow(missing_docs)]
|
||||
#[derive(PartialEq)]
|
||||
pub enum OpcodeMap {
|
||||
None,
|
||||
_0F,
|
||||
@@ -168,7 +169,7 @@ pub enum OpcodeMap {
|
||||
|
||||
impl OpcodeMap {
|
||||
/// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
|
||||
/// formats pack this information as bits in a prefix (e.g. EVEX).
|
||||
/// formats pack this information as bits in a prefix (e.g. VEX / EVEX).
|
||||
pub(crate) fn bits(&self) -> u8 {
|
||||
match self {
|
||||
OpcodeMap::None => 0b00,
|
||||
@@ -187,6 +188,7 @@ impl Default for OpcodeMap {
|
||||
|
||||
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
|
||||
/// covers only the small set of possibilities that we actually need.
|
||||
#[derive(PartialEq)]
|
||||
pub enum LegacyPrefixes {
|
||||
/// No prefix bytes.
|
||||
None,
|
||||
|
||||
@@ -1,2 +1,357 @@
|
||||
//! Encodes VEX instructions. These instructions are those added by the Advanced Vector Extensions
|
||||
//! (AVX).
|
||||
|
||||
use super::evex::Register;
|
||||
use super::rex::{LegacyPrefixes, OpcodeMap};
|
||||
use super::ByteSink;
|
||||
use crate::isa::x64::encoding::rex::encode_modrm;
|
||||
|
||||
/// Constructs a VEX-encoded instruction using a builder pattern. This approach makes it visually
|
||||
/// easier to transform something the manual's syntax, `VEX.128.66.0F 73 /7 ib` to code:
|
||||
/// `VexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
|
||||
pub struct VexInstruction {
|
||||
length: VexVectorLength,
|
||||
prefix: LegacyPrefixes,
|
||||
map: OpcodeMap,
|
||||
opcode: u8,
|
||||
w: bool,
|
||||
reg: u8,
|
||||
rm: Register,
|
||||
vvvv: Option<Register>,
|
||||
imm: Option<u8>,
|
||||
}
|
||||
|
||||
impl Default for VexInstruction {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
length: VexVectorLength::default(),
|
||||
prefix: LegacyPrefixes::None,
|
||||
map: OpcodeMap::None,
|
||||
opcode: 0x00,
|
||||
w: false,
|
||||
reg: 0x00,
|
||||
rm: Register::default(),
|
||||
vvvv: None,
|
||||
imm: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VexInstruction {
|
||||
/// Construct a default VEX instruction.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set the length of the instruction.
|
||||
#[inline(always)]
|
||||
pub fn length(mut self, length: VexVectorLength) -> Self {
|
||||
self.length = length;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the legacy prefix byte of the instruction: None | 66 | F2 | F3. VEX instructions
|
||||
/// pack these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
|
||||
debug_assert!(
|
||||
prefix == LegacyPrefixes::None
|
||||
|| prefix == LegacyPrefixes::_66
|
||||
|| prefix == LegacyPrefixes::_F2
|
||||
|| prefix == LegacyPrefixes::_F3
|
||||
);
|
||||
|
||||
self.prefix = prefix;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. VEX instructions pack
|
||||
/// these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn map(mut self, map: OpcodeMap) -> Self {
|
||||
self.map = map;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the W bit, denoted by `.W1` or `.W0` in the instruction string.
|
||||
/// Typically used to indicate an instruction using 64 bits of an operand (e.g.
|
||||
/// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
|
||||
/// prefix.
|
||||
#[inline(always)]
|
||||
pub fn w(mut self, w: bool) -> Self {
|
||||
self.w = w;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the instruction opcode byte.
|
||||
#[inline(always)]
|
||||
pub fn opcode(mut self, opcode: u8) -> Self {
|
||||
self.opcode = opcode;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `reg` bits; many instructions use this as the write operand.
|
||||
#[inline(always)]
|
||||
pub fn reg(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.reg = reg.into().into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Some instructions use the ModRM.reg field as an opcode extension. This is usually denoted by
|
||||
/// a `/n` field in the manual.
|
||||
#[inline(always)]
|
||||
pub fn opcode_ext(mut self, n: u8) -> Self {
|
||||
self.reg = n;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `rm` bits; many instructions use this as the "read from
|
||||
/// register/memory" operand. Currently this does not support memory addressing (TODO).Setting
|
||||
/// this affects both the ModRM byte (`rm` section) and the VEX prefix (the extension bits for
|
||||
/// register encodings > 8).
|
||||
#[inline(always)]
|
||||
pub fn rm(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.rm = reg.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the `vvvv` register; some instructions allow using this as a second, non-destructive
|
||||
/// source register in 3-operand instructions (e.g. 2 read, 1 write).
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn vvvv(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.vvvv = Some(reg.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the imm byte when used for a register. The reg bits are stored in `imm8[7:4]` with
|
||||
/// the lower bits unused. Overrides a previously set [Self::imm] field.
|
||||
#[inline(always)]
|
||||
pub fn imm_reg(mut self, reg: impl Into<Register>) -> Self {
|
||||
let reg: u8 = reg.into().into();
|
||||
self.imm = Some((reg & 0xf) << 4);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the imm byte.
|
||||
/// Overrides a previously set [Self::imm_reg] field.
|
||||
#[inline(always)]
|
||||
pub fn imm(mut self, imm: u8) -> Self {
|
||||
self.imm = Some(imm);
|
||||
self
|
||||
}
|
||||
|
||||
/// The R bit in encoded format (inverted).
|
||||
#[inline(always)]
|
||||
fn r_bit(&self) -> u8 {
|
||||
(!(self.reg >> 3)) & 1
|
||||
}
|
||||
|
||||
/// The X bit in encoded format (inverted).
|
||||
#[inline(always)]
|
||||
fn x_bit(&self) -> u8 {
|
||||
// TODO
|
||||
(!0) & 1
|
||||
}
|
||||
|
||||
/// The B bit in encoded format (inverted).
|
||||
#[inline(always)]
|
||||
fn b_bit(&self) -> u8 {
|
||||
let rm: u8 = self.rm.into();
|
||||
(!(rm >> 3)) & 1
|
||||
}
|
||||
|
||||
/// Is the 2 byte prefix available for this instruction?
|
||||
/// We essentially just check if we need any of the bits that are only available
|
||||
/// in the 3 byte instruction
|
||||
#[inline(always)]
|
||||
fn use_2byte_prefix(&self) -> bool {
|
||||
// These bits are only represented on the 3 byte prefix, so their presence
|
||||
// implies the use of the 3 byte prefix
|
||||
self.b_bit() == 1 && self.x_bit() == 1 &&
|
||||
// The presence of W1 in the opcode column implies the opcode must be encoded using the
|
||||
// 3-byte form of the VEX prefix.
|
||||
self.w == false &&
|
||||
// The presence of 0F3A and 0F38 in the opcode column implies that opcode can only be
|
||||
// encoded by the three-byte form of VEX
|
||||
!(self.map == OpcodeMap::_0F3A || self.map == OpcodeMap::_0F38)
|
||||
}
|
||||
/// The last byte of the 2byte and 3byte prefixes is mostly the same, share the common
|
||||
/// encoding logic here.
|
||||
#[inline(always)]
|
||||
fn prefix_last_byte(&self) -> u8 {
|
||||
let vvvv = self.vvvv.map(|r| r.into()).unwrap_or(0x00);
|
||||
|
||||
let mut byte = 0x00;
|
||||
byte |= self.prefix.bits();
|
||||
byte |= self.length.bits() << 2;
|
||||
byte |= ((!vvvv) & 0xF) << 3;
|
||||
byte
|
||||
}
|
||||
|
||||
/// Encode the 2 byte prefix
|
||||
#[inline(always)]
|
||||
fn encode_2byte_prefix<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
|
||||
// 2 bytes:
|
||||
// +-----+ +-------------------+
|
||||
// | C5h | | R | vvvv | L | pp |
|
||||
// +-----+ +-------------------+
|
||||
|
||||
let last_byte = self.prefix_last_byte() | (self.r_bit() << 7);
|
||||
|
||||
sink.put1(0xC5);
|
||||
sink.put1(last_byte);
|
||||
}
|
||||
|
||||
/// Encode the 3 byte prefix
|
||||
#[inline(always)]
|
||||
fn encode_3byte_prefix<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
|
||||
// 3 bytes:
|
||||
// +-----+ +--------------+ +-------------------+
|
||||
// | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
|
||||
// +-----+ +--------------+ +-------------------+
|
||||
|
||||
let mut second_byte = 0x00;
|
||||
second_byte |= self.map.bits(); // m-mmmm field
|
||||
second_byte |= self.b_bit() << 5;
|
||||
second_byte |= self.x_bit() << 6;
|
||||
second_byte |= self.r_bit() << 7;
|
||||
|
||||
let w_bit = self.w as u8;
|
||||
let last_byte = self.prefix_last_byte() | (w_bit << 7);
|
||||
|
||||
sink.put1(0xC4);
|
||||
sink.put1(second_byte);
|
||||
sink.put1(last_byte);
|
||||
}
|
||||
|
||||
/// Emit the VEX-encoded instruction to the code sink:
|
||||
pub fn encode<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
|
||||
// 2/3 byte prefix
|
||||
if self.use_2byte_prefix() {
|
||||
self.encode_2byte_prefix(sink);
|
||||
} else {
|
||||
self.encode_3byte_prefix(sink);
|
||||
}
|
||||
|
||||
// 1 Byte Opcode
|
||||
sink.put1(self.opcode);
|
||||
|
||||
// 1 ModRM Byte
|
||||
// Not all instructions use Reg as a reg, some use it as an extension of the opcode.
|
||||
let rm: u8 = self.rm.into();
|
||||
sink.put1(encode_modrm(3, self.reg & 7, rm & 7));
|
||||
|
||||
// TODO: 0/1 byte SIB
|
||||
// TODO: 0/1/2/4 bytes DISP
|
||||
|
||||
// Optional 1 Byte imm
|
||||
if let Some(imm) = self.imm {
|
||||
sink.put1(imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The VEX format allows choosing a vector length in the `L` bit.
|
||||
#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
|
||||
pub enum VexVectorLength {
|
||||
V128,
|
||||
V256,
|
||||
}
|
||||
|
||||
impl VexVectorLength {
|
||||
/// Encode the `L` bit.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::V128 => 0b0,
|
||||
Self::V256 => 0b1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for VexVectorLength {
|
||||
fn default() -> Self {
|
||||
Self::V128
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::x64::inst::regs;
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn vpslldq() {
|
||||
// VEX.128.66.0F 73 /7 ib
|
||||
// VPSLLDQ xmm1, xmm2, imm8
|
||||
|
||||
let dst = regs::xmm1().to_real_reg().unwrap().hw_enc();
|
||||
let src = regs::xmm2().to_real_reg().unwrap().hw_enc();
|
||||
let mut sink0 = Vec::new();
|
||||
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F)
|
||||
.opcode(0x73)
|
||||
.opcode_ext(7)
|
||||
.vvvv(dst)
|
||||
.rm(src)
|
||||
.imm(0x17)
|
||||
.encode(&mut sink0);
|
||||
|
||||
assert_eq!(sink0, vec![0xc5, 0xf1, 0x73, 0xfa, 0x17]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vblendvpd() {
|
||||
// A four operand instruction
|
||||
// VEX.128.66.0F3A.W0 4B /r /is4
|
||||
// VBLENDVPD xmm1, xmm2, xmm3, xmm4
|
||||
|
||||
let dst = regs::xmm1().to_real_reg().unwrap().hw_enc();
|
||||
let a = regs::xmm2().to_real_reg().unwrap().hw_enc();
|
||||
let b = regs::xmm3().to_real_reg().unwrap().hw_enc();
|
||||
let c = regs::xmm4().to_real_reg().unwrap().hw_enc();
|
||||
let mut sink0 = Vec::new();
|
||||
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F3A)
|
||||
.w(false)
|
||||
.opcode(0x4B)
|
||||
.reg(dst)
|
||||
.vvvv(a)
|
||||
.rm(b)
|
||||
.imm_reg(c)
|
||||
.encode(&mut sink0);
|
||||
|
||||
assert_eq!(sink0, vec![0xc4, 0xe3, 0x69, 0x4b, 0xcb, 0x40]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vcmpps() {
|
||||
// VEX.128.0F.WIG C2 /r ib
|
||||
// VCMPPS ymm10, ymm11, ymm12, 4 // neq
|
||||
|
||||
let dst = regs::xmm10().to_real_reg().unwrap().hw_enc();
|
||||
let a = regs::xmm11().to_real_reg().unwrap().hw_enc();
|
||||
let b = regs::xmm12().to_real_reg().unwrap().hw_enc();
|
||||
let mut sink0 = Vec::new();
|
||||
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V256)
|
||||
.prefix(LegacyPrefixes::None)
|
||||
.map(OpcodeMap::_0F)
|
||||
.opcode(0xC2)
|
||||
.reg(dst)
|
||||
.vvvv(a)
|
||||
.rm(b)
|
||||
.imm(4)
|
||||
.encode(&mut sink0);
|
||||
|
||||
assert_eq!(sink0, vec![0xc4, 0x41, 0x24, 0xc2, 0xd4, 0x04]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,6 +193,13 @@
|
||||
(src2 XmmMem)
|
||||
(dst WritableXmm))
|
||||
|
||||
;; XMM (scalar or vector) binary op that relies on the VEX prefix.
|
||||
(XmmRmRVex (op AvxOpcode)
|
||||
(src1 Xmm)
|
||||
(src2 Xmm)
|
||||
(src3 XmmMem)
|
||||
(dst WritableXmm))
|
||||
|
||||
;; XMM (scalar or vector) binary op that relies on the EVEX prefix.
|
||||
(XmmRmREvex (op Avx512Opcode)
|
||||
(src1 XmmMem)
|
||||
@@ -1042,6 +1049,10 @@
|
||||
(decl intcc_to_cc (IntCC) CC)
|
||||
(extern constructor intcc_to_cc intcc_to_cc)
|
||||
|
||||
(type AvxOpcode extern
|
||||
(enum Vfmadd213ps
|
||||
Vfmadd213pd))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
(enum Vcvtudq2ps
|
||||
Vpabsq
|
||||
@@ -2839,6 +2850,28 @@
|
||||
dst))
|
||||
|
||||
|
||||
;; Helper for creating `MInst.XmmRmRVex` instructions.
|
||||
(decl xmm_rmr_vex (AvxOpcode Xmm Xmm XmmMem) Xmm)
|
||||
(rule (xmm_rmr_vex op src1 src2 src3)
|
||||
(let ((dst WritableXmm (temp_writable_xmm))
|
||||
(_ Unit (emit (MInst.XmmRmRVex op
|
||||
src1
|
||||
src2
|
||||
src3
|
||||
dst))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `vfmadd213ps` instructions.
|
||||
(decl x64_vfmadd213ps (Xmm Xmm XmmMem) Xmm)
|
||||
(rule (x64_vfmadd213ps x y z)
|
||||
(xmm_rmr_vex (AvxOpcode.Vfmadd213ps) x y z))
|
||||
|
||||
;; Helper for creating `vfmadd213pd` instructions.
|
||||
(decl x64_vfmadd213pd (Xmm Xmm XmmMem) Xmm)
|
||||
(rule (x64_vfmadd213pd x y z)
|
||||
(xmm_rmr_vex (AvxOpcode.Vfmadd213pd) x y z))
|
||||
|
||||
|
||||
;; Helper for creating `sqrtss` instructions.
|
||||
(decl x64_sqrtss (Xmm) Xmm)
|
||||
(rule (x64_sqrtss x)
|
||||
|
||||
@@ -794,6 +794,7 @@ pub(crate) enum InstructionSet {
|
||||
BMI1,
|
||||
#[allow(dead_code)] // never constructed (yet).
|
||||
BMI2,
|
||||
FMA,
|
||||
AVX512BITALG,
|
||||
AVX512DQ,
|
||||
AVX512F,
|
||||
@@ -1386,6 +1387,38 @@ impl fmt::Display for SseOpcode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum AvxOpcode {
|
||||
Vfmadd213ps,
|
||||
Vfmadd213pd,
|
||||
}
|
||||
|
||||
impl AvxOpcode {
|
||||
/// Which `InstructionSet`s support the opcode?
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
AvxOpcode::Vfmadd213ps => smallvec![InstructionSet::FMA],
|
||||
AvxOpcode::Vfmadd213pd => smallvec![InstructionSet::FMA],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for AvxOpcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
AvxOpcode::Vfmadd213ps => "vfmadd213ps",
|
||||
AvxOpcode::Vfmadd213pd => "vfmadd213pd",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for AvxOpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Avx512Opcode {
|
||||
Vcvtudq2ps,
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::isa::x64::encoding::rex::{
|
||||
low8_will_sign_extend_to_32, low8_will_sign_extend_to_64, reg_enc, LegacyPrefixes, OpcodeMap,
|
||||
RexFlags,
|
||||
};
|
||||
use crate::isa::x64::encoding::vex::{VexInstruction, VexVectorLength};
|
||||
use crate::isa::x64::inst::args::*;
|
||||
use crate::isa::x64::inst::*;
|
||||
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel, Reg, Writable};
|
||||
@@ -119,6 +120,7 @@ pub(crate) fn emit(
|
||||
InstructionSet::Lzcnt => info.isa_flags.use_lzcnt(),
|
||||
InstructionSet::BMI1 => info.isa_flags.use_bmi1(),
|
||||
InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
|
||||
InstructionSet::FMA => info.isa_flags.has_fma(),
|
||||
InstructionSet::AVX512BITALG => info.isa_flags.has_avx512bitalg(),
|
||||
InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
|
||||
InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
|
||||
@@ -1689,6 +1691,39 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
src3,
|
||||
dst,
|
||||
} => {
|
||||
let src1 = allocs.next(src1.to_reg());
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
debug_assert_eq!(src1, dst);
|
||||
let src2 = allocs.next(src2.to_reg());
|
||||
let src3 = src3.clone().to_reg_mem().with_allocs(allocs);
|
||||
|
||||
let (w, opcode) = match op {
|
||||
AvxOpcode::Vfmadd213ps => (false, 0xA8),
|
||||
AvxOpcode::Vfmadd213pd => (true, 0xA8),
|
||||
};
|
||||
|
||||
match src3 {
|
||||
RegMem::Reg { reg: src } => VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(w)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src.to_real_reg().unwrap().hw_enc())
|
||||
.vvvv(src2.to_real_reg().unwrap().hw_enc())
|
||||
.encode(sink),
|
||||
_ => todo!(),
|
||||
};
|
||||
}
|
||||
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
|
||||
@@ -3701,6 +3701,21 @@ fn test_x64_emit() {
|
||||
"jmp *321(%r10,%rdx,4)",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM FMA
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213ps, RegMem::reg(xmm2), xmm1, w_xmm0),
|
||||
"C4E271A8C2",
|
||||
"vfmadd213ps %xmm0, %xmm1, %xmm2, %xmm0",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r_vex(AvxOpcode::Vfmadd213pd, RegMem::reg(xmm5), xmm4, w_xmm3),
|
||||
"C4E2D9A8DD",
|
||||
"vfmadd213pd %xmm3, %xmm4, %xmm5, %xmm3",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM_CMP_RM_R
|
||||
|
||||
@@ -4866,6 +4881,7 @@ fn test_x64_emit() {
|
||||
let mut isa_flag_builder = x64::settings::builder();
|
||||
isa_flag_builder.enable("has_ssse3").unwrap();
|
||||
isa_flag_builder.enable("has_sse41").unwrap();
|
||||
isa_flag_builder.enable("has_fma").unwrap();
|
||||
isa_flag_builder.enable("has_avx512bitalg").unwrap();
|
||||
isa_flag_builder.enable("has_avx512dq").unwrap();
|
||||
isa_flag_builder.enable("has_avx512f").unwrap();
|
||||
|
||||
@@ -129,6 +129,8 @@ impl Inst {
|
||||
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, .. } | Inst::XmmRmREvex { op, .. } => op.available_from(),
|
||||
|
||||
Inst::XmmRmRVex { op, .. } => op.available_from(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -324,6 +326,20 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn xmm_rm_r_vex(op: AvxOpcode, src3: RegMem, src2: Reg, dst: Writable<Reg>) -> Self {
|
||||
src3.assert_regclass_is(RegClass::Float);
|
||||
debug_assert!(src2.class() == RegClass::Float);
|
||||
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src3: XmmMem::new(src3).unwrap(),
|
||||
src2: Xmm::new(src2).unwrap(),
|
||||
src1: Xmm::new(dst.to_reg()).unwrap(),
|
||||
dst: WritableXmm::from_writable_reg(dst).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_rm_r_evex(
|
||||
op: Avx512Opcode,
|
||||
src1: RegMem,
|
||||
@@ -1136,6 +1152,29 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
|
||||
}
|
||||
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
src3,
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src2 = pretty_print_reg(src2.to_reg(), 8, allocs);
|
||||
let src3 = src3.pretty_print(8, allocs);
|
||||
|
||||
format!(
|
||||
"{} {}, {}, {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
src1,
|
||||
src2,
|
||||
src3,
|
||||
dst
|
||||
)
|
||||
}
|
||||
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
@@ -1840,6 +1879,24 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
}
|
||||
}
|
||||
}
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
src3,
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
// Vfmadd uses and defs the dst reg, that is not the case with all
|
||||
// AVX's ops, if you're adding a new op, make sure to correctly define
|
||||
// register uses.
|
||||
assert!(*op == AvxOpcode::Vfmadd213ps || *op == AvxOpcode::Vfmadd213pd);
|
||||
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
collector.reg_use(src2.to_reg());
|
||||
src3.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
|
||||
@@ -2566,6 +2566,13 @@
|
||||
(rule (lower (has_type $F64X2 (fmax_pseudo x y)))
|
||||
(x64_maxpd y x))
|
||||
|
||||
;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $F32X4 (fma x y z)))
|
||||
(x64_vfmadd213ps x y z))
|
||||
(rule (lower (has_type $F64X2 (fma x y z)))
|
||||
(x64_vfmadd213pd x y z))
|
||||
|
||||
;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; In order to load a value from memory to a GPR register, we may need to extend
|
||||
|
||||
@@ -2832,7 +2832,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Cls => unimplemented!("Cls not supported"),
|
||||
|
||||
Opcode::Fma => unimplemented!("Fma not supported"),
|
||||
Opcode::Fma => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::BorNot | Opcode::BxorNot => {
|
||||
unimplemented!("or-not / xor-not opcodes not implemented");
|
||||
|
||||
Reference in New Issue
Block a user