Add encoding functions for emitting EVEX formats

Only the `reg, vvvv, rm` form is currently supported but it should not be difficult to add more forms.
2020-02-14 13:24:13 -08:00
parent 079fcafcb1
commit 965714d675
1 changed files with 156 additions and 1 deletions
--- a/cranelift/codegen/src/isa/x86/binemit.rs
+++ b/cranelift/codegen/src/isa/x86/binemit.rs
@@ -10,7 +10,6 @@ use crate::ir::{
 };
 use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa};
 use crate::regalloc::RegDiversions;
-
 use cranelift_codegen_shared::isa::x86::EncodingBits;

 include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
@@ -64,6 +63,17 @@ fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
    BASE_REX | b | (x << 1) | (r << 2)
 }

+/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1
+/// in the Intel Software Development Manual, volume 2A. These bits can be used by different
+/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one.
+fn evex2(rm: RegUnit, reg: RegUnit) -> u8 {
+    let b = (!(rm >> 3) & 1) as u8;
+    let x = (!(rm >> 4) & 1) as u8;
+    let r = (!(reg >> 3) & 1) as u8;
+    let r_ = (!(reg >> 4) & 1) as u8;
+    0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
+}
+
 /// Determines whether a REX prefix should be emitted.
 #[inline]
 fn needs_rex(bits: u16, rex: u8) -> bool {
@@ -209,6 +219,151 @@ fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    sink.put1(bits as u8);
 }

+/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
+/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
+/// used together for certain classes of instructions; i.e., special care should be taken to ensure
+/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
+/// opcodes can result in an #UD.
+#[allow(dead_code)]
+enum EvexContext {
+    RoundingRegToRegFP {
+        rc: EvexRoundingControl,
+    },
+    NoRoundingFP {
+        sae: bool,
+        length: EvexVectorLength,
+    },
+    MemoryOp {
+        broadcast: bool,
+        length: EvexVectorLength,
+    },
+    Other {
+        length: EvexVectorLength,
+    },
+}
+
+impl EvexContext {
+    /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
+    fn bits(&self) -> u8 {
+        match self {
+            Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
+            Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
+            Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
+            Self::Other { length } => length.bits() << 1,
+        }
+    }
+}
+
+/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
+enum EvexVectorLength {
+    V128,
+    V256,
+    V512,
+}
+
+impl EvexVectorLength {
+    /// Encode the `L'` and `L` bits for merging with the P2 byte.
+    fn bits(&self) -> u8 {
+        match self {
+            Self::V128 => 0b00,
+            Self::V256 => 0b01,
+            Self::V512 => 0b10,
+            // 0b11 is reserved (#UD).
+        }
+    }
+}
+
+/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
+enum EvexRoundingControl {
+    RNE,
+    RD,
+    RU,
+    RZ,
+}
+
+impl EvexRoundingControl {
+    /// Encode the `L'` and `L` bits for merging with the P2 byte.
+    fn bits(&self) -> u8 {
+        match self {
+            Self::RNE => 0b00,
+            Self::RD => 0b01,
+            Self::RU => 0b10,
+            Self::RZ => 0b11,
+        }
+    }
+}
+
+/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
+/// Software Development Manual, volume 2A.
+#[allow(dead_code)]
+enum EvexMasking {
+    None,
+    Merging { k: u8 },
+    Zeroing { k: u8 },
+}
+
+impl EvexMasking {
+    /// Encode the `z` bit for merging with the P2 byte.
+    fn z_bit(&self) -> u8 {
+        match self {
+            Self::None | Self::Merging { .. } => 0,
+            Self::Zeroing { .. } => 1,
+        }
+    }
+
+    /// Encode the `aaa` bits for merging with the P2 byte.
+    fn aaa_bits(&self) -> u8 {
+        match self {
+            Self::None => 0b000,
+            Self::Merging { k } | Self::Zeroing { k } => {
+                debug_assert!(*k <= 7);
+                *k
+            }
+        }
+    }
+}
+
+/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe
+/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function
+/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are
+/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring
+/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix).
+fn put_evex<CS: CodeSink + ?Sized>(
+    bits: u16,
+    reg: RegUnit,
+    vvvvv: RegUnit,
+    rm: RegUnit,
+    context: EvexContext,
+    masking: EvexMasking,
+    sink: &mut CS,
+) {
+    let enc = EncodingBits::from(bits);
+
+    // EVEX prefix.
+    sink.put1(0x62);
+
+    debug_assert!(enc.mm() < 0b100);
+    let mut p0 = enc.mm() & 0b11;
+    p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
+    sink.put1(p0);
+
+    let mut p1 = enc.pp() | 0b100; // bit 2 is always set
+    p1 |= (!(vvvvv as u8) & 0b1111) << 3;
+    p1 |= (enc.rex_w() & 0b1) << 7;
+    sink.put1(p1);
+
+    let mut p2 = masking.aaa_bits();
+    p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3;
+    p2 |= context.bits() << 4;
+    p2 |= masking.z_bit() << 7;
+    sink.put1(p2);
+
+    // Opcode
+    sink.put1(enc.opcode_byte());
+
+    // ModR/M byte placed in recipe
+}
+
 /// Emit a ModR/M byte for reg-reg operands.
 fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
    let reg = reg as u8 & 7;