x64: move encodings to a separate module
In order to benchmark the encoding code with criterion, the functions and structures must be public. Moving this code to its own module (instead of keeping as a submodule to `inst`), allows `inst` to remain private. This avoids having to expose and document (or ignore documenting) the numerous instruction variants in `inst` while allowing access to the encoding code. This commit changes no functionality.
This commit is contained in:
403
cranelift/codegen/src/isa/x64/encoding/evex.rs
Normal file
403
cranelift/codegen/src/isa/x64/encoding/evex.rs
Normal file
@@ -0,0 +1,403 @@
|
||||
//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
|
||||
//! EVEX encoding requires a 4-byte prefix:
|
||||
//!
|
||||
//! Byte 0: 0x62
|
||||
//! ┌───┬───┬───┬───┬───┬───┬───┬───┐
|
||||
//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
|
||||
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
|
||||
//! ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
|
||||
//! └───┴───┴───┴───┴───┴───┴───┴───┘
|
||||
//!
|
||||
//! The prefix is then followeded by the opcode byte, the ModR/M byte, and other optional suffixes
|
||||
//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
|
||||
//! Software Development Manual, volume 2A).
|
||||
use super::rex::{encode_modrm, LegacyPrefixes, OpcodeMap};
|
||||
use super::ByteSink;
|
||||
use core::ops::RangeInclusive;
|
||||
|
||||
/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
|
||||
/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
|
||||
/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
|
||||
pub struct EvexInstruction {
|
||||
bits: u32,
|
||||
opcode: u8,
|
||||
reg: Register,
|
||||
rm: Register,
|
||||
}
|
||||
|
||||
/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
|
||||
/// choose to skip setting fields, here we set some sane defaults. Note that:
|
||||
/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
|
||||
/// implemented--remember the little-endian order
|
||||
/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
|
||||
/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
|
||||
/// 3).
|
||||
///
|
||||
/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
|
||||
/// unsetting the W bit, etc.)
|
||||
impl Default for EvexInstruction {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
bits: 0x08_7C_F0_62,
|
||||
opcode: 0,
|
||||
reg: Register::default(),
|
||||
rm: Register::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names.
|
||||
impl EvexInstruction {
|
||||
/// Construct a default EVEX instruction.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
|
||||
/// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
|
||||
/// way to set those context bits and verify that both are not used (e.g. rounding AND length).
|
||||
/// For now, this method is very convenient.
|
||||
#[inline(always)]
|
||||
pub fn length(mut self, length: EvexVectorLength) -> Self {
|
||||
self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
|
||||
/// pack these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
|
||||
self.write(Self::pp, prefix.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
|
||||
/// these into the prefix, not as separate bytes.
|
||||
#[inline(always)]
|
||||
pub fn map(mut self, map: OpcodeMap) -> Self {
|
||||
self.write(Self::mm, map.bits() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
|
||||
/// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
|
||||
/// prefix.
|
||||
#[inline(always)]
|
||||
pub fn w(mut self, w: bool) -> Self {
|
||||
self.write(Self::W, w as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the instruction opcode byte.
|
||||
#[inline(always)]
|
||||
pub fn opcode(mut self, opcode: u8) -> Self {
|
||||
self.opcode = opcode;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `reg` bits; many instructions use this as the write operand.
|
||||
/// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
|
||||
/// bits for register encodings > 8).
|
||||
#[inline(always)]
|
||||
pub fn reg(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.reg = reg.into();
|
||||
let r = !(self.reg.0 >> 3) & 1;
|
||||
let r_ = !(self.reg.0 >> 4) & 1;
|
||||
self.write(Self::R, r as u32);
|
||||
self.write(Self::R_, r_ as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
|
||||
/// more details.
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn mask(mut self, mask: EvexMasking) -> Self {
|
||||
self.write(Self::aaa, mask.aaa_bits() as u32);
|
||||
self.write(Self::z, mask.z_bit() as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
|
||||
/// source register in 3-operand instructions (e.g. 2 read, 1 write).
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
|
||||
let reg = reg.into();
|
||||
self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
|
||||
self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the register to use for the `rm` bits; many instructions use this as the "read from
|
||||
/// register/memory" operand. Currently this does not support memory addressing (TODO).Setting
|
||||
/// this affects both the ModRM byte (`rm` section) and the EVEX prefix (the extension bits for
|
||||
/// register encodings > 8).
|
||||
#[inline(always)]
|
||||
pub fn rm(mut self, reg: impl Into<Register>) -> Self {
|
||||
self.rm = reg.into();
|
||||
let b = !(self.rm.0 >> 3) & 1;
|
||||
let x = !(self.rm.0 >> 4) & 1;
|
||||
self.write(Self::X, x as u32);
|
||||
self.write(Self::B, b as u32);
|
||||
self
|
||||
}
|
||||
|
||||
/// Emit the EVEX-encoded instruction to the code sink:
|
||||
/// - first, the 4-byte EVEX prefix;
|
||||
/// - then, the opcode byte;
|
||||
/// - finally, the ModR/M byte.
|
||||
///
|
||||
/// Eventually this method should support encodings of more than just the reg-reg addressing mode (TODO).
|
||||
pub fn encode<CS: ByteSink + ?Sized>(&self, sink: &mut CS) {
|
||||
sink.put4(self.bits);
|
||||
sink.put1(self.opcode);
|
||||
sink.put1(encode_modrm(3, self.reg.0 & 7, self.rm.0 & 7));
|
||||
}
|
||||
|
||||
// In order to simplify the encoding of the various bit ranges in the prefix, we specify those
|
||||
// ranges according to the table below (extracted from the Intel Software Development Manual,
|
||||
// volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
|
||||
// chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
|
||||
// 8, leaving bits 0-7 for the mandatory `0x62`.
|
||||
// ┌───┬───┬───┬───┬───┬───┬───┬───┐
|
||||
// Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
|
||||
// ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
// Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
|
||||
// ├───┼───┼───┼───┼───┼───┼───┼───┤
|
||||
// Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
|
||||
// └───┴───┴───┴───┴───┴───┴───┴───┘
|
||||
|
||||
// Byte 1:
|
||||
const mm: RangeInclusive<u8> = 8..=9;
|
||||
const R_: RangeInclusive<u8> = 12..=12;
|
||||
const B: RangeInclusive<u8> = 13..=13;
|
||||
const X: RangeInclusive<u8> = 14..=14;
|
||||
const R: RangeInclusive<u8> = 15..=15;
|
||||
|
||||
// Byte 2:
|
||||
const pp: RangeInclusive<u8> = 16..=17;
|
||||
const vvvv: RangeInclusive<u8> = 19..=22;
|
||||
const W: RangeInclusive<u8> = 23..=23;
|
||||
|
||||
// Byte 3:
|
||||
const aaa: RangeInclusive<u8> = 24..=26;
|
||||
const V_: RangeInclusive<u8> = 27..=27;
|
||||
#[allow(dead_code)] // Will be used once broadcast and rounding controls are exposed.
|
||||
const b: RangeInclusive<u8> = 28..=28;
|
||||
const LL: RangeInclusive<u8> = 29..=30;
|
||||
const z: RangeInclusive<u8> = 31..=31;
|
||||
|
||||
// A convenience method for writing the `value` bits to the given range in `self.bits`.
|
||||
#[inline]
|
||||
fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
|
||||
assert!(ExactSizeIterator::len(&range) > 0);
|
||||
let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
|
||||
let mask: u32 = (1 << size) - 1; // Generate a bit mask.
|
||||
debug_assert!(
|
||||
value <= mask,
|
||||
"The written value should have fewer than {} bits.",
|
||||
size
|
||||
);
|
||||
let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
|
||||
self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
|
||||
let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
|
||||
self.bits |= value; // Modify the bits in `range`.
|
||||
}
|
||||
}
|
||||
|
||||
/// Describe the register index to use. This wrapper is a type-safe way to pass
|
||||
/// around the registers defined in `inst/regs.rs`.
|
||||
#[derive(Copy, Clone, Default)]
|
||||
pub struct Register(u8);
|
||||
impl From<u8> for Register {
|
||||
fn from(reg: u8) -> Self {
|
||||
debug_assert!(reg < 16);
|
||||
Self(reg)
|
||||
}
|
||||
}
|
||||
impl Into<u8> for Register {
|
||||
fn into(self) -> u8 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
|
||||
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
|
||||
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
|
||||
/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
|
||||
/// opcodes can result in an #UD.
|
||||
#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used.
|
||||
pub enum EvexContext {
|
||||
RoundingRegToRegFP {
|
||||
rc: EvexRoundingControl,
|
||||
},
|
||||
NoRoundingFP {
|
||||
sae: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
MemoryOp {
|
||||
broadcast: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
Other {
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for EvexContext {
|
||||
fn default() -> Self {
|
||||
Self::Other {
|
||||
length: EvexVectorLength::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EvexContext {
|
||||
/// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
|
||||
pub fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
|
||||
Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
|
||||
Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
|
||||
Self::Other { length } => length.bits() << 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
|
||||
pub enum EvexVectorLength {
|
||||
V128,
|
||||
V256,
|
||||
V512,
|
||||
}
|
||||
|
||||
impl EvexVectorLength {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::V128 => 0b00,
|
||||
Self::V256 => 0b01,
|
||||
Self::V512 => 0b10,
|
||||
// 0b11 is reserved (#UD).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EvexVectorLength {
|
||||
fn default() -> Self {
|
||||
Self::V128
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code, missing_docs)] // Rounding controls are not yet used.
|
||||
pub enum EvexRoundingControl {
|
||||
RNE,
|
||||
RD,
|
||||
RU,
|
||||
RZ,
|
||||
}
|
||||
|
||||
impl EvexRoundingControl {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RNE => 0b00,
|
||||
Self::RD => 0b01,
|
||||
Self::RU => 0b10,
|
||||
Self::RZ => 0b11,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
|
||||
/// Software Development Manual, volume 2A.
|
||||
#[allow(dead_code, missing_docs)] // Masking is not yet used.
|
||||
pub enum EvexMasking {
|
||||
None,
|
||||
Merging { k: u8 },
|
||||
Zeroing { k: u8 },
|
||||
}
|
||||
|
||||
impl Default for EvexMasking {
|
||||
fn default() -> Self {
|
||||
EvexMasking::None
|
||||
}
|
||||
}
|
||||
|
||||
impl EvexMasking {
|
||||
/// Encode the `z` bit for merging with the P2 byte.
|
||||
pub fn z_bit(&self) -> u8 {
|
||||
match self {
|
||||
Self::None | Self::Merging { .. } => 0,
|
||||
Self::Zeroing { .. } => 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the `aaa` bits for merging with the P2 byte.
|
||||
pub fn aaa_bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::None => 0b000,
|
||||
Self::Merging { k } | Self::Zeroing { k } => {
|
||||
debug_assert!(*k <= 7);
|
||||
*k
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::x64::inst::regs;
|
||||
use std::vec::Vec;
|
||||
|
||||
// As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
|
||||
// xmm1'` matches this EVEX encoding machinery.
|
||||
#[test]
|
||||
fn vpabsq() {
|
||||
let dst = regs::xmm0();
|
||||
let src = regs::xmm1();
|
||||
let mut sink0 = Vec::new();
|
||||
|
||||
EvexInstruction::new()
|
||||
.prefix(LegacyPrefixes::_66)
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(true)
|
||||
.opcode(0x1F)
|
||||
.reg(dst.get_hw_encoding())
|
||||
.rm(src.get_hw_encoding())
|
||||
.length(EvexVectorLength::V128)
|
||||
.encode(&mut sink0);
|
||||
|
||||
assert_eq!(sink0, vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1]);
|
||||
}
|
||||
|
||||
/// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
|
||||
/// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
|
||||
/// test is more interesting than it may appear because some of the parameters have flipped-bit
|
||||
/// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
|
||||
#[test]
|
||||
fn default_emission() {
|
||||
let mut sink0 = Vec::new();
|
||||
EvexInstruction::new().encode(&mut sink0);
|
||||
|
||||
let mut sink1 = Vec::new();
|
||||
EvexInstruction::new()
|
||||
.length(EvexVectorLength::V128)
|
||||
.prefix(LegacyPrefixes::None)
|
||||
.map(OpcodeMap::None)
|
||||
.w(false)
|
||||
.opcode(0x00)
|
||||
.reg(regs::rax().get_hw_encoding())
|
||||
.rm(regs::rax().get_hw_encoding())
|
||||
.mask(EvexMasking::None)
|
||||
.encode(&mut sink1);
|
||||
|
||||
assert_eq!(sink0, sink1);
|
||||
}
|
||||
}
|
||||
60
cranelift/codegen/src/isa/x64/encoding/mod.rs
Normal file
60
cranelift/codegen/src/isa/x64/encoding/mod.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! Contains the encoding machinery for the various x64 instruction formats.
|
||||
use crate::{isa::x64, machinst::MachBuffer};
|
||||
use std::vec::Vec;
|
||||
|
||||
pub mod evex;
|
||||
pub mod rex;
|
||||
pub mod vex;
|
||||
|
||||
/// The encoding formats in this module all require a way of placing bytes into
|
||||
/// a buffer.
|
||||
pub trait ByteSink {
|
||||
/// Add 1 byte to the code section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the code section.
|
||||
fn put2(&mut self, _: u16);
|
||||
|
||||
/// Add 4 bytes to the code section.
|
||||
fn put4(&mut self, _: u32);
|
||||
|
||||
/// Add 8 bytes to the code section.
|
||||
fn put8(&mut self, _: u64);
|
||||
}
|
||||
|
||||
impl ByteSink for MachBuffer<x64::inst::Inst> {
|
||||
fn put1(&mut self, value: u8) {
|
||||
self.put1(value)
|
||||
}
|
||||
|
||||
fn put2(&mut self, value: u16) {
|
||||
self.put2(value)
|
||||
}
|
||||
|
||||
fn put4(&mut self, value: u32) {
|
||||
self.put4(value)
|
||||
}
|
||||
|
||||
fn put8(&mut self, value: u64) {
|
||||
self.put8(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide a convenient implementation for testing.
|
||||
impl ByteSink for Vec<u8> {
|
||||
fn put1(&mut self, v: u8) {
|
||||
self.extend_from_slice(&[v])
|
||||
}
|
||||
|
||||
fn put2(&mut self, v: u16) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
|
||||
fn put4(&mut self, v: u32) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
|
||||
fn put8(&mut self, v: u64) {
|
||||
self.extend_from_slice(&v.to_le_bytes())
|
||||
}
|
||||
}
|
||||
504
cranelift/codegen/src/isa/x64/encoding/rex.rs
Normal file
504
cranelift/codegen/src/isa/x64/encoding/rex.rs
Normal file
@@ -0,0 +1,504 @@
|
||||
//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
|
||||
//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
|
||||
//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
|
||||
//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
|
||||
//!
|
||||
//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
|
||||
//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
|
||||
//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
|
||||
//! means "hardware register encoding number".
|
||||
|
||||
use crate::{
|
||||
ir::TrapCode,
|
||||
isa::x64::inst::{
|
||||
args::{Amode, OperandSize},
|
||||
regs, EmitInfo, EmitState, Inst, LabelUse,
|
||||
},
|
||||
machinst::{MachBuffer, MachInstEmitInfo},
|
||||
};
|
||||
use regalloc::{Reg, RegClass};
|
||||
|
||||
pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
|
||||
let xs = (x as i32) as i64;
|
||||
xs == ((xs << 56) >> 56)
|
||||
}
|
||||
|
||||
pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
|
||||
let xs = x as i32;
|
||||
xs == ((xs << 24) >> 24)
|
||||
}
|
||||
|
||||
/// Encode the ModR/M byte.
|
||||
#[inline(always)]
|
||||
pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
|
||||
debug_assert!(m0d < 4);
|
||||
debug_assert!(enc_reg_g < 8);
|
||||
debug_assert!(rm_e < 8);
|
||||
((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
|
||||
debug_assert!(shift < 4);
|
||||
debug_assert!(enc_index < 8);
|
||||
debug_assert!(enc_base < 8);
|
||||
((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
|
||||
}
|
||||
|
||||
/// Get the encoding number of a GPR.
|
||||
#[inline(always)]
|
||||
pub(crate) fn int_reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
debug_assert_eq!(reg.get_class(), RegClass::I64);
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// Get the encoding number of any register.
|
||||
#[inline(always)]
|
||||
pub(crate) fn reg_enc(reg: Reg) -> u8 {
|
||||
debug_assert!(reg.is_real());
|
||||
reg.get_hw_encoding()
|
||||
}
|
||||
|
||||
/// A small bit field to record a REX prefix specification:
|
||||
/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
|
||||
/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct RexFlags(u8);
|
||||
|
||||
impl RexFlags {
|
||||
/// By default, set the W field, and don't always emit.
|
||||
#[inline(always)]
|
||||
pub(crate) fn set_w() -> Self {
|
||||
Self(0)
|
||||
}
|
||||
/// Creates a new RexPrefix for which the REX.W bit will be cleared.
|
||||
#[inline(always)]
|
||||
pub(crate) fn clear_w() -> Self {
|
||||
Self(1)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn always_emit(&mut self) -> &mut Self {
|
||||
self.0 = self.0 | 2;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
|
||||
let enc_reg = int_reg_enc(reg);
|
||||
if enc_reg >= 4 && enc_reg <= 7 {
|
||||
self.always_emit();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn must_clear_w(&self) -> bool {
|
||||
(self.0 & 1) != 0
|
||||
}
|
||||
#[inline(always)]
|
||||
pub(crate) fn must_always_emit(&self) -> bool {
|
||||
(self.0 & 2) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = 0;
|
||||
let b = (enc_e >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn emit_three_op(
|
||||
&self,
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
enc_g: u8,
|
||||
enc_index: u8,
|
||||
enc_base: u8,
|
||||
) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = (enc_g >> 3) & 1;
|
||||
let x = (enc_index >> 3) & 1;
|
||||
let b = (enc_base >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the proper Rex flags for the given operand size.
|
||||
impl From<OperandSize> for RexFlags {
|
||||
fn from(size: OperandSize) -> Self {
|
||||
match size {
|
||||
OperandSize::Size64 => RexFlags::set_w(),
|
||||
_ => RexFlags::clear_w(),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Generate Rex flags for an OperandSize/register tuple.
|
||||
impl From<(OperandSize, Reg)> for RexFlags {
|
||||
fn from((size, reg): (OperandSize, Reg)) -> Self {
|
||||
let mut rex = RexFlags::from(size);
|
||||
if size == OperandSize::Size8 {
|
||||
rex.always_emit_if_8bit_needed(reg);
|
||||
}
|
||||
rex
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
|
||||
/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
|
||||
#[allow(missing_docs)]
|
||||
pub enum OpcodeMap {
|
||||
None,
|
||||
_0F,
|
||||
_0F38,
|
||||
_0F3A,
|
||||
}
|
||||
|
||||
impl OpcodeMap {
|
||||
/// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
|
||||
/// formats pack this information as bits in a prefix (e.g. EVEX).
|
||||
pub(crate) fn bits(&self) -> u8 {
|
||||
match self {
|
||||
OpcodeMap::None => 0b00,
|
||||
OpcodeMap::_0F => 0b01,
|
||||
OpcodeMap::_0F38 => 0b10,
|
||||
OpcodeMap::_0F3A => 0b11,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpcodeMap {
|
||||
fn default() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
|
||||
/// covers only the small set of possibilities that we actually need.
|
||||
pub enum LegacyPrefixes {
|
||||
/// No prefix bytes.
|
||||
None,
|
||||
/// Operand Size Override -- here, denoting "16-bit operation".
|
||||
_66,
|
||||
/// The Lock prefix.
|
||||
_F0,
|
||||
/// Operand size override and Lock.
|
||||
_66F0,
|
||||
/// REPNE, but no specific meaning here -- is just an opcode extension.
|
||||
_F2,
|
||||
/// REP/REPE, but no specific meaning here -- is just an opcode extension.
|
||||
_F3,
|
||||
/// Operand size override and same effect as F3.
|
||||
_66F3,
|
||||
}
|
||||
|
||||
impl LegacyPrefixes {
|
||||
/// Emit the legacy prefix as bytes (e.g. in REX instructions).
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
|
||||
match self {
|
||||
Self::_66 => sink.put1(0x66),
|
||||
Self::_F0 => sink.put1(0xF0),
|
||||
Self::_66F0 => {
|
||||
// I don't think the order matters, but in any case, this is the same order that
|
||||
// the GNU assembler uses.
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF0);
|
||||
}
|
||||
Self::_F2 => sink.put1(0xF2),
|
||||
Self::_F3 => sink.put1(0xF3),
|
||||
Self::_66F3 => {
|
||||
sink.put1(0x66);
|
||||
sink.put1(0xF3);
|
||||
}
|
||||
Self::None => (),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the legacy prefix as bits (e.g. for EVEX instructions).
|
||||
#[inline(always)]
|
||||
pub(crate) fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::None => 0b00,
|
||||
Self::_66 => 0b01,
|
||||
Self::_F3 => 0b10,
|
||||
Self::_F2 => 0b11,
|
||||
_ => panic!(
|
||||
"VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LegacyPrefixes {
|
||||
fn default() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that reference memory.
|
||||
///
|
||||
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
|
||||
/// create and emit:
|
||||
/// - first the legacy prefixes, if any
|
||||
/// - then the REX prefix, if needed
|
||||
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
|
||||
/// - then the MOD/RM byte,
|
||||
/// - then optionally, a SIB byte,
|
||||
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
|
||||
///
|
||||
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
|
||||
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
|
||||
/// instructions will require their own emitter functions.
|
||||
///
|
||||
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
|
||||
///
|
||||
/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
|
||||
/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
|
||||
/// 0xF3_0F_27 and `num_opcodes` == 3.
|
||||
///
|
||||
/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
|
||||
/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
|
||||
/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
|
||||
/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
|
||||
/// indicate a 64-bit operation.
|
||||
pub(crate) fn emit_std_enc_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// General comment for this function: the registers in `mem_e` must be
|
||||
// 64-bit integer registers, because they are part of an address
|
||||
// expression. But `enc_g` can be derived from a register of any class.
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
let can_trap = mem_e.can_trap();
|
||||
if can_trap {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
prefixes.emit(sink);
|
||||
|
||||
match mem_e {
|
||||
Amode::ImmReg { simm32, base, .. } => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
// First, the REX byte.
|
||||
let enc_e = int_reg_enc(*base);
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm and associated immediates. This is
|
||||
// significantly complicated due to the multiple special cases.
|
||||
if *simm32 == 0
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_RBP
|
||||
&& enc_e != regs::ENC_R12
|
||||
&& enc_e != regs::ENC_R13
|
||||
{
|
||||
// FIXME JRS 2020Feb11: those four tests can surely be
|
||||
// replaced by a single mask-and-compare check. We should do
|
||||
// that because this routine is likely to be hot.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
|
||||
} else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
} else if low8_will_sign_extend_to_32(*simm32)
|
||||
&& enc_e != regs::ENC_RSP
|
||||
&& enc_e != regs::ENC_R12
|
||||
{
|
||||
sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
|
||||
sink.put4(*simm32);
|
||||
} else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
|
||||
&& low8_will_sign_extend_to_32(*simm32)
|
||||
{
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put1((simm32 & 0xFF) as u8);
|
||||
} else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
|
||||
//.. wait for test case for RSP case
|
||||
// REX.B distinguishes RSP from R12
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(0x24);
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
unreachable!("ImmReg");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::ImmRegRegShift {
|
||||
simm32,
|
||||
base: reg_base,
|
||||
index: reg_index,
|
||||
shift,
|
||||
..
|
||||
} => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
let enc_base = int_reg_enc(*reg_base);
|
||||
let enc_index = int_reg_enc(*reg_index);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_three_op(sink, enc_g, enc_index, enc_base);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// modrm, SIB, immediates.
|
||||
if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(1, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put1(*simm32 as u8);
|
||||
} else if enc_index != regs::ENC_RSP {
|
||||
sink.put1(encode_modrm(2, enc_g & 7, 4));
|
||||
sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
|
||||
sink.put4(*simm32);
|
||||
} else {
|
||||
panic!("ImmRegRegShift");
|
||||
}
|
||||
}
|
||||
|
||||
Amode::RipRelative { ref target } => {
|
||||
// First, the REX byte, with REX.B = 0.
|
||||
rex.emit_two_op(sink, enc_g, 0);
|
||||
|
||||
// Now the opcode(s). These include any other prefixes the caller
|
||||
// hands to us.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// RIP-relative is mod=00, rm=101.
|
||||
sink.put1(encode_modrm(0, enc_g & 7, 0b101));
|
||||
|
||||
let offset = sink.cur_offset();
|
||||
sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
|
||||
sink.put4(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the core 'emit' function for instructions that do not reference memory.
|
||||
///
|
||||
/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
|
||||
/// operand is a register rather than memory. Hence it is much simpler.
|
||||
pub(crate) fn emit_std_enc_enc(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
enc_g: u8,
|
||||
enc_e: u8,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
// EncG and EncE can be derived from registers of any class, and they
|
||||
// don't even have to be from the same class. For example, for an
|
||||
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
|
||||
// RegClass::V128.
|
||||
|
||||
// The legacy prefixes.
|
||||
prefixes.emit(sink);
|
||||
|
||||
// The rex byte.
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
|
||||
// All other prefixes and opcodes.
|
||||
while num_opcodes > 0 {
|
||||
num_opcodes -= 1;
|
||||
sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
|
||||
}
|
||||
|
||||
// Now the mod/rm byte. The instruction we're generating doesn't access
|
||||
// memory, so there is no SIB byte or immediate -- we're done.
|
||||
sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
|
||||
}
|
||||
|
||||
// These are merely wrappers for the above two functions that facilitate passing
|
||||
// actual `Reg`s rather than their encodings.
|
||||
|
||||
pub(crate) fn emit_std_reg_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
mem_e: &Amode,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefixes,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
enc_g,
|
||||
mem_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn emit_std_reg_reg(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
reg_g: Reg,
|
||||
reg_e: Reg,
|
||||
rex: RexFlags,
|
||||
) {
|
||||
let enc_g = reg_enc(reg_g);
|
||||
let enc_e = reg_enc(reg_e);
|
||||
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
|
||||
}
|
||||
|
||||
/// Write a suitable number of bits from an imm64 to the sink.
|
||||
pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
|
||||
match size {
|
||||
8 | 4 => sink.put4(simm32),
|
||||
2 => sink.put2(simm32 as u16),
|
||||
1 => sink.put1(simm32 as u8),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
2
cranelift/codegen/src/isa/x64/encoding/vex.rs
Normal file
2
cranelift/codegen/src/isa/x64/encoding/vex.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
//! Encodes VEX instructions. These instructions are those added by the Advanced Vector Extensions
|
||||
//! (AVX).
|
||||
Reference in New Issue
Block a user