Break entity references and instruction info out into new modules.

Avoid gathering too much code in repr.rs.

The `entities` module contains entity reference types, and the `instructions`
module contains instruction opcodes and formats.
This commit is contained in:
Jakob Stoklund Olesen
2016-05-13 13:32:20 -07:00
parent e3927e205e
commit 9c9be1cb58
6 changed files with 424 additions and 353 deletions

185
src/libcretonne/entities.rs Normal file
View File

@@ -0,0 +1,185 @@
//! IL entity references.
//!
//! Instructions in Cretonne IL need to reference other entities in the function. This can be other
//! parts of the function like extended basic blocks or stack slots, or it can be external entities
//! that are declared in the function preamble in the text format.
//!
//! These entity references in instruction operands are not implemented as Rust references both
//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers
//! take up a lot of space, and we want a compact in-memory representation. Instead, entity
//! references are structs wrapping a `u32` index into a table in the `Function` main data
//! structure. There is a separate index type for each entity type, so we don't lose type safety.
//!
//! The `entities` module defines public types for the entity references along with constants
//! representing an invalid reference. We prefer to use `Option<EntityRef>` whenever possible, but
//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact
//! data structures use the sentinen constant, while function arguments and return values prefer
//! the more Rust-like `Option<EntityRef>` variant.
//!
//! The entity references all implement the `Display` trait in a way that matches the textual IL
//! format.
use std::default::Default;
use std::fmt::{self, Display, Formatter, Write};
use std::u32;
/// An opaque reference to an extended basic block in a function.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Ebb(u32);
impl Ebb {
pub fn new(index: usize) -> Ebb {
assert!(index < (u32::MAX as usize));
Ebb(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display an `Ebb` reference as "ebb12".
impl Display for Ebb {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "ebb{}", self.0)
}
}
/// A guaranteed invalid EBB reference.
pub const NO_EBB: Ebb = Ebb(u32::MAX);
impl Default for Ebb {
fn default() -> Ebb {
NO_EBB
}
}
/// An opaque reference to an instruction in a function.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Inst(u32);
impl Inst {
pub fn new(index: usize) -> Inst {
assert!(index < (u32::MAX as usize));
Inst(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display an `Inst` reference as "inst7".
impl Display for Inst {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "inst{}", self.0)
}
}
/// A guaranteed invalid instruction reference.
pub const NO_INST: Inst = Inst(u32::MAX);
impl Default for Inst {
fn default() -> Inst {
NO_INST
}
}
/// An opaque reference to an SSA value.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Value(u32);
// Value references can either reference an instruction directly, or they can refer to the extended
// value table.
pub enum ExpandedValue {
// This is the first value produced by the referenced instruction.
Direct(Inst),
// This value is described in the extended value table.
Table(usize),
// This is NO_VALUE.
None,
}
impl Value {
pub fn new_direct(i: Inst) -> Value {
let encoding = i.index() * 2;
assert!(encoding < u32::MAX as usize);
Value(encoding as u32)
}
pub fn new_table(index: usize) -> Value {
let encoding = index * 2 + 1;
assert!(encoding < u32::MAX as usize);
Value(encoding as u32)
}
// Expand the internal representation into something useful.
pub fn expand(&self) -> ExpandedValue {
use self::ExpandedValue::*;
if *self == NO_VALUE {
return None;
}
let index = (self.0 / 2) as usize;
if self.0 % 2 == 0 {
Direct(Inst::new(index))
} else {
Table(index)
}
}
}
/// Display a `Value` reference as "v7" or "v2x".
impl Display for Value {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
use self::ExpandedValue::*;
match self.expand() {
Direct(i) => write!(fmt, "v{}", i.0),
Table(i) => write!(fmt, "vx{}", i),
None => write!(fmt, "NO_VALUE"),
}
}
}
/// A guaranteed invalid value reference.
pub const NO_VALUE: Value = Value(u32::MAX);
impl Default for Value {
fn default() -> Value {
NO_VALUE
}
}
/// An opaque reference to a stack slot.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct StackSlot(u32);
impl StackSlot {
pub fn new(index: usize) -> StackSlot {
assert!(index < (u32::MAX as usize));
StackSlot(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display a `StackSlot` reference as "ss12".
impl Display for StackSlot {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "ss{}", self.0)
}
}
/// A guaranteed invalid stack slot reference.
pub const NO_STACK_SLOT: StackSlot = StackSlot(u32::MAX);
impl Default for StackSlot {
fn default() -> StackSlot {
NO_STACK_SLOT
}
}

View File

@@ -9,73 +9,6 @@ use std::fmt::{self, Display, Formatter};
use std::mem;
use std::str::FromStr;
// Include code generated by `meta/gen_instr.py`. This file contains:
//
// - The `pub enum Opcode` definition with all known opcodes,
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
//
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
impl Display for Opcode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", opcode_name(*self))
}
}
impl Opcode {
/// Get the instruction format for this opcode.
pub fn format(self) -> Option<InstructionFormat> {
if self == Opcode::NotAnOpcode {
None
} else {
Some(OPCODE_FORMAT[self as usize - 1])
}
}
}
// A primitive hash function for matching opcodes.
// Must match `meta/constant_hash.py`.
fn simple_hash(s: &str) -> u32 {
let mut h: u32 = 5381;
for c in s.chars() {
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
}
h
}
impl FromStr for Opcode {
type Err = &'static str;
/// Parse an Opcode name from a string.
fn from_str(s: &str) -> Result<Opcode, &'static str> {
let tlen = OPCODE_HASH_TABLE.len();
assert!(tlen.is_power_of_two());
let mut idx = simple_hash(s) as usize;
let mut step: usize = 0;
loop {
idx = idx % tlen;
let entry = OPCODE_HASH_TABLE[idx];
if entry == Opcode::NotAnOpcode {
return Err("Unknown opcode");
}
if *opcode_name(entry) == *s {
return Ok(entry);
}
// Quadratic probing.
step += 1;
// When `tlen` is a power of two, it can be proven that idx will visit all entries.
// This means that this loop will always terminate if the hash table has even one
// unused entry.
assert!(step < tlen);
idx += step;
}
}
}
/// 64-bit immediate integer operand.
///
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -494,27 +427,6 @@ mod tests {
use std::str::FromStr;
use std::fmt::Display;
#[test]
fn opcodes() {
let x = Opcode::Iadd;
let mut y = Opcode::Isub;
assert!(x != y);
y = Opcode::Iadd;
assert_eq!(x, y);
assert_eq!(x.format(), Some(InstructionFormat::Binary));
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
// Check the matcher.
assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
}
#[test]
fn format_imm64() {
assert_eq!(Imm64(0).to_string(), "0");
@@ -791,5 +703,4 @@ mod tests {
parse_ok::<Ieee64>("sNaN:0x4000000000001", "sNaN:0x4000000000001");
parse_err::<Ieee64>("sNaN:0x8000000000001", "Invalid sNaN payload");
}
}

View File

@@ -0,0 +1,229 @@
//! Instruction formats and opcodes.
//!
//! The `instructions` module contains definitions for instruction formats, opcodes, and the
//! in-memory representation of IL instructions.
//!
//! A large part of this module is auto-generated from the instruction descriptions in the meta
//! directory.
use std::fmt::{self, Display, Formatter};
use std::str::FromStr;
use entities::*;
use immediates::*;
use types::Type;
// Include code generated by `meta/gen_instr.py`. This file contains:
//
// - The `pub enum InstructionFormat` enum with all the instruction formats.
// - The `pub enum Opcode` definition with all known opcodes,
// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
//
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
impl Display for Opcode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", opcode_name(*self))
}
}
impl Opcode {
/// Get the instruction format for this opcode.
pub fn format(self) -> Option<InstructionFormat> {
if self == Opcode::NotAnOpcode {
None
} else {
Some(OPCODE_FORMAT[self as usize - 1])
}
}
}
// A primitive hash function for matching opcodes.
// Must match `meta/constant_hash.py`.
fn simple_hash(s: &str) -> u32 {
let mut h: u32 = 5381;
for c in s.chars() {
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
}
h
}
// This trait really belongs in libreader where it is used by the .cton file parser, but since it
// critically depends on the `opcode_name()` function which is needed here anyway, it lives in this
// module. This also saves us from runing the build script twice to generate code for the two
// separate crates.
impl FromStr for Opcode {
type Err = &'static str;
/// Parse an Opcode name from a string.
fn from_str(s: &str) -> Result<Opcode, &'static str> {
let tlen = OPCODE_HASH_TABLE.len();
assert!(tlen.is_power_of_two());
let mut idx = simple_hash(s) as usize;
let mut step: usize = 0;
loop {
idx = idx % tlen;
let entry = OPCODE_HASH_TABLE[idx];
if entry == Opcode::NotAnOpcode {
return Err("Unknown opcode");
}
if *opcode_name(entry) == *s {
return Ok(entry);
}
// Quadratic probing.
step += 1;
// When `tlen` is a power of two, it can be proven that idx will visit all entries.
// This means that this loop will always terminate if the hash table has even one
// unused entry.
assert!(step < tlen);
idx += step;
}
}
}
/// Contents on an instruction.
///
/// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a
/// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at
/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
/// `Box<AuxData>` to store the additional information out of line.
#[derive(Debug)]
pub enum InstructionData {
Nullary {
opcode: Opcode,
ty: Type,
},
Unary {
opcode: Opcode,
ty: Type,
arg: Value,
},
UnaryImm {
opcode: Opcode,
ty: Type,
imm: Imm64,
},
Binary {
opcode: Opcode,
ty: Type,
args: [Value; 2],
},
BinaryImm {
opcode: Opcode,
ty: Type,
arg: Value,
imm: Imm64,
},
Call {
opcode: Opcode,
ty: Type,
data: Box<CallData>,
},
}
/// Payload of a call instruction.
#[derive(Debug)]
pub struct CallData {
/// Second result value for a call producing multiple return values.
second_result: Value,
// Dynamically sized array containing call argument values.
arguments: Vec<Value>,
}
impl InstructionData {
/// Create data for a call instruction.
pub fn call(opc: Opcode, return_type: Type) -> InstructionData {
InstructionData::Call {
opcode: opc,
ty: return_type,
data: Box::new(CallData {
second_result: NO_VALUE,
arguments: Vec::new(),
}),
}
}
/// Get the opcode of this instruction.
pub fn opcode(&self) -> Opcode {
use self::InstructionData::*;
match *self {
Nullary { opcode, .. } => opcode,
Unary { opcode, .. } => opcode,
UnaryImm { opcode, .. } => opcode,
Binary { opcode, .. } => opcode,
BinaryImm { opcode, .. } => opcode,
Call { opcode, .. } => opcode,
}
}
/// Type of the first result.
pub fn first_type(&self) -> Type {
use self::InstructionData::*;
match *self {
Nullary { ty, .. } => ty,
Unary { ty, .. } => ty,
UnaryImm { ty, .. } => ty,
Binary { ty, .. } => ty,
BinaryImm { ty, .. } => ty,
Call { ty, .. } => ty,
}
}
/// Second result value, if any.
pub fn second_result(&self) -> Option<Value> {
use self::InstructionData::*;
match *self {
Nullary { .. } => None,
Unary { .. } => None,
UnaryImm { .. } => None,
Binary { .. } => None,
BinaryImm { .. } => None,
Call { ref data, .. } => Some(data.second_result),
}
}
pub fn second_result_mut<'a>(&'a mut self) -> Option<&'a mut Value> {
use self::InstructionData::*;
match *self {
Nullary { .. } => None,
Unary { .. } => None,
UnaryImm { .. } => None,
Binary { .. } => None,
BinaryImm { .. } => None,
Call { ref mut data, .. } => Some(&mut data.second_result),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn opcodes() {
let x = Opcode::Iadd;
let mut y = Opcode::Isub;
assert!(x != y);
y = Opcode::Iadd;
assert_eq!(x, y);
assert_eq!(x.format(), Some(InstructionFormat::Binary));
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
// Check the matcher.
assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
}
}

View File

@@ -7,5 +7,7 @@
pub mod types;
pub mod immediates;
pub mod entities;
pub mod instructions;
pub mod repr;
pub mod write;

View File

@@ -2,11 +2,10 @@
//! Representation of Cretonne IL functions.
use types::{Type, FunctionName, Signature};
use immediates::*;
use std::default::Default;
use std::fmt::{self, Display, Formatter, Write};
use entities::*;
use instructions::*;
use std::fmt::{self, Display, Formatter};
use std::ops::Index;
use std::u32;
// ====--------------------------------------------------------------------------------------====//
//
@@ -14,34 +13,6 @@ use std::u32;
//
// ====--------------------------------------------------------------------------------------====//
/// An opaque reference to an extended basic block in a function.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Ebb(u32);
/// A guaranteed invalid EBB reference.
pub const NO_EBB: Ebb = Ebb(u32::MAX);
/// An opaque reference to an instruction in a function.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Inst(u32);
/// A guaranteed invalid instruction reference.
pub const NO_INST: Inst = Inst(u32::MAX);
/// An opaque reference to an SSA value.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Value(u32);
/// A guaranteed invalid value reference.
pub const NO_VALUE: Value = Value(u32::MAX);
/// An opaque reference to a stack slot.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct StackSlot(u32);
/// A guaranteed invalid stack slot reference.
pub const NO_STACK_SLOT: StackSlot = StackSlot(u32::MAX);
/// A function.
///
/// The `Function` struct owns all of its instructions and extended basic blocks, and it works as a
@@ -98,81 +69,12 @@ pub struct EbbData {
last_arg: Value,
}
/// Contents on an instruction.
///
/// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a
/// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at
/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
/// `Box<AuxData>` to store the additional information out of line.
#[derive(Debug)]
pub enum InstructionData {
Nullary {
opcode: Opcode,
ty: Type,
},
Unary {
opcode: Opcode,
ty: Type,
arg: Value,
},
UnaryImm {
opcode: Opcode,
ty: Type,
imm: Imm64,
},
Binary {
opcode: Opcode,
ty: Type,
args: [Value; 2],
},
BinaryImm {
opcode: Opcode,
ty: Type,
arg: Value,
imm: Imm64,
},
Call {
opcode: Opcode,
ty: Type,
data: Box<CallData>,
},
}
/// Payload of a call instruction.
#[derive(Debug)]
pub struct CallData {
/// Second result value for a call producing multiple return values.
second_result: Value,
// Dynamically sized array containing call argument values.
arguments: Vec<Value>,
}
// ====--------------------------------------------------------------------------------------====//
//
// Stack slot implementation.
//
// ====--------------------------------------------------------------------------------------====//
impl StackSlot {
fn new(index: usize) -> StackSlot {
assert!(index < (u32::MAX as usize));
StackSlot(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display a `StackSlot` reference as "ss12".
impl Display for StackSlot {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "ss{}", self.0)
}
}
impl StackSlotData {
/// Create a stack slot with the specified byte size.
pub fn new(size: u32) -> StackSlotData {
@@ -221,24 +123,6 @@ impl Iterator for StackSlotIter {
//
// ====--------------------------------------------------------------------------------------====//
impl Ebb {
fn new(index: usize) -> Ebb {
assert!(index < (u32::MAX as usize));
Ebb(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display an `Ebb` reference as "ebb12".
impl Display for Ebb {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "ebb{}", self.0)
}
}
impl EbbData {
fn new() -> EbbData {
EbbData {
@@ -254,24 +138,6 @@ impl EbbData {
//
// ====--------------------------------------------------------------------------------------====//
impl Inst {
fn new(index: usize) -> Inst {
assert!(index < (u32::MAX as usize));
Inst(index as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
/// Display an `Inst` reference as "inst7".
impl Display for Inst {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "inst{}", self.0)
}
}
/// Allow immutable access to instructions via function indexing.
impl Index<Inst> for Function {
type Output = InstructionData;
@@ -287,65 +153,6 @@ impl Index<Inst> for Function {
//
// ====--------------------------------------------------------------------------------------====//
// Value references can either reference an instruction directly, or they can refer to the
// extended value table.
enum ExpandedValue {
// This is the first value produced by the referenced instruction.
Direct(Inst),
// This value is described in the extended value table.
Table(usize),
// This is NO_VALUE.
None,
}
impl Value {
fn new_direct(i: Inst) -> Value {
let encoding = i.index() * 2;
assert!(encoding < u32::MAX as usize);
Value(encoding as u32)
}
fn new_table(index: usize) -> Value {
let encoding = index * 2 + 1;
assert!(encoding < u32::MAX as usize);
Value(encoding as u32)
}
// Expand the internal representation into something useful.
fn expand(&self) -> ExpandedValue {
use self::ExpandedValue::*;
if *self == NO_VALUE {
return None;
}
let index = (self.0 / 2) as usize;
if self.0 % 2 == 0 {
Direct(Inst::new(index))
} else {
Table(index)
}
}
}
impl Default for Value {
fn default() -> Value {
NO_VALUE
}
}
/// Display a `Value` reference as "v7" or "v2x".
impl Display for Value {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
use self::ExpandedValue::*;
match self.expand() {
Direct(i) => write!(fmt, "v{}", i.0),
Table(i) => write!(fmt, "vx{}", i),
None => write!(fmt, "NO_VALUE"),
}
}
}
// Most values are simply the first value produced by an instruction.
// Other values have an entry in the value table.
#[derive(Debug)]
@@ -398,71 +205,6 @@ impl<'a> Iterator for Values<'a> {
}
}
impl InstructionData {
/// Create data for a call instruction.
pub fn call(opc: Opcode, return_type: Type) -> InstructionData {
InstructionData::Call {
opcode: opc,
ty: return_type,
data: Box::new(CallData {
second_result: NO_VALUE,
arguments: Vec::new(),
}),
}
}
/// Get the opcode of this instruction.
pub fn opcode(&self) -> Opcode {
use self::InstructionData::*;
match *self {
Nullary { opcode, .. } => opcode,
Unary { opcode, .. } => opcode,
UnaryImm { opcode, .. } => opcode,
Binary { opcode, .. } => opcode,
BinaryImm { opcode, .. } => opcode,
Call { opcode, .. } => opcode,
}
}
/// Type of the first result.
pub fn first_type(&self) -> Type {
use self::InstructionData::*;
match *self {
Nullary { ty, .. } => ty,
Unary { ty, .. } => ty,
UnaryImm { ty, .. } => ty,
Binary { ty, .. } => ty,
BinaryImm { ty, .. } => ty,
Call { ty, .. } => ty,
}
}
/// Second result value, if any.
fn second_result(&self) -> Option<Value> {
use self::InstructionData::*;
match *self {
Nullary { .. } => None,
Unary { .. } => None,
UnaryImm { .. } => None,
Binary { .. } => None,
BinaryImm { .. } => None,
Call { ref data, .. } => Some(data.second_result),
}
}
fn second_result_mut<'a>(&'a mut self) -> Option<&'a mut Value> {
use self::InstructionData::*;
match *self {
Nullary { .. } => None,
Unary { .. } => None,
UnaryImm { .. } => None,
Binary { .. } => None,
BinaryImm { .. } => None,
Call { ref mut data, .. } => Some(&mut data.second_result),
}
}
}
impl Function {
/// Create a function with the given name and signature.
pub fn with_name_signature(name: FunctionName, sig: Signature) -> Function {
@@ -632,7 +374,7 @@ impl Function {
/// Get the type of a value.
pub fn value_type(&self, v: Value) -> Type {
use self::ExpandedValue::*;
use entities::ExpandedValue::*;
use self::ValueData::*;
match v.expand() {
Direct(i) => self[i].first_type(),
@@ -651,7 +393,8 @@ impl Function {
mod tests {
use super::*;
use types;
use immediates::*;
use entities::*;
use instructions::*;
#[test]
fn make_inst() {

View File

@@ -12,7 +12,8 @@ use std::u32;
use lexer::{self, Lexer, Token};
use cretonne::types::{FunctionName, Signature, ArgumentType, ArgumentExtension};
use cretonne::immediates::Imm64;
use cretonne::repr::{Function, StackSlot, StackSlotData};
use cretonne::entities::StackSlot;
use cretonne::repr::{Function, StackSlotData};
pub use lexer::Location;