//! Instruction formats and opcodes. //! //! The `instructions` module contains definitions for instruction formats, opcodes, and the //! in-memory representation of IL instructions. //! //! A large part of this module is auto-generated from the instruction descriptions in the meta //! directory. use std::fmt::{self, Display, Formatter}; use std::str::FromStr; use entities::*; use immediates::*; use types::{self, Type}; // Include code generated by `meta/gen_instr.py`. This file contains: // // - The `pub enum InstructionFormat` enum with all the instruction formats. // - The `pub enum Opcode` definition with all known opcodes, // - The `const OPCODE_FORMAT: [InstructionFormat; N]` table. // - The private `fn opcode_name(Opcode) -> &'static str` function, and // - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. // // For value type constraints: // // - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table. // - The `const TYPE_SETS : [ValueTypeSet; N]` table. // - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table. // include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); impl Display for Opcode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}", opcode_name(*self)) } } impl Opcode { /// Get the instruction format for this opcode. pub fn format(self) -> Option { if self == Opcode::NotAnOpcode { None } else { Some(OPCODE_FORMAT[self as usize - 1]) } } /// Get the constraint descriptor for this opcode. /// Panic if this is called on `NotAnOpcode`. pub fn constraints(self) -> OpcodeConstraints { OPCODE_CONSTRAINTS[self as usize - 1] } } // A primitive hash function for matching opcodes. // Must match `meta/constant_hash.py`. fn simple_hash(s: &str) -> u32 { let mut h: u32 = 5381; for c in s.chars() { h = (h ^ c as u32).wrapping_add(h.rotate_right(6)); } h } // This trait really belongs in libreader where it is used by the .cton file parser, but since it // critically depends on the `opcode_name()` function which is needed here anyway, it lives in this // module. This also saves us from runing the build script twice to generate code for the two // separate crates. impl FromStr for Opcode { type Err = &'static str; /// Parse an Opcode name from a string. fn from_str(s: &str) -> Result { let tlen = OPCODE_HASH_TABLE.len(); assert!(tlen.is_power_of_two()); let mut idx = simple_hash(s) as usize; let mut step: usize = 0; loop { idx = idx % tlen; let entry = OPCODE_HASH_TABLE[idx]; if entry == Opcode::NotAnOpcode { return Err("Unknown opcode"); } if *opcode_name(entry) == *s { return Ok(entry); } // Quadratic probing. step += 1; // When `tlen` is a power of two, it can be proven that idx will visit all entries. // This means that this loop will always terminate if the hash table has even one // unused entry. assert!(step < tlen); idx += step; } } } /// Contents on an instruction. /// /// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a /// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a /// `Box` to store the additional information out of line. #[derive(Debug)] pub enum InstructionData { Nullary { opcode: Opcode, ty: Type, }, Unary { opcode: Opcode, ty: Type, arg: Value, }, UnaryImm { opcode: Opcode, ty: Type, imm: Imm64, }, UnaryIeee32 { opcode: Opcode, ty: Type, imm: Ieee32, }, UnaryIeee64 { opcode: Opcode, ty: Type, imm: Ieee64, }, UnaryImmVector { opcode: Opcode, ty: Type, // TBD: imm: Box }, Binary { opcode: Opcode, ty: Type, args: [Value; 2], }, BinaryImm { opcode: Opcode, ty: Type, lhs: Value, rhs: Imm64, }, // Same as BinaryImm, but the immediate is the lhs operand. BinaryImmRev { opcode: Opcode, ty: Type, rhs: Value, lhs: Imm64, }, BinaryOverflow { opcode: Opcode, ty: Type, second_result: Value, args: [Value; 2], }, Select { opcode: Opcode, ty: Type, args: [Value; 3], }, Jump { opcode: Opcode, ty: Type, data: Box, }, Branch { opcode: Opcode, ty: Type, data: Box, }, BranchTable { opcode: Opcode, ty: Type, arg: Value, table: JumpTable, }, Call { opcode: Opcode, ty: Type, data: Box, }, } /// A variable list of `Value` operands used for function call arguments and passing arguments to /// basic blocks. #[derive(Debug)] pub struct VariableArgs(Vec); impl VariableArgs { pub fn new() -> VariableArgs { VariableArgs(Vec::new()) } } impl Display for VariableArgs { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { try!(write!(fmt, "(")); for (i, val) in self.0.iter().enumerate() { if i == 0 { try!(write!(fmt, "{}", val)); } else { try!(write!(fmt, ", {}", val)); } } write!(fmt, ")") } } impl Default for VariableArgs { fn default() -> VariableArgs { VariableArgs::new() } } /// Payload data for jump instructions. These need to carry lists of EBB arguments that won't fit /// in the allowed InstructionData size. #[derive(Debug)] pub struct JumpData { destination: Ebb, arguments: VariableArgs, } impl Display for JumpData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}{}", self.destination, self.arguments) } } /// Payload data for branch instructions. These need to carry lists of EBB arguments that won't fit /// in the allowed InstructionData size. #[derive(Debug)] pub struct BranchData { arg: Value, destination: Ebb, arguments: VariableArgs, } impl Display for BranchData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}, {}{}", self.arg, self.destination, self.arguments) } } /// Payload of a call instruction. #[derive(Debug)] pub struct CallData { /// Second result value for a call producing multiple return values. second_result: Value, // Dynamically sized array containing call argument values. arguments: VariableArgs, } impl Display for CallData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "TBD{}", self.arguments) } } impl InstructionData { /// Create data for a call instruction. pub fn call(opc: Opcode, return_type: Type) -> InstructionData { InstructionData::Call { opcode: opc, ty: return_type, data: Box::new(CallData { second_result: NO_VALUE, arguments: VariableArgs::new(), }), } } } /// Value type constraints for a given opcode. /// /// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and /// results are not determined by the format. Every `Opcode` has an associated /// `OpcodeConstraints` object that provides the missing details. /// /// Since there can be a lot of opcodes, the `OpcodeConstraints` object is encoded as a bit field /// by the `meta/gen_instr.py` script. /// /// The bit field bits are: /// /// Bits 0-2: /// Number of fixed result values. This does not include `variable_args` results as are /// produced by call instructions. /// /// Bit 3: /// This opcode is polymorphic and the controlling type variable can be inferred from the /// designated input operand. This is the `typevar_operand` index given to the /// `InstructionFormat` meta language object. When bit 0 is not set, the controlling type /// variable must be the first output value instead. /// /// Bits 4-7: /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`. /// /// Bits 8-15: /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first /// `fixed_results()` entries describe the result constraints, then follows constraints for the /// fixed `Value` input operands. The number of `Value` inputs isdetermined by the instruction /// format. /// #[derive(Clone, Copy)] pub struct OpcodeConstraints(u16); impl OpcodeConstraints { /// Can the controlling type variable for this opcode be inferred from the designated value /// input operand? /// This also implies that this opcode is polymorphic. pub fn use_typevar_operand(self) -> bool { (self.0 & 0x8) != 0 } /// Get the number of *fixed* result values produced by this opcode. /// This does not include `variable_args` produced by calls. pub fn fixed_results(self) -> usize { (self.0 & 0x7) as usize } /// Get the offset into `TYPE_SETS` for the controlling type variable. /// Returns `None` if the instruction is not polymorphic. fn typeset_offset(self) -> Option { let offset = ((self.0 & 0xff) >> 4) as usize; if offset < TYPE_SETS.len() { Some(offset) } else { None } } /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin. fn constraint_offset(self) -> usize { (self.0 >> 8) as usize } /// Get the value type of result number `n`, having resolved the controlling type variable to /// `ctrl_type`. pub fn result_type(self, n: usize, ctrl_type: Type) -> Type { assert!(n < self.fixed_results(), "Invalid result index"); OPERAND_CONSTRAINTS[self.constraint_offset() + n] .resolve(ctrl_type) .expect("Result constraints can't be free") } /// Get the typeset of allowed types for the controlling type variable in a polymorphic /// instruction. pub fn ctrl_typeset(self) -> Option { self.typeset_offset().map(|offset| TYPE_SETS[offset]) } } /// A value type set describes the permitted set of types for a type variable. #[derive(Clone, Copy)] pub struct ValueTypeSet { allow_scalars: bool, allow_simd: bool, base: Type, all_ints: bool, all_floats: bool, all_bools: bool, } impl ValueTypeSet { /// Is `scalar` part of the base type set? /// /// Note that the base type set does not have to be included in the type set proper. fn is_base_type(&self, scalar: Type) -> bool { scalar == self.base || (self.all_ints && scalar.is_int()) || (self.all_floats && scalar.is_float()) || (self.all_bools && scalar.is_bool()) } /// Does `typ` belong to this set? pub fn contains(&self, typ: Type) -> bool { let allowed = if typ.is_scalar() { self.allow_scalars } else { self.allow_simd }; allowed && self.is_base_type(typ.lane_type()) } } /// Operand constraints. This describes the value type constraints on a single `Value` operand. enum OperandConstraint { /// This operand has a concrete value type. Concrete(Type), /// This operand can vary freely within the given type set. /// The type set is identified by its index into the TYPE_SETS constant table. Free(u8), /// This operand is the same type as the controlling type variable. Same, /// This operand is `ctrlType.lane_type()`. Lane, /// This operand is `ctrlType.as_bool()`. AsBool, } impl OperandConstraint { /// Resolve this operand constraint into a concrete value type, given the value of the /// controlling type variable. /// Returns `None` if this is a free operand which is independent of the controlling type /// variable. pub fn resolve(&self, ctrl_type: Type) -> Option { use self::OperandConstraint::*; match *self { Concrete(t) => Some(t), Free(_) => None, Same => Some(ctrl_type), Lane => Some(ctrl_type.lane_type()), AsBool => Some(ctrl_type.as_bool()), } } } #[cfg(test)] mod tests { use super::*; #[test] fn opcodes() { let x = Opcode::Iadd; let mut y = Opcode::Isub; assert!(x != y); y = Opcode::Iadd; assert_eq!(x, y); assert_eq!(x.format(), Some(InstructionFormat::Binary)); assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm"); // Check the matcher. assert_eq!("iadd".parse::(), Ok(Opcode::Iadd)); assert_eq!("iadd_imm".parse::(), Ok(Opcode::IaddImm)); assert_eq!("iadd\0".parse::(), Err("Unknown opcode")); assert_eq!("".parse::(), Err("Unknown opcode")); assert_eq!("\0".parse::(), Err("Unknown opcode")); } #[test] fn instruction_data() { use std::mem; // The size of the InstructionData enum is important for performance. It should not exceed // 16 bytes. Use `Box` out-of-line payloads for instruction formats that require // more space than that. // It would be fine with a data structure smaller than 16 bytes, but what are the odds of // that? assert_eq!(mem::size_of::(), 16); } }