//! Instruction formats and opcodes. //! //! The `instructions` module contains definitions for instruction formats, opcodes, and the //! in-memory representation of IL instructions. //! //! A large part of this module is auto-generated from the instruction descriptions in the meta //! directory. use std::fmt::{self, Display, Formatter}; use std::str::FromStr; use std::ops::{Deref, DerefMut}; use ir::entities::*; use ir::immediates::*; use ir::condcodes::*; use ir::types::{self, Type}; // Include code generated by `meta/gen_instr.py`. This file contains: // // - The `pub enum InstructionFormat` enum with all the instruction formats. // - The `pub enum Opcode` definition with all known opcodes, // - The `const OPCODE_FORMAT: [InstructionFormat; N]` table. // - The private `fn opcode_name(Opcode) -> &'static str` function, and // - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. // // For value type constraints: // // - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table. // - The `const TYPE_SETS : [ValueTypeSet; N]` table. // - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table. // include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); impl Display for Opcode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}", opcode_name(*self)) } } impl Opcode { /// Get the instruction format for this opcode. pub fn format(self) -> Option { if self == Opcode::NotAnOpcode { None } else { Some(OPCODE_FORMAT[self as usize - 1]) } } /// Get the constraint descriptor for this opcode. /// Panic if this is called on `NotAnOpcode`. pub fn constraints(self) -> OpcodeConstraints { OPCODE_CONSTRAINTS[self as usize - 1] } } // This trait really belongs in libreader where it is used by the .cton file parser, but since it // critically depends on the `opcode_name()` function which is needed here anyway, it lives in this // module. This also saves us from runing the build script twice to generate code for the two // separate crates. impl FromStr for Opcode { type Err = &'static str; /// Parse an Opcode name from a string. fn from_str(s: &str) -> Result { use simple_hash::simple_hash; let tlen = OPCODE_HASH_TABLE.len(); assert!(tlen.is_power_of_two()); let mut idx = simple_hash(s) as usize; let mut step: usize = 0; loop { idx = idx % tlen; let entry = OPCODE_HASH_TABLE[idx]; if entry == Opcode::NotAnOpcode { return Err("Unknown opcode"); } if *opcode_name(entry) == *s { return Ok(entry); } // Quadratic probing. step += 1; // When `tlen` is a power of two, it can be proven that idx will visit all entries. // This means that this loop will always terminate if the hash table has even one // unused entry. assert!(step < tlen); idx += step; } } } /// Contents on an instruction. /// /// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a /// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a /// `Box` to store the additional information out of line. #[derive(Debug)] pub enum InstructionData { Nullary { opcode: Opcode, ty: Type, }, Unary { opcode: Opcode, ty: Type, arg: Value, }, UnaryImm { opcode: Opcode, ty: Type, imm: Imm64, }, UnaryIeee32 { opcode: Opcode, ty: Type, imm: Ieee32, }, UnaryIeee64 { opcode: Opcode, ty: Type, imm: Ieee64, }, UnaryImmVector { opcode: Opcode, ty: Type, // TBD: imm: Box }, Binary { opcode: Opcode, ty: Type, args: [Value; 2], }, BinaryImm { opcode: Opcode, ty: Type, arg: Value, imm: Imm64, }, // Same as BinaryImm, but the immediate is the lhs operand. BinaryImmRev { opcode: Opcode, ty: Type, arg: Value, imm: Imm64, }, BinaryOverflow { opcode: Opcode, ty: Type, second_result: Value, args: [Value; 2], }, Ternary { opcode: Opcode, ty: Type, args: [Value; 3], }, InsertLane { opcode: Opcode, ty: Type, lane: u8, args: [Value; 2], }, ExtractLane { opcode: Opcode, ty: Type, lane: u8, arg: Value, }, IntCompare { opcode: Opcode, ty: Type, cond: IntCC, args: [Value; 2], }, FloatCompare { opcode: Opcode, ty: Type, cond: FloatCC, args: [Value; 2], }, Jump { opcode: Opcode, ty: Type, data: Box, }, Branch { opcode: Opcode, ty: Type, data: Box, }, BranchTable { opcode: Opcode, ty: Type, arg: Value, table: JumpTable, }, Call { opcode: Opcode, ty: Type, data: Box, }, Return { opcode: Opcode, ty: Type, data: Box, }, } /// A variable list of `Value` operands used for function call arguments and passing arguments to /// basic blocks. #[derive(Debug)] pub struct VariableArgs(Vec); impl VariableArgs { pub fn new() -> VariableArgs { VariableArgs(Vec::new()) } pub fn push(&mut self, v: Value) { self.0.push(v) } pub fn is_empty(&self) -> bool { self.0.is_empty() } } // Coerce VariableArgs into a &[Value] slice. impl Deref for VariableArgs { type Target = [Value]; fn deref<'a>(&'a self) -> &'a [Value] { &self.0 } } impl DerefMut for VariableArgs { fn deref_mut<'a>(&'a mut self) -> &'a mut [Value] { &mut self.0 } } impl Display for VariableArgs { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { for (i, val) in self.0.iter().enumerate() { if i == 0 { try!(write!(fmt, "{}", val)); } else { try!(write!(fmt, ", {}", val)); } } Ok(()) } } impl Default for VariableArgs { fn default() -> VariableArgs { VariableArgs::new() } } /// Payload data for jump instructions. These need to carry lists of EBB arguments that won't fit /// in the allowed InstructionData size. #[derive(Debug)] pub struct JumpData { pub destination: Ebb, pub arguments: VariableArgs, } impl Display for JumpData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { if self.arguments.is_empty() { write!(f, "{}", self.destination) } else { write!(f, "{}({})", self.destination, self.arguments) } } } /// Payload data for branch instructions. These need to carry lists of EBB arguments that won't fit /// in the allowed InstructionData size. #[derive(Debug)] pub struct BranchData { pub arg: Value, pub destination: Ebb, pub arguments: VariableArgs, } impl Display for BranchData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { try!(write!(f, "{}, {}", self.arg, self.destination)); if !self.arguments.is_empty() { try!(write!(f, "({})", self.arguments)); } Ok(()) } } /// Payload of a call instruction. #[derive(Debug)] pub struct CallData { /// Second result value for a call producing multiple return values. second_result: Value, // Dynamically sized array containing call argument values. pub args: VariableArgs, } impl Display for CallData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "TBD({})", self.args) } } /// Payload of a return instruction. #[derive(Debug)] pub struct ReturnData { // Dynamically sized array containing return values. pub args: VariableArgs, } impl InstructionData { /// Create data for a call instruction. pub fn call(opc: Opcode, return_type: Type) -> InstructionData { InstructionData::Call { opcode: opc, ty: return_type, data: Box::new(CallData { second_result: NO_VALUE, args: VariableArgs::new(), }), } } } /// Analyzing an instruction. /// /// Avoid large matches on instruction formats by using the methods efined here to examine /// instructions. impl InstructionData { /// Return information about the destination of a branch or jump instruction. /// /// Any instruction that can transfer control to another EBB reveals its possible destinations /// here. pub fn analyze_branch<'a>(&'a self) -> BranchInfo<'a> { match self { &InstructionData::Jump { ref data, .. } => { BranchInfo::SingleDest(data.destination, &data.arguments) } &InstructionData::Branch { ref data, .. } => { BranchInfo::SingleDest(data.destination, &data.arguments) } &InstructionData::BranchTable { table, .. } => BranchInfo::Table(table), _ => BranchInfo::NotABranch, } } } /// Information about branch and jump instructions. pub enum BranchInfo<'a> { /// This is not a branch or jump instruction. /// This instruction will not transfer control to another EBB in the function, but it may still /// affect control flow by returning or trapping. NotABranch, /// This is a branch or jump to a single destination EBB, possibly taking value arguments. SingleDest(Ebb, &'a [Value]), /// This is a jump table branch which can have many destination EBBs. Table(JumpTable), } /// Value type constraints for a given opcode. /// /// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and /// results are not determined by the format. Every `Opcode` has an associated /// `OpcodeConstraints` object that provides the missing details. /// /// Since there can be a lot of opcodes, the `OpcodeConstraints` object is encoded as a bit field /// by the `meta/gen_instr.py` script. /// /// The bit field bits are: /// /// Bits 0-2: /// Number of fixed result values. This does not include `variable_args` results as are /// produced by call instructions. /// /// Bit 3: /// This opcode is polymorphic and the controlling type variable can be inferred from the /// designated input operand. This is the `typevar_operand` index given to the /// `InstructionFormat` meta language object. When bit 0 is not set, the controlling type /// variable must be the first output value instead. /// /// Bits 4-7: /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`. /// /// Bits 8-15: /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first /// `fixed_results()` entries describe the result constraints, then follows constraints for the /// fixed `Value` input operands. The number of `Value` inputs isdetermined by the instruction /// format. /// #[derive(Clone, Copy)] pub struct OpcodeConstraints(u16); impl OpcodeConstraints { /// Can the controlling type variable for this opcode be inferred from the designated value /// input operand? /// This also implies that this opcode is polymorphic. pub fn use_typevar_operand(self) -> bool { (self.0 & 0x8) != 0 } /// Get the number of *fixed* result values produced by this opcode. /// This does not include `variable_args` produced by calls. pub fn fixed_results(self) -> usize { (self.0 & 0x7) as usize } /// Get the offset into `TYPE_SETS` for the controlling type variable. /// Returns `None` if the instruction is not polymorphic. fn typeset_offset(self) -> Option { let offset = ((self.0 & 0xff) >> 4) as usize; if offset < TYPE_SETS.len() { Some(offset) } else { None } } /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin. fn constraint_offset(self) -> usize { (self.0 >> 8) as usize } /// Get the value type of result number `n`, having resolved the controlling type variable to /// `ctrl_type`. pub fn result_type(self, n: usize, ctrl_type: Type) -> Type { assert!(n < self.fixed_results(), "Invalid result index"); OPERAND_CONSTRAINTS[self.constraint_offset() + n] .resolve(ctrl_type) .expect("Result constraints can't be free") } /// Get the typeset of allowed types for the controlling type variable in a polymorphic /// instruction. pub fn ctrl_typeset(self) -> Option { self.typeset_offset().map(|offset| TYPE_SETS[offset]) } /// Is this instruction polymorphic? pub fn is_polymorphic(self) -> bool { self.ctrl_typeset().is_some() } } /// A value type set describes the permitted set of types for a type variable. #[derive(Clone, Copy)] pub struct ValueTypeSet { allow_scalars: bool, allow_simd: bool, base: Type, all_ints: bool, all_floats: bool, all_bools: bool, } impl ValueTypeSet { /// Is `scalar` part of the base type set? /// /// Note that the base type set does not have to be included in the type set proper. fn is_base_type(&self, scalar: Type) -> bool { scalar == self.base || (self.all_ints && scalar.is_int()) || (self.all_floats && scalar.is_float()) || (self.all_bools && scalar.is_bool()) } /// Does `typ` belong to this set? pub fn contains(&self, typ: Type) -> bool { let allowed = if typ.is_scalar() { self.allow_scalars } else { self.allow_simd }; allowed && self.is_base_type(typ.lane_type()) } /// Get an example member of this type set. /// /// This is used for error messages to avoid suggesting invalid types. pub fn example(&self) -> Type { if self.base != types::VOID { return self.base; } let t = if self.all_ints { types::I32 } else if self.all_floats { types::F32 } else if self.allow_scalars { types::B1 } else { types::B32 }; if self.allow_scalars { t } else { t.by(4).unwrap() } } } /// Operand constraints. This describes the value type constraints on a single `Value` operand. enum OperandConstraint { /// This operand has a concrete value type. Concrete(Type), /// This operand can vary freely within the given type set. /// The type set is identified by its index into the TYPE_SETS constant table. Free(u8), /// This operand is the same type as the controlling type variable. Same, /// This operand is `ctrlType.lane_type()`. LaneOf, /// This operand is `ctrlType.as_bool()`. AsBool, } impl OperandConstraint { /// Resolve this operand constraint into a concrete value type, given the value of the /// controlling type variable. /// Returns `None` if this is a free operand which is independent of the controlling type /// variable. pub fn resolve(&self, ctrl_type: Type) -> Option { use self::OperandConstraint::*; match *self { Concrete(t) => Some(t), Free(_) => None, Same => Some(ctrl_type), LaneOf => Some(ctrl_type.lane_type()), AsBool => Some(ctrl_type.as_bool()), } } } #[cfg(test)] mod tests { use super::*; #[test] fn opcodes() { let x = Opcode::Iadd; let mut y = Opcode::Isub; assert!(x != y); y = Opcode::Iadd; assert_eq!(x, y); assert_eq!(x.format(), Some(InstructionFormat::Binary)); assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm"); // Check the matcher. assert_eq!("iadd".parse::(), Ok(Opcode::Iadd)); assert_eq!("iadd_imm".parse::(), Ok(Opcode::IaddImm)); assert_eq!("iadd\0".parse::(), Err("Unknown opcode")); assert_eq!("".parse::(), Err("Unknown opcode")); assert_eq!("\0".parse::(), Err("Unknown opcode")); } #[test] fn instruction_data() { use std::mem; // The size of the InstructionData enum is important for performance. It should not exceed // 16 bytes. Use `Box` out-of-line payloads for instruction formats that require // more space than that. // It would be fine with a data structure smaller than 16 bytes, but what are the odds of // that? assert_eq!(mem::size_of::(), 16); } #[test] fn value_set() { use ir::types::*; let vts = ValueTypeSet { allow_scalars: true, allow_simd: true, base: VOID, all_ints: true, all_floats: false, all_bools: true, }; assert_eq!(vts.example().to_string(), "i32"); let vts = ValueTypeSet { allow_scalars: true, allow_simd: true, base: VOID, all_ints: false, all_floats: true, all_bools: true, }; assert_eq!(vts.example().to_string(), "f32"); let vts = ValueTypeSet { allow_scalars: false, allow_simd: true, base: VOID, all_ints: false, all_floats: true, all_bools: true, }; assert_eq!(vts.example().to_string(), "f32x4"); let vts = ValueTypeSet { allow_scalars: false, allow_simd: true, base: VOID, all_ints: false, all_floats: false, all_bools: true, }; assert_eq!(vts.example().to_string(), "b32x4"); } }