//! Instruction formats and opcodes. //! //! The `instructions` module contains definitions for instruction formats, opcodes, and the //! in-memory representation of IL instructions. //! //! A large part of this module is auto-generated from the instruction descriptions in the meta //! directory. use std::fmt::{self, Display, Formatter}; use std::str::FromStr; use std::ops::{Deref, DerefMut}; use ir::{Value, Type, Ebb, JumpTable, SigRef, FuncRef}; use ir::immediates::{Imm64, Uimm8, Ieee32, Ieee64}; use ir::condcodes::*; use ir::types; use ir::DataFlowGraph; use entity_list; use packed_option::PackedOption; use ref_slice::{ref_slice, ref_slice_mut}; /// Some instructions use an external list of argument values because there is not enough space in /// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in /// `dfg.value_lists`. pub type ValueList = entity_list::EntityList; /// Memory pool for holding value lists. See `ValueList`. pub type ValueListPool = entity_list::ListPool; // Include code generated by `lib/cretonne/meta/gen_instr.py`. This file contains: // // - The `pub enum InstructionFormat` enum with all the instruction formats. // - The `pub enum Opcode` definition with all known opcodes, // - The `const OPCODE_FORMAT: [InstructionFormat; N]` table. // - The private `fn opcode_name(Opcode) -> &'static str` function, and // - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. // // For value type constraints: // // - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table. // - The `const TYPE_SETS : [ValueTypeSet; N]` table. // - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table. // include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); impl Display for Opcode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}", opcode_name(*self)) } } impl Opcode { /// Get the instruction format for this opcode. pub fn format(self) -> InstructionFormat { OPCODE_FORMAT[self as usize - 1] } /// Get the constraint descriptor for this opcode. /// Panic if this is called on `NotAnOpcode`. pub fn constraints(self) -> OpcodeConstraints { OPCODE_CONSTRAINTS[self as usize - 1] } } // This trait really belongs in lib/reader where it is used by the `.cton` file parser, but since // it critically depends on the `opcode_name()` function which is needed here anyway, it lives in // this module. This also saves us from running the build script twice to generate code for the two // separate crates. impl FromStr for Opcode { type Err = &'static str; /// Parse an Opcode name from a string. fn from_str(s: &str) -> Result { use constant_hash::{Table, simple_hash, probe}; impl<'a> Table<&'a str> for [Option] { fn len(&self) -> usize { self.len() } fn key(&self, idx: usize) -> Option<&'a str> { self[idx].map(opcode_name) } } match probe::<&str, [Option]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) { None => Err("Unknown opcode"), // We unwrap here because probe() should have ensured that the entry // at this index is not None. Some(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()), } } } /// Contents on an instruction. /// /// Every variant must contain `opcode` and `ty` fields. An instruction that doesn't produce a /// value should have its `ty` field set to `VOID`. The size of `InstructionData` should be kept at /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a /// `Box` to store the additional information out of line. #[derive(Clone, Debug)] #[allow(missing_docs)] pub enum InstructionData { Nullary { opcode: Opcode, ty: Type }, Unary { opcode: Opcode, ty: Type, arg: Value, }, UnaryImm { opcode: Opcode, ty: Type, imm: Imm64, }, UnaryIeee32 { opcode: Opcode, ty: Type, imm: Ieee32, }, UnaryIeee64 { opcode: Opcode, ty: Type, imm: Ieee64, }, UnarySplit { opcode: Opcode, ty: Type, second_result: PackedOption, arg: Value, }, Binary { opcode: Opcode, ty: Type, args: [Value; 2], }, BinaryImm { opcode: Opcode, ty: Type, arg: Value, imm: Imm64, }, BinaryOverflow { opcode: Opcode, ty: Type, second_result: PackedOption, args: [Value; 2], }, Ternary { opcode: Opcode, ty: Type, args: [Value; 3], }, MultiAry { opcode: Opcode, ty: Type, second_result: PackedOption, args: ValueList, }, InsertLane { opcode: Opcode, ty: Type, lane: Uimm8, args: [Value; 2], }, ExtractLane { opcode: Opcode, ty: Type, lane: Uimm8, arg: Value, }, IntCompare { opcode: Opcode, ty: Type, cond: IntCC, args: [Value; 2], }, FloatCompare { opcode: Opcode, ty: Type, cond: FloatCC, args: [Value; 2], }, Jump { opcode: Opcode, ty: Type, destination: Ebb, args: ValueList, }, Branch { opcode: Opcode, ty: Type, destination: Ebb, args: ValueList, }, BranchTable { opcode: Opcode, ty: Type, arg: Value, table: JumpTable, }, Call { opcode: Opcode, ty: Type, second_result: PackedOption, func_ref: FuncRef, args: ValueList, }, IndirectCall { opcode: Opcode, ty: Type, second_result: PackedOption, sig_ref: SigRef, args: ValueList, }, } /// A variable list of `Value` operands used for function call arguments and passing arguments to /// basic blocks. #[derive(Clone, Debug)] pub struct VariableArgs(Vec); impl VariableArgs { /// Create an empty argument list. pub fn new() -> VariableArgs { VariableArgs(Vec::new()) } /// Add an argument to the end. pub fn push(&mut self, v: Value) { self.0.push(v) } /// Check if the list is empty. pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Convert this to a value list in `pool` with `fixed` prepended. pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList { let mut vlist = ValueList::default(); vlist.extend(fixed.iter().cloned(), pool); vlist.extend(self.0, pool); vlist } } // Coerce `VariableArgs` into a `&[Value]` slice. impl Deref for VariableArgs { type Target = [Value]; fn deref<'a>(&'a self) -> &'a [Value] { &self.0 } } impl DerefMut for VariableArgs { fn deref_mut<'a>(&'a mut self) -> &'a mut [Value] { &mut self.0 } } impl Display for VariableArgs { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { for (i, val) in self.0.iter().enumerate() { if i == 0 { write!(fmt, "{}", val)?; } else { write!(fmt, ", {}", val)?; } } Ok(()) } } impl Default for VariableArgs { fn default() -> VariableArgs { VariableArgs::new() } } /// Analyzing an instruction. /// /// Avoid large matches on instruction formats by using the methods defined here to examine /// instructions. impl InstructionData { /// Return information about the destination of a branch or jump instruction. /// /// Any instruction that can transfer control to another EBB reveals its possible destinations /// here. pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> { match self { &InstructionData::Jump { destination, ref args, .. } => { BranchInfo::SingleDest(destination, &args.as_slice(pool)) } &InstructionData::Branch { destination, ref args, .. } => { BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]) } &InstructionData::BranchTable { table, .. } => BranchInfo::Table(table), _ => BranchInfo::NotABranch, } } /// Return information about a call instruction. /// /// Any instruction that can call another function reveals its call signature here. pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> { match self { &InstructionData::Call { func_ref, ref args, .. } => { CallInfo::Direct(func_ref, &args.as_slice(pool)) } &InstructionData::IndirectCall { sig_ref, ref args, .. } => { CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..]) } _ => CallInfo::NotACall, } } /// Get the controlling type variable, or `VOID` if this instruction isn't polymorphic. /// /// In most cases, the controlling type variable is the same as the first result type, but some /// opcodes require us to read the type of the designated type variable operand from `dfg`. pub fn ctrl_typevar(&self, dfg: &DataFlowGraph) -> Type { let constraints = self.opcode().constraints(); if !constraints.is_polymorphic() { types::VOID } else if constraints.requires_typevar_operand() { // Not all instruction formats have a designated operand, but in that case // `requires_typevar_operand()` should never be true. dfg.value_type(self.typevar_operand(&dfg.value_lists) .expect("Instruction format doesn't have a designated operand, bad opcode.")) } else { // For locality of reference, we prefer to get the controlling type variable from // `idata` itself, when possible. self.first_type() } } } /// Information about branch and jump instructions. pub enum BranchInfo<'a> { /// This is not a branch or jump instruction. /// This instruction will not transfer control to another EBB in the function, but it may still /// affect control flow by returning or trapping. NotABranch, /// This is a branch or jump to a single destination EBB, possibly taking value arguments. SingleDest(Ebb, &'a [Value]), /// This is a jump table branch which can have many destination EBBs. Table(JumpTable), } /// Information about call instructions. pub enum CallInfo<'a> { /// This is not a call instruction. NotACall, /// This is a direct call to an external function declared in the preamble. See /// `DataFlowGraph.ext_funcs`. Direct(FuncRef, &'a [Value]), /// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`. Indirect(SigRef, &'a [Value]), } /// Value type constraints for a given opcode. /// /// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and /// results are not determined by the format. Every `Opcode` has an associated /// `OpcodeConstraints` object that provides the missing details. #[derive(Clone, Copy)] pub struct OpcodeConstraints { /// Flags for this opcode encoded as a bit field: /// /// Bits 0-2: /// Number of fixed result values. This does not include `variable_args` results as are /// produced by call instructions. /// /// Bit 3: /// This opcode is polymorphic and the controlling type variable can be inferred from the /// designated input operand. This is the `typevar_operand` index given to the /// `InstructionFormat` meta language object. When this bit is not set, the controlling /// type variable must be the first output value instead. /// /// Bit 4: /// This opcode is polymorphic and the controlling type variable does *not* appear as the /// first result type. /// /// Bits 5-7: /// Number of fixed value arguments. The minimum required number of value operands. flags: u8, /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`. typeset_offset: u8, /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first /// `fixed_results()` entries describe the result constraints, then follows constraints for the /// fixed `Value` input operands. The number of `Value` inputs is determined by the instruction /// format. constraint_offset: u16, } impl OpcodeConstraints { /// Can the controlling type variable for this opcode be inferred from the designated value /// input operand? /// This also implies that this opcode is polymorphic. pub fn use_typevar_operand(self) -> bool { (self.flags & 0x8) != 0 } /// Is it necessary to look at the designated value input operand in order to determine the /// controlling type variable, or is it good enough to use the first return type? /// /// Most polymorphic instructions produce a single result with the type of the controlling type /// variable. A few polymorphic instructions either don't produce any results, or produce /// results with a fixed type. These instructions return `true`. pub fn requires_typevar_operand(self) -> bool { (self.flags & 0x10) != 0 } /// Get the number of *fixed* result values produced by this opcode. /// This does not include `variable_args` produced by calls. pub fn fixed_results(self) -> usize { (self.flags & 0x7) as usize } /// Get the number of *fixed* input values required by this opcode. /// /// This does not include `variable_args` arguments on call and branch instructions. /// /// The number of fixed input values is usually implied by the instruction format, but /// instruction formats that use a `ValueList` put both fixed and variable arguments in the /// list. This method returns the *minimum* number of values required in the value list. pub fn fixed_value_arguments(self) -> usize { ((self.flags >> 5) & 0x7) as usize } /// Get the offset into `TYPE_SETS` for the controlling type variable. /// Returns `None` if the instruction is not polymorphic. fn typeset_offset(self) -> Option { let offset = self.typeset_offset as usize; if offset < TYPE_SETS.len() { Some(offset) } else { None } } /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin. fn constraint_offset(self) -> usize { self.constraint_offset as usize } /// Get the value type of result number `n`, having resolved the controlling type variable to /// `ctrl_type`. pub fn result_type(self, n: usize, ctrl_type: Type) -> Type { assert!(n < self.fixed_results(), "Invalid result index"); OPERAND_CONSTRAINTS[self.constraint_offset() + n] .resolve(ctrl_type) .expect("Result constraints can't be free") } /// Get the typeset of allowed types for the controlling type variable in a polymorphic /// instruction. pub fn ctrl_typeset(self) -> Option { self.typeset_offset().map(|offset| TYPE_SETS[offset]) } /// Is this instruction polymorphic? pub fn is_polymorphic(self) -> bool { self.ctrl_typeset().is_some() } } /// A value type set describes the permitted set of types for a type variable. #[derive(Clone, Copy)] pub struct ValueTypeSet { min_lanes: u8, max_lanes: u8, min_int: u8, max_int: u8, min_float: u8, max_float: u8, min_bool: u8, max_bool: u8, } impl ValueTypeSet { /// Is `scalar` part of the base type set? /// /// Note that the base type set does not have to be included in the type set proper. fn is_base_type(&self, scalar: Type) -> bool { let l2b = scalar.log2_lane_bits(); if scalar.is_int() { self.min_int <= l2b && l2b < self.max_int } else if scalar.is_float() { self.min_float <= l2b && l2b < self.max_float } else if scalar.is_bool() { self.min_bool <= l2b && l2b < self.max_bool } else { false } } /// Does `typ` belong to this set? pub fn contains(&self, typ: Type) -> bool { let l2l = typ.log2_lane_count(); self.min_lanes <= l2l && l2l < self.max_lanes && self.is_base_type(typ.lane_type()) } /// Get an example member of this type set. /// /// This is used for error messages to avoid suggesting invalid types. pub fn example(&self) -> Type { let t = if self.max_int > 5 { types::I32 } else if self.max_float > 5 { types::F32 } else if self.max_bool > 5 { types::B32 } else { types::B1 }; t.by(1 << self.min_lanes).unwrap() } } /// Operand constraints. This describes the value type constraints on a single `Value` operand. enum OperandConstraint { /// This operand has a concrete value type. Concrete(Type), /// This operand can vary freely within the given type set. /// The type set is identified by its index into the TYPE_SETS constant table. Free(u8), /// This operand is the same type as the controlling type variable. Same, /// This operand is `ctrlType.lane_type()`. LaneOf, /// This operand is `ctrlType.as_bool()`. AsBool, /// This operand is `ctrlType.half_width()`. HalfWidth, /// This operand is `ctrlType.double_width()`. DoubleWidth, /// This operand is `ctrlType.half_vector()`. HalfVector, /// This operand is `ctrlType.double_vector()`. DoubleVector, } impl OperandConstraint { /// Resolve this operand constraint into a concrete value type, given the value of the /// controlling type variable. /// Returns `None` if this is a free operand which is independent of the controlling type /// variable. pub fn resolve(&self, ctrl_type: Type) -> Option { use self::OperandConstraint::*; match *self { Concrete(t) => Some(t), Free(_) => None, Same => Some(ctrl_type), LaneOf => Some(ctrl_type.lane_type()), AsBool => Some(ctrl_type.as_bool()), HalfWidth => Some(ctrl_type.half_width().expect("invalid type for half_width")), DoubleWidth => Some(ctrl_type.double_width().expect("invalid type for double_width")), HalfVector => Some(ctrl_type.half_vector().expect("invalid type for half_vector")), DoubleVector => Some(ctrl_type.by(2).expect("invalid type for double_vector")), } } } #[cfg(test)] mod tests { use super::*; #[test] fn opcodes() { use std::mem; let x = Opcode::Iadd; let mut y = Opcode::Isub; assert!(x != y); y = Opcode::Iadd; assert_eq!(x, y); assert_eq!(x.format(), InstructionFormat::Binary); assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm"); // Check the matcher. assert_eq!("iadd".parse::(), Ok(Opcode::Iadd)); assert_eq!("iadd_imm".parse::(), Ok(Opcode::IaddImm)); assert_eq!("iadd\0".parse::(), Err("Unknown opcode")); assert_eq!("".parse::(), Err("Unknown opcode")); assert_eq!("\0".parse::(), Err("Unknown opcode")); // Opcode is a single byte, and because Option originally came to 2 bytes, early on // Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust // compiler has brought in NonZero optimization, meaning that an enum not using the 0 value // can be optional for no size cost. We want to ensure Option remains small. assert_eq!(mem::size_of::(), mem::size_of::>()); } #[test] fn instruction_data() { use std::mem; // The size of the `InstructionData` enum is important for performance. It should not // exceed 16 bytes. Use `Box` out-of-line payloads for instruction formats that // require more space than that. It would be fine with a data structure smaller than 16 // bytes, but what are the odds of that? assert_eq!(mem::size_of::(), 16); } #[test] fn constraints() { let a = Opcode::Iadd.constraints(); assert!(a.use_typevar_operand()); assert!(!a.requires_typevar_operand()); assert_eq!(a.fixed_results(), 1); assert_eq!(a.fixed_value_arguments(), 2); let b = Opcode::Bitcast.constraints(); assert!(!b.use_typevar_operand()); assert!(!b.requires_typevar_operand()); assert_eq!(b.fixed_results(), 1); assert_eq!(b.fixed_value_arguments(), 1); let c = Opcode::Call.constraints(); assert_eq!(c.fixed_results(), 0); assert_eq!(c.fixed_value_arguments(), 0); let i = Opcode::CallIndirect.constraints(); assert_eq!(i.fixed_results(), 0); assert_eq!(i.fixed_value_arguments(), 1); let cmp = Opcode::Icmp.constraints(); assert!(cmp.use_typevar_operand()); assert!(cmp.requires_typevar_operand()); assert_eq!(cmp.fixed_results(), 1); assert_eq!(cmp.fixed_value_arguments(), 2); } #[test] fn value_set() { use ir::types::*; let vts = ValueTypeSet { min_lanes: 0, max_lanes: 8, min_int: 3, max_int: 7, min_float: 0, max_float: 0, min_bool: 3, max_bool: 7, }; assert!(vts.contains(I32)); assert!(vts.contains(I64)); assert!(vts.contains(I32X4)); assert!(!vts.contains(F32)); assert!(!vts.contains(B1)); assert!(vts.contains(B8)); assert!(vts.contains(B64)); assert_eq!(vts.example().to_string(), "i32"); let vts = ValueTypeSet { min_lanes: 0, max_lanes: 8, min_int: 0, max_int: 0, min_float: 5, max_float: 7, min_bool: 3, max_bool: 7, }; assert_eq!(vts.example().to_string(), "f32"); let vts = ValueTypeSet { min_lanes: 1, max_lanes: 8, min_int: 0, max_int: 0, min_float: 5, max_float: 7, min_bool: 3, max_bool: 7, }; assert_eq!(vts.example().to_string(), "f32x2"); let vts = ValueTypeSet { min_lanes: 2, max_lanes: 8, min_int: 0, max_int: 0, min_float: 0, max_float: 0, min_bool: 3, max_bool: 7, }; assert!(!vts.contains(B32X2)); assert!(vts.contains(B32X4)); assert_eq!(vts.example().to_string(), "b32x4"); let vts = ValueTypeSet { // TypeSet(lanes=(1, 256), ints=(8, 64)) min_lanes: 0, max_lanes: 9, min_int: 3, max_int: 7, min_float: 0, max_float: 0, min_bool: 0, max_bool: 0, }; assert!(vts.contains(I32)); assert!(vts.contains(I32X4)); } }