diff --git a/cranelift/codegen/meta/src/cdsl/encodings.rs b/cranelift/codegen/meta/src/cdsl/encodings.rs index 77d6bc7374..540d3402ae 100644 --- a/cranelift/codegen/meta/src/cdsl/encodings.rs +++ b/cranelift/codegen/meta/src/cdsl/encodings.rs @@ -1,5 +1,4 @@ -use std::rc::Rc; - +use crate::cdsl::formats::FormatRegistry; use crate::cdsl::instructions::{ InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode, InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny, @@ -7,6 +6,8 @@ use crate::cdsl::instructions::{ use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::SettingPredicateNumber; use crate::cdsl::types::ValueType; +use std::rc::Rc; +use std::string::ToString; /// Encoding for a concrete instruction. /// @@ -61,19 +62,25 @@ pub(crate) struct EncodingBuilder { } impl EncodingBuilder { - pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self { + pub fn new( + inst: InstSpec, + recipe: EncodingRecipeNumber, + encbits: u16, + formats: &FormatRegistry, + ) -> Self { let (inst_predicate, bound_type) = match &inst { InstSpec::Bound(inst) => { let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars; - assert!( - inst.value_types.len() == other_typevars.len() + 1, + assert_eq!( + inst.value_types.len(), + other_typevars.len() + 1, "partially bound polymorphic instruction" ); // Add secondary type variables to the instruction predicate. let value_types = &inst.value_types; - let mut inst_predicate = None; + let mut inst_predicate: Option = None; for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) { let value_type = match value_type { ValueTypeOrAny::Any => continue, @@ -84,6 +91,24 @@ impl EncodingBuilder { inst_predicate = Some(type_predicate.into()); } + // Add immediate value predicates + for (immediate_value, immediate_operand) in inst + .immediate_values + .iter() + .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate())) + { + let immediate_predicate = InstructionPredicate::new_is_field_equal( + formats.get(inst.inst.format), + immediate_operand.name, + immediate_value.to_string(), + ); + inst_predicate = if let Some(type_predicate) = inst_predicate { + Some(type_predicate.and(immediate_predicate)) + } else { + Some(immediate_predicate.into()) + } + } + let ctrl_type = value_types[0] .clone() .expect("Controlling type shouldn't be Any"); diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index 178077e456..5d47c1a76b 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -2,6 +2,7 @@ use cranelift_entity::{entity_impl, PrimaryMap}; use std::collections::HashMap; use std::fmt; +use std::fmt::{Display, Error, Formatter}; use std::ops; use std::rc::Rc; @@ -13,6 +14,7 @@ use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint; use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; use crate::cdsl::typevar::TypeVar; +use crate::shared::types::{Bool, Float, Int, Reference}; use cranelift_codegen_shared::condcodes::IntCC; #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -80,6 +82,14 @@ impl InstructionGroup { } } +/// Instructions can have parameters bound to them to specialize them for more specific encodings +/// (e.g. the encoding for adding two float types may be different than that of adding two +/// integer types) +pub trait Bindable { + /// Bind a parameter to an instruction + fn bind(&self, parameter: impl Into) -> BoundInstruction; +} + #[derive(Debug)] pub struct PolymorphicInfo { pub use_typevar_operand: bool, @@ -173,30 +183,11 @@ impl Instruction { None => Vec::new(), } } +} - pub fn bind(&self, lane_type: impl Into) -> BoundInstruction { - bind(self.clone(), Some(lane_type.into()), Vec::new()) - } - - pub fn bind_ref(&self, reference_type: impl Into) -> BoundInstruction { - bind_ref(self.clone(), Some(reference_type.into()), Vec::new()) - } - - pub fn bind_vector_from_lane( - &self, - lane_type: impl Into, - vector_size_in_bits: u64, - ) -> BoundInstruction { - bind_vector( - self.clone(), - lane_type.into(), - vector_size_in_bits, - Vec::new(), - ) - } - - pub fn bind_any(&self) -> BoundInstruction { - bind(self.clone(), None, Vec::new()) +impl Bindable for Instruction { + fn bind(&self, parameter: impl Into) -> BoundInstruction { + BoundInstruction::new(self).bind(parameter) } } @@ -407,36 +398,163 @@ impl ValueTypeOrAny { } } +/// The number of bits in the vector +type VectorBitWidth = u64; + +/// An parameter used for binding instructions to specific types or values +pub enum BindParameter { + Any, + Lane(LaneType), + Vector(LaneType, VectorBitWidth), + Reference(ReferenceType), + Immediate(Immediate), +} + +/// Constructor for more easily building vector parameters from any lane type +pub fn vector(parameter: impl Into, vector_size: VectorBitWidth) -> BindParameter { + BindParameter::Vector(parameter.into(), vector_size) +} + +impl From for BindParameter { + fn from(ty: Int) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: Bool) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: Float) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: LaneType) -> Self { + BindParameter::Lane(ty) + } +} + +impl From for BindParameter { + fn from(ty: Reference) -> Self { + BindParameter::Reference(ty.into()) + } +} + +impl From for BindParameter { + fn from(imm: Immediate) -> Self { + BindParameter::Immediate(imm) + } +} + +#[derive(Clone)] +pub enum Immediate { + UInt8(u8), + UInt128(u128), +} + +impl Display for Immediate { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Immediate::UInt8(x) => write!(f, "{}", x), + Immediate::UInt128(x) => write!(f, "{}", x), + } + } +} + #[derive(Clone)] pub struct BoundInstruction { pub inst: Instruction, pub value_types: Vec, + pub immediate_values: Vec, } impl BoundInstruction { - pub fn bind(self, lane_type: impl Into) -> BoundInstruction { - bind(self.inst, Some(lane_type.into()), self.value_types) + /// Construct a new bound instruction (with nothing bound yet) from an instruction + fn new(inst: &Instruction) -> Self { + BoundInstruction { + inst: inst.clone(), + value_types: vec![], + immediate_values: vec![], + } } - pub fn bind_ref(self, reference_type: impl Into) -> BoundInstruction { - bind_ref(self.inst, Some(reference_type.into()), self.value_types) - } + /// Verify that the bindings for a BoundInstruction are correct. + fn verify_bindings(&self) -> Result<(), String> { + // Verify that binding types to the instruction does not violate the polymorphic rules. + if !self.value_types.is_empty() { + match &self.inst.polymorphic_info { + Some(poly) => { + if self.value_types.len() > 1 + poly.other_typevars.len() { + return Err(format!( + "trying to bind too many types for {}", + self.inst.name + )); + } + } + None => { + return Err(format!( + "trying to bind a type for {} which is not a polymorphic instruction", + self.inst.name + )); + } + } + } - pub fn bind_vector_from_lane( - self, - lane_type: impl Into, - vector_size_in_bits: u64, - ) -> BoundInstruction { - bind_vector( - self.inst, - lane_type.into(), - vector_size_in_bits, - self.value_types, - ) - } + // Verify that only the right number of immediates are bound. + let immediate_count = self + .inst + .operands_in + .iter() + .filter(|o| o.is_immediate()) + .count(); + if self.immediate_values.len() > immediate_count { + return Err(format!( + "trying to bind too many immediates ({}) to instruction {} which only expects {} \ + immediates", + self.immediate_values.len(), + self.inst.name, + immediate_count + )); + } - pub fn bind_any(self) -> BoundInstruction { - bind(self.inst, None, self.value_types) + Ok(()) + } +} + +impl Bindable for BoundInstruction { + fn bind(&self, parameter: impl Into) -> BoundInstruction { + let mut modified = self.clone(); + match parameter.into() { + BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any), + BindParameter::Lane(lane_type) => modified + .value_types + .push(ValueTypeOrAny::ValueType(lane_type.into())), + BindParameter::Vector(lane_type, vector_size_in_bits) => { + let num_lanes = vector_size_in_bits / lane_type.lane_bits(); + assert!( + num_lanes >= 2, + "Minimum lane number for bind_vector is 2, found {}.", + num_lanes, + ); + let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); + modified + .value_types + .push(ValueTypeOrAny::ValueType(vector_type)); + } + BindParameter::Reference(reference_type) => { + modified + .value_types + .push(ValueTypeOrAny::ValueType(reference_type.into())); + } + BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate), + } + modified.verify_bindings().unwrap(); + modified } } @@ -1124,17 +1242,13 @@ impl InstSpec { InstSpec::Bound(bound_inst) => &bound_inst.inst, } } - pub fn bind(&self, lane_type: impl Into) -> BoundInstruction { - match self { - InstSpec::Inst(inst) => inst.bind(lane_type), - InstSpec::Bound(inst) => inst.clone().bind(lane_type), - } - } +} - pub fn bind_ref(&self, reference_type: impl Into) -> BoundInstruction { +impl Bindable for InstSpec { + fn bind(&self, parameter: impl Into) -> BoundInstruction { match self { - InstSpec::Inst(inst) => inst.bind_ref(reference_type), - InstSpec::Bound(inst) => inst.clone().bind_ref(reference_type), + InstSpec::Inst(inst) => inst.bind(parameter.into()), + InstSpec::Bound(inst) => inst.bind(parameter.into()), } } } @@ -1151,79 +1265,94 @@ impl Into for BoundInstruction { } } -/// Helper bind reused by {Bound,}Instruction::bind. -fn bind( - inst: Instruction, - lane_type: Option, - mut value_types: Vec, -) -> BoundInstruction { - match lane_type { - Some(lane_type) => { - value_types.push(ValueTypeOrAny::ValueType(lane_type.into())); - } - None => { - value_types.push(ValueTypeOrAny::Any); - } +#[cfg(test)] +mod test { + use super::*; + use crate::cdsl::formats::InstructionFormatBuilder; + use crate::cdsl::operands::{OperandBuilder, OperandKindBuilder, OperandKindFields}; + use crate::cdsl::typevar::TypeSetBuilder; + use crate::shared::types::Int::{I32, I64}; + + fn field_to_operand(index: usize, field: OperandKindFields) -> Operand { + // pretend the index string is &'static + let name = Box::leak(index.to_string().into_boxed_str()); + let kind = OperandKindBuilder::new(name, field).build(); + let operand = OperandBuilder::new(name, kind).build(); + operand } - verify_polymorphic_binding(&inst, &value_types); - - BoundInstruction { inst, value_types } -} - -/// Helper bind for reference types reused by {Bound,}Instruction::bind_ref. -fn bind_ref( - inst: Instruction, - reference_type: Option, - mut value_types: Vec, -) -> BoundInstruction { - match reference_type { - Some(reference_type) => { - value_types.push(ValueTypeOrAny::ValueType(reference_type.into())); - } - None => { - value_types.push(ValueTypeOrAny::Any); - } + fn field_to_operands(types: Vec) -> Vec { + types + .iter() + .enumerate() + .map(|(i, f)| field_to_operand(i, f.clone())) + .collect() } - verify_polymorphic_binding(&inst, &value_types); - - BoundInstruction { inst, value_types } -} - -/// Helper bind for vector types reused by {Bound,}Instruction::bind. -fn bind_vector( - inst: Instruction, - lane_type: LaneType, - vector_size_in_bits: u64, - mut value_types: Vec, -) -> BoundInstruction { - let num_lanes = vector_size_in_bits / lane_type.lane_bits(); - assert!( - num_lanes >= 2, - "Minimum lane number for bind_vector is 2, found {}.", - num_lanes, - ); - let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); - value_types.push(ValueTypeOrAny::ValueType(vector_type)); - verify_polymorphic_binding(&inst, &value_types); - BoundInstruction { inst, value_types } -} - -/// Helper to verify that binding types to the instruction does not violate polymorphic rules -fn verify_polymorphic_binding(inst: &Instruction, value_types: &Vec) { - match &inst.polymorphic_info { - Some(poly) => { - assert!( - value_types.len() <= 1 + poly.other_typevars.len(), - format!("trying to bind too many types for {}", inst.name) - ); - } - None => { - panic!(format!( - "trying to bind a type for {} which is not a polymorphic instruction", - inst.name - )); + fn build_fake_instruction( + inputs: Vec, + outputs: Vec, + ) -> Instruction { + // setup a format from the input operands + let mut formats = FormatRegistry::new(); + let mut format = InstructionFormatBuilder::new("fake"); + for (i, f) in inputs.iter().enumerate() { + match f { + OperandKindFields::TypeVar(_) => format = format.value(), + OperandKindFields::ImmValue => { + format = format.imm(&field_to_operand(i, f.clone()).kind) + } + _ => {} + }; } + formats.insert(format); + + // create the fake instruction + InstructionBuilder::new("fake", "A fake instruction for testing.") + .operands_in(field_to_operands(inputs).iter().collect()) + .operands_out(field_to_operands(outputs).iter().collect()) + .build(&formats, OpcodeNumber(42)) + } + + #[test] + fn ensure_bound_instructions_can_bind_lane_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::IntType(I32)); + } + + #[test] + fn ensure_bound_instructions_can_bind_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + let bound_inst = inst.bind(Immediate::UInt8(42)); + assert!(bound_inst.verify_bindings().is_ok()); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind() { + let inst = build_fake_instruction(vec![], vec![]); + inst.bind(BindParameter::Lane(LaneType::IntType(I32))); + // trying to bind to an instruction with no inputs should fail + } + + #[test] + #[should_panic] + fn ensure_bound_instructions_fail_to_bind_too_many_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::IntType(I32)) + .bind(LaneType::IntType(I64)); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind_too_many_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + inst.bind(BindParameter::Immediate(Immediate::UInt8(0))) + .bind(BindParameter::Immediate(Immediate::UInt8(1))); + // trying to bind too many immediates to an instruction should fail } } diff --git a/cranelift/codegen/meta/src/isa/riscv/encodings.rs b/cranelift/codegen/meta/src/isa/riscv/encodings.rs index 6ddc13b0a3..21ad3c469c 100644 --- a/cranelift/codegen/meta/src/isa/riscv/encodings.rs +++ b/cranelift/codegen/meta/src/isa/riscv/encodings.rs @@ -1,7 +1,7 @@ use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; use crate::cdsl::encodings::{Encoding, EncodingBuilder}; use crate::cdsl::instructions::{ - BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, + Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, }; use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::SettingGroup; @@ -13,27 +13,34 @@ use crate::shared::types::Reference::{R32, R64}; use crate::shared::Definitions as SharedDefinitions; use super::recipes::RecipeGroup; - -fn enc(inst: impl Into, recipe: EncodingRecipeNumber, bits: u16) -> EncodingBuilder { - EncodingBuilder::new(inst.into(), recipe, bits) -} +use crate::cdsl::formats::FormatRegistry; pub(crate) struct PerCpuModeEncodings<'defs> { pub inst_pred_reg: InstructionPredicateRegistry, pub enc32: Vec, pub enc64: Vec, recipes: &'defs Recipes, + formats: &'defs FormatRegistry, } impl<'defs> PerCpuModeEncodings<'defs> { - fn new(recipes: &'defs Recipes) -> Self { + fn new(recipes: &'defs Recipes, formats: &'defs FormatRegistry) -> Self { Self { inst_pred_reg: InstructionPredicateRegistry::new(), enc32: Vec::new(), enc64: Vec::new(), recipes, + formats, } } + fn enc( + &self, + inst: impl Into, + recipe: EncodingRecipeNumber, + bits: u16, + ) -> EncodingBuilder { + EncodingBuilder::new(inst.into(), recipe, bits, self.formats) + } fn add32(&mut self, encoding: EncodingBuilder) { self.enc32 .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); @@ -169,7 +176,7 @@ pub(crate) fn define<'defs>( let use_m = isa_settings.predicate_by_name("use_m"); // Definitions. - let mut e = PerCpuModeEncodings::new(&recipes.recipes); + let mut e = PerCpuModeEncodings::new(&recipes.recipes, &shared_defs.format_registry); // Basic arithmetic binary instructions are encoded in an R-type instruction. for &(inst, inst_imm, f3, f7) in &[ @@ -179,26 +186,26 @@ pub(crate) fn define<'defs>( (bor, Some(bor_imm), 0b110, 0b0000000), (band, Some(band_imm), 0b111, 0b0000000), ] { - e.add32(enc(inst.bind(I32), r_r, op_bits(f3, f7))); - e.add64(enc(inst.bind(I64), r_r, op_bits(f3, f7))); + e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); // Immediate versions for add/xor/or/and. if let Some(inst_imm) = inst_imm { - e.add32(enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); - e.add64(enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); + e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); + e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); } } // 32-bit ops in RV64. - e.add64(enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b0000000))); - e.add64(enc(isub.bind(I32), r_r, op32_bits(0b000, 0b0100000))); + e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b0000000))); + e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b0100000))); // There are no andiw/oriw/xoriw variations. - e.add64(enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); + e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); // Use iadd_imm with %x0 to materialize constants. - e.add32(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); + e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); // Dynamic shifts have the same masking semantics as the clif base instructions. for &(inst, inst_imm, f3, f7) in &[ @@ -206,17 +213,17 @@ pub(crate) fn define<'defs>( (ushr, ushr_imm, 0b101, 0b0), (sshr, sshr_imm, 0b101, 0b100000), ] { - e.add32(enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); - e.add64(enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); - e.add64(enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); + e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); // Allow i32 shift amounts in 64-bit shifts. - e.add64(enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); - e.add64(enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); // Immediate shifts. - e.add32(enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); - e.add64(enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); - e.add64(enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); + e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); } // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit @@ -242,20 +249,20 @@ pub(crate) fn define<'defs>( let icmp_i32 = icmp.bind(I32); let icmp_i64 = icmp.bind(I64); e.add32( - enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b0000000)) + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b0000000)) .inst_predicate(icmp_instp(&icmp_i32, "slt")), ); e.add64( - enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b0000000)) + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b0000000)) .inst_predicate(icmp_instp(&icmp_i64, "slt")), ); e.add32( - enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b0000000)) + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b0000000)) .inst_predicate(icmp_instp(&icmp_i32, "ult")), ); e.add64( - enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b0000000)) + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b0000000)) .inst_predicate(icmp_instp(&icmp_i64, "ult")), ); @@ -263,42 +270,51 @@ pub(crate) fn define<'defs>( let icmp_i32 = icmp_imm.bind(I32); let icmp_i64 = icmp_imm.bind(I64); e.add32( - enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) .inst_predicate(icmp_instp(&icmp_i32, "slt")), ); e.add64( - enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) .inst_predicate(icmp_instp(&icmp_i64, "slt")), ); e.add32( - enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) .inst_predicate(icmp_instp(&icmp_i32, "ult")), ); e.add64( - enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) .inst_predicate(icmp_instp(&icmp_i64, "ult")), ); } // Integer constants with the low 12 bits clear are materialized by lui. - e.add32(enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(enc(iconst.bind(I64), r_u, lui_bits())); + e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); // "M" Standard Extension for Integer Multiplication and Division. // Gated by the `use_m` flag. - e.add32(enc(imul.bind(I32), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m)); - e.add64(enc(imul.bind(I64), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m)); - e.add64(enc(imul.bind(I32), r_r, op32_bits(0b000, 0b00000001)).isa_predicate(use_m)); + e.add32( + e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b00000001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b00000001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b00000001)) + .isa_predicate(use_m), + ); // Control flow. // Unconditional branches. - e.add32(enc(jump, r_uj, jal_bits())); - e.add64(enc(jump, r_uj, jal_bits())); - e.add32(enc(call, r_uj_call, jal_bits())); - e.add64(enc(call, r_uj_call, jal_bits())); + e.add32(e.enc(jump, r_uj, jal_bits())); + e.add64(e.enc(jump, r_uj, jal_bits())); + e.add32(e.enc(call, r_uj_call, jal_bits())); + e.add64(e.enc(call, r_uj_call, jal_bits())); // Conditional branches. { @@ -338,101 +354,81 @@ pub(crate) fn define<'defs>( ("uge", 0b111), ] { e.add32( - enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) + e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), ); e.add64( - enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) + e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), ); } } for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { - e.add32(enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); - e.add64(enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); - e.add32(enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); - e.add64(enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); + e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); } // Returns are a special case of jalr_bits using %x1 to hold the return address. // The return address is provided by a special-purpose `link` return value that // is added by legalize_signature(). - e.add32(enc(return_, r_iret, jalr_bits())); - e.add64(enc(return_, r_iret, jalr_bits())); - e.add32(enc(call_indirect.bind(I32), r_icall, jalr_bits())); - e.add64(enc(call_indirect.bind(I64), r_icall, jalr_bits())); + e.add32(e.enc(return_, r_iret, jalr_bits())); + e.add64(e.enc(return_, r_iret, jalr_bits())); + e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); + e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); // Spill and fill. - e.add32(enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); - e.add32(enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); + e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); + e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); // No-op fills, created by late-stage redundant-fill removal. for &ty in &[I64, I32] { - e.add64(enc(fill_nop.bind(ty), r_fillnull, 0)); - e.add32(enc(fill_nop.bind(ty), r_fillnull, 0)); + e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); } - e.add64(enc(fill_nop.bind(B1), r_fillnull, 0)); - e.add32(enc(fill_nop.bind(B1), r_fillnull, 0)); + e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); // Register copies. - e.add32(enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); - e.add64(enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); - e.add64(enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); + e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); - e.add32(enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); - e.add64(enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); - e.add64(enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); + e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); - e.add32(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add64(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add32(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); - e.add64(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn // into a no-op. // The same encoding is generated for both the 64- and 32-bit architectures. for &ty in &[I64, I32, I16, I8] { - e.add32(enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); } for &ty in &[F64, F32] { - e.add32(enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); } // Copy-to-SSA - e.add32(enc( - copy_to_ssa.bind(I32), - r_copytossa, - opimm_bits(0b000, 0), - )); - e.add64(enc( - copy_to_ssa.bind(I64), - r_copytossa, - opimm_bits(0b000, 0), - )); - e.add64(enc( - copy_to_ssa.bind(I32), - r_copytossa, - opimm32_bits(0b000, 0), - )); - e.add32(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add64(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add32(enc( - copy_to_ssa.bind_ref(R32), - r_copytossa, - opimm_bits(0b000, 0), - )); - e.add64(enc( - copy_to_ssa.bind_ref(R64), - r_copytossa, - opimm_bits(0b000, 0), - )); + e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); e } diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index da08462e9a..c9b1ed8b42 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -5,8 +5,8 @@ use std::collections::HashMap; use crate::cdsl::encodings::{Encoding, EncodingBuilder}; use crate::cdsl::instructions::{ - InstSpec, Instruction, InstructionGroup, InstructionPredicate, InstructionPredicateNode, - InstructionPredicateRegistry, + vector, Bindable, InstSpec, Instruction, InstructionGroup, InstructionPredicate, + InstructionPredicateNode, InstructionPredicateRegistry, }; use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; @@ -20,23 +20,27 @@ use crate::shared::Definitions as SharedDefinitions; use crate::isa::x86::opcodes::*; use super::recipes::{RecipeGroup, Template}; +use crate::cdsl::formats::FormatRegistry; +use crate::cdsl::instructions::BindParameter::Any; -pub(crate) struct PerCpuModeEncodings { +pub(crate) struct PerCpuModeEncodings<'defs> { pub enc32: Vec, pub enc64: Vec, pub recipes: Recipes, recipes_by_name: HashMap, pub inst_pred_reg: InstructionPredicateRegistry, + formats: &'defs FormatRegistry, } -impl PerCpuModeEncodings { - fn new() -> Self { +impl<'defs> PerCpuModeEncodings<'defs> { + fn new(formats: &'defs FormatRegistry) -> Self { Self { enc32: Vec::new(), enc64: Vec::new(), recipes: Recipes::new(), recipes_by_name: HashMap::new(), inst_pred_reg: InstructionPredicateRegistry::new(), + formats, } } @@ -69,7 +73,7 @@ impl PerCpuModeEncodings { { let (recipe, bits) = template.build(); let recipe_number = self.add_recipe(recipe); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats); builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) } @@ -101,7 +105,7 @@ impl PerCpuModeEncodings { } fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats); let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); self.enc32.push(encoding); } @@ -134,7 +138,7 @@ impl PerCpuModeEncodings { } fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats); let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); self.enc64.push(encoding); } @@ -207,8 +211,8 @@ impl PerCpuModeEncodings { /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { let inst: InstSpec = inst.into(); - self.enc32(inst.bind_ref(R32), template.nonrex()); - self.enc64(inst.bind_ref(R64), template.rex().w()); + self.enc32(inst.bind(R32), template.nonrex()); + self.enc64(inst.bind(R64), template.rex().w()); } /// Add encodings for `inst` to X86_64 with and without a REX prefix. @@ -281,18 +285,18 @@ impl PerCpuModeEncodings { /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` /// argument to determine whether or not to set the REX.W bit. fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(I32).bind_any(), template.clone()); + self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); // REX-less encoding must come after REX encoding so we don't use it by // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(I32).bind_any(), template.clone().rex()); - self.enc64(inst.clone().bind(I32).bind_any(), template.clone()); + self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); if w_bit { - self.enc64(inst.clone().bind(I64).bind_any(), template.rex().w()); + self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); } else { - self.enc64(inst.clone().bind(I64).bind_any(), template.clone().rex()); - self.enc64(inst.clone().bind(I64).bind_any(), template); + self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I64).bind(Any), template); } } @@ -366,12 +370,12 @@ impl PerCpuModeEncodings { // Definitions. -pub(crate) fn define( - shared_defs: &SharedDefinitions, +pub(crate) fn define<'defs>( + shared_defs: &'defs SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, -) -> PerCpuModeEncodings { +) -> PerCpuModeEncodings<'defs> { let shared = &shared_defs.instructions; let formats = &shared_defs.format_registry; @@ -681,7 +685,7 @@ pub(crate) fn define( let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); // Definitions. - let mut e = PerCpuModeEncodings::new(); + let mut e = PerCpuModeEncodings::new(formats); // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); @@ -742,15 +746,11 @@ pub(crate) fn define( e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); } e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - e.enc64(regmove.bind(B64), rec_rmov.opcodes(&MOV_STORE).rex().w()); e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); - e.enc32(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE).rex()); - e.enc64( - regmove.bind_ref(R64), - rec_rmov.opcodes(&MOV_STORE).rex().w(), - ); + e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); + e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); @@ -834,19 +834,19 @@ pub(crate) fn define( // Cannot use enc_i32_i64 for this pattern because instructions require // to bind any. e.enc32( - inst.bind(I32).bind_any(), + inst.bind(I32).bind(Any), rec_rc.opcodes(&ROTATE_CL).rrr(rrr), ); e.enc64( - inst.bind(I64).bind_any(), + inst.bind(I64).bind(Any), rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), ); e.enc64( - inst.bind(I32).bind_any(), + inst.bind(I32).bind(Any), rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), ); e.enc64( - inst.bind(I32).bind_any(), + inst.bind(I32).bind(Any), rec_rc.opcodes(&ROTATE_CL).rrr(rrr), ); } @@ -970,7 +970,7 @@ pub(crate) fn define( for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(&MOV_STORE)); + e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); } @@ -979,14 +979,8 @@ pub(crate) fn define( // the corresponding st* recipes when a REX prefix is applied. for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { - e.enc_both( - istore8.bind(I32).bind_any(), - recipe.opcodes(&MOV_BYTE_STORE), - ); - e.enc_x86_64( - istore8.bind(I64).bind_any(), - recipe.opcodes(&MOV_BYTE_STORE), - ); + e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); + e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); } e.enc_i32_i64(spill, rec_spillSib32.opcodes(&MOV_STORE)); @@ -1121,12 +1115,9 @@ pub(crate) fn define( ); // Float loads and stores. - e.enc_both(load.bind(F32).bind_any(), rec_fld.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind_any(), rec_fldDisp8.opcodes(&MOVSS_LOAD)); - e.enc_both( - load.bind(F32).bind_any(), - rec_fldDisp32.opcodes(&MOVSS_LOAD), - ); + e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); e.enc_both( load_complex.bind(F32), @@ -1141,12 +1132,9 @@ pub(crate) fn define( rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), ); - e.enc_both(load.bind(F64).bind_any(), rec_fld.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind_any(), rec_fldDisp8.opcodes(&MOVSD_LOAD)); - e.enc_both( - load.bind(F64).bind_any(), - rec_fldDisp32.opcodes(&MOVSD_LOAD), - ); + e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); e.enc_both( load_complex.bind(F64), @@ -1161,13 +1149,13 @@ pub(crate) fn define( rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), ); - e.enc_both(store.bind(F32).bind_any(), rec_fst.opcodes(&MOVSS_STORE)); + e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); e.enc_both( - store.bind(F32).bind_any(), + store.bind(F32).bind(Any), rec_fstDisp8.opcodes(&MOVSS_STORE), ); e.enc_both( - store.bind(F32).bind_any(), + store.bind(F32).bind(Any), rec_fstDisp32.opcodes(&MOVSS_STORE), ); @@ -1184,13 +1172,13 @@ pub(crate) fn define( rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), ); - e.enc_both(store.bind(F64).bind_any(), rec_fst.opcodes(&MOVSD_STORE)); + e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); e.enc_both( - store.bind(F64).bind_any(), + store.bind(F64).bind(Any), rec_fstDisp8.opcodes(&MOVSD_STORE), ); e.enc_both( - store.bind(F64).bind_any(), + store.bind(F64).bind(Any), rec_fstDisp32.opcodes(&MOVSD_STORE), ); @@ -1727,7 +1715,7 @@ pub(crate) fn define( // PSHUFB, 8-bit shuffle using two XMM registers. for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); + let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); let template = rec_fa.nonrex().opcodes(&PSHUFB); e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd); e.enc64_isap(instruction, template, use_ssse3_simd); @@ -1735,7 +1723,7 @@ pub(crate) fn define( // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let instruction = x86_pshufd.bind_vector_from_lane(ty, sse_vector_size); + let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); let template = rec_r_ib_unsigned_fpr.nonrex().opcodes(&PSHUFD); e.enc32(instruction.clone(), template.clone()); e.enc64(instruction, template); @@ -1745,7 +1733,7 @@ pub(crate) fn define( // to the Intel manual: "When the destination operand is an XMM register, the source operand is // written to the low doubleword of the register and the register is zero-extended to 128 bits." for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size); + let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); if ty.is_float() { e.enc_32_64_rec(instruction, rec_null_fpr, 0); } else { @@ -1767,7 +1755,7 @@ pub(crate) fn define( _ => panic!("invalid size for SIMD insertlane"), }; - let instruction = x86_pinsr.bind_vector_from_lane(ty, sse_vector_size); + let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); let template = rec_r_ib_unsigned_r.opcodes(opcode); if ty.lane_bits() < 64 { e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap); @@ -1780,21 +1768,21 @@ pub(crate) fn define( // For legalizing insertlane with floats, INSERTPS from SSE4.1. { - let instruction = x86_insertps.bind_vector_from_lane(F32, sse_vector_size); + let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); let template = rec_fa_ib.nonrex().opcodes(&INSERTPS); e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd)); } // For legalizing insertlane with floats, MOVSD from SSE2. { - let instruction = x86_movsd.bind_vector_from_lane(F64, sse_vector_size); + let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); let template = rec_fa.nonrex().opcodes(&MOVSD_LOAD); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2 } // For legalizing insertlane with floats, MOVLHPS from SSE. { - let instruction = x86_movlhps.bind_vector_from_lane(F64, sse_vector_size); + let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); let template = rec_fa.nonrex().opcodes(&MOVLHPS); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } @@ -1808,7 +1796,7 @@ pub(crate) fn define( _ => panic!("invalid size for SIMD extractlane"), }; - let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size); + let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); let template = rec_r_ib_unsigned_gpr.opcodes(opcode); if ty.lane_bits() < 64 { e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd)); @@ -1825,8 +1813,8 @@ pub(crate) fn define( ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) { let instruction = raw_bitcast - .bind_vector_from_lane(to_type, sse_vector_size) - .bind_vector_from_lane(from_type, sse_vector_size); + .bind(vector(to_type, sse_vector_size)) + .bind(vector(from_type, sse_vector_size)); e.enc_32_64_rec(instruction, rec_null_fpr, 0); } } @@ -1837,7 +1825,7 @@ pub(crate) fn define( for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { e.enc_32_64_rec( raw_bitcast - .bind_vector_from_lane(lane_type, sse_vector_size) + .bind(vector(lane_type, sse_vector_size)) .bind(*float_type), rec_null_fpr, 0, @@ -1845,7 +1833,7 @@ pub(crate) fn define( e.enc_32_64_rec( raw_bitcast .bind(*float_type) - .bind_vector_from_lane(lane_type, sse_vector_size), + .bind(vector(lane_type, sse_vector_size)), rec_null_fpr, 0, ); @@ -1857,7 +1845,7 @@ pub(crate) fn define( // encoding first for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let f_unary_const = formats.get(formats.by_name("UnaryConst")); - let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size); + let instruction = vconst.bind(vector(ty, sse_vector_size)); let is_zero_128bit = InstructionPredicate::new_is_all_zeroes_128bit(f_unary_const, "constant_handle"); @@ -1881,14 +1869,14 @@ pub(crate) fn define( // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored // in memory) but some performance measurements are needed. for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size); + let instruction = vconst.bind(vector(ty, sse_vector_size)); let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } // SIMD bor using ORPS for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = bor.bind_vector_from_lane(ty, sse_vector_size); + let instruction = bor.bind(vector(ty, sse_vector_size)); let template = rec_fa.nonrex().opcodes(&ORPS); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } @@ -1898,87 +1886,87 @@ pub(crate) fn define( // alignment or type-specific encodings, see https://github.com/CraneStation/cranelift/issues/1039). for ty in ValueType::all_lane_types().filter(allowed_simd_type) { // Store - let bound_store = store.bind_vector_from_lane(ty, sse_vector_size).bind_any(); + let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); // Load - let bound_load = load.bind_vector_from_lane(ty, sse_vector_size).bind_any(); + let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); e.enc_32_64(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); // Spill - let bound_spill = spill.bind_vector_from_lane(ty, sse_vector_size); + let bound_spill = spill.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); - let bound_regspill = regspill.bind_vector_from_lane(ty, sse_vector_size); + let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); // Fill - let bound_fill = fill.bind_vector_from_lane(ty, sse_vector_size); + let bound_fill = fill.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); - let bound_regfill = regfill.bind_vector_from_lane(ty, sse_vector_size); + let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); - let bound_fill_nop = fill_nop.bind_vector_from_lane(ty, sse_vector_size); + let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); // Regmove - let bound_regmove = regmove.bind_vector_from_lane(ty, sse_vector_size); + let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); // Copy - let bound_copy = copy.bind_vector_from_lane(ty, sse_vector_size); + let bound_copy = copy.bind(vector(ty, sse_vector_size)); e.enc_32_64(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); - let bound_copy_nop = copy_nop.bind_vector_from_lane(ty, sse_vector_size); + let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); } // SIMD integer addition for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { - let iadd = iadd.bind_vector_from_lane(ty.clone(), sse_vector_size); + let iadd = iadd.bind(vector(ty.clone(), sse_vector_size)); e.enc_32_64(iadd, rec_fa.opcodes(*opcodes)); } // SIMD integer saturating addition e.enc_32_64( - sadd_sat.bind_vector_from_lane(I8, sse_vector_size), + sadd_sat.bind(vector(I8, sse_vector_size)), rec_fa.opcodes(&PADDSB), ); e.enc_32_64( - sadd_sat.bind_vector_from_lane(I16, sse_vector_size), + sadd_sat.bind(vector(I16, sse_vector_size)), rec_fa.opcodes(&PADDSW), ); e.enc_32_64( - uadd_sat.bind_vector_from_lane(I8, sse_vector_size), + uadd_sat.bind(vector(I8, sse_vector_size)), rec_fa.opcodes(&PADDUSB), ); e.enc_32_64( - uadd_sat.bind_vector_from_lane(I16, sse_vector_size), + uadd_sat.bind(vector(I16, sse_vector_size)), rec_fa.opcodes(&PADDUSW), ); // SIMD integer subtraction for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { - let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size); + let isub = isub.bind(vector(ty.clone(), sse_vector_size)); e.enc_32_64(isub, rec_fa.opcodes(*opcodes)); } // SIMD integer saturating subtraction e.enc_32_64( - ssub_sat.bind_vector_from_lane(I8, sse_vector_size), + ssub_sat.bind(vector(I8, sse_vector_size)), rec_fa.opcodes(&PSUBSB), ); e.enc_32_64( - ssub_sat.bind_vector_from_lane(I16, sse_vector_size), + ssub_sat.bind(vector(I16, sse_vector_size)), rec_fa.opcodes(&PSUBSW), ); e.enc_32_64( - usub_sat.bind_vector_from_lane(I8, sse_vector_size), + usub_sat.bind(vector(I8, sse_vector_size)), rec_fa.opcodes(&PSUBUSB), ); e.enc_32_64( - usub_sat.bind_vector_from_lane(I16, sse_vector_size), + usub_sat.bind(vector(I16, sse_vector_size)), rec_fa.opcodes(&PSUBUSW), ); @@ -1988,7 +1976,7 @@ pub(crate) fn define( (I16, &PMULLW[..], None), (I32, &PMULLD[..], Some(use_sse41_simd)), ] { - let imul = imul.bind_vector_from_lane(ty.clone(), sse_vector_size); + let imul = imul.bind(vector(ty.clone(), sse_vector_size)); e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); } @@ -2002,7 +1990,7 @@ pub(crate) fn define( _ => panic!("invalid size for SIMD icmp"), }; - let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size); + let instruction = icmp.bind(vector(ty, sse_vector_size)); let f_int_compare = formats.get(formats.by_name("IntCompare")); let has_eq_condition_code = InstructionPredicate::new_has_condition_code(f_int_compare, IntCC::Equal, "cond"); @@ -2020,10 +2008,10 @@ pub(crate) fn define( // Reference type instructions // Null references implemented as iconst 0. - e.enc32(null.bind_ref(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); + e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); - e.enc64(null.bind_ref(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); - e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); + e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); + e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); // is_null, implemented by testing whether the value is 0. e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 61389729f5..f5c1c43015 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -1,5 +1,5 @@ use crate::cdsl::ast::{var, ExprBuilder, Literal}; -use crate::cdsl::instructions::InstructionGroup; +use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; use crate::cdsl::types::ValueType; use crate::cdsl::xform::TransformGroupBuilder; use crate::shared::types::Float::F64; @@ -322,10 +322,8 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct // SIMD splat: 8-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let splat_any8x16 = splat.bind_vector_from_lane(ty, sse_vector_size); - let bitcast_f64_to_any8x16 = raw_bitcast - .bind_vector_from_lane(ty, sse_vector_size) - .bind(F64); + let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); + let bitcast_f64_to_any8x16 = raw_bitcast.bind(vector(ty, sse_vector_size)).bind(F64); narrow.legalize( def!(y = splat_any8x16(x)), vec![ @@ -340,13 +338,13 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct // SIMD splat: 16-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let splat_x16x8 = splat.bind_vector_from_lane(ty, sse_vector_size); + let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); let raw_bitcast_any16x8_to_i32x4 = raw_bitcast - .bind_vector_from_lane(I32, sse_vector_size) - .bind_vector_from_lane(ty, sse_vector_size); + .bind(vector(I32, sse_vector_size)) + .bind(vector(ty, sse_vector_size)); let raw_bitcast_i32x4_to_any16x8 = raw_bitcast - .bind_vector_from_lane(ty, sse_vector_size) - .bind_vector_from_lane(I32, sse_vector_size); + .bind(vector(ty, sse_vector_size)) + .bind(vector(I32, sse_vector_size)); narrow.legalize( def!(y = splat_x16x8(x)), vec![ @@ -361,7 +359,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct // SIMD splat: 32-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let splat_any32x4 = splat.bind_vector_from_lane(ty, sse_vector_size); + let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); narrow.legalize( def!(y = splat_any32x4(x)), vec![ @@ -373,7 +371,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct // SIMD splat: 64-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { - let splat_any64x2 = splat.bind_vector_from_lane(ty, sse_vector_size); + let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); narrow.legalize( def!(y = splat_any64x2(x)), vec![ diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs index aef8cca89a..2606dfb60d 100644 --- a/cranelift/codegen/meta/src/shared/legalize.rs +++ b/cranelift/codegen/meta/src/shared/legalize.rs @@ -1,5 +1,5 @@ use crate::cdsl::ast::{var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{Instruction, InstructionGroup}; +use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup}; use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; use crate::shared::immediates::Immediates;