diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 701194611a..4960b0c68c 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -218,7 +218,9 @@ fn get_isle_compilations( inputs: vec![ prelude_isle.clone(), src_isa_aarch64.join("inst.isle"), + src_isa_aarch64.join("inst_neon.isle"), src_isa_aarch64.join("lower.isle"), + src_isa_aarch64.join("lower_dynamic_neon.isle"), ], untracked_inputs: vec![clif_isle.clone()], }, diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index 12da29829a..1c2ca3f1cc 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -20,6 +20,7 @@ pub(crate) enum ValueType { Reference(ReferenceType), Special(SpecialType), Vector(VectorType), + DynamicVector(DynamicVectorType), } impl ValueType { @@ -44,6 +45,7 @@ impl ValueType { ValueType::Reference(r) => r.doc(), ValueType::Special(s) => s.doc(), ValueType::Vector(ref v) => v.doc(), + ValueType::DynamicVector(ref v) => v.doc(), } } @@ -54,6 +56,7 @@ impl ValueType { ValueType::Reference(r) => r.lane_bits(), ValueType::Special(s) => s.lane_bits(), ValueType::Vector(ref v) => v.lane_bits(), + ValueType::DynamicVector(ref v) => v.lane_bits(), } } @@ -77,6 +80,7 @@ impl ValueType { ValueType::Reference(r) => r.number(), ValueType::Special(s) => s.number(), ValueType::Vector(ref v) => v.number(), + ValueType::DynamicVector(ref v) => v.number(), } } @@ -98,6 +102,7 @@ impl fmt::Display for ValueType { ValueType::Reference(r) => r.fmt(f), ValueType::Special(s) => s.fmt(f), ValueType::Vector(ref v) => v.fmt(f), + ValueType::DynamicVector(ref v) => v.fmt(f), } } } @@ -130,6 +135,13 @@ impl From for ValueType { } } +/// Create a ValueType from a given dynamic vector type. +impl From for ValueType { + fn from(vector: DynamicVectorType) -> Self { + ValueType::DynamicVector(vector) + } +} + /// A concrete scalar type that can appear as a vector lane too. #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub(crate) enum LaneType { @@ -230,6 +242,10 @@ impl LaneType { ValueType::Vector(VectorType::new(self, lanes.into())) } } + + pub fn to_dynamic(self, lanes: u16) -> ValueType { + ValueType::DynamicVector(DynamicVectorType::new(self, lanes.into())) + } } impl fmt::Display for LaneType { @@ -380,6 +396,80 @@ impl fmt::Debug for VectorType { } } +/// A concrete dynamic SIMD vector type. +/// +/// A vector type has a lane type which is an instance of `LaneType`, +/// and a positive number of lanes. +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct DynamicVectorType { + base: LaneType, + unscaled_lanes: u64, +} + +impl DynamicVectorType { + /// Initialize a new type with `base` lane type and a minimum number of lanes. + pub fn new(base: LaneType, unscaled_lanes: u64) -> Self { + Self { + base, + unscaled_lanes, + } + } + + /// Return a string containing the documentation comment for this vector type. + pub fn doc(&self) -> String { + format!( + "A dynamically-scaled SIMD vector with a minimum of {} lanes containing `{}` bits each.", + self.unscaled_lanes, + self.base + ) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + self.base.lane_bits() + } + + /// Return the number of lanes. + pub fn minimum_lane_count(&self) -> u64 { + self.unscaled_lanes + } + + /// Return the lane type. + pub fn lane_type(&self) -> LaneType { + self.base + } + + /// Find the unique number associated with this vector type. + /// + /// Dynamic vector types are encoded in the same manner as `VectorType`, + /// with lane type in the low 4 bits and the log2(lane_count). We add the + /// `VECTOR_BASE` to move these numbers into the range beyond the fixed + /// SIMD types. + pub fn number(&self) -> u16 { + let base_num = u32::from(self.base.number()); + let lanes_log_2: u32 = 63 - self.minimum_lane_count().leading_zeros(); + let num = 0x80 + (lanes_log_2 << 4) + base_num; + num as u16 + } +} + +impl fmt::Display for DynamicVectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}x{}xN", self.base, self.minimum_lane_count()) + } +} + +impl fmt::Debug for DynamicVectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "DynamicVectorType(base={}, lanes={})", + self.base, + self.minimum_lane_count(), + ) + } +} + /// A concrete scalar type that is neither a vector nor a lane type. /// /// Special types cannot be used to form vectors. diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index eea3e2724c..63c14f861a 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -68,10 +68,19 @@ impl TypeVar { ValueType::Vector(vec_type) => { (vec_type.lane_type(), vec_type.lane_count() as RangeBound) } + ValueType::DynamicVector(vec_type) => ( + vec_type.lane_type(), + vec_type.minimum_lane_count() as RangeBound, + ), }; builder = builder.simd_lanes(num_lanes..num_lanes); + // Only generate dynamic types for multiple lanes. + if num_lanes > 1 { + builder = builder.dynamic_simd_lanes(num_lanes..num_lanes); + } + let builder = match scalar_type { LaneType::Int(int_type) => { let bits = int_type as RangeBound; @@ -229,7 +238,9 @@ impl TypeVar { "can't halve a scalar type" ); } - DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ } + DerivedFunc::LaneOf | DerivedFunc::AsBool | DerivedFunc::DynamicToVector => { + /* no particular assertions */ + } } TypeVar { @@ -269,6 +280,9 @@ impl TypeVar { pub fn merge_lanes(&self) -> TypeVar { self.derived(DerivedFunc::MergeLanes) } + pub fn dynamic_to_vector(&self) -> TypeVar { + self.derived(DerivedFunc::DynamicToVector) + } } impl Into for &TypeVar { @@ -331,6 +345,7 @@ pub(crate) enum DerivedFunc { DoubleVector, SplitLanes, MergeLanes, + DynamicToVector, } impl DerivedFunc { @@ -344,6 +359,7 @@ impl DerivedFunc { DerivedFunc::DoubleVector => "double_vector", DerivedFunc::SplitLanes => "split_lanes", DerivedFunc::MergeLanes => "merge_lanes", + DerivedFunc::DynamicToVector => "dynamic_to_vector", } } } @@ -385,6 +401,7 @@ macro_rules! num_set { #[derive(Clone, PartialEq, Eq, Hash)] pub(crate) struct TypeSet { pub lanes: NumSet, + pub dynamic_lanes: NumSet, pub ints: NumSet, pub floats: NumSet, pub bools: NumSet, @@ -395,6 +412,7 @@ pub(crate) struct TypeSet { impl TypeSet { fn new( lanes: NumSet, + dynamic_lanes: NumSet, ints: NumSet, floats: NumSet, bools: NumSet, @@ -403,6 +421,7 @@ impl TypeSet { ) -> Self { Self { lanes, + dynamic_lanes, ints, floats, bools, @@ -415,6 +434,8 @@ impl TypeSet { pub fn size(&self) -> usize { self.lanes.len() * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + + self.dynamic_lanes.len() + * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + self.specials.len() } @@ -429,6 +450,7 @@ impl TypeSet { DerivedFunc::DoubleVector => self.double_vector(), DerivedFunc::SplitLanes => self.half_width().double_vector(), DerivedFunc::MergeLanes => self.double_width().half_vector(), + DerivedFunc::DynamicToVector => self.dynamic_to_vector(), } } @@ -507,6 +529,19 @@ impl TypeSet { copy } + fn dynamic_to_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter( + self.dynamic_lanes + .iter() + .filter(|&&x| x < MAX_LANES) + .map(|&x| x), + ); + copy.specials = Vec::new(); + copy.dynamic_lanes = NumSet::new(); + copy + } + fn concrete_types(&self) -> Vec { let mut ret = Vec::new(); for &num_lanes in &self.lanes { @@ -523,6 +558,17 @@ impl TypeSet { ret.push(ReferenceType::ref_from_bits(bits).into()); } } + for &num_lanes in &self.dynamic_lanes { + for &bits in &self.ints { + ret.push(LaneType::int_from_bits(bits).to_dynamic(num_lanes)); + } + for &bits in &self.floats { + ret.push(LaneType::float_from_bits(bits).to_dynamic(num_lanes)); + } + for &bits in &self.bools { + ret.push(LaneType::bool_from_bits(bits).to_dynamic(num_lanes)); + } + } for &special in &self.specials { ret.push(special.into()); } @@ -548,6 +594,12 @@ impl fmt::Debug for TypeSet { Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ") )); } + if !self.dynamic_lanes.is_empty() { + subsets.push(format!( + "dynamic_lanes={{{}}}", + Vec::from_iter(self.dynamic_lanes.iter().map(|x| x.to_string())).join(", ") + )); + } if !self.ints.is_empty() { subsets.push(format!( "ints={{{}}}", @@ -591,6 +643,7 @@ pub(crate) struct TypeSetBuilder { refs: Interval, includes_scalars: bool, simd_lanes: Interval, + dynamic_simd_lanes: Interval, specials: Vec, } @@ -603,6 +656,7 @@ impl TypeSetBuilder { refs: Interval::None, includes_scalars: true, simd_lanes: Interval::None, + dynamic_simd_lanes: Interval::None, specials: Vec::new(), } } @@ -636,6 +690,11 @@ impl TypeSetBuilder { self.simd_lanes = interval.into(); self } + pub fn dynamic_simd_lanes(mut self, interval: impl Into) -> Self { + assert!(self.dynamic_simd_lanes == Interval::None); + self.dynamic_simd_lanes = interval.into(); + self + } pub fn specials(mut self, specials: Vec) -> Self { assert!(self.specials.is_empty()); self.specials = specials; @@ -652,6 +711,7 @@ impl TypeSetBuilder { TypeSet::new( range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))), + range_to_set(self.dynamic_simd_lanes.to_range(2..MAX_LANES, None)), range_to_set(self.ints.to_range(8..MAX_BITS, None)), range_to_set(self.floats.to_range(32..64, None)), bools, @@ -770,6 +830,50 @@ fn test_typevar_builder() { assert!(type_set.bools.is_empty()); assert!(type_set.specials.is_empty()); + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.dynamic_lanes.is_empty()); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .floats(Interval::All) + .dynamic_simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!( + type_set.dynamic_lanes, + num_set![2, 4, 8, 16, 32, 64, 128, 256] + ); + assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); + assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .dynamic_simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!( + type_set.dynamic_lanes, + num_set![2, 4, 8, 16, 32, 64, 128, 256] + ); + assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + let type_set = TypeSetBuilder::new().ints(16..64).build(); assert_eq!(type_set.lanes, num_set![1]); assert_eq!(type_set.ints, num_set![16, 32, 64]); @@ -778,6 +882,45 @@ fn test_typevar_builder() { assert!(type_set.specials.is_empty()); } +#[test] +fn test_dynamic_to_vector() { + // We don't generate single lane dynamic types, so the maximum number of + // lanes we support is 128, as MAX_BITS is 256. + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .ints(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .ints(Interval::All) + .build() + ); + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .bools(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .bools(Interval::All) + .build() + ); + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .floats(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .floats(Interval::All) + .build() + ); +} + #[test] #[should_panic] fn test_typevar_builder_too_high_bound_panic() { diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs index e6ae750435..eb2a6dfd20 100644 --- a/cranelift/codegen/meta/src/gen_inst.rs +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -688,6 +688,7 @@ pub(crate) fn gen_typesets_table(type_sets: &UniqueTable, fmt: &mut For fmt.indent(|fmt| { fmt.comment(typeset_to_string(ts)); gen_bitset(&ts.lanes, "lanes", 16, fmt); + gen_bitset(&ts.dynamic_lanes, "dynamic_lanes", 16, fmt); gen_bitset(&ts.ints, "ints", 8, fmt); gen_bitset(&ts.floats, "floats", 8, fmt); gen_bitset(&ts.bools, "bools", 8, fmt); diff --git a/cranelift/codegen/meta/src/gen_settings.rs b/cranelift/codegen/meta/src/gen_settings.rs index a76853b85c..8218876ae3 100644 --- a/cranelift/codegen/meta/src/gen_settings.rs +++ b/cranelift/codegen/meta/src/gen_settings.rs @@ -119,7 +119,7 @@ fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) }); fmtln!(fmt, "}"); - fmtln!(fmt, "impl str::FromStr for {} {{", name); + fmtln!(fmt, "impl core::str::FromStr for {} {{", name); fmt.indent(|fmt| { fmtln!(fmt, "type Err = ();"); fmtln!(fmt, "fn from_str(s: &str) -> Result {"); diff --git a/cranelift/codegen/meta/src/gen_types.rs b/cranelift/codegen/meta/src/gen_types.rs index f55848751c..0d27070df7 100644 --- a/cranelift/codegen/meta/src/gen_types.rs +++ b/cranelift/codegen/meta/src/gen_types.rs @@ -33,6 +33,19 @@ fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) { } } +/// Emit definition for all dynamic vector types with `bits` total size. +fn emit_dynamic_vectors(bits: u64, fmt: &mut srcgen::Formatter) { + let vec_size: u64 = bits / 8; + for vec in cdsl_types::ValueType::all_lane_types() + .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes())) + .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size) + .map(|(ty, lane_size)| (ty, vec_size / lane_size)) + .map(|(ty, lanes)| cdsl_types::DynamicVectorType::new(ty, lanes)) + { + emit_type(&cdsl_types::ValueType::from(vec), fmt); + } +} + /// Emit types using the given formatter object. fn emit_types(fmt: &mut srcgen::Formatter) { // Emit all of the special types, such as types for CPU flags. @@ -51,8 +64,10 @@ fn emit_types(fmt: &mut srcgen::Formatter) { } // Emit vector definitions for common SIMD sizes. + // Emit dynamic vector definitions. for vec_size in &[64_u64, 128, 256, 512] { emit_vectors(*vec_size, fmt); + emit_dynamic_vectors(*vec_size, fmt); } } diff --git a/cranelift/codegen/meta/src/shared/entities.rs b/cranelift/codegen/meta/src/shared/entities.rs index dcf4ce2cf2..f612d3507d 100644 --- a/cranelift/codegen/meta/src/shared/entities.rs +++ b/cranelift/codegen/meta/src/shared/entities.rs @@ -18,6 +18,9 @@ pub(crate) struct EntityRefs { /// A reference to a stack slot declared in the function preamble. pub(crate) stack_slot: OperandKind, + /// A reference to a dynamic_stack slot declared in the function preamble. + pub(crate) dynamic_stack_slot: OperandKind, + /// A reference to a global value. pub(crate) global_value: OperandKind, @@ -52,6 +55,12 @@ impl EntityRefs { ), stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"), + dynamic_stack_slot: new( + "dynamic_stack_slot", + "ir::DynamicStackSlot", + "A dynamic stack slot", + ), + global_value: new("global_value", "ir::GlobalValue", "A global value."), sig_ref: new("sig_ref", "ir::SigRef", "A function signature."), diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 92fa34d8bc..84c2a39af7 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -34,6 +34,8 @@ pub(crate) struct Formats { pub(crate) shuffle: Rc, pub(crate) stack_load: Rc, pub(crate) stack_store: Rc, + pub(crate) dynamic_stack_load: Rc, + pub(crate) dynamic_stack_store: Rc, pub(crate) store: Rc, pub(crate) store_no_offset: Rc, pub(crate) table_addr: Rc, @@ -230,6 +232,15 @@ impl Formats { .imm(&imm.offset32) .build(), + dynamic_stack_load: Builder::new("DynamicStackLoad") + .imm(&entities.dynamic_stack_slot) + .build(), + + dynamic_stack_store: Builder::new("DynamicStackStore") + .value() + .imm(&entities.dynamic_stack_slot) + .build(), + // Accessing a WebAssembly heap. heap_addr: Builder::new("HeapAddr") .imm(&entities.heap) diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index cdba177578..8a7d99fd4d 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -427,6 +427,7 @@ fn define_simd_lane_access( .floats(Interval::All) .bools(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .includes_scalars(false) .build(), ); @@ -706,6 +707,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); @@ -785,6 +787,7 @@ pub(crate) fn define( .floats(Interval::All) .simd_lanes(Interval::All) .refs(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); @@ -793,6 +796,7 @@ pub(crate) fn define( let addr = &Operand::new("addr", iAddr); let SS = &Operand::new("SS", &entities.stack_slot); + let DSS = &Operand::new("DSS", &entities.dynamic_stack_slot); let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address"); let x = &Operand::new("x", Mem).with_doc("Value to be stored"); let a = &Operand::new("a", Mem).with_doc("Value loaded"); @@ -1163,7 +1167,51 @@ pub(crate) fn define( .operands_out(vec![addr]), ); + ig.push( + Inst::new( + "dynamic_stack_load", + r#" + Load a value from a dynamic stack slot. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.dynamic_stack_load, + ) + .operands_in(vec![DSS]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "dynamic_stack_store", + r#" + Store a value to a dynamic stack slot. + + This is a polymorphic instruction that can store any dynamic value type with a + memory representation. + "#, + &formats.dynamic_stack_store, + ) + .operands_in(vec![x, DSS]) + .can_store(true), + ); + let GV = &Operand::new("GV", &entities.global_value); + ig.push( + Inst::new( + "dynamic_stack_addr", + r#" + Get the address of a dynamic stack slot. + + Compute the absolute address of the first byte of a dynamic stack slot. + "#, + &formats.dynamic_stack_load, + ) + .operands_in(vec![DSS]) + .operands_out(vec![addr]), + ); ig.push( Inst::new( @@ -2786,6 +2834,7 @@ pub(crate) fn define( TypeSetBuilder::new() .floats(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); let Cond = &Operand::new("Cond", &imm.floatcc); @@ -3409,6 +3458,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(16..64) .simd_lanes(2..8) + .dynamic_simd_lanes(2..8) .includes_scalars(false) .build(), ); @@ -3479,6 +3529,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(8..32) .simd_lanes(4..16) + .dynamic_simd_lanes(4..16) .includes_scalars(false) .build(), ); @@ -4063,4 +4114,30 @@ pub(crate) fn define( ) .other_side_effects(true), ); + + let TxN = &TypeVar::new( + "TxN", + "A dynamic vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .dynamic_simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", TxN).with_doc("The dynamic vector to extract from"); + let y = &Operand::new("y", &imm.uimm8).with_doc("128-bit vector index"); + let a = &Operand::new("a", &TxN.dynamic_to_vector()).with_doc("New fixed vector"); + + ig.push( + Inst::new( + "extract_vector", + r#" + Return a fixed length sub vector, extracted from a dynamic vector. + "#, + &formats.binary_imm8, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); } diff --git a/cranelift/codegen/shared/src/constants.rs b/cranelift/codegen/shared/src/constants.rs index 86823ea06b..d9aa3916ca 100644 --- a/cranelift/codegen/shared/src/constants.rs +++ b/cranelift/codegen/shared/src/constants.rs @@ -8,9 +8,12 @@ // 0x70-0x7d: Lane types // 0x7e-0x7f: Reference types // 0x80-0xff: Vector types +// 0x100-0x17f: Dynamic Vector types // // Vector types are encoded with the lane type in the low 4 bits and log2(lanes) -// in the high 4 bits, giving a range of 2-256 lanes. +// in the next highest 4 bits, giving a range of 2-256 lanes. + +// Dynamic vector types are encoded similarily. /// Start of the lane types. pub const LANE_BASE: u16 = 0x70; @@ -20,3 +23,6 @@ pub const REFERENCE_BASE: u16 = 0x7E; /// Start of the 2-lane vector types. pub const VECTOR_BASE: u16 = 0x80; + +/// Start of the dynamic vector types. +pub const DYNAMIC_VECTOR_BASE: u16 = 0x100; diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index a3dad36cb5..e1f1595766 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -3,12 +3,13 @@ use crate::entity::{self, PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::builder::ReplaceBuilder; +use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; use crate::ir::extfunc::ExtFuncData; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData}; use crate::ir::{types, ConstantData, ConstantPool, Immediate}; use crate::ir::{ - Block, FuncRef, Inst, SigRef, Signature, SourceLoc, Type, Value, ValueLabelAssignments, - ValueList, ValueListPool, + Block, DynamicType, FuncRef, Inst, SigRef, Signature, SourceLoc, Type, Value, + ValueLabelAssignments, ValueList, ValueListPool, }; use crate::packed_option::ReservedValue; use crate::write::write_operands; @@ -50,6 +51,9 @@ pub struct DataFlowGraph { /// instructions contained in each block. blocks: PrimaryMap, + /// Dynamic types created. + pub dynamic_types: DynamicTypes, + /// Memory pool of value lists. /// /// The `ValueList` references into this pool appear in many places: @@ -89,6 +93,7 @@ impl DataFlowGraph { insts: PrimaryMap::new(), results: SecondaryMap::new(), blocks: PrimaryMap::new(), + dynamic_types: DynamicTypes::new(), value_lists: ValueListPool::new(), values: PrimaryMap::new(), signatures: PrimaryMap::new(), @@ -105,6 +110,7 @@ impl DataFlowGraph { self.insts.clear(); self.results.clear(); self.blocks.clear(); + self.dynamic_types.clear(); self.value_lists.clear(); self.values.clear(); self.signatures.clear(); @@ -557,6 +563,11 @@ impl DataFlowGraph { self.insts.push(data) } + /// Declares a dynamic vector type + pub fn make_dynamic_ty(&mut self, data: DynamicTypeData) -> DynamicType { + self.dynamic_types.push(data) + } + /// Returns an object that displays `inst`. pub fn display_inst<'a>(&'a self, inst: Inst) -> DisplayInst<'a> { DisplayInst(self, inst) @@ -1104,6 +1115,20 @@ impl DataFlowGraph { self.values[v].set_type(t); } + /// Check that the given concrete `Type` has been defined in the function. + pub fn check_dynamic_type(&mut self, ty: Type) -> Option { + debug_assert!(ty.is_dynamic_vector()); + if self + .dynamic_types + .values() + .any(|dyn_ty_data| dyn_ty_data.concrete().unwrap() == ty) + { + Some(ty) + } else { + None + } + } + /// Create result values for `inst`, reusing the provided detached values. /// This is similar to `make_inst_results_reusing` except it's only for use /// in the parser, which needs to reuse previously invalid values. @@ -1130,6 +1155,10 @@ impl DataFlowGraph { let constraints = self.insts[inst].opcode().constraints(); for res_idx in 0..constraints.num_fixed_results() { let ty = constraints.result_type(res_idx, ctrl_typevar); + if ty.is_dynamic_vector() { + self.check_dynamic_type(ty) + .unwrap_or_else(|| panic!("Use of undeclared dynamic type: {}", ty)); + } if let Some(v) = reuse.get(res_idx) { self.set_value_type_for_parser(*v, ty); } diff --git a/cranelift/codegen/src/ir/dynamic_type.rs b/cranelift/codegen/src/ir/dynamic_type.rs new file mode 100644 index 0000000000..85589cef67 --- /dev/null +++ b/cranelift/codegen/src/ir/dynamic_type.rs @@ -0,0 +1,38 @@ +//! Dynamic IR types + +use crate::ir::entities::DynamicType; +use crate::ir::GlobalValue; +use crate::ir::PrimaryMap; +use crate::ir::Type; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// A dynamic type object which has a base vector type and a scaling factor. +#[derive(Clone)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicTypeData { + /// Base vector type, this is the minimum size of the type. + pub base_vector_ty: Type, + /// The dynamic scaling factor of the base vector type. + pub dynamic_scale: GlobalValue, +} + +impl DynamicTypeData { + /// Create a new dynamic type. + pub fn new(base_vector_ty: Type, dynamic_scale: GlobalValue) -> Self { + assert!(base_vector_ty.is_vector()); + Self { + base_vector_ty, + dynamic_scale, + } + } + + /// Convert 'base_vector_ty' into a concrete dynamic vector type. + pub fn concrete(&self) -> Option { + self.base_vector_ty.vector_to_dynamic() + } +} + +/// All allocated dynamic types. +pub type DynamicTypes = PrimaryMap; diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs index d8ca7cef36..2be7014685 100644 --- a/cranelift/codegen/src/ir/entities.rs +++ b/cranelift/codegen/src/ir/entities.rs @@ -135,6 +135,44 @@ impl StackSlot { } } +/// An opaque reference to a dynamic stack slot. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlot(u32); +entity_impl!(DynamicStackSlot, "dss"); + +impl DynamicStackSlot { + /// Create a new stack slot reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a dynamic type. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicType(u32); +entity_impl!(DynamicType, "dt"); + +impl DynamicType { + /// Create a new dynamic type reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + /// An opaque reference to a global value. /// /// A `GlobalValue` is a [`Value`](Value) that will be live across the entire @@ -389,6 +427,10 @@ pub enum AnyEntity { Value(Value), /// A stack slot. StackSlot(StackSlot), + /// A dynamic stack slot. + DynamicStackSlot(DynamicStackSlot), + /// A dynamic type + DynamicType(DynamicType), /// A Global value. GlobalValue(GlobalValue), /// A jump table. @@ -415,6 +457,8 @@ impl fmt::Display for AnyEntity { Self::Inst(r) => r.fmt(f), Self::Value(r) => r.fmt(f), Self::StackSlot(r) => r.fmt(f), + Self::DynamicStackSlot(r) => r.fmt(f), + Self::DynamicType(r) => r.fmt(f), Self::GlobalValue(r) => r.fmt(f), Self::JumpTable(r) => r.fmt(f), Self::Constant(r) => r.fmt(f), @@ -457,6 +501,18 @@ impl From for AnyEntity { } } +impl From for AnyEntity { + fn from(r: DynamicStackSlot) -> Self { + Self::DynamicStackSlot(r) + } +} + +impl From for AnyEntity { + fn from(r: DynamicType) -> Self { + Self::DynamicType(r) + } +} + impl From for AnyEntity { fn from(r: GlobalValue) -> Self { Self::GlobalValue(r) diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 0092a33306..856c8f5f0d 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -7,12 +7,12 @@ use crate::entity::{PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::JumpTables; use crate::ir::{ - instructions::BranchInfo, Block, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, - HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot, - StackSlotData, Table, TableData, + instructions::BranchInfo, Block, DynamicStackSlot, DynamicStackSlotData, DynamicType, + ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstructionData, + JumpTable, JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData, Type, }; use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; -use crate::ir::{SourceLocs, StackSlots}; +use crate::ir::{DynamicStackSlots, SourceLocs, StackSlots}; use crate::isa::CallConv; use crate::value_label::ValueLabelsRanges; use crate::write::write_function; @@ -78,8 +78,11 @@ pub struct Function { /// Signature of this function. pub signature: Signature, - /// Stack slots allocated in this function. - pub stack_slots: StackSlots, + /// Sized stack slots allocated in this function. + pub sized_stack_slots: StackSlots, + + /// Dynamic stack slots allocated in this function. + pub dynamic_stack_slots: DynamicStackSlots, /// Global values referenced. pub global_values: PrimaryMap, @@ -120,7 +123,8 @@ impl Function { version_marker: VersionMarker, name, signature: sig, - stack_slots: StackSlots::new(), + sized_stack_slots: StackSlots::new(), + dynamic_stack_slots: DynamicStackSlots::new(), global_values: PrimaryMap::new(), heaps: PrimaryMap::new(), tables: PrimaryMap::new(), @@ -135,7 +139,8 @@ impl Function { /// Clear all data structures in this function. pub fn clear(&mut self) { self.signature.clear(CallConv::Fast); - self.stack_slots.clear(); + self.sized_stack_slots.clear(); + self.dynamic_stack_slots.clear(); self.global_values.clear(); self.heaps.clear(); self.tables.clear(); @@ -156,10 +161,16 @@ impl Function { self.jump_tables.push(data) } - /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and - /// `stack_addr` instructions. - pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { - self.stack_slots.push(data) + /// Creates a sized stack slot in the function, to be used by `stack_load`, `stack_store` + /// and `stack_addr` instructions. + pub fn create_sized_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.sized_stack_slots.push(data) + } + + /// Creates a dynamic stack slot in the function, to be used by `dynamic_stack_load`, + /// `dynamic_stack_store` and `dynamic_stack_addr` instructions. + pub fn create_dynamic_stack_slot(&mut self, data: DynamicStackSlotData) -> DynamicStackSlot { + self.dynamic_stack_slots.push(data) } /// Adds a signature which can later be used to declare an external function import. @@ -177,6 +188,26 @@ impl Function { self.global_values.push(data) } + /// Find the global dyn_scale value associated with given DynamicType + pub fn get_dyn_scale(&self, ty: DynamicType) -> GlobalValue { + self.dfg.dynamic_types.get(ty).unwrap().dynamic_scale + } + + /// Find the global dyn_scale for the given stack slot. + pub fn get_dynamic_slot_scale(&self, dss: DynamicStackSlot) -> GlobalValue { + let dyn_ty = self.dynamic_stack_slots.get(dss).unwrap().dyn_ty; + self.get_dyn_scale(dyn_ty) + } + + /// Get a concrete `Type` from a user defined `DynamicType`. + pub fn get_concrete_dynamic_ty(&self, ty: DynamicType) -> Option { + self.dfg + .dynamic_types + .get(ty) + .unwrap_or_else(|| panic!("Undeclared dynamic vector type: {}", ty)) + .concrete() + } + /// Declares a heap accessible to the function. pub fn create_heap(&mut self, data: HeapData) -> Heap { self.heaps.push(data) @@ -322,8 +353,8 @@ impl Function { /// Size occupied by all stack slots associated with this function. /// /// Does not include any padding necessary due to offsets - pub fn stack_size(&self) -> u32 { - self.stack_slots.values().map(|ss| ss.size).sum() + pub fn fixed_stack_size(&self) -> u32 { + self.sized_stack_slots.values().map(|ss| ss.size).sum() } } diff --git a/cranelift/codegen/src/ir/globalvalue.rs b/cranelift/codegen/src/ir/globalvalue.rs index e70f8221fb..8ec39bf0a4 100644 --- a/cranelift/codegen/src/ir/globalvalue.rs +++ b/cranelift/codegen/src/ir/globalvalue.rs @@ -76,6 +76,13 @@ pub enum GlobalValueData { /// Does this symbol refer to a thread local storage value? tls: bool, }, + + /// Value is a multiple of how many instances of `vector_type` will fit in + /// a target vector register. + DynScaleTargetConst { + /// Base vector type. + vector_type: Type, + }, } impl GlobalValueData { @@ -92,6 +99,7 @@ impl GlobalValueData { match *self { Self::VMContext { .. } | Self::Symbol { .. } => isa.pointer_type(), Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type, + Self::DynScaleTargetConst { .. } => isa.pointer_type(), } } @@ -154,6 +162,9 @@ impl fmt::Display for GlobalValueData { } Ok(()) } + Self::DynScaleTargetConst { vector_type } => { + write!(f, "dyn_scale_target_const.{}", vector_type) + } } } } diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index df7357fe2c..02a4d48e87 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -633,6 +633,8 @@ pub struct ValueTypeSet { pub bools: BitSet8, /// Allowed ref widths pub refs: BitSet8, + /// Allowed dynamic vectors minimum lane sizes + pub dynamic_lanes: BitSet16, } impl ValueTypeSet { @@ -656,8 +658,13 @@ impl ValueTypeSet { /// Does `typ` belong to this set? pub fn contains(self, typ: Type) -> bool { - let l2l = typ.log2_lane_count(); - self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + if typ.is_dynamic_vector() { + let l2l = typ.log2_min_lane_count(); + self.dynamic_lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } else { + let l2l = typ.log2_lane_count(); + self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } } /// Get an example member of this type set. @@ -712,6 +719,9 @@ enum OperandConstraint { /// This operand is `ctrlType.merge_lanes()`. MergeLanes, + + /// This operands is `ctrlType.dynamic_to_vector()`. + DynamicToVector, } impl OperandConstraint { @@ -738,15 +748,48 @@ impl OperandConstraint { .expect("invalid type for half_vector"), ), DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")), - SplitLanes => Bound( + SplitLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .split_lanes() + .expect("invalid type for split_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .split_lanes() + .expect("invalid type for split_lanes"), + ) + } + } + MergeLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .merge_lanes() + .expect("invalid type for merge_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .merge_lanes() + .expect("invalid type for merge_lanes"), + ) + } + } + DynamicToVector => Bound( ctrl_type - .split_lanes() - .expect("invalid type for split_lanes"), - ), - MergeLanes => Bound( - ctrl_type - .merge_lanes() - .expect("invalid type for merge_lanes"), + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector"), ), } } @@ -860,11 +903,13 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(5, 7), + dynamic_lanes: BitSet16::from_range(0, 4), }; assert!(!vts.contains(I8)); assert!(vts.contains(I32)); assert!(vts.contains(I64)); assert!(vts.contains(I32X4)); + assert!(vts.contains(I32X4XN)); assert!(!vts.contains(F32)); assert!(!vts.contains(B1)); assert!(vts.contains(B8)); @@ -879,6 +924,7 @@ mod tests { floats: BitSet8::from_range(5, 7), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert_eq!(vts.example().to_string(), "f32"); @@ -888,6 +934,7 @@ mod tests { floats: BitSet8::from_range(5, 7), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert_eq!(vts.example().to_string(), "f32x2"); @@ -897,9 +944,11 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert!(!vts.contains(B32X2)); assert!(vts.contains(B32X4)); + assert!(vts.contains(B16X4XN)); assert_eq!(vts.example().to_string(), "b32x4"); let vts = ValueTypeSet { @@ -909,6 +958,7 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(0, 0), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert!(vts.contains(I32)); assert!(vts.contains(I32X4)); diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 713d2fd37a..ac0a3bb44c 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -5,6 +5,7 @@ mod builder; pub mod condcodes; pub mod constant; pub mod dfg; +pub mod dynamic_type; pub mod entities; mod extfunc; mod extname; @@ -33,9 +34,10 @@ pub use crate::ir::builder::{ }; pub use crate::ir::constant::{ConstantData, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; +pub use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; pub use crate::ir::entities::{ - Block, Constant, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, - Table, Value, + Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Heap, Immediate, Inst, + JumpTable, SigRef, StackSlot, Table, Value, }; pub use crate::ir::extfunc::{ AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, @@ -53,7 +55,9 @@ pub use crate::ir::libcall::{get_probestack_funcref, LibCall}; pub use crate::ir::memflags::{Endianness, MemFlags}; pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; pub use crate::ir::sourceloc::SourceLoc; -pub use crate::ir::stackslot::{StackSlotData, StackSlotKind, StackSlots}; +pub use crate::ir::stackslot::{ + DynamicStackSlotData, DynamicStackSlots, StackSlotData, StackSlotKind, StackSlots, +}; pub use crate::ir::table::TableData; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs index 4c30eb48b6..e4db80d5d7 100644 --- a/cranelift/codegen/src/ir/stackslot.rs +++ b/cranelift/codegen/src/ir/stackslot.rs @@ -4,10 +4,18 @@ //! use crate::entity::PrimaryMap; +use crate::ir::entities::{DynamicStackSlot, DynamicType}; use crate::ir::StackSlot; use core::fmt; use core::str::FromStr; +/// imports only needed for testing. +#[allow(unused_imports)] +use crate::ir::{DynamicTypeData, GlobalValueData}; + +#[allow(unused_imports)] +use crate::ir::types::*; + #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -25,6 +33,9 @@ pub enum StackSlotKind { /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load` /// and `stack_store` instructions. ExplicitSlot, + /// An explicit stack slot for dynamic vector types. This is a chunk of stack memory + /// for use by the `dynamic_stack_load` and `dynamic_stack_store` instructions. + ExplicitDynamicSlot, } impl FromStr for StackSlotKind { @@ -34,6 +45,7 @@ impl FromStr for StackSlotKind { use self::StackSlotKind::*; match s { "explicit_slot" => Ok(ExplicitSlot), + "explicit_dynamic_slot" => Ok(ExplicitDynamicSlot), _ => Err(()), } } @@ -44,6 +56,7 @@ impl fmt::Display for StackSlotKind { use self::StackSlotKind::*; f.write_str(match *self { ExplicitSlot => "explicit_slot", + ExplicitDynamicSlot => "explicit_dynamic_slot", }) } } @@ -68,11 +81,15 @@ impl StackSlotData { /// Get the alignment in bytes of this stack slot given the stack pointer alignment. pub fn alignment(&self, max_align: StackSize) -> StackSize { debug_assert!(max_align.is_power_of_two()); - // We want to find the largest power of two that divides both `self.size` and `max_align`. - // That is the same as isolating the rightmost bit in `x`. - let x = self.size | max_align; - // C.f. Hacker's delight. - x & x.wrapping_neg() + if self.kind == StackSlotKind::ExplicitDynamicSlot { + max_align + } else { + // We want to find the largest power of two that divides both `self.size` and `max_align`. + // That is the same as isolating the rightmost bit in `x`. + let x = self.size | max_align; + // C.f. Hacker's delight. + x & x.wrapping_neg() + } } } @@ -82,9 +99,43 @@ impl fmt::Display for StackSlotData { } } +/// Contents of a dynamic stack slot. +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlotData { + /// The kind of stack slot. + pub kind: StackSlotKind, + + /// The type of this slot. + pub dyn_ty: DynamicType, +} + +impl DynamicStackSlotData { + /// Create a stack slot with the specified byte size. + pub fn new(kind: StackSlotKind, dyn_ty: DynamicType) -> Self { + assert!(kind == StackSlotKind::ExplicitDynamicSlot); + Self { kind, dyn_ty } + } + + /// Get the alignment in bytes of this stack slot given the stack pointer alignment. + pub fn alignment(&self, max_align: StackSize) -> StackSize { + debug_assert!(max_align.is_power_of_two()); + max_align + } +} + +impl fmt::Display for DynamicStackSlotData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.kind, self.dyn_ty) + } +} + /// All allocated stack slots. pub type StackSlots = PrimaryMap; +/// All allocated dynamic stack slots. +pub type DynamicStackSlots = PrimaryMap; + #[cfg(test)] mod tests { use super::*; @@ -95,16 +146,56 @@ mod tests { fn stack_slot() { let mut func = Function::new(); - let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); - let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8)); + let ss0 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + let ss1 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8)); assert_eq!(ss0.to_string(), "ss0"); assert_eq!(ss1.to_string(), "ss1"); - assert_eq!(func.stack_slots[ss0].size, 4); - assert_eq!(func.stack_slots[ss1].size, 8); + assert_eq!(func.sized_stack_slots[ss0].size, 4); + assert_eq!(func.sized_stack_slots[ss1].size, 8); - assert_eq!(func.stack_slots[ss0].to_string(), "explicit_slot 4"); - assert_eq!(func.stack_slots[ss1].to_string(), "explicit_slot 8"); + assert_eq!(func.sized_stack_slots[ss0].to_string(), "explicit_slot 4"); + assert_eq!(func.sized_stack_slots[ss1].to_string(), "explicit_slot 8"); + } + + #[test] + fn dynamic_stack_slot() { + let mut func = Function::new(); + + let int_vector_ty = I32X4; + let fp_vector_ty = F64X2; + let scale0 = GlobalValueData::DynScaleTargetConst { + vector_type: int_vector_ty, + }; + let scale1 = GlobalValueData::DynScaleTargetConst { + vector_type: fp_vector_ty, + }; + let gv0 = func.create_global_value(scale0); + let gv1 = func.create_global_value(scale1); + let dtd0 = DynamicTypeData::new(int_vector_ty, gv0); + let dtd1 = DynamicTypeData::new(fp_vector_ty, gv1); + let dt0 = func.dfg.make_dynamic_ty(dtd0); + let dt1 = func.dfg.make_dynamic_ty(dtd1); + + let dss0 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt0, + )); + let dss1 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt1, + )); + assert_eq!(dss0.to_string(), "dss0"); + assert_eq!(dss1.to_string(), "dss1"); + + assert_eq!( + func.dynamic_stack_slots[dss0].to_string(), + "explicit_dynamic_slot dt0" + ); + assert_eq!( + func.dynamic_stack_slots[dss1].to_string(), + "explicit_dynamic_slot dt1" + ); } #[test] diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index b7fce2fb20..cacafb5bfb 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -233,7 +233,12 @@ impl Type { /// /// A vector type has 2 or more lanes. pub fn is_vector(self) -> bool { - self.0 >= constants::VECTOR_BASE + self.0 >= constants::VECTOR_BASE && !self.is_dynamic_vector() + } + + /// Is this a SIMD vector type with a runtime number of lanes? + pub fn is_dynamic_vector(self) -> bool { + self.0 >= constants::DYNAMIC_VECTOR_BASE } /// Is this a scalar boolean type? @@ -288,19 +293,62 @@ impl Type { /// /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0. pub fn log2_lane_count(self) -> u32 { - (self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32 + if self.is_dynamic_vector() { + 0 + } else { + (self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32 + } + } + + /// Get log_2 of the number of lanes in this vector/dynamic type. + pub fn log2_min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + (self + .0 + .saturating_sub(constants::VECTOR_BASE + constants::LANE_BASE) + >> 4) as u32 + } else { + self.log2_lane_count() + } } /// Get the number of lanes in this SIMD vector type. /// /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1. pub fn lane_count(self) -> u32 { - 1 << self.log2_lane_count() + if self.is_dynamic_vector() { + 0 + } else { + 1 << self.log2_lane_count() + } } /// Get the total number of bits used to represent this type. pub fn bits(self) -> u32 { - self.lane_bits() * self.lane_count() + if self.is_dynamic_vector() { + 0 + } else { + self.lane_bits() * self.lane_count() + } + } + + /// Get the minimum of lanes in this SIMD vector type, this supports both fixed and + /// dynamic types. + pub fn min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + 1 << self.log2_min_lane_count() + } else { + 1 << self.log2_lane_count() + } + } + + /// Get the minimum number of bits used to represent this type. + pub fn min_bits(self) -> u32 { + if self.is_dynamic_vector() { + self.lane_bits() * self.min_lane_count() + } else { + self.bits() + } } /// Get the number of bytes used to store this type in memory. @@ -315,23 +363,46 @@ impl Type { /// If this is already a SIMD vector type, this produces a SIMD vector type with `n * /// self.lane_count()` lanes. pub fn by(self, n: u32) -> Option { + if self.is_dynamic_vector() { + return None; + } if self.lane_bits() == 0 || !n.is_power_of_two() { return None; } let log2_lanes: u32 = n.trailing_zeros(); let new_type = u32::from(self.0) + (log2_lanes << 4); - if new_type < 0x100 { + if new_type < constants::DYNAMIC_VECTOR_BASE as u32 + && (new_type as u16) < constants::DYNAMIC_VECTOR_BASE + { Some(Self(new_type as u16)) } else { None } } + /// Convert a fixed vector type to a dynamic one. + pub fn vector_to_dynamic(self) -> Option { + assert!(self.is_vector()); + if self.bits() > 256 { + return None; + } + let new_ty = self.0 + constants::VECTOR_BASE; + let ty = Some(Self(new_ty)); + assert!(ty.unwrap().is_dynamic_vector()); + return ty; + } + + /// Convert a dynamic vector type to a fixed one. + pub fn dynamic_to_vector(self) -> Option { + assert!(self.is_dynamic_vector()); + Some(Self(self.0 - constants::VECTOR_BASE)) + } + /// Get a SIMD vector with half the number of lanes. /// /// There is no `double_vector()` method. Use `t.by(2)` instead. pub fn half_vector(self) -> Option { - if self.is_vector() { + if self.is_vector() && !self.is_dynamic_vector() { Some(Self(self.0 - 0x10)) } else { None @@ -418,6 +489,8 @@ impl Display for Type { write!(f, "f{}", self.lane_bits()) } else if self.is_vector() { write!(f, "{}x{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}x{}xN", self.lane_type(), self.min_lane_count()) } else if self.is_ref() { write!(f, "r{}", self.lane_bits()) } else { @@ -441,6 +514,8 @@ impl Debug for Type { write!(f, "types::F{}", self.lane_bits()) } else if self.is_vector() { write!(f, "{:?}X{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}X{}XN", self.lane_type(), self.min_lane_count()) } else if self.is_ref() { write!(f, "types::R{}", self.lane_bits()) } else { @@ -568,6 +643,55 @@ mod tests { assert_eq!(F64.by(8), Some(F64X8)); } + #[test] + fn dynamic_vectors() { + // Identification. + assert_eq!(I8X16XN.is_dynamic_vector(), true); + assert_eq!(B16X4XN.is_dynamic_vector(), true); + assert_eq!(F32X8XN.is_dynamic_vector(), true); + assert_eq!(F64X4XN.is_dynamic_vector(), true); + assert_eq!(I128X2XN.is_dynamic_vector(), true); + + // Lane counts. + assert_eq!(I16X8XN.lane_count(), 0); + assert_eq!(I16X8XN.min_lane_count(), 8); + + // Size + assert_eq!(B32X2XN.bits(), 0); + assert_eq!(B32X2XN.min_bits(), 64); + + // Change lane counts + assert_eq!(F64X4XN.half_vector(), None); + assert_eq!(I8X8XN.by(2), None); + + // Conversions to and from vectors. + assert_eq!(B8.by(8).unwrap().vector_to_dynamic(), Some(B8X8XN)); + assert_eq!(I8.by(16).unwrap().vector_to_dynamic(), Some(I8X16XN)); + assert_eq!(I16.by(8).unwrap().vector_to_dynamic(), Some(I16X8XN)); + assert_eq!(B16.by(16).unwrap().vector_to_dynamic(), Some(B16X16XN)); + assert_eq!(B32.by(2).unwrap().vector_to_dynamic(), Some(B32X2XN)); + assert_eq!(B32.by(8).unwrap().vector_to_dynamic(), Some(B32X8XN)); + assert_eq!(I32.by(4).unwrap().vector_to_dynamic(), Some(I32X4XN)); + assert_eq!(F32.by(4).unwrap().vector_to_dynamic(), Some(F32X4XN)); + assert_eq!(F64.by(2).unwrap().vector_to_dynamic(), Some(F64X2XN)); + assert_eq!(I128.by(2).unwrap().vector_to_dynamic(), Some(I128X2XN)); + + assert_eq!(I128X2XN.dynamic_to_vector(), Some(I128X2)); + assert_eq!(B64X2XN.dynamic_to_vector(), Some(B64X2)); + assert_eq!(F32X4XN.dynamic_to_vector(), Some(F32X4)); + assert_eq!(F64X4XN.dynamic_to_vector(), Some(F64X4)); + assert_eq!(I32X2XN.dynamic_to_vector(), Some(I32X2)); + assert_eq!(I32X8XN.dynamic_to_vector(), Some(I32X8)); + assert_eq!(I16X16XN.dynamic_to_vector(), Some(I16X16)); + assert_eq!(I8X32XN.dynamic_to_vector(), Some(I8X32)); + + assert_eq!(I8X64.vector_to_dynamic(), None); + assert_eq!(B16X32.vector_to_dynamic(), None); + assert_eq!(F32X16.vector_to_dynamic(), None); + assert_eq!(I64X8.vector_to_dynamic(), None); + assert_eq!(I128X4.vector_to_dynamic(), None); + } + #[test] fn format_scalars() { assert_eq!(IFLAGS.to_string(), "iflags"); diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index b626b766f0..1ef9e1a68f 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -5,7 +5,7 @@ use crate::ir::types; use crate::ir::types::*; use crate::ir::MemFlags; use crate::ir::Opcode; -use crate::ir::{ExternalName, LibCall}; +use crate::ir::{ExternalName, LibCall, Signature}; use crate::isa; use crate::isa::aarch64::{inst::EmitState, inst::*}; use crate::isa::unwind::UnwindInst; @@ -155,6 +155,7 @@ fn saved_reg_stack_size( } else { vec_reg.len() & 1 }; + // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs? let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size; (int_save_bytes, vec_save_bytes) @@ -365,9 +366,15 @@ impl ABIMachineSpec for AArch64MachineDeps { RegClass::Int => xreg(*next_reg), RegClass::Float => vreg(*next_reg), }; + // Overlay Z-regs on V-regs for parameter passing. + let ty = if param.value_type.is_dynamic_vector() { + dynamic_to_fixed(param.value_type) + } else { + param.value_type + }; ret.push(ABIArg::reg( reg.to_real_reg().unwrap(), - param.value_type, + ty, param.extension, param.purpose, )); @@ -558,6 +565,7 @@ impl ABIMachineSpec for AArch64MachineDeps { } fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + // FIXME: Do something different for dynamic types? let mem = mem.into(); Inst::LoadAddr { rd: into_reg, mem } } @@ -931,6 +939,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -938,7 +947,7 @@ impl ABIMachineSpec for AArch64MachineDeps { ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = - get_regs_restored_in_epilogue(call_conv, flags, clobbers); + get_regs_restored_in_epilogue(call_conv, flags, sig, clobbers); // Free the fixed frame if necessary. if fixed_frame_storage_size > 0 { @@ -1146,11 +1155,12 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, vector_size: u32) -> u32 { + assert_eq!(vector_size % 8, 0); // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, - RegClass::Float => 2, + RegClass::Float => vector_size / 8, } } @@ -1195,12 +1205,15 @@ impl ABIMachineSpec for AArch64MachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> Vec> { let mut regs: Vec> = regs .iter() .cloned() - .filter(|r| is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), r.to_reg())) + .filter(|r| { + is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg()) + }) .collect(); // Sort registers for deterministic code output. We can do an unstable @@ -1235,7 +1248,12 @@ fn legal_type_for_machine(ty: Type) -> bool { /// Is the given register saved in the prologue if clobbered, i.e., is it a /// callee-save? -fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r: RealReg) -> bool { +fn is_reg_saved_in_prologue( + call_conv: isa::CallConv, + enable_pinned_reg: bool, + sig: &Signature, + r: RealReg, +) -> bool { if call_conv.extends_baldrdash() { match r.class() { RegClass::Int => { @@ -1249,6 +1267,14 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r }; } + // FIXME: We need to inspect whether a function is returning Z or P regs too. + let save_z_regs = sig + .params + .iter() + .filter(|p| p.value_type.is_dynamic_vector()) + .count() + != 0; + match r.class() { RegClass::Int => { // x19 - x28 inclusive are callee-saves. @@ -1262,8 +1288,17 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r } } RegClass::Float => { - // v8 - v15 inclusive are callee-saves. - r.hw_enc() >= 8 && r.hw_enc() <= 15 + // If a subroutine takes at least one argument in scalable vector registers + // or scalable predicate registers, or if it is a function that returns + // results in such registers, it must ensure that the entire contents of + // z8-z23 are preserved across the call. In other cases it need only + // preserve the low 64 bits of z8-z15. + if save_z_regs { + r.hw_enc() >= 8 && r.hw_enc() <= 23 + } else { + // v8 - v15 inclusive are callee-saves. + r.hw_enc() >= 8 && r.hw_enc() <= 15 + } } } } @@ -1274,12 +1309,13 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r fn get_regs_restored_in_epilogue( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut vec_saves = vec![]; for ® in regs { - if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), reg.to_reg()) { + if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, reg.to_reg()) { match reg.to_reg().class() { RegClass::Int => int_saves.push(reg), RegClass::Float => vec_saves.push(reg), diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 7a907ca8a7..6397ff3c1c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -919,6 +919,17 @@ (Size64x2) )) +(type DynamicVectorSize extern + (enum + (Size8x8xN) + (Size8x16xN) + (Size16x4xN) + (Size16x8xN) + (Size32x2xN) + (Size32x4xN) + (Size64x2xN) +)) + ;; Helper for calculating the `VectorSize` corresponding to a type (decl vector_size (Type) VectorSize) (rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8)) @@ -928,6 +939,13 @@ (rule (vector_size (multi_lane 32 2)) (VectorSize.Size32x2)) (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4)) (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2)) +(rule (vector_size (dynamic_lane 8 8)) (VectorSize.Size8x8)) +(rule (vector_size (dynamic_lane 8 16)) (VectorSize.Size8x16)) +(rule (vector_size (dynamic_lane 16 4)) (VectorSize.Size16x4)) +(rule (vector_size (dynamic_lane 16 8)) (VectorSize.Size16x8)) +(rule (vector_size (dynamic_lane 32 2)) (VectorSize.Size32x2)) +(rule (vector_size (dynamic_lane 32 4)) (VectorSize.Size32x4)) +(rule (vector_size (dynamic_lane 64 2)) (VectorSize.Size64x2)) ;; A floating-point unit (FPU) operation with one arg. (type FPUOp1 diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 6bb97c945b..ef9abd42ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -706,12 +706,9 @@ impl VectorSize { /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. pub fn lane_size(&self) -> ScalarSize { match self { - VectorSize::Size8x8 => ScalarSize::Size8, - VectorSize::Size8x16 => ScalarSize::Size8, - VectorSize::Size16x4 => ScalarSize::Size16, - VectorSize::Size16x8 => ScalarSize::Size16, - VectorSize::Size32x2 => ScalarSize::Size32, - VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size8x8 | VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 | VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 | VectorSize::Size32x4 => ScalarSize::Size32, VectorSize::Size64x2 => ScalarSize::Size64, } } @@ -743,3 +740,18 @@ impl VectorSize { (q, size) } } + +pub(crate) fn dynamic_to_fixed(ty: Type) -> Type { + match ty { + I8X8XN => I8X8, + I8X16XN => I8X16, + I16X4XN => I16X4, + I16X8XN => I16X8, + I32X2XN => I32X2, + I32X4XN => I32X4, + I64X2XN => I64X2, + F32X4XN => F32X4, + F64X2XN => F64X2, + _ => unreachable!("unhandled type: {}", ty), + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index a4aadb67a1..7ff0a2f2a2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -89,12 +89,12 @@ pub fn mem_finalize( //============================================================================= // Instructions and subcomponents: emission -fn machreg_to_gpr(m: Reg) -> u32 { +pub(crate) fn machreg_to_gpr(m: Reg) -> u32 { assert_eq!(m.class(), RegClass::Int); u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() } -fn machreg_to_vec(m: Reg) -> u32 { +pub(crate) fn machreg_to_vec(m: Reg) -> u32 { assert_eq!(m.class(), RegClass::Float); u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } @@ -2259,7 +2259,7 @@ impl MachInstEmit for Inst { VectorSize::Size16x8 => 0b00010, VectorSize::Size32x4 => 0b00100, VectorSize::Size64x2 => 0b01000, - _ => unimplemented!(), + _ => unimplemented!("Unexpected VectorSize: {:?}", size), }; sink.put4( 0b010_01110000_00000_000011_00000_00000 diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f3bf2c4e82..e4044f2de8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1194,6 +1194,7 @@ impl MachInst for Inst { assert!(ty.bits() <= 128); Ok((&[RegClass::Float], &[I8X16])) } + _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])), IFLAGS | FFLAGS => Ok((&[RegClass::Int], &[I64])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 2cbfe5f332..ba86baeeb4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -332,12 +332,9 @@ pub fn show_vreg_element(reg: Reg, idx: u8, size: VectorSize) -> String { assert_eq!(RegClass::Float, reg.class()); let s = show_reg(reg); let suffix = match size { - VectorSize::Size8x8 => ".b", - VectorSize::Size8x16 => ".b", - VectorSize::Size16x4 => ".h", - VectorSize::Size16x8 => ".h", - VectorSize::Size32x2 => ".s", - VectorSize::Size32x4 => ".s", + VectorSize::Size8x8 | VectorSize::Size8x16 => ".b", + VectorSize::Size16x4 | VectorSize::Size16x8 => ".h", + VectorSize::Size32x2 | VectorSize::Size32x4 => ".s", VectorSize::Size64x2 => ".d", }; format!("{}{}[{}]", s, suffix, idx) diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index b029ce0101..f5d86252b3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -117,7 +117,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/aarch64/inst_neon.isle b/cranelift/codegen/src/isa/aarch64/inst_neon.isle new file mode 100644 index 0000000000..2b6fd5792e --- /dev/null +++ b/cranelift/codegen/src/isa/aarch64/inst_neon.isle @@ -0,0 +1,8 @@ + +;; Move helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(decl fpu_move_128 (Reg) Reg) +(rule (fpu_move_128 src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMove128 dst src)))) + (writable_reg_to_reg dst))) + diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 08fd779ec3..6e1739adcb 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -81,6 +81,9 @@ (rule (lower (has_type ty @ (multi_lane _ _) (iadd x y))) (add_vec x y (vector_size ty))) +(rule (lower (has_type ty @ (dynamic_lane _ _) (iadd x y))) + (value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + ;; `i128` (rule (lower (has_type $I128 (iadd x y))) (let @@ -157,6 +160,8 @@ ;; vectors (rule (lower (has_type ty @ (multi_lane _ _) (isub x y))) (sub_vec x y (vector_size ty))) +(rule (lower (has_type ty @ (dynamic_lane _ _) (isub x y))) + (value_reg (sub_vec (put_in_reg x) (put_in_reg y) (vector_size ty)))) ;; `i128` (rule (lower (has_type $I128 (isub x y))) @@ -244,6 +249,10 @@ (rule (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y))) (mul x y (vector_size ty))) +;; Case for 'dynamic' i8x16, i16x8, and i32x4. +(rule (lower (has_type ty @ (dynamic_lane _ _) (imul x y))) + (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + ;; Special lowering for i64x2. ;; ;; This I64X2 multiplication is performed with several 32-bit diff --git a/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle new file mode 100644 index 0000000000..3b9337094d --- /dev/null +++ b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle @@ -0,0 +1,30 @@ + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (iadd x y))) + (value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (isub x y))) + (value_reg (vec_rrr (VecALUOp.Sub) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (lane_fits_in_32 ty @ (dynamic_lane _ _)) (imul x y))) + (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (fadd x y))) + (value_reg (vec_rrr (VecALUOp.Fadd) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (fsub x y))) + (value_reg (vec_rrr (VecALUOp.Fsub) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;; Rules for `dynamic_stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (dynamic_stack_addr stack_slot)) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (abi_dynamic_stackslot_addr dst stack_slot)))) + (value_reg dst))) + +;;; Rules for `extract_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (extract_vector x 0)) + (value_reg (fpu_move_128 (put_in_reg x)))) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index acbfd5095b..5a5f2c0a99 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -124,7 +124,10 @@ pub(crate) fn lower_insn_to_regs>( insn, &inputs[..], outputs[0], - |ctx, dst, elem_ty, mem| { + |ctx, dst, mut elem_ty, mem| { + if elem_ty.is_dynamic_vector() { + elem_ty = dynamic_to_fixed(elem_ty); + } let rd = dst.only_reg().unwrap(); let is_float = ty_has_float_or_vec_representation(elem_ty); ctx.emit(match (ty_bits(elem_ty), sign_extend, is_float) { @@ -177,7 +180,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { let off = ctx.data(insn).load_store_offset().unwrap(); - let elem_ty = match op { + let mut elem_ty = match op { Opcode::Istore8 => I8, Opcode::Istore16 => I16, Opcode::Istore32 => I32, @@ -200,6 +203,9 @@ pub(crate) fn lower_insn_to_regs>( flags, }); } else { + if elem_ty.is_dynamic_vector() { + elem_ty = dynamic_to_fixed(elem_ty); + } let rd = dst.only_reg().unwrap(); let mem = lower_address(ctx, elem_ty, &inputs[1..], off); ctx.emit(match (ty_bits(elem_ty), is_float) { @@ -231,12 +237,15 @@ pub(crate) fn lower_insn_to_regs>( }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let offset: i32 = offset.into(); - let inst = ctx - .abi() - .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + assert!(ctx.abi().sized_stackslot_offsets().is_valid(stack_slot)); + let inst = + ctx.abi() + .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); ctx.emit(inst); } + Opcode::DynamicStackAddr => implemented_in_isle(ctx), + Opcode::AtomicRmw => implemented_in_isle(ctx), Opcode::AtomicCas => implemented_in_isle(ctx), @@ -249,7 +258,10 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::Fence {}); } - Opcode::StackLoad | Opcode::StackStore => { + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } @@ -684,7 +696,8 @@ pub(crate) fn lower_insn_to_regs>( let idx = *imm; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(input_ty); let ty = ty.unwrap(); if ty_has_int_representation(ty) { @@ -730,7 +743,14 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Splat => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let size = VectorSize::from_ty(ty.unwrap()); + let ty = ty.unwrap(); + // TODO: Handle SVE Dup. + let ty = if ty.is_dynamic_vector() { + dynamic_to_fixed(ty) + } else { + ty + }; + let size = VectorSize::from_ty(ty); if let Some((_, insn)) = maybe_input_insn_multi( ctx, @@ -1284,7 +1304,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if !ty.is_vector() { + if !ty.is_vector() && !ty.is_dynamic_vector() { let fpu_op = match op { Opcode::Fadd => FPUOp2::Add, Opcode::Fsub => FPUOp2::Sub, @@ -1336,7 +1356,7 @@ pub(crate) fn lower_insn_to_regs>( debug_assert!(lane_type == F32 || lane_type == F64); - if ty.is_vector() { + if ty.is_vector() || ty.is_dynamic_vector() { let size = VectorSize::from_ty(ty); // pmin(a,b) => bitsel(b, a, cmpgt(a, b)) @@ -2015,7 +2035,15 @@ pub(crate) fn lower_insn_to_regs>( .map_or(true, |insn| { const_param_to_u128(ctx, insn).expect("Invalid immediate bytes") != 0 }); - let op = match (op, ty.unwrap()) { + let ty = ty.unwrap(); + let ty = if ty.is_dynamic_vector() { + ty.dynamic_to_vector() + .unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty)) + } else { + ty + }; + + let op = match (op, ty) { (Opcode::Snarrow, I8X16) => VecRRNarrowOp::Sqxtn16, (Opcode::Snarrow, I16X8) => VecRRNarrowOp::Sqxtn32, (Opcode::Snarrow, I32X4) => VecRRNarrowOp::Sqxtn64, @@ -2057,7 +2085,14 @@ pub(crate) fn lower_insn_to_regs>( Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let (t, high_half) = match (ty.unwrap(), op) { + let ty = ty.unwrap(); + let ty = if ty.is_dynamic_vector() { + ty.dynamic_to_vector() + .unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty)) + } else { + ty + }; + let (t, high_half) = match (ty, op) { (I16X8, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false), (I16X8, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true), (I16X8, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false), @@ -2182,6 +2217,8 @@ pub(crate) fn lower_insn_to_regs>( }); } + Opcode::ExtractVector => implemented_in_isle(ctx), + Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit | Opcode::IfcmpSp => { return Err(CodegenError::Unsupported(format!( "Unimplemented lowering: {}", diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 1b05f887c1..e4cfd78f98 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -1,7 +1,7 @@ //! ARM 64-bit Instruction Set Architecture. use crate::ir::condcodes::IntCC; -use crate::ir::Function; +use crate::ir::{Function, Type}; use crate::isa::aarch64::settings as aarch64_settings; use crate::isa::{Builder as IsaBuilder, TargetIsa}; use crate::machinst::{ @@ -57,7 +57,7 @@ impl AArch64Backend { flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone()); - let abi = Box::new(abi::AArch64ABICallee::new(func, flags, self.isa_flags())?); + let abi = Box::new(abi::AArch64ABICallee::new(func, self)?); compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -76,7 +76,8 @@ impl TargetIsa for AArch64Backend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -87,7 +88,8 @@ impl TargetIsa for AArch64Backend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -109,6 +111,10 @@ impl TargetIsa for AArch64Backend { self.isa_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn unsigned_add_overflow_condition(&self) -> IntCC { // Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on // overflow of an add. diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 691c90e32b..3d556d4935 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -196,7 +196,7 @@ pub struct TargetFrontendConfig { impl TargetFrontendConfig { /// Get the pointer type of this target. pub fn pointer_type(self) -> ir::Type { - ir::Type::int(u16::from(self.pointer_bits())).unwrap() + ir::Type::int(self.pointer_bits() as u16).unwrap() } /// Get the width of pointers on this target, in units of bits. @@ -226,6 +226,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; + /// Get the ISA-dependent maximum vector register size, in bytes. + fn dynamic_vector_bytes(&self, dynamic_ty: ir::Type) -> u32; + /// Compile the given function. fn compile_function( &self, @@ -311,7 +314,7 @@ impl<'a> dyn TargetIsa + 'a { /// Get the pointer type of this ISA. pub fn pointer_type(&self) -> ir::Type { - ir::Type::int(u16::from(self.pointer_bits())).unwrap() + ir::Type::int(self.pointer_bits() as u16).unwrap() } /// Get the width of pointers on this ISA. diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index f5d56b7113..77dcc87e94 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -61,6 +61,7 @@ use crate::ir; use crate::ir::condcodes::IntCC; use crate::ir::types; use crate::ir::MemFlags; +use crate::ir::Signature; use crate::ir::Type; use crate::isa; use crate::isa::s390x::inst::*; @@ -556,6 +557,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, + _: &Signature, _: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -633,7 +635,7 @@ impl ABIMachineSpec for S390xMachineDeps { unimplemented!("StructArgs not implemented for S390X yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, _vector_scale: u32) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, @@ -665,6 +667,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + _sig: &Signature, regs: &[Writable], ) -> Vec> { assert!( @@ -688,7 +691,7 @@ impl ABIMachineSpec for S390xMachineDeps { _is_leaf: bool, _stack_args_size: u32, _num_clobbered_callee_saves: usize, - _fixed_frame_storage_size: u32, + _frame_storage_size: u32, ) -> bool { // The call frame set-up is handled by gen_clobber_save(). false diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 11006ed643..00868224c5 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -1158,9 +1158,6 @@ ;; Helpers for stack-slot addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) -(extern constructor abi_stackslot_addr abi_stackslot_addr) - (decl stack_addr_impl (Type StackSlot Offset32) Reg) (rule (stack_addr_impl ty stack_slot offset) (let ((dst WritableReg (temp_writable_reg ty)) diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs index 5702134dbf..152dabe44b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -148,7 +148,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func @@ -206,7 +206,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 2585253e53..5dfc2ec3ec 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -2301,7 +2301,7 @@ (decl lower_call_ret_arg (ABISig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) (rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) - (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_stack_arg_space abi) 0))) + (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0))) (_ Unit (copy_reg_to_arg_slot 0 slot ret_arg))) (output_none))) @@ -2309,7 +2309,7 @@ (decl lower_call_rets (ABISig Range InstOutputBuilder) InstOutput) (rule (lower_call_rets abi (range_empty) builder) (output_builder_finish builder)) (rule (lower_call_rets abi (range_unwrap head tail) builder) - (let ((ret ValueRegs (copy_from_arg (abi_stack_arg_space abi) (abi_get_ret abi head))) + (let ((ret ValueRegs (copy_from_arg (abi_sized_stack_arg_space abi) (abi_get_ret abi head))) (_ Unit (output_builder_push builder ret))) (lower_call_rets abi tail builder))) diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index fe402f83ad..2c87621aae 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -197,7 +197,11 @@ impl LowerBackend for S390xBackend { | Opcode::SqmulRoundSat | Opcode::FvpromoteLow | Opcode::Fvdemote - | Opcode::IaddPairwise => { + | Opcode::IaddPairwise + | Opcode::DynamicStackLoad + | Opcode::DynamicStackStore + | Opcode::DynamicStackAddr + | Opcode::ExtractVector => { unreachable!( "TODO: not yet implemented in ISLE: inst = `{}`, type = `{:?}`", ctx.dfg().display_inst(ir_inst), diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index ec775e0b41..2d41c6a88a 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -16,7 +16,7 @@ use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData, - MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList, + MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::unwind::UnwindInst, machinst::{InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, @@ -77,7 +77,7 @@ where } fn abi_accumulate_outgoing_args_size(&mut self, abi: &ABISig) -> Unit { - let off = abi.stack_arg_space() + abi.stack_ret_space(); + let off = abi.sized_stack_arg_space() + abi.sized_stack_ret_space(); self.lower_ctx .abi() .accumulate_outgoing_args_size(off as u32); @@ -531,17 +531,6 @@ where } } - #[inline] - fn abi_stackslot_addr( - &mut self, - dst: WritableReg, - stack_slot: StackSlot, - offset: Offset32, - ) -> MInst { - let offset = u32::try_from(i32::from(offset)).unwrap(); - self.lower_ctx.abi().stackslot_addr(stack_slot, offset, dst) - } - #[inline] fn inst_builder_new(&mut self) -> VecMInstBuilder { Cell::new(Vec::::new()) diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 3e9674d6f2..c860a479dd 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -1,7 +1,7 @@ //! IBM Z 64-bit Instruction Set Architecture. use crate::ir::condcodes::IntCC; -use crate::ir::Function; +use crate::ir::{Function, Type}; use crate::isa::s390x::settings as s390x_settings; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; @@ -58,7 +58,7 @@ impl S390xBackend { flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); - let abi = Box::new(abi::S390xABICallee::new(func, flags, self.isa_flags())?); + let abi = Box::new(abi::S390xABICallee::new(func, self)?); compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -77,7 +77,8 @@ impl TargetIsa for S390xBackend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -88,7 +89,8 @@ impl TargetIsa for S390xBackend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -110,6 +112,10 @@ impl TargetIsa for S390xBackend { self.isa_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn unsigned_add_overflow_condition(&self) -> IntCC { // The ADD LOGICAL family of instructions set the condition code // differently from normal comparisons, in a way that cannot be diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index fa2be84ded..65954c8051 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,7 +1,7 @@ //! Implementation of the standard x64 ABI. use crate::ir::types::*; -use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type}; +use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, Signature, TrapCode, Type}; use crate::isa; use crate::isa::{unwind::UnwindInst, x64::inst::*, CallConv}; use crate::machinst::abi_impl::*; @@ -573,6 +573,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -580,7 +581,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { ) -> SmallVec<[Self::I; 16]> { let mut insts = SmallVec::new(); - let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, flags, clobbers); + let clobbered_callee_saves = + Self::get_clobbered_callee_saves(call_conv, flags, sig, clobbers); let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); // Restore regs by loading from offsets of RSP. RSP will be @@ -722,11 +724,11 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, vector_scale: u32) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, - RegClass::Float => 2, + RegClass::Float => vector_scale / 8, } } @@ -771,6 +773,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_clobbered_callee_saves( call_conv: CallConv, flags: &settings::Flags, + _sig: &Signature, regs: &[Writable], ) -> Vec> { let mut regs: Vec> = match call_conv { @@ -805,7 +808,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _is_leaf: bool, _stack_args_size: u32, _num_clobbered_callee_saves: usize, - _fixed_frame_storage_size: u32, + _frame_storage_size: u32, ) -> bool { true } diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index c28ea3b623..d3970a575a 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -144,7 +144,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 2c4641296d..07bf0c6e74 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2169,6 +2169,8 @@ fn lower_insn_to_regs>( }); } + Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), + Opcode::StackAddr => { let (stack_slot, offset) = match *ctx.data(insn) { InstructionData::StackLoad { @@ -2180,9 +2182,9 @@ fn lower_insn_to_regs>( }; let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let offset: i32 = offset.into(); - let inst = ctx - .abi() - .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); + let inst = + ctx.abi() + .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); ctx.emit(inst); } @@ -2908,7 +2910,11 @@ fn lower_insn_to_regs>( // Unimplemented opcodes below. These are not currently used by Wasm // lowering or other known embeddings, but should be either supported or - // removed eventually. + // removed eventually + Opcode::ExtractVector => { + unimplemented!("ExtractVector not supported"); + } + Opcode::Cls => unimplemented!("Cls not supported"), Opcode::Fma => unimplemented!("Fma not supported"), @@ -2965,7 +2971,10 @@ fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - Opcode::StackLoad | Opcode::StackStore => { + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { panic!("Direct stack memory access not supported; should have been legalized"); } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index c732b56194..cb03b558d9 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -3,7 +3,7 @@ use self::inst::EmitInfo; use super::TargetIsa; -use crate::ir::{condcodes::IntCC, Function}; +use crate::ir::{condcodes::IntCC, Function, Type}; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv; use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings}; @@ -53,7 +53,7 @@ impl X64Backend { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); - let abi = Box::new(abi::X64ABICallee::new(&func, flags, self.isa_flags())?); + let abi = Box::new(abi::X64ABICallee::new(&func, self)?); compile::compile::(&func, self, abi, &self.reg_env, emit_info) } } @@ -72,7 +72,8 @@ impl TargetIsa for X64Backend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -83,7 +84,8 @@ impl TargetIsa for X64Backend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -97,6 +99,10 @@ impl TargetIsa for X64Backend { self.x64_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn name(&self) -> &'static str { "x64" } diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs index 50974a1d85..751f4f4035 100644 --- a/cranelift/codegen/src/legalizer/globalvalue.rs +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -28,9 +28,23 @@ pub fn expand_global_value( readonly, } => load_addr(inst, func, base, offset, global_type, readonly, isa), ir::GlobalValueData::Symbol { tls, .. } => symbol(inst, func, global_value, isa, tls), + ir::GlobalValueData::DynScaleTargetConst { vector_type } => { + const_vector_scale(inst, func, vector_type, isa) + } } } +fn const_vector_scale(inst: ir::Inst, func: &mut ir::Function, ty: ir::Type, isa: &dyn TargetIsa) { + assert!(ty.bytes() <= 16); + + // Use a minimum of 128-bits for the base type. + let base_bytes = std::cmp::max(ty.bytes(), 16); + let scale = (isa.dynamic_vector_bytes(ty) / base_bytes) as i64; + assert!(scale > 0); + let pos = FuncCursor::new(func).at_inst(inst); + pos.func.dfg.replace(inst).iconst(isa.pointer_type(), scale); +} + /// Expand a `global_value` instruction for a vmctx global. fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) { // Get the value representing the `vmctx` argument. diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 755be3a9ec..ae7caf0345 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -115,6 +115,41 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: mflags.set_aligned(); pos.func.dfg.replace(inst).store(mflags, arg, addr, 0); } + InstructionData::DynamicStackLoad { + opcode: ir::Opcode::DynamicStackLoad, + dynamic_stack_slot, + } => { + let ty = pos.func.dfg.value_type(pos.func.dfg.first_result(inst)); + assert!(ty.is_dynamic_vector()); + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(pos.func).at_inst(inst); + pos.use_srcloc(inst); + + let addr = pos.ins().dynamic_stack_addr(addr_ty, dynamic_stack_slot); + + // Stack slots are required to be accessible and aligned. + let mflags = MemFlags::trusted(); + pos.func.dfg.replace(inst).load(ty, mflags, addr, 0); + } + InstructionData::DynamicStackStore { + opcode: ir::Opcode::DynamicStackStore, + arg, + dynamic_stack_slot, + } => { + pos.use_srcloc(inst); + let addr_ty = isa.pointer_type(); + let vector_ty = pos.func.dfg.value_type(arg); + assert!(vector_ty.is_dynamic_vector()); + + let addr = pos.ins().dynamic_stack_addr(addr_ty, dynamic_stack_slot); + + let mut mflags = MemFlags::new(); + // Stack slots are required to be accessible and aligned. + mflags.set_notrap(); + mflags.set_aligned(); + pos.func.dfg.replace(inst).store(mflags, arg, addr, 0); + } InstructionData::TableAddr { opcode: ir::Opcode::TableAddr, table, diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 4d7442b670..db9936efe8 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1,7 +1,7 @@ //! ABI definitions. use crate::binemit::StackMap; -use crate::ir::{Signature, StackSlot}; +use crate::ir::{DynamicStackSlot, Signature, StackSlot}; use crate::isa::CallConv; use crate::machinst::*; use crate::settings; @@ -47,11 +47,17 @@ pub trait ABICallee { /// Number of return values. fn num_retvals(&self) -> usize; - /// Number of stack slots (not spill slots). - fn num_stackslots(&self) -> usize; + /// Number of sized stack slots (not spill slots). + fn num_sized_stackslots(&self) -> usize; - /// The offsets of all stack slots (not spill slots) for debuginfo purposes. - fn stackslot_offsets(&self) -> &PrimaryMap; + /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes. + fn sized_stackslot_offsets(&self) -> &PrimaryMap; + + /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes. + fn dynamic_stackslot_offsets(&self) -> &PrimaryMap; + + /// All the defined dynamic types. + fn dynamic_type_size(&self, ty: Type) -> u32; /// Generate an instruction which copies an argument to a destination /// register. @@ -101,8 +107,16 @@ pub trait ABICallee { /// Update with the clobbered registers, post-regalloc. fn set_clobbered(&mut self, clobbered: Vec>); - /// Get the address of a stackslot. - fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I; + /// Get the address of a sized stackslot. + fn sized_stackslot_addr( + &self, + slot: StackSlot, + offset: u32, + into_reg: Writable, + ) -> Self::I; + + /// Get the address of a dynamic stackslot. + fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable) -> Self::I; /// Load from a spillslot. fn load_spillslot( diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index b2a11e2d76..45bf8884b9 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -126,7 +126,8 @@ use super::abi::*; use crate::binemit::StackMap; use crate::ir::types::*; -use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot}; +use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot}; +use crate::isa::TargetIsa; use crate::machinst::*; use crate::settings; use crate::CodegenResult; @@ -138,6 +139,8 @@ use std::convert::TryFrom; use std::marker::PhantomData; use std::mem; +use std::collections::HashMap; + /// A location for (part of) an argument or return value. These "storage slots" /// are specified for each register-sized part of an argument. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -430,6 +433,7 @@ pub trait ABIMachineSpec { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> Vec>; @@ -465,6 +469,7 @@ pub trait ABIMachineSpec { /// clobber-save sequence finished. fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -495,7 +500,7 @@ pub trait ABIMachineSpec { ) -> SmallVec<[Self::I; 8]>; /// Get the number of spillslots required for the given register-class. - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32; + fn get_number_of_spillslots_for_value(rc: RegClass, target_vector_bytes: u32) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -528,9 +533,9 @@ pub struct ABISig { /// pointer. rets: Vec, /// Space on stack used to store arguments. - stack_arg_space: i64, + sized_stack_arg_space: i64, /// Space on stack used to store return values. - stack_ret_space: i64, + sized_stack_ret_space: i64, /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, /// Specific order for copying into arguments at callsites. We must be @@ -550,15 +555,15 @@ impl ABISig { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = M::compute_arg_locs( + let (rets, sized_stack_ret_space, _) = M::compute_arg_locs( sig.call_conv, flags, &sig.returns, ArgsOrRets::Rets, /* extra ret-area ptr = */ false, )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs( + let need_stack_return_area = sized_stack_ret_space > 0; + let (args, sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs( sig.call_conv, flags, &sig.params, @@ -586,8 +591,8 @@ impl ABISig { sig, args, rets, - stack_arg_space, - stack_ret_space, + sized_stack_arg_space, + sized_stack_ret_space, stack_ret_arg, copy_to_arg_order, ); @@ -595,8 +600,8 @@ impl ABISig { Ok(ABISig { args, rets, - stack_arg_space, - stack_ret_space, + sized_stack_arg_space, + sized_stack_ret_space, stack_ret_arg, copy_to_arg_order, call_conv: sig.call_conv, @@ -666,8 +671,8 @@ impl ABISig { } /// Get total stack space required for arguments. - pub fn stack_arg_space(&self) -> i64 { - self.stack_arg_space + pub fn sized_stack_arg_space(&self) -> i64 { + self.sized_stack_arg_space } /// Get the number of return values expected. @@ -681,8 +686,8 @@ impl ABISig { } /// Get total stack space required for return values. - pub fn stack_ret_space(&self) -> i64 { - self.stack_ret_space + pub fn sized_stack_ret_space(&self) -> i64 { + self.sized_stack_ret_space } /// Get information specifying how to pass the implicit pointer @@ -699,15 +704,19 @@ pub struct ABICalleeImpl { ir_sig: ir::Signature, /// Signature: arg and retval regs. sig: ABISig, - /// Offsets to each stackslot. - stackslots: PrimaryMap, - /// Total stack size of all stackslots. + /// Defined dynamic types. + dynamic_type_sizes: HashMap, + /// Offsets to each dynamic stackslot. + dynamic_stackslots: PrimaryMap, + /// Offsets to each sized stackslot. + sized_stackslots: PrimaryMap, + /// Total stack size of all stackslots stackslots_size: u32, /// Stack size to be reserved for outgoing arguments. outgoing_args_size: u32, /// Clobbered registers, from regalloc. clobbered: Vec>, - /// Total number of spillslots, from regalloc. + /// Total number of spillslots, including for 'dynamic' types, from regalloc. spillslots: Option, /// Storage allocated for the fixed part of the stack frame. This is /// usually the same as the total frame size below, except in the case @@ -766,13 +775,10 @@ fn get_special_purpose_param_register( impl ABICalleeImpl { /// Create a new body ABI instance. - pub fn new( - f: &ir::Function, - flags: settings::Flags, - isa_flags: Vec, - ) -> CodegenResult { + pub fn new(f: &ir::Function, isa: &dyn TargetIsa) -> CodegenResult { log::trace!("ABI: func signature {:?}", f.signature); + let flags = isa.flags().clone(); let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); let sig = ABISig::from_func_sig::(&ir_sig, &flags)?; @@ -791,16 +797,41 @@ impl ABICalleeImpl { call_conv ); - // Compute stackslot locations and total stackslot size. - let mut stack_offset: u32 = 0; - let mut stackslots = PrimaryMap::new(); - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size; + // Compute sized stackslot locations and total stackslot size. + let mut sized_stack_offset: u32 = 0; + let mut sized_stackslots = PrimaryMap::new(); + for (stackslot, data) in f.sized_stack_slots.iter() { + let off = sized_stack_offset; + sized_stack_offset += data.size; let mask = M::word_bytes() - 1; - stack_offset = (stack_offset + mask) & !mask; - debug_assert_eq!(stackslot.as_u32() as usize, stackslots.len()); - stackslots.push(off); + sized_stack_offset = (sized_stack_offset + mask) & !mask; + debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len()); + sized_stackslots.push(off); + } + + // Compute dynamic stackslot locations and total stackslot size. + let mut dynamic_stackslots = PrimaryMap::new(); + let mut dynamic_stack_offset: u32 = sized_stack_offset; + for (stackslot, data) in f.dynamic_stack_slots.iter() { + debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len()); + let off = dynamic_stack_offset; + let ty = f + .get_concrete_dynamic_ty(data.dyn_ty) + .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", data.dyn_ty)); + dynamic_stack_offset += isa.dynamic_vector_bytes(ty); + let mask = M::word_bytes() - 1; + dynamic_stack_offset = (dynamic_stack_offset + mask) & !mask; + dynamic_stackslots.push(off); + } + let stackslots_size = dynamic_stack_offset; + + let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len()); + for (dyn_ty, _data) in f.dfg.dynamic_types.iter() { + let ty = f + .get_concrete_dynamic_ty(dyn_ty) + .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", dyn_ty)); + let size = isa.dynamic_vector_bytes(ty); + dynamic_type_sizes.insert(ty, size); } // Figure out what instructions, if any, will be needed to check the @@ -827,8 +858,10 @@ impl ABICalleeImpl { Ok(Self { ir_sig, sig, - stackslots, - stackslots_size: stack_offset, + dynamic_stackslots, + dynamic_type_sizes, + sized_stackslots, + stackslots_size, outgoing_args_size: 0, clobbered: vec![], spillslots: None, @@ -837,7 +870,7 @@ impl ABICalleeImpl { ret_area_ptr: None, call_conv, flags, - isa_flags, + isa_flags: isa.isa_flags(), is_leaf: f.is_leaf(), stack_limit, probestack_min_frame, @@ -1060,12 +1093,16 @@ impl ABICallee for ABICalleeImpl { self.sig.rets.len() } - fn num_stackslots(&self) -> usize { - self.stackslots.len() + fn num_sized_stackslots(&self) -> usize { + self.sized_stackslots.len() } - fn stackslot_offsets(&self) -> &PrimaryMap { - &self.stackslots + fn sized_stackslot_offsets(&self) -> &PrimaryMap { + &self.sized_stackslots + } + + fn dynamic_stackslot_offsets(&self) -> &PrimaryMap { + &self.dynamic_stackslots } fn gen_copy_arg_to_regs( @@ -1256,15 +1293,34 @@ impl ABICallee for ABICalleeImpl { self.clobbered = clobbered; } - /// Produce an instruction that computes a stackslot address. - fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I { + /// Produce an instruction that computes a sized stackslot address. + fn sized_stackslot_addr( + &self, + slot: StackSlot, + offset: u32, + into_reg: Writable, + ) -> Self::I { // Offset from beginning of stackslot area, which is at nominal SP (see // [MemArg::NominalSPOffset] for more details on nominal SP tracking). - let stack_off = self.stackslots[slot] as i64; + let stack_off = self.sized_stackslots[slot] as i64; let sp_off: i64 = stack_off + (offset as i64); M::gen_get_stack_addr(StackAMode::NominalSPOffset(sp_off, I8), into_reg, I8) } + /// Produce an instruction that computes a dynamic stackslot address. + fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable) -> Self::I { + let stack_off = self.dynamic_stackslots[slot] as i64; + M::gen_get_stack_addr( + StackAMode::NominalSPOffset(stack_off, I64X2XN), + into_reg, + I64X2XN, + ) + } + + fn dynamic_type_size(&self, ty: Type) -> u32 { + self.dynamic_type_sizes[&ty] + } + /// Load from a spillslot. fn load_spillslot( &self, @@ -1339,8 +1395,12 @@ impl ABICallee for ABICalleeImpl { } let mask = M::stack_align(self.call_conv) - 1; let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack. - let clobbered_callee_saves = - M::get_clobbered_callee_saves(self.call_conv, &self.flags, &self.clobbered); + let clobbered_callee_saves = M::get_clobbered_callee_saves( + self.call_conv, + &self.flags, + self.signature(), + &self.clobbered, + ); let mut insts = smallvec![]; if !self.call_conv.extends_baldrdash() { @@ -1408,6 +1468,7 @@ impl ABICallee for ABICalleeImpl { // Restore clobbered registers. insts.extend(M::gen_clobber_restore( self.call_conv, + self.signature(), &self.flags, &self.clobbered, self.fixed_frame_storage_size, @@ -1441,11 +1502,21 @@ impl ABICallee for ABICalleeImpl { } fn stack_args_size(&self) -> u32 { - self.sig.stack_arg_space as u32 + self.sig.sized_stack_arg_space as u32 } fn get_spillslot_size(&self, rc: RegClass) -> u32 { - M::get_number_of_spillslots_for_value(rc) + let max = if self.dynamic_type_sizes.len() == 0 { + 16 + } else { + *self + .dynamic_type_sizes + .iter() + .max_by(|x, y| x.1.cmp(&y.1)) + .map(|(_k, v)| v) + .unwrap() + }; + M::get_number_of_spillslots_for_value(rc, max) } fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I { @@ -1586,17 +1657,17 @@ impl ABICaller for ABICallerImpl { } fn accumulate_outgoing_args_size>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; ctx.abi().accumulate_outgoing_args_size(off as u32); } fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } @@ -1720,7 +1791,7 @@ impl ABICaller for ABICallerImpl { ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty)); } &ABIArgSlot::Stack { offset, ty, .. } => { - let ret_area_base = self.sig.stack_arg_space; + let ret_area_base = self.sig.sized_stack_arg_space; ctx.emit(M::gen_load_stack( StackAMode::SPOffset(offset + ret_area_base, ty), *into_reg, @@ -1744,7 +1815,7 @@ impl ABICaller for ABICallerImpl { let word_type = M::word_type(); if let Some(i) = self.sig.stack_ret_arg { let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); - let ret_area_base = self.sig.stack_arg_space; + let ret_area_base = self.sig.sized_stack_arg_space; ctx.emit(M::gen_get_stack_addr( StackAMode::SPOffset(ret_area_base, I8), rd, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 2f2b48ca8f..28005863df 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -6,7 +6,9 @@ use smallvec::SmallVec; use std::cell::Cell; pub use super::MachLabel; -pub use crate::ir::{ArgumentExtension, ExternalName, FuncRef, GlobalValue, SigRef}; +pub use crate::ir::{ + ArgumentExtension, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, SigRef, StackSlot, +}; pub use crate::isa::unwind::UnwindInst; pub use crate::machinst::{ABIArg, ABIArgSlot, ABISig, RealReg, Reg, RelocDistance, Writable}; @@ -243,7 +245,18 @@ macro_rules! isle_prelude_methods { #[inline] fn fits_in_32(&mut self, ty: Type) -> Option { - if ty.bits() <= 32 { + if ty.bits() <= 32 && !ty.is_dynamic_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn lane_fits_in_32(&mut self, ty: Type) -> Option { + if !ty.is_vector() && !ty.is_dynamic_vector() { + None + } else if ty.lane_type().bits() <= 32 { Some(ty) } else { None @@ -252,7 +265,7 @@ macro_rules! isle_prelude_methods { #[inline] fn fits_in_64(&mut self, ty: Type) -> Option { - if ty.bits() <= 64 { + if ty.bits() <= 64 && !ty.is_dynamic_vector() { Some(ty) } else { None @@ -418,6 +431,36 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> { + if ty.is_dynamic_vector() { + Some((ty.lane_bits(), ty.min_lane_count())) + } else { + None + } + } + + #[inline] + fn dynamic_int_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + + #[inline] + fn dynamic_fp_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() + && crate::machinst::ty_has_float_or_vec_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + #[inline] fn def_inst(&mut self, val: Value) -> Option { self.lower_ctx.dfg().value_def(val).inst() @@ -635,12 +678,12 @@ macro_rules! isle_prelude_methods { } } - fn abi_stack_arg_space(&mut self, abi: &ABISig) -> i64 { - abi.stack_arg_space() + fn abi_sized_stack_arg_space(&mut self, abi: &ABISig) -> i64 { + abi.sized_stack_arg_space() } - fn abi_stack_ret_space(&mut self, abi: &ABISig) -> i64 { - abi.stack_ret_space() + fn abi_sized_stack_ret_space(&mut self, abi: &ABISig) -> i64 { + abi.sized_stack_ret_space() } fn abi_arg_only_slot(&mut self, arg: &ABIArg) -> Option { @@ -656,6 +699,31 @@ macro_rules! isle_prelude_methods { } } + fn abi_stackslot_addr( + &mut self, + dst: WritableReg, + stack_slot: StackSlot, + offset: Offset32, + ) -> MInst { + let offset = u32::try_from(i32::from(offset)).unwrap(); + self.lower_ctx + .abi() + .sized_stackslot_addr(stack_slot, offset, dst) + } + + fn abi_dynamic_stackslot_addr( + &mut self, + dst: WritableReg, + stack_slot: DynamicStackSlot, + ) -> MInst { + assert!(self + .lower_ctx + .abi() + .dynamic_stackslot_offsets() + .is_valid(stack_slot)); + self.lower_ctx.abi().dynamic_stackslot_addr(stack_slot, dst) + } + fn real_reg_to_reg(&mut self, reg: RealReg) -> Reg { Reg::from(reg) } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 6d0d145349..a47f4a2a95 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -45,7 +45,7 @@ //! ``` use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; -use crate::ir::{SourceLoc, StackSlot, Type}; +use crate::ir::{DynamicStackSlot, SourceLoc, StackSlot, Type}; use crate::result::CodegenResult; use crate::settings::Flags; use crate::value_label::ValueLabelsRanges; @@ -282,7 +282,9 @@ pub struct MachCompileResult { /// Debug info: value labels to registers/stackslots at code offsets. pub value_labels_ranges: ValueLabelsRanges, /// Debug info: stackslots to stack pointer offsets. - pub stackslot_offsets: PrimaryMap, + pub sized_stackslot_offsets: PrimaryMap, + /// Debug info: stackslots to stack pointer offsets. + pub dynamic_stackslot_offsets: PrimaryMap, /// Basic-block layout info: block start offsets. /// /// This info is generated only if the `machine_code_cfg_info` diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bc4966de77..a29945383a 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -19,7 +19,9 @@ use crate::fx::FxHashMap; use crate::fx::FxHashSet; -use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel}; +use crate::ir::{ + self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, SourceLoc, ValueLabel, +}; use crate::machinst::*; use crate::timing; use crate::ValueLocRange; @@ -207,8 +209,11 @@ pub struct EmitResult { /// epilogue(s), and makes use of the regalloc results. pub disasm: Option, - /// Offsets of stackslots. - pub stackslot_offsets: PrimaryMap, + /// Offsets of sized stackslots. + pub sized_stackslot_offsets: PrimaryMap, + + /// Offsets of dynamic stackslots. + pub dynamic_stackslot_offsets: PrimaryMap, /// Value-labels information (debug metadata). pub value_labels_ranges: ValueLabelsRanges, @@ -1038,7 +1043,8 @@ impl VCode { inst_offsets, func_body_len, disasm: if want_disasm { Some(disasm) } else { None }, - stackslot_offsets: self.abi.stackslot_offsets().clone(), + sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), + dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), value_labels_ranges, frame_size, } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index bf93a3146e..ccaef32341 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -256,6 +256,8 @@ (extern const $F32X4 Type) (extern const $F64X2 Type) +(extern const $I32X4XN Type) + ;; Get the bit width of a given type. (decl pure ty_bits (Type) u8) (extern constructor ty_bits ty_bits) @@ -290,6 +292,10 @@ (decl fits_in_32 (Type) Type) (extern extractor fits_in_32 fits_in_32) +;; An extractor that only matches types that can fit in 32 bits. +(decl lane_fits_in_32 (Type) Type) +(extern extractor lane_fits_in_32 lane_fits_in_32) + ;; An extractor that only matches types that can fit in 64 bits. (decl fits_in_64 (Type) Type) (extern extractor fits_in_64 fits_in_64) @@ -433,6 +439,21 @@ (decl multi_lane (u32 u32) Type) (extern extractor multi_lane multi_lane) +;; Match a dynamic-lane type, extracting (# bits per lane) from the given +;; type. +(decl dynamic_lane (u32 u32) Type) +(extern extractor dynamic_lane dynamic_lane) + +;; Match a dynamic-lane integer type, extracting (# bits per lane) from the given +;; type. +(decl dynamic_int_lane (u32) Type) +(extern extractor dynamic_int_lane dynamic_int_lane) + +;; Match a dynamic-lane floating point type, extracting (# bits per lane) +;; from the given type. +(decl dynamic_fp_lane (u32) Type) +(extern extractor dynamic_fp_lane dynamic_fp_lane) + ;; Match the instruction that defines the given value, if any. (decl def_inst (Inst) Value) (extern extractor def_inst def_inst) @@ -727,12 +748,20 @@ (extern extractor abi_no_ret_arg abi_no_ret_arg) ;; Size of the argument area. -(decl abi_stack_arg_space (ABISig) i64) -(extern constructor abi_stack_arg_space abi_stack_arg_space) +(decl abi_sized_stack_arg_space (ABISig) i64) +(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) ;; Size of the return-value area. -(decl abi_stack_ret_space (ABISig) i64) -(extern constructor abi_stack_ret_space abi_stack_ret_space) +(decl abi_sized_stack_ret_space (ABISig) i64) +(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) + +;; StackSlot addr +(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) +(extern constructor abi_stackslot_addr abi_stackslot_addr) + +;; DynamicStackSlot addr +(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst) +(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr) ;; Extractor to detect the special case where an argument or ;; return value only requires a single slot to be passed. diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 0d44f47e74..b9bc0c2ee4 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -65,8 +65,8 @@ use crate::ir; use crate::ir::entities::AnyEntity; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::{ - types, ArgumentPurpose, Block, Constant, FuncRef, Function, GlobalValue, Inst, JumpTable, - Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, + types, ArgumentPurpose, Block, Constant, DynamicStackSlot, FuncRef, Function, GlobalValue, + Inst, JumpTable, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, }; use crate::isa::TargetIsa; use crate::iterators::IteratorExtras; @@ -681,6 +681,14 @@ impl<'a> Verifier<'a> { StackLoad { stack_slot, .. } | StackStore { stack_slot, .. } => { self.verify_stack_slot(inst, stack_slot, errors)?; } + DynamicStackLoad { + dynamic_stack_slot, .. + } + | DynamicStackStore { + dynamic_stack_slot, .. + } => { + self.verify_dynamic_stack_slot(inst, dynamic_stack_slot, errors)?; + } UnaryGlobalValue { global_value, .. } => { self.verify_global_value(inst, global_value, errors)?; } @@ -819,7 +827,7 @@ impl<'a> Verifier<'a> { ss: StackSlot, errors: &mut VerifierErrors, ) -> VerifierStepResult<()> { - if !self.func.stack_slots.is_valid(ss) { + if !self.func.sized_stack_slots.is_valid(ss) { errors.nonfatal(( inst, self.context(inst), @@ -830,6 +838,23 @@ impl<'a> Verifier<'a> { } } + fn verify_dynamic_stack_slot( + &self, + inst: Inst, + ss: DynamicStackSlot, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.dynamic_stack_slots.is_valid(ss) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid dynamic stack slot {}", ss), + )) + } else { + Ok(()) + } + } + fn verify_global_value( &self, inst: Inst, diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index ff8bfea582..51b2235db5 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -41,7 +41,12 @@ pub trait FuncWriter { fn super_preamble(&mut self, w: &mut dyn Write, func: &Function) -> Result { let mut any = false; - for (ss, slot) in func.stack_slots.iter() { + for (ss, slot) in func.dynamic_stack_slots.iter() { + any = true; + self.write_entity_definition(w, func, ss.into(), slot)?; + } + + for (ss, slot) in func.sized_stack_slots.iter() { any = true; self.write_entity_definition(w, func, ss.into(), slot)?; } @@ -493,6 +498,14 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt offset, .. } => write!(w, " {}, {}{}", arg, stack_slot, offset), + DynamicStackLoad { + dynamic_stack_slot, .. + } => write!(w, " {}", dynamic_stack_slot), + DynamicStackStore { + arg, + dynamic_stack_slot, + .. + } => write!(w, " {}, {}", arg, dynamic_stack_slot), HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm), TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg), Load { @@ -570,7 +583,7 @@ mod tests { f.name = ExternalName::testcase("foo"); assert_eq!(f.to_string(), "function %foo() fast {\n}\n"); - f.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); assert_eq!( f.to_string(), "function %foo() fast {\n ss0 = explicit_slot 4\n}\n" diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif new file mode 100644 index 0000000000..7f39747abc --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif @@ -0,0 +1,164 @@ +test compile +target aarch64 + +function %snarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sqxtn v0.8b, v2.8h +; nextln: sqxtn2 v0.16b, v2.8h +; nextln: ret + +function %snarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sqxtn v0.4h, v2.4s +; nextln: sqxtn2 v0.8h, v2.4s +; nextln: ret + +function %snarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: sqxtn v0.2s, v2.2d +; nextln: sqxtn2 v0.4s, v2.2d +; nextln: ret + +function %unarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sqxtun v0.8b, v2.8h +; nextln: sqxtun2 v0.16b, v2.8h +; nextln: ret + +function %unarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sqxtun v0.4h, v2.4s +; nextln: sqxtun2 v0.8h, v2.4s +; nextln: ret + +function %unarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: sqxtun v0.2s, v2.2d +; nextln: sqxtun2 v0.4s, v2.2d +; nextln: ret + +function %uunarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: uqxtn v0.8b, v2.8h +; nextln: uqxtn2 v0.16b, v2.8h +; nextln: ret + +function %uunarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: uqxtn v0.4h, v2.4s +; nextln: uqxtn2 v0.8h, v2.4s +; nextln: ret + +function %uunarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: uqxtn v0.2s, v2.2d +; nextln: uqxtn2 v0.4s, v2.2d +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif new file mode 100644 index 0000000000..255e19bfde --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif @@ -0,0 +1,104 @@ +test compile +target aarch64 + +function %i8x16_splat_add(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.16b, w0 +; nextln: dup v6.16b, w1 +; nextln: add v0.16b, v4.16b, v6.16b +; nextln: ret + +function %i16x8_splat_add(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.8h, w0 +; nextln: dup v6.8h, w1 +; nextln: add v0.8h, v4.8h, v6.8h +; nextln: ret + +function %i32x4_splat_mul(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.4s, w0 +; nextln: dup v6.4s, w1 +; nextln: mul v0.4s, v4.4s, v6.4s +; nextln: ret + +function %i64x2_splat_sub(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.2d, x0 +; nextln: dup v6.2d, x1 +; nextln: sub v0.2d, v4.2d, v6.2d +; nextln: ret + +function %f32x4_splat_add(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.4s, v0.s[0] +; nextln: dup v6.4s, v1.s[0] +; nextln: fadd v0.4s, v4.4s, v6.4s +; nextln: ret + +function %f64x2_splat_sub(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.2d, v0.d[0] +; nextln: dup v6.2d, v1.d[0] +; nextln: fsub v0.2d, v4.2d, v6.2d +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif new file mode 100644 index 0000000000..5161c48ae1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif @@ -0,0 +1,104 @@ +test compile +target aarch64 + +function %swidenhigh_i8x16(i8) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv1 + dt1 = i16x8*gv0 + +block0(v0: i8): + v1 = splat.dt0 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.16b, w0 +; nextln: sxtl2 v0.8h, v2.16b +; nextln: ret + +function %swidenhigh_i16x8(i16) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv1 + dt1 = i32x4*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sxtl2 v0.4s, v2.8h +; nextln: ret + +function %swidenhigh_i32x4(i32) -> i64x2 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv1 + dt1 = i32x4*gv0 + +block0(v0: i32): + v1 = splat.dt1 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sxtl2 v0.2d, v2.4s +; nextln: ret + +function %swidenlow_i8x16(i8) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv1 + dt1 = i16x8*gv0 + +block0(v0: i8): + v1 = splat.dt0 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.16b, w0 +; nextln: sxtl v0.8h, v2.8b +; nextln: ret + +function %swidenlow_i16x8(i16) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv1 + dt1 = i32x4*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sxtl v0.4s, v2.4h +; nextln: ret + +function %swidenlow_i32x4(i32) -> i64x2 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv1 + dt1 = i32x4*gv0 + +block0(v0: i32): + v1 = splat.dt1 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sxtl v0.2d, v2.2s +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif new file mode 100644 index 0000000000..982457c889 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif @@ -0,0 +1,129 @@ +test compile precise-output +target aarch64 + +function %store_scale() { + gv0 = dyn_scale_target_const.i32x4 + ss0 = explicit_slot 8 + +block0: + v0 = global_value.i64 gv0 + stack_store.i64 v0, ss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; movz x2, #1 +; str x2, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_scale_lt_128() { + gv0 = dyn_scale_target_const.i16x4 + ss0 = explicit_slot 8 + +block0: + v0 = global_value.i64 gv0 + stack_store.i64 v0, ss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; movz x2, #1 +; str x2, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_explicit(i32) { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0(v0: i32): + v1 = splat.dt0 v0 + dynamic_stack_store.dt0 v1, dss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; dup v2.4s, w0 +; mov x4, sp +; str q2, [x4] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %load_explicit() -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0: + v0 = dynamic_stack_load.dt0 dss0 + v1 = extract_vector.dt0 v0, 0 + return v1 +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x3, sp +; ldr q0, [x3] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_implicit(i32) { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0(v0: i32): + v1 = splat.dt0 v0 + dynamic_stack_store v1, dss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; dup v2.4s, w0 +; mov x4, sp +; str q2, [x4] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %addr() -> i64 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0: + v0 = dynamic_stack_addr.i64 dss0 + return v0 +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + diff --git a/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif new file mode 100644 index 0000000000..de7dcdc79f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif @@ -0,0 +1,197 @@ +test run +target aarch64 + +function %i8x16_splat_add(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_add(1, 3) == [4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4] + +function %i16x8_splat_add(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_add(255, 254) == [509 509 509 509 509 509 509 509] + +function %i32x4_splat_add(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32sv_splat_add(1234, 8765) == [9999 9999 9999 9999] + +function %i64x2_splat_add(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i64x2_splat_add(4321, 8765) == [13086 13086] + +function %i8x16_splat_sub(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_sub(127, 126) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +function %i16x8_splat_sub(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_sub(12345, 6789) == [5556 5556 5556 5556 5556 5556 5556 5556] + +function %i32x4_splat_sub(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32x4_splat_sub(1, 3) == [-2 -2 -2 -2] + +function %i64x2_splat_sub(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i64x2_splat_sub(255, 65535) == [-65280 -65280] + +function %i8x16_splat_mul(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_mul(15, 15) == [225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225] + +function %i16x8_splat_mul(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_mul(135, 246) == [33210 33210 33210 33210 33210 33210 33210 33210] + +function %i32x4_splat_mul(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32x4_splat_mul(2, 3) == [6 6 6 6] + +function %f32x4_splat_add(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f32x4_splat_add(0x1.2, 0x3.4) == [0x4.6 0x4.6 0x4.6 0x4.6] + +function %f64x2_splat_add(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f64x2_splat_add(0x1.0, 0x2.0) == [0x3.0 0x3.0] + +function %f32x4_splat_sub(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f32x4_splat_sub(0x1.2, 0x3.4) == [-0x2.2 -0x2.2 -0x2.2 -0x2.2] + +function %f64x2_splat_sub(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f64x2_splat_sub(0x1.0, 0x3.0) == [-0x2.0 -0x2.0] diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index da3d60e8f8..5441bf03a8 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -6,10 +6,11 @@ use cranelift_codegen::entity::{EntitySet, SecondaryMap}; use cranelift_codegen::ir; use cranelift_codegen::ir::condcodes::IntCC; use cranelift_codegen::ir::{ - types, AbiParam, Block, DataFlowGraph, ExtFuncData, ExternalName, FuncRef, Function, - GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, - InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, SigRef, Signature, StackSlot, - StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, ValueLabelStart, + types, AbiParam, Block, DataFlowGraph, DynamicStackSlot, DynamicStackSlotData, ExtFuncData, + ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, Inst, + InstBuilder, InstBuilderBase, InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, + SigRef, Signature, StackSlot, StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, + ValueLabelStart, }; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_codegen::packed_option::PackedOption; @@ -370,10 +371,16 @@ impl<'a> FunctionBuilder<'a> { self.func.create_jump_table(data) } - /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and + /// Creates a sized stack slot in the function, to be used by `stack_load`, `stack_store` and /// `stack_addr` instructions. - pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { - self.func.create_stack_slot(data) + pub fn create_sized_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.func.create_sized_stack_slot(data) + } + + /// Creates a dynamic stack slot in the function, to be used by `dynamic_stack_load`, + /// `dynamic_stack_store` and `dynamic_stack_addr` instructions. + pub fn create_dynamic_stack_slot(&mut self, data: DynamicStackSlotData) -> DynamicStackSlot { + self.func.create_dynamic_stack_slot(data) } /// Adds a signature which can later be used to declare an external function import. diff --git a/cranelift/interpreter/src/interpreter.rs b/cranelift/interpreter/src/interpreter.rs index 504af716f9..a41447b986 100644 --- a/cranelift/interpreter/src/interpreter.rs +++ b/cranelift/interpreter/src/interpreter.rs @@ -301,12 +301,12 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { fn push_frame(&mut self, function: &'a Function) { if let Some(frame) = self.frame_stack.iter().last() { - self.frame_offset += frame.function.stack_size() as usize; + self.frame_offset += frame.function.fixed_stack_size() as usize; } // Grow the stack by the space necessary for this frame self.stack - .extend(iter::repeat(0).take(function.stack_size() as usize)); + .extend(iter::repeat(0).take(function.fixed_stack_size() as usize)); self.frame_stack.push(Frame::new(function)); } @@ -314,11 +314,11 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { if let Some(frame) = self.frame_stack.pop() { // Shorten the stack after exiting the frame self.stack - .truncate(self.stack.len() - frame.function.stack_size() as usize); + .truncate(self.stack.len() - frame.function.fixed_stack_size() as usize); // Reset frame_offset to the start of this function if let Some(frame) = self.frame_stack.iter().last() { - self.frame_offset -= frame.function.stack_size() as usize; + self.frame_offset -= frame.function.fixed_stack_size() as usize; } } } @@ -358,7 +358,7 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { slot: StackSlot, offset: u64, ) -> Result { - let stack_slots = &self.get_current_function().stack_slots; + let stack_slots = &self.get_current_function().sized_stack_slots; let stack_slot = &stack_slots[slot]; // offset must be `0 <= Offset < sizeof(SS)` @@ -539,6 +539,7 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { action_stack.push(ResolveAction::Resolve(base)); } GlobalValueData::Symbol { .. } => unimplemented!(), + GlobalValueData::DynScaleTargetConst { .. } => unimplemented!(), }, Some(ResolveAction::Add(dv)) => { current_val = current_val diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index df8e25530c..d1f8b7c971 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -381,6 +381,9 @@ where }) }) } + Opcode::DynamicStackAddr => unimplemented!("DynamicStackSlot"), + Opcode::DynamicStackLoad => unimplemented!("DynamicStackLoad"), + Opcode::DynamicStackStore => unimplemented!("DynamicStackStore"), Opcode::GlobalValue => { if let InstructionData::UnaryGlobalValue { global_value, .. } = inst { assign_or_memtrap(state.resolve_global_value(global_value)) @@ -995,6 +998,9 @@ where assign(vectorizelanes(&new_vec, ctrl_ty)?) } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), + Opcode::ExtractVector => { + unimplemented!("ExtractVector not supported"); + } }) } diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index f100d63219..6b471be0f8 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -15,40 +15,43 @@ use std::u16; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Token<'a> { Comment(&'a str), - LPar, // '(' - RPar, // ')' - LBrace, // '{' - RBrace, // '}' - LBracket, // '[' - RBracket, // ']' - Minus, // '-' - Plus, // '+' - Comma, // ',' - Dot, // '.' - Colon, // ':' - Equal, // '=' - Not, // '!' - Arrow, // '->' - Float(&'a str), // Floating point immediate - Integer(&'a str), // Integer immediate - Type(types::Type), // i32, f32, b32x4, ... - Value(Value), // v12, v7 - Block(Block), // block3 - Cold, // cold (flag on block) - StackSlot(u32), // ss3 - GlobalValue(u32), // gv3 - Heap(u32), // heap2 - Table(u32), // table2 - JumpTable(u32), // jt2 - Constant(u32), // const2 - FuncRef(u32), // fn2 - SigRef(u32), // sig2 - UserRef(u32), // u345 - Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... - String(&'a str), // "arbitrary quoted string with no escape" ... - HexSequence(&'a str), // #89AF - Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) - SourceLoc(&'a str), // @00c7 + LPar, // '(' + RPar, // ')' + LBrace, // '{' + RBrace, // '}' + LBracket, // '[' + RBracket, // ']' + Minus, // '-' + Plus, // '+' + Multiply, // '*' + Comma, // ',' + Dot, // '.' + Colon, // ':' + Equal, // '=' + Not, // '!' + Arrow, // '->' + Float(&'a str), // Floating point immediate + Integer(&'a str), // Integer immediate + Type(types::Type), // i32, f32, b32x4, ... + DynamicType(u32), // dt5 + Value(Value), // v12, v7 + Block(Block), // block3 + Cold, // cold (flag on block) + StackSlot(u32), // ss3 + DynamicStackSlot(u32), // dss4 + GlobalValue(u32), // gv3 + Heap(u32), // heap2 + Table(u32), // table2 + JumpTable(u32), // jt2 + Constant(u32), // const2 + FuncRef(u32), // fn2 + SigRef(u32), // sig2 + UserRef(u32), // u345 + Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... + String(&'a str), // "arbitrary quoted string with no escape" ... + HexSequence(&'a str), // #89AF + Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) + SourceLoc(&'a str), // @00c7 } /// A `Token` with an associated location. @@ -341,6 +344,8 @@ impl<'a> Lexer<'a> { "v" => Value::with_number(number).map(Token::Value), "block" => Block::with_number(number).map(Token::Block), "ss" => Some(Token::StackSlot(number)), + "dss" => Some(Token::DynamicStackSlot(number)), + "dt" => Some(Token::DynamicType(number)), "gv" => Some(Token::GlobalValue(number)), "heap" => Some(Token::Heap(number)), "table" => Some(Token::Table(number)), @@ -482,6 +487,7 @@ impl<'a> Lexer<'a> { Some('=') => Some(self.scan_char(Token::Equal)), Some('!') => Some(self.scan_char(Token::Not)), Some('+') => Some(self.scan_number()), + Some('*') => Some(self.scan_char(Token::Multiply)), Some('-') => { if self.looking_at("->") { Some(self.scan_chars(2, Token::Arrow)) diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 3e04e3af96..f1f279ef15 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -11,16 +11,17 @@ use crate::testfile::{Comment, Details, Feature, TestFile}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir; -use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::entities::{AnyEntity, DynamicType}; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{ - AbiParam, ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, ExtFuncData, - ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, - JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, - StackSlotKind, Table, TableData, Type, Value, + AbiParam, ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, DynamicStackSlot, + DynamicStackSlotData, DynamicTypeData, ExtFuncData, ExternalName, FuncRef, Function, + GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags, + Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, + Value, }; use cranelift_codegen::isa::{self, CallConv}; use cranelift_codegen::packed_option::ReservedValue; @@ -249,11 +250,11 @@ impl Context { // Allocate a new stack slot. fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { self.map.def_ss(ss, loc)?; - while self.function.stack_slots.next_key().index() <= ss.index() { + while self.function.sized_stack_slots.next_key().index() <= ss.index() { self.function - .create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0)); + .create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0)); } - self.function.stack_slots[ss] = data; + self.function.sized_stack_slots[ss] = data; Ok(()) } @@ -266,6 +267,47 @@ impl Context { } } + // Allocate a new stack slot. + fn add_dss( + &mut self, + ss: DynamicStackSlot, + data: DynamicStackSlotData, + loc: Location, + ) -> ParseResult<()> { + self.map.def_dss(ss, loc)?; + while self.function.dynamic_stack_slots.next_key().index() <= ss.index() { + self.function + .create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + data.dyn_ty, + )); + } + self.function.dynamic_stack_slots[ss] = data; + Ok(()) + } + + // Resolve a reference to a dynamic stack slot. + fn check_dss(&self, dss: DynamicStackSlot, loc: Location) -> ParseResult<()> { + if !self.map.contains_dss(dss) { + err!(loc, "undefined dynamic stack slot {}", dss) + } else { + Ok(()) + } + } + + // Allocate a new dynamic type. + fn add_dt(&mut self, dt: DynamicType, data: DynamicTypeData, loc: Location) -> ParseResult<()> { + self.map.def_dt(dt, loc)?; + while self.function.dfg.dynamic_types.next_key().index() <= dt.index() { + self.function.dfg.make_dynamic_ty(DynamicTypeData::new( + data.base_vector_ty, + data.dynamic_scale, + )); + } + self.function.dfg.dynamic_types[dt] = data; + Ok(()) + } + // Allocate a global value slot. fn add_gv(&mut self, gv: GlobalValue, data: GlobalValueData, loc: Location) -> ParseResult<()> { self.map.def_gv(gv, loc)?; @@ -597,6 +639,33 @@ impl<'a> Parser<'a> { err!(self.loc, err_msg) } + // Match and consume a dynamic stack slot reference. + fn match_dss(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::DynamicStackSlot(ss)) = self.token() { + self.consume(); + if let Some(ss) = DynamicStackSlot::with_number(ss) { + return Ok(ss); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a dynamic type reference. + fn match_dt(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::DynamicType(dt)) = self.token() { + self.consume(); + if let Some(dt) = DynamicType::with_number(dt) { + return Ok(dt); + } + } + err!(self.loc, err_msg) + } + + // Extract Type from DynamicType + fn concrete_from_dt(&mut self, dt: DynamicType, ctx: &mut Context) -> Option { + ctx.function.get_concrete_dynamic_ty(dt) + } + // Match and consume a global value reference. fn match_gv(&mut self, err_msg: &str) -> ParseResult { if let Some(Token::GlobalValue(gv)) = self.token() { @@ -986,7 +1055,7 @@ impl<'a> Parser<'a> { vec![value; lane_size as usize] } - if !ty.is_vector() { + if !ty.is_vector() && !ty.is_dynamic_vector() { err!(self.loc, "Expected a controlling vector type, not {}", ty) } else { let constant_data = match ty.lane_type() { @@ -1386,6 +1455,18 @@ impl<'a> Parser<'a> { self.parse_stack_slot_decl() .and_then(|(ss, dat)| ctx.add_ss(ss, dat, loc)) } + Some(Token::DynamicStackSlot(..)) => { + self.start_gathering_comments(); + let loc = self.loc; + self.parse_dynamic_stack_slot_decl() + .and_then(|(dss, dat)| ctx.add_dss(dss, dat, loc)) + } + Some(Token::DynamicType(..)) => { + self.start_gathering_comments(); + let loc = self.loc; + self.parse_dynamic_type_decl() + .and_then(|(dt, dat)| ctx.add_dt(dt, dat, loc)) + } Some(Token::GlobalValue(..)) => { self.start_gathering_comments(); self.parse_global_value_decl() @@ -1465,6 +1546,39 @@ impl<'a> Parser<'a> { Ok((ss, data)) } + fn parse_dynamic_stack_slot_decl( + &mut self, + ) -> ParseResult<(DynamicStackSlot, DynamicStackSlotData)> { + let dss = self.match_dss("expected stack slot number: dss«n»")?; + self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; + let kind = self.match_enum("expected stack slot kind")?; + let dt = self.match_dt("expected dynamic type")?; + let data = DynamicStackSlotData::new(kind, dt); + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(dss); + + // TBD: stack-slot-decl ::= StackSlot(ss) "=" stack-slot-kind Bytes * {"," stack-slot-flag} + Ok((dss, data)) + } + + fn parse_dynamic_type_decl(&mut self) -> ParseResult<(DynamicType, DynamicTypeData)> { + let dt = self.match_dt("expected dynamic type number: dt«n»")?; + self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; + let vector_base_ty = self.match_type("expected base type")?; + assert!(vector_base_ty.is_vector(), "expected vector type"); + self.match_token( + Token::Multiply, + "expected '*' followed by a dynamic scale value", + )?; + let dyn_scale = self.match_gv("expected dynamic scale global value")?; + let data = DynamicTypeData::new(vector_base_ty, dyn_scale); + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(dt); + Ok((dt, data)) + } + // Parse a global value decl. // // global-val-decl ::= * GlobalValue(gv) "=" global-val-desc @@ -1472,6 +1586,7 @@ impl<'a> Parser<'a> { // | "load" "." type "notrap" "aligned" GlobalValue(base) [offset] // | "iadd_imm" "(" GlobalValue(base) ")" imm64 // | "symbol" ["colocated"] name + imm64 + // | "dyn_scale_target_const" "." type // fn parse_global_value_decl(&mut self) -> ParseResult<(GlobalValue, GlobalValueData)> { let gv = self.match_gv("expected global value number: gv«n»")?; @@ -1530,6 +1645,15 @@ impl<'a> Parser<'a> { tls, } } + "dyn_scale_target_const" => { + self.match_token( + Token::Dot, + "expected '.' followed by type in dynamic scale global value decl", + )?; + let vector_type = self.match_type("expected load type")?; + assert!(vector_type.is_vector(), "Expected vector type"); + GlobalValueData::DynScaleTargetConst { vector_type } + } other => return err!(self.loc, "Unknown global value kind '{}'", other), }; @@ -2095,7 +2219,12 @@ impl<'a> Parser<'a> { // Look for a controlling type variable annotation. // instruction ::= [inst-results "="] Opcode(opc) * ["." Type] ... let explicit_ctrl_type = if self.optional(Token::Dot) { - Some(self.match_type("expected type after 'opcode.'")?) + if let Some(Token::Type(_t)) = self.token() { + Some(self.match_type("expected type after 'opcode.'")?) + } else { + let dt = self.match_dt("expected dynamic type")?; + self.concrete_from_dt(dt, ctx) + } } else { None }; @@ -2489,7 +2618,7 @@ impl<'a> Parser<'a> { I128 => DataValue::from(self.match_imm128("expected an i128")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), - _ if ty.is_vector() => { + _ if (ty.is_vector() || ty.is_dynamic_vector()) => { let as_vec = self.match_uimm128(ty)?.into_vec(); if as_vec.len() == 16 { let mut as_array = [0; 16]; @@ -2824,6 +2953,25 @@ impl<'a> Parser<'a> { offset, } } + InstructionFormat::DynamicStackLoad => { + let dss = self.match_dss("expected dynamic stack slot number: dss«n»")?; + ctx.check_dss(dss, self.loc)?; + InstructionData::DynamicStackLoad { + opcode, + dynamic_stack_slot: dss, + } + } + InstructionFormat::DynamicStackStore => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let dss = self.match_dss("expected dynamic stack slot number: dss«n»")?; + ctx.check_dss(dss, self.loc)?; + InstructionData::DynamicStackStore { + opcode, + arg, + dynamic_stack_slot: dss, + } + } InstructionFormat::HeapAddr => { let heap = self.match_heap("expected heap identifier")?; ctx.check_heap(heap, self.loc)?; @@ -3080,17 +3228,23 @@ mod tests { .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%foo"); - let mut iter = func.stack_slots.keys(); + let mut iter = func.sized_stack_slots.keys(); let _ss0 = iter.next().unwrap(); let ss1 = iter.next().unwrap(); assert_eq!(ss1.to_string(), "ss1"); - assert_eq!(func.stack_slots[ss1].kind, StackSlotKind::ExplicitSlot); - assert_eq!(func.stack_slots[ss1].size, 1); + assert_eq!( + func.sized_stack_slots[ss1].kind, + StackSlotKind::ExplicitSlot + ); + assert_eq!(func.sized_stack_slots[ss1].size, 1); let _ss2 = iter.next().unwrap(); let ss3 = iter.next().unwrap(); assert_eq!(ss3.to_string(), "ss3"); - assert_eq!(func.stack_slots[ss3].kind, StackSlotKind::ExplicitSlot); - assert_eq!(func.stack_slots[ss3].size, 13); + assert_eq!( + func.sized_stack_slots[ss3].kind, + StackSlotKind::ExplicitSlot + ); + assert_eq!(func.sized_stack_slots[ss3].size, 13); assert_eq!(iter.next(), None); // Catch duplicate definitions. diff --git a/cranelift/reader/src/sourcemap.rs b/cranelift/reader/src/sourcemap.rs index 2a9298b8c1..00425dc586 100644 --- a/cranelift/reader/src/sourcemap.rs +++ b/cranelift/reader/src/sourcemap.rs @@ -8,9 +8,10 @@ use crate::error::{Location, ParseResult}; use crate::lexer::split_entity_name; -use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::entities::{AnyEntity, DynamicType}; use cranelift_codegen::ir::{ - Block, Constant, FuncRef, GlobalValue, Heap, JumpTable, SigRef, StackSlot, Table, Value, + Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap, JumpTable, SigRef, StackSlot, + Table, Value, }; use std::collections::HashMap; @@ -38,6 +39,11 @@ impl SourceMap { self.locations.contains_key(&ss.into()) } + /// Look up a dynamic stack slot entity. + pub fn contains_dss(&self, dss: DynamicStackSlot) -> bool { + self.locations.contains_key(&dss.into()) + } + /// Look up a global value entity. pub fn contains_gv(&self, gv: GlobalValue) -> bool { self.locations.contains_key(&gv.into()) @@ -173,6 +179,16 @@ impl SourceMap { self.def_entity(entity.into(), loc) } + /// Define the dynamic stack slot `entity`. + pub fn def_dss(&mut self, entity: DynamicStackSlot, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the dynamic type `entity`. + pub fn def_dt(&mut self, entity: DynamicType, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + /// Define the global value `entity`. pub fn def_gv(&mut self, entity: GlobalValue, loc: Location) -> ParseResult<()> { self.def_entity(entity.into(), loc) diff --git a/cranelift/src/bugpoint.rs b/cranelift/src/bugpoint.rs index 8f0f027bf7..dcc48245f2 100644 --- a/cranelift/src/bugpoint.rs +++ b/cranelift/src/bugpoint.rs @@ -575,7 +575,7 @@ impl Mutator for RemoveUnusedEntities { let mut stack_slots = StackSlots::new(); - for (stack_slot, stack_slot_data) in func.stack_slots.clone().iter() { + for (stack_slot, stack_slot_data) in func.sized_stack_slots.clone().iter() { if let Some(stack_slot_usage) = stack_slot_usage_map.get(&stack_slot) { let new_stack_slot = stack_slots.push(stack_slot_data.clone()); for &inst in stack_slot_usage { @@ -591,7 +591,7 @@ impl Mutator for RemoveUnusedEntities { } } - func.stack_slots = stack_slots; + func.sized_stack_slots = stack_slots; "Remove unused stack slots" } @@ -617,9 +617,9 @@ impl Mutator for RemoveUnusedEntities { // These can create cyclic references, which cause complications. Just skip // the global value removal for now. // FIXME Handle them in a better way. - GlobalValueData::Load { .. } | GlobalValueData::IAddImm { .. } => { - return None - } + GlobalValueData::Load { .. } + | GlobalValueData::IAddImm { .. } + | GlobalValueData::DynScaleTargetConst { .. } => return None, } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 11916350e4..d993041342 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -264,7 +264,7 @@ impl wasmtime_environ::Compiler for Compiler { let length = u32::try_from(code_buf.len()).unwrap(); - let stack_slots = std::mem::take(&mut context.func.stack_slots); + let sized_stack_slots = std::mem::take(&mut context.func.sized_stack_slots); self.save_context(CompilerContext { func_translator, @@ -275,7 +275,7 @@ impl wasmtime_environ::Compiler for Compiler { body: code_buf, relocations: func_relocs, value_labels_ranges: ranges.unwrap_or(Default::default()), - stack_slots, + sized_stack_slots, unwind_info, traps, info: FunctionInfo { @@ -613,7 +613,7 @@ impl Compiler { let values_vec_byte_size = u32::try_from(value_size * values_vec_len).unwrap(); let values_vec_len = u32::try_from(values_vec_len).unwrap(); - let ss = builder.func.create_stack_slot(ir::StackSlotData::new( + let ss = builder.func.create_sized_stack_slot(ir::StackSlotData::new( ir::StackSlotKind::ExplicitSlot, values_vec_byte_size, )); @@ -712,7 +712,7 @@ impl Compiler { body: code_buf, unwind_info, relocations: Vec::new(), - stack_slots: Default::default(), + sized_stack_slots: Default::default(), value_labels_ranges: Default::default(), info: Default::default(), address_map: Default::default(), diff --git a/crates/cranelift/src/debug/transform/expression.rs b/crates/cranelift/src/debug/transform/expression.rs index a72e5daa42..2b47f991cc 100644 --- a/crates/cranelift/src/debug/transform/expression.rs +++ b/crates/cranelift/src/debug/transform/expression.rs @@ -17,7 +17,7 @@ use wasmtime_environ::{DefinedFuncIndex, EntityRef}; pub struct FunctionFrameInfo<'a> { pub value_ranges: &'a ValueLabelsRanges, pub memory_offset: ModuleMemoryOffset, - pub stack_slots: &'a StackSlots, + pub sized_stack_slots: &'a StackSlots, } impl<'a> FunctionFrameInfo<'a> { @@ -1207,11 +1207,11 @@ mod tests { use wasmtime_environ::{DefinedFuncIndex, EntityRef}; let addr_tr = create_mock_address_transform(); - let stack_slots = StackSlots::new(); + let sized_stack_slots = StackSlots::new(); let (value_ranges, value_labels) = create_mock_value_ranges(); let fi = FunctionFrameInfo { memory_offset: ModuleMemoryOffset::None, - stack_slots: &stack_slots, + sized_stack_slots: &sized_stack_slots, value_ranges: &value_ranges, }; diff --git a/crates/cranelift/src/debug/transform/utils.rs b/crates/cranelift/src/debug/transform/utils.rs index fca9b455c9..3cba3f5677 100644 --- a/crates/cranelift/src/debug/transform/utils.rs +++ b/crates/cranelift/src/debug/transform/utils.rs @@ -178,7 +178,7 @@ where let frame_info = FunctionFrameInfo { value_ranges: &func.value_labels_ranges, memory_offset: memory_offset.clone(), - stack_slots: &func.stack_slots, + sized_stack_slots: &func.sized_stack_slots, }; Some(frame_info) } else { diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index e98abefd5b..a5bf431800 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -42,8 +42,9 @@ pub struct CompiledFunction { relocations: Vec, value_labels_ranges: cranelift_codegen::ValueLabelsRanges, - stack_slots: ir::StackSlots, + sized_stack_slots: ir::StackSlots, + // TODO: Add dynamic_stack_slots? info: FunctionInfo, }