From 9c43749dfe0b378c40b9932694d248c3546abac3 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 7 Jul 2022 20:54:39 +0100 Subject: [PATCH] [RFC] Dynamic Vector Support (#4200) Introduce a new concept in the IR that allows a producer to create dynamic vector types. An IR function can now contain global value(s) that represent a dynamic scaling factor, for a given fixed-width vector type. A dynamic type is then created by 'multiplying' the corresponding global value with a fixed-width type. These new types can be used just like the existing types and the type system has a set of hard-coded dynamic types, such as I32X4XN, which the user defined types map onto. The dynamic types are also used explicitly to create dynamic stack slots, which have no set size like their existing counterparts. New IR instructions are added to access these new stack entities. Currently, during codegen, the dynamic scaling factor has to be lowered to a constant so the dynamic slots do eventually have a compile-time known size, as do spill slots. The current lowering for aarch64 just targets Neon, using a dynamic scale of 1. Copyright (c) 2022, Arm Limited. --- cranelift/codegen/build.rs | 2 + cranelift/codegen/meta/src/cdsl/types.rs | 90 ++++++++ cranelift/codegen/meta/src/cdsl/typevar.rs | 145 ++++++++++++- cranelift/codegen/meta/src/gen_inst.rs | 1 + cranelift/codegen/meta/src/gen_settings.rs | 2 +- cranelift/codegen/meta/src/gen_types.rs | 15 ++ cranelift/codegen/meta/src/shared/entities.rs | 9 + cranelift/codegen/meta/src/shared/formats.rs | 11 + .../codegen/meta/src/shared/instructions.rs | 77 +++++++ cranelift/codegen/shared/src/constants.rs | 8 +- cranelift/codegen/src/ir/dfg.rs | 33 ++- cranelift/codegen/src/ir/dynamic_type.rs | 38 ++++ cranelift/codegen/src/ir/entities.rs | 56 +++++ cranelift/codegen/src/ir/function.rs | 59 ++++-- cranelift/codegen/src/ir/globalvalue.rs | 11 + cranelift/codegen/src/ir/instructions.rs | 70 ++++++- cranelift/codegen/src/ir/mod.rs | 10 +- cranelift/codegen/src/ir/stackslot.rs | 113 +++++++++- cranelift/codegen/src/ir/types.rs | 136 +++++++++++- cranelift/codegen/src/isa/aarch64/abi.rs | 56 ++++- cranelift/codegen/src/isa/aarch64/inst.isle | 18 ++ .../codegen/src/isa/aarch64/inst/args.rs | 24 ++- .../codegen/src/isa/aarch64/inst/emit.rs | 6 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 1 + .../codegen/src/isa/aarch64/inst/regs.rs | 9 +- .../src/isa/aarch64/inst/unwind/systemv.rs | 2 +- .../codegen/src/isa/aarch64/inst_neon.isle | 8 + cranelift/codegen/src/isa/aarch64/lower.isle | 9 + .../src/isa/aarch64/lower_dynamic_neon.isle | 30 +++ .../codegen/src/isa/aarch64/lower_inst.rs | 61 ++++-- cranelift/codegen/src/isa/aarch64/mod.rs | 14 +- cranelift/codegen/src/isa/mod.rs | 7 +- cranelift/codegen/src/isa/s390x/abi.rs | 7 +- cranelift/codegen/src/isa/s390x/inst.isle | 3 - .../src/isa/s390x/inst/unwind/systemv.rs | 4 +- cranelift/codegen/src/isa/s390x/lower.isle | 4 +- cranelift/codegen/src/isa/s390x/lower.rs | 6 +- cranelift/codegen/src/isa/s390x/lower/isle.rs | 15 +- cranelift/codegen/src/isa/s390x/mod.rs | 14 +- cranelift/codegen/src/isa/x64/abi.rs | 13 +- .../src/isa/x64/inst/unwind/systemv.rs | 2 +- cranelift/codegen/src/isa/x64/lower.rs | 19 +- cranelift/codegen/src/isa/x64/mod.rs | 14 +- .../codegen/src/legalizer/globalvalue.rs | 14 ++ cranelift/codegen/src/legalizer/mod.rs | 35 ++++ cranelift/codegen/src/machinst/abi.rs | 28 ++- cranelift/codegen/src/machinst/abi_impl.rs | 175 +++++++++++----- cranelift/codegen/src/machinst/isle.rs | 82 +++++++- cranelift/codegen/src/machinst/mod.rs | 6 +- cranelift/codegen/src/machinst/vcode.rs | 14 +- cranelift/codegen/src/prelude.isle | 37 +++- cranelift/codegen/src/verifier/mod.rs | 31 ++- cranelift/codegen/src/write.rs | 17 +- .../isa/aarch64/dynamic-simd-narrow.clif | 164 +++++++++++++++ .../isa/aarch64/dynamic-simd-neon.clif | 104 +++++++++ .../isa/aarch64/dynamic-simd-widen.clif | 104 +++++++++ .../filetests/isa/aarch64/dynamic-slot.clif | 129 ++++++++++++ .../runtests/dynamic-simd-arithmetic.clif | 197 ++++++++++++++++++ cranelift/frontend/src/frontend.rs | 21 +- cranelift/interpreter/src/interpreter.rs | 11 +- cranelift/interpreter/src/step.rs | 6 + cranelift/reader/src/lexer.rs | 74 ++++--- cranelift/reader/src/parser.rs | 186 +++++++++++++++-- cranelift/reader/src/sourcemap.rs | 20 +- cranelift/src/bugpoint.rs | 10 +- crates/cranelift/src/compiler.rs | 8 +- .../src/debug/transform/expression.rs | 6 +- crates/cranelift/src/debug/transform/utils.rs | 2 +- crates/cranelift/src/lib.rs | 3 +- 69 files changed, 2422 insertions(+), 294 deletions(-) create mode 100644 cranelift/codegen/src/ir/dynamic_type.rs create mode 100644 cranelift/codegen/src/isa/aarch64/inst_neon.isle create mode 100644 cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle create mode 100644 cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif create mode 100644 cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif create mode 100644 cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif create mode 100644 cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif create mode 100644 cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 701194611a..4960b0c68c 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -218,7 +218,9 @@ fn get_isle_compilations( inputs: vec![ prelude_isle.clone(), src_isa_aarch64.join("inst.isle"), + src_isa_aarch64.join("inst_neon.isle"), src_isa_aarch64.join("lower.isle"), + src_isa_aarch64.join("lower_dynamic_neon.isle"), ], untracked_inputs: vec![clif_isle.clone()], }, diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index 12da29829a..1c2ca3f1cc 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -20,6 +20,7 @@ pub(crate) enum ValueType { Reference(ReferenceType), Special(SpecialType), Vector(VectorType), + DynamicVector(DynamicVectorType), } impl ValueType { @@ -44,6 +45,7 @@ impl ValueType { ValueType::Reference(r) => r.doc(), ValueType::Special(s) => s.doc(), ValueType::Vector(ref v) => v.doc(), + ValueType::DynamicVector(ref v) => v.doc(), } } @@ -54,6 +56,7 @@ impl ValueType { ValueType::Reference(r) => r.lane_bits(), ValueType::Special(s) => s.lane_bits(), ValueType::Vector(ref v) => v.lane_bits(), + ValueType::DynamicVector(ref v) => v.lane_bits(), } } @@ -77,6 +80,7 @@ impl ValueType { ValueType::Reference(r) => r.number(), ValueType::Special(s) => s.number(), ValueType::Vector(ref v) => v.number(), + ValueType::DynamicVector(ref v) => v.number(), } } @@ -98,6 +102,7 @@ impl fmt::Display for ValueType { ValueType::Reference(r) => r.fmt(f), ValueType::Special(s) => s.fmt(f), ValueType::Vector(ref v) => v.fmt(f), + ValueType::DynamicVector(ref v) => v.fmt(f), } } } @@ -130,6 +135,13 @@ impl From for ValueType { } } +/// Create a ValueType from a given dynamic vector type. +impl From for ValueType { + fn from(vector: DynamicVectorType) -> Self { + ValueType::DynamicVector(vector) + } +} + /// A concrete scalar type that can appear as a vector lane too. #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub(crate) enum LaneType { @@ -230,6 +242,10 @@ impl LaneType { ValueType::Vector(VectorType::new(self, lanes.into())) } } + + pub fn to_dynamic(self, lanes: u16) -> ValueType { + ValueType::DynamicVector(DynamicVectorType::new(self, lanes.into())) + } } impl fmt::Display for LaneType { @@ -380,6 +396,80 @@ impl fmt::Debug for VectorType { } } +/// A concrete dynamic SIMD vector type. +/// +/// A vector type has a lane type which is an instance of `LaneType`, +/// and a positive number of lanes. +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct DynamicVectorType { + base: LaneType, + unscaled_lanes: u64, +} + +impl DynamicVectorType { + /// Initialize a new type with `base` lane type and a minimum number of lanes. + pub fn new(base: LaneType, unscaled_lanes: u64) -> Self { + Self { + base, + unscaled_lanes, + } + } + + /// Return a string containing the documentation comment for this vector type. + pub fn doc(&self) -> String { + format!( + "A dynamically-scaled SIMD vector with a minimum of {} lanes containing `{}` bits each.", + self.unscaled_lanes, + self.base + ) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + self.base.lane_bits() + } + + /// Return the number of lanes. + pub fn minimum_lane_count(&self) -> u64 { + self.unscaled_lanes + } + + /// Return the lane type. + pub fn lane_type(&self) -> LaneType { + self.base + } + + /// Find the unique number associated with this vector type. + /// + /// Dynamic vector types are encoded in the same manner as `VectorType`, + /// with lane type in the low 4 bits and the log2(lane_count). We add the + /// `VECTOR_BASE` to move these numbers into the range beyond the fixed + /// SIMD types. + pub fn number(&self) -> u16 { + let base_num = u32::from(self.base.number()); + let lanes_log_2: u32 = 63 - self.minimum_lane_count().leading_zeros(); + let num = 0x80 + (lanes_log_2 << 4) + base_num; + num as u16 + } +} + +impl fmt::Display for DynamicVectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}x{}xN", self.base, self.minimum_lane_count()) + } +} + +impl fmt::Debug for DynamicVectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "DynamicVectorType(base={}, lanes={})", + self.base, + self.minimum_lane_count(), + ) + } +} + /// A concrete scalar type that is neither a vector nor a lane type. /// /// Special types cannot be used to form vectors. diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index eea3e2724c..63c14f861a 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -68,10 +68,19 @@ impl TypeVar { ValueType::Vector(vec_type) => { (vec_type.lane_type(), vec_type.lane_count() as RangeBound) } + ValueType::DynamicVector(vec_type) => ( + vec_type.lane_type(), + vec_type.minimum_lane_count() as RangeBound, + ), }; builder = builder.simd_lanes(num_lanes..num_lanes); + // Only generate dynamic types for multiple lanes. + if num_lanes > 1 { + builder = builder.dynamic_simd_lanes(num_lanes..num_lanes); + } + let builder = match scalar_type { LaneType::Int(int_type) => { let bits = int_type as RangeBound; @@ -229,7 +238,9 @@ impl TypeVar { "can't halve a scalar type" ); } - DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ } + DerivedFunc::LaneOf | DerivedFunc::AsBool | DerivedFunc::DynamicToVector => { + /* no particular assertions */ + } } TypeVar { @@ -269,6 +280,9 @@ impl TypeVar { pub fn merge_lanes(&self) -> TypeVar { self.derived(DerivedFunc::MergeLanes) } + pub fn dynamic_to_vector(&self) -> TypeVar { + self.derived(DerivedFunc::DynamicToVector) + } } impl Into for &TypeVar { @@ -331,6 +345,7 @@ pub(crate) enum DerivedFunc { DoubleVector, SplitLanes, MergeLanes, + DynamicToVector, } impl DerivedFunc { @@ -344,6 +359,7 @@ impl DerivedFunc { DerivedFunc::DoubleVector => "double_vector", DerivedFunc::SplitLanes => "split_lanes", DerivedFunc::MergeLanes => "merge_lanes", + DerivedFunc::DynamicToVector => "dynamic_to_vector", } } } @@ -385,6 +401,7 @@ macro_rules! num_set { #[derive(Clone, PartialEq, Eq, Hash)] pub(crate) struct TypeSet { pub lanes: NumSet, + pub dynamic_lanes: NumSet, pub ints: NumSet, pub floats: NumSet, pub bools: NumSet, @@ -395,6 +412,7 @@ pub(crate) struct TypeSet { impl TypeSet { fn new( lanes: NumSet, + dynamic_lanes: NumSet, ints: NumSet, floats: NumSet, bools: NumSet, @@ -403,6 +421,7 @@ impl TypeSet { ) -> Self { Self { lanes, + dynamic_lanes, ints, floats, bools, @@ -415,6 +434,8 @@ impl TypeSet { pub fn size(&self) -> usize { self.lanes.len() * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + + self.dynamic_lanes.len() + * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + self.specials.len() } @@ -429,6 +450,7 @@ impl TypeSet { DerivedFunc::DoubleVector => self.double_vector(), DerivedFunc::SplitLanes => self.half_width().double_vector(), DerivedFunc::MergeLanes => self.double_width().half_vector(), + DerivedFunc::DynamicToVector => self.dynamic_to_vector(), } } @@ -507,6 +529,19 @@ impl TypeSet { copy } + fn dynamic_to_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter( + self.dynamic_lanes + .iter() + .filter(|&&x| x < MAX_LANES) + .map(|&x| x), + ); + copy.specials = Vec::new(); + copy.dynamic_lanes = NumSet::new(); + copy + } + fn concrete_types(&self) -> Vec { let mut ret = Vec::new(); for &num_lanes in &self.lanes { @@ -523,6 +558,17 @@ impl TypeSet { ret.push(ReferenceType::ref_from_bits(bits).into()); } } + for &num_lanes in &self.dynamic_lanes { + for &bits in &self.ints { + ret.push(LaneType::int_from_bits(bits).to_dynamic(num_lanes)); + } + for &bits in &self.floats { + ret.push(LaneType::float_from_bits(bits).to_dynamic(num_lanes)); + } + for &bits in &self.bools { + ret.push(LaneType::bool_from_bits(bits).to_dynamic(num_lanes)); + } + } for &special in &self.specials { ret.push(special.into()); } @@ -548,6 +594,12 @@ impl fmt::Debug for TypeSet { Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ") )); } + if !self.dynamic_lanes.is_empty() { + subsets.push(format!( + "dynamic_lanes={{{}}}", + Vec::from_iter(self.dynamic_lanes.iter().map(|x| x.to_string())).join(", ") + )); + } if !self.ints.is_empty() { subsets.push(format!( "ints={{{}}}", @@ -591,6 +643,7 @@ pub(crate) struct TypeSetBuilder { refs: Interval, includes_scalars: bool, simd_lanes: Interval, + dynamic_simd_lanes: Interval, specials: Vec, } @@ -603,6 +656,7 @@ impl TypeSetBuilder { refs: Interval::None, includes_scalars: true, simd_lanes: Interval::None, + dynamic_simd_lanes: Interval::None, specials: Vec::new(), } } @@ -636,6 +690,11 @@ impl TypeSetBuilder { self.simd_lanes = interval.into(); self } + pub fn dynamic_simd_lanes(mut self, interval: impl Into) -> Self { + assert!(self.dynamic_simd_lanes == Interval::None); + self.dynamic_simd_lanes = interval.into(); + self + } pub fn specials(mut self, specials: Vec) -> Self { assert!(self.specials.is_empty()); self.specials = specials; @@ -652,6 +711,7 @@ impl TypeSetBuilder { TypeSet::new( range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))), + range_to_set(self.dynamic_simd_lanes.to_range(2..MAX_LANES, None)), range_to_set(self.ints.to_range(8..MAX_BITS, None)), range_to_set(self.floats.to_range(32..64, None)), bools, @@ -770,6 +830,50 @@ fn test_typevar_builder() { assert!(type_set.bools.is_empty()); assert!(type_set.specials.is_empty()); + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.dynamic_lanes.is_empty()); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .floats(Interval::All) + .dynamic_simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!( + type_set.dynamic_lanes, + num_set![2, 4, 8, 16, 32, 64, 128, 256] + ); + assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); + assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .dynamic_simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!( + type_set.dynamic_lanes, + num_set![2, 4, 8, 16, 32, 64, 128, 256] + ); + assert_eq!(type_set.floats, num_set![32, 64]); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + let type_set = TypeSetBuilder::new().ints(16..64).build(); assert_eq!(type_set.lanes, num_set![1]); assert_eq!(type_set.ints, num_set![16, 32, 64]); @@ -778,6 +882,45 @@ fn test_typevar_builder() { assert!(type_set.specials.is_empty()); } +#[test] +fn test_dynamic_to_vector() { + // We don't generate single lane dynamic types, so the maximum number of + // lanes we support is 128, as MAX_BITS is 256. + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .ints(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .ints(Interval::All) + .build() + ); + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .bools(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .bools(Interval::All) + .build() + ); + assert_eq!( + TypeSetBuilder::new() + .dynamic_simd_lanes(Interval::All) + .floats(Interval::All) + .build() + .dynamic_to_vector(), + TypeSetBuilder::new() + .simd_lanes(2..128) + .floats(Interval::All) + .build() + ); +} + #[test] #[should_panic] fn test_typevar_builder_too_high_bound_panic() { diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs index e6ae750435..eb2a6dfd20 100644 --- a/cranelift/codegen/meta/src/gen_inst.rs +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -688,6 +688,7 @@ pub(crate) fn gen_typesets_table(type_sets: &UniqueTable, fmt: &mut For fmt.indent(|fmt| { fmt.comment(typeset_to_string(ts)); gen_bitset(&ts.lanes, "lanes", 16, fmt); + gen_bitset(&ts.dynamic_lanes, "dynamic_lanes", 16, fmt); gen_bitset(&ts.ints, "ints", 8, fmt); gen_bitset(&ts.floats, "floats", 8, fmt); gen_bitset(&ts.bools, "bools", 8, fmt); diff --git a/cranelift/codegen/meta/src/gen_settings.rs b/cranelift/codegen/meta/src/gen_settings.rs index a76853b85c..8218876ae3 100644 --- a/cranelift/codegen/meta/src/gen_settings.rs +++ b/cranelift/codegen/meta/src/gen_settings.rs @@ -119,7 +119,7 @@ fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) }); fmtln!(fmt, "}"); - fmtln!(fmt, "impl str::FromStr for {} {{", name); + fmtln!(fmt, "impl core::str::FromStr for {} {{", name); fmt.indent(|fmt| { fmtln!(fmt, "type Err = ();"); fmtln!(fmt, "fn from_str(s: &str) -> Result {"); diff --git a/cranelift/codegen/meta/src/gen_types.rs b/cranelift/codegen/meta/src/gen_types.rs index f55848751c..0d27070df7 100644 --- a/cranelift/codegen/meta/src/gen_types.rs +++ b/cranelift/codegen/meta/src/gen_types.rs @@ -33,6 +33,19 @@ fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) { } } +/// Emit definition for all dynamic vector types with `bits` total size. +fn emit_dynamic_vectors(bits: u64, fmt: &mut srcgen::Formatter) { + let vec_size: u64 = bits / 8; + for vec in cdsl_types::ValueType::all_lane_types() + .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes())) + .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size) + .map(|(ty, lane_size)| (ty, vec_size / lane_size)) + .map(|(ty, lanes)| cdsl_types::DynamicVectorType::new(ty, lanes)) + { + emit_type(&cdsl_types::ValueType::from(vec), fmt); + } +} + /// Emit types using the given formatter object. fn emit_types(fmt: &mut srcgen::Formatter) { // Emit all of the special types, such as types for CPU flags. @@ -51,8 +64,10 @@ fn emit_types(fmt: &mut srcgen::Formatter) { } // Emit vector definitions for common SIMD sizes. + // Emit dynamic vector definitions. for vec_size in &[64_u64, 128, 256, 512] { emit_vectors(*vec_size, fmt); + emit_dynamic_vectors(*vec_size, fmt); } } diff --git a/cranelift/codegen/meta/src/shared/entities.rs b/cranelift/codegen/meta/src/shared/entities.rs index dcf4ce2cf2..f612d3507d 100644 --- a/cranelift/codegen/meta/src/shared/entities.rs +++ b/cranelift/codegen/meta/src/shared/entities.rs @@ -18,6 +18,9 @@ pub(crate) struct EntityRefs { /// A reference to a stack slot declared in the function preamble. pub(crate) stack_slot: OperandKind, + /// A reference to a dynamic_stack slot declared in the function preamble. + pub(crate) dynamic_stack_slot: OperandKind, + /// A reference to a global value. pub(crate) global_value: OperandKind, @@ -52,6 +55,12 @@ impl EntityRefs { ), stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"), + dynamic_stack_slot: new( + "dynamic_stack_slot", + "ir::DynamicStackSlot", + "A dynamic stack slot", + ), + global_value: new("global_value", "ir::GlobalValue", "A global value."), sig_ref: new("sig_ref", "ir::SigRef", "A function signature."), diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 92fa34d8bc..84c2a39af7 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -34,6 +34,8 @@ pub(crate) struct Formats { pub(crate) shuffle: Rc, pub(crate) stack_load: Rc, pub(crate) stack_store: Rc, + pub(crate) dynamic_stack_load: Rc, + pub(crate) dynamic_stack_store: Rc, pub(crate) store: Rc, pub(crate) store_no_offset: Rc, pub(crate) table_addr: Rc, @@ -230,6 +232,15 @@ impl Formats { .imm(&imm.offset32) .build(), + dynamic_stack_load: Builder::new("DynamicStackLoad") + .imm(&entities.dynamic_stack_slot) + .build(), + + dynamic_stack_store: Builder::new("DynamicStackStore") + .value() + .imm(&entities.dynamic_stack_slot) + .build(), + // Accessing a WebAssembly heap. heap_addr: Builder::new("HeapAddr") .imm(&entities.heap) diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index cdba177578..8a7d99fd4d 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -427,6 +427,7 @@ fn define_simd_lane_access( .floats(Interval::All) .bools(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .includes_scalars(false) .build(), ); @@ -706,6 +707,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); @@ -785,6 +787,7 @@ pub(crate) fn define( .floats(Interval::All) .simd_lanes(Interval::All) .refs(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); @@ -793,6 +796,7 @@ pub(crate) fn define( let addr = &Operand::new("addr", iAddr); let SS = &Operand::new("SS", &entities.stack_slot); + let DSS = &Operand::new("DSS", &entities.dynamic_stack_slot); let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address"); let x = &Operand::new("x", Mem).with_doc("Value to be stored"); let a = &Operand::new("a", Mem).with_doc("Value loaded"); @@ -1163,7 +1167,51 @@ pub(crate) fn define( .operands_out(vec![addr]), ); + ig.push( + Inst::new( + "dynamic_stack_load", + r#" + Load a value from a dynamic stack slot. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.dynamic_stack_load, + ) + .operands_in(vec![DSS]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "dynamic_stack_store", + r#" + Store a value to a dynamic stack slot. + + This is a polymorphic instruction that can store any dynamic value type with a + memory representation. + "#, + &formats.dynamic_stack_store, + ) + .operands_in(vec![x, DSS]) + .can_store(true), + ); + let GV = &Operand::new("GV", &entities.global_value); + ig.push( + Inst::new( + "dynamic_stack_addr", + r#" + Get the address of a dynamic stack slot. + + Compute the absolute address of the first byte of a dynamic stack slot. + "#, + &formats.dynamic_stack_load, + ) + .operands_in(vec![DSS]) + .operands_out(vec![addr]), + ); ig.push( Inst::new( @@ -2786,6 +2834,7 @@ pub(crate) fn define( TypeSetBuilder::new() .floats(Interval::All) .simd_lanes(Interval::All) + .dynamic_simd_lanes(Interval::All) .build(), ); let Cond = &Operand::new("Cond", &imm.floatcc); @@ -3409,6 +3458,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(16..64) .simd_lanes(2..8) + .dynamic_simd_lanes(2..8) .includes_scalars(false) .build(), ); @@ -3479,6 +3529,7 @@ pub(crate) fn define( TypeSetBuilder::new() .ints(8..32) .simd_lanes(4..16) + .dynamic_simd_lanes(4..16) .includes_scalars(false) .build(), ); @@ -4063,4 +4114,30 @@ pub(crate) fn define( ) .other_side_effects(true), ); + + let TxN = &TypeVar::new( + "TxN", + "A dynamic vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .dynamic_simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", TxN).with_doc("The dynamic vector to extract from"); + let y = &Operand::new("y", &imm.uimm8).with_doc("128-bit vector index"); + let a = &Operand::new("a", &TxN.dynamic_to_vector()).with_doc("New fixed vector"); + + ig.push( + Inst::new( + "extract_vector", + r#" + Return a fixed length sub vector, extracted from a dynamic vector. + "#, + &formats.binary_imm8, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); } diff --git a/cranelift/codegen/shared/src/constants.rs b/cranelift/codegen/shared/src/constants.rs index 86823ea06b..d9aa3916ca 100644 --- a/cranelift/codegen/shared/src/constants.rs +++ b/cranelift/codegen/shared/src/constants.rs @@ -8,9 +8,12 @@ // 0x70-0x7d: Lane types // 0x7e-0x7f: Reference types // 0x80-0xff: Vector types +// 0x100-0x17f: Dynamic Vector types // // Vector types are encoded with the lane type in the low 4 bits and log2(lanes) -// in the high 4 bits, giving a range of 2-256 lanes. +// in the next highest 4 bits, giving a range of 2-256 lanes. + +// Dynamic vector types are encoded similarily. /// Start of the lane types. pub const LANE_BASE: u16 = 0x70; @@ -20,3 +23,6 @@ pub const REFERENCE_BASE: u16 = 0x7E; /// Start of the 2-lane vector types. pub const VECTOR_BASE: u16 = 0x80; + +/// Start of the dynamic vector types. +pub const DYNAMIC_VECTOR_BASE: u16 = 0x100; diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index a3dad36cb5..e1f1595766 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -3,12 +3,13 @@ use crate::entity::{self, PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::builder::ReplaceBuilder; +use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; use crate::ir::extfunc::ExtFuncData; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData}; use crate::ir::{types, ConstantData, ConstantPool, Immediate}; use crate::ir::{ - Block, FuncRef, Inst, SigRef, Signature, SourceLoc, Type, Value, ValueLabelAssignments, - ValueList, ValueListPool, + Block, DynamicType, FuncRef, Inst, SigRef, Signature, SourceLoc, Type, Value, + ValueLabelAssignments, ValueList, ValueListPool, }; use crate::packed_option::ReservedValue; use crate::write::write_operands; @@ -50,6 +51,9 @@ pub struct DataFlowGraph { /// instructions contained in each block. blocks: PrimaryMap, + /// Dynamic types created. + pub dynamic_types: DynamicTypes, + /// Memory pool of value lists. /// /// The `ValueList` references into this pool appear in many places: @@ -89,6 +93,7 @@ impl DataFlowGraph { insts: PrimaryMap::new(), results: SecondaryMap::new(), blocks: PrimaryMap::new(), + dynamic_types: DynamicTypes::new(), value_lists: ValueListPool::new(), values: PrimaryMap::new(), signatures: PrimaryMap::new(), @@ -105,6 +110,7 @@ impl DataFlowGraph { self.insts.clear(); self.results.clear(); self.blocks.clear(); + self.dynamic_types.clear(); self.value_lists.clear(); self.values.clear(); self.signatures.clear(); @@ -557,6 +563,11 @@ impl DataFlowGraph { self.insts.push(data) } + /// Declares a dynamic vector type + pub fn make_dynamic_ty(&mut self, data: DynamicTypeData) -> DynamicType { + self.dynamic_types.push(data) + } + /// Returns an object that displays `inst`. pub fn display_inst<'a>(&'a self, inst: Inst) -> DisplayInst<'a> { DisplayInst(self, inst) @@ -1104,6 +1115,20 @@ impl DataFlowGraph { self.values[v].set_type(t); } + /// Check that the given concrete `Type` has been defined in the function. + pub fn check_dynamic_type(&mut self, ty: Type) -> Option { + debug_assert!(ty.is_dynamic_vector()); + if self + .dynamic_types + .values() + .any(|dyn_ty_data| dyn_ty_data.concrete().unwrap() == ty) + { + Some(ty) + } else { + None + } + } + /// Create result values for `inst`, reusing the provided detached values. /// This is similar to `make_inst_results_reusing` except it's only for use /// in the parser, which needs to reuse previously invalid values. @@ -1130,6 +1155,10 @@ impl DataFlowGraph { let constraints = self.insts[inst].opcode().constraints(); for res_idx in 0..constraints.num_fixed_results() { let ty = constraints.result_type(res_idx, ctrl_typevar); + if ty.is_dynamic_vector() { + self.check_dynamic_type(ty) + .unwrap_or_else(|| panic!("Use of undeclared dynamic type: {}", ty)); + } if let Some(v) = reuse.get(res_idx) { self.set_value_type_for_parser(*v, ty); } diff --git a/cranelift/codegen/src/ir/dynamic_type.rs b/cranelift/codegen/src/ir/dynamic_type.rs new file mode 100644 index 0000000000..85589cef67 --- /dev/null +++ b/cranelift/codegen/src/ir/dynamic_type.rs @@ -0,0 +1,38 @@ +//! Dynamic IR types + +use crate::ir::entities::DynamicType; +use crate::ir::GlobalValue; +use crate::ir::PrimaryMap; +use crate::ir::Type; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// A dynamic type object which has a base vector type and a scaling factor. +#[derive(Clone)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicTypeData { + /// Base vector type, this is the minimum size of the type. + pub base_vector_ty: Type, + /// The dynamic scaling factor of the base vector type. + pub dynamic_scale: GlobalValue, +} + +impl DynamicTypeData { + /// Create a new dynamic type. + pub fn new(base_vector_ty: Type, dynamic_scale: GlobalValue) -> Self { + assert!(base_vector_ty.is_vector()); + Self { + base_vector_ty, + dynamic_scale, + } + } + + /// Convert 'base_vector_ty' into a concrete dynamic vector type. + pub fn concrete(&self) -> Option { + self.base_vector_ty.vector_to_dynamic() + } +} + +/// All allocated dynamic types. +pub type DynamicTypes = PrimaryMap; diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs index d8ca7cef36..2be7014685 100644 --- a/cranelift/codegen/src/ir/entities.rs +++ b/cranelift/codegen/src/ir/entities.rs @@ -135,6 +135,44 @@ impl StackSlot { } } +/// An opaque reference to a dynamic stack slot. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlot(u32); +entity_impl!(DynamicStackSlot, "dss"); + +impl DynamicStackSlot { + /// Create a new stack slot reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a dynamic type. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicType(u32); +entity_impl!(DynamicType, "dt"); + +impl DynamicType { + /// Create a new dynamic type reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + /// An opaque reference to a global value. /// /// A `GlobalValue` is a [`Value`](Value) that will be live across the entire @@ -389,6 +427,10 @@ pub enum AnyEntity { Value(Value), /// A stack slot. StackSlot(StackSlot), + /// A dynamic stack slot. + DynamicStackSlot(DynamicStackSlot), + /// A dynamic type + DynamicType(DynamicType), /// A Global value. GlobalValue(GlobalValue), /// A jump table. @@ -415,6 +457,8 @@ impl fmt::Display for AnyEntity { Self::Inst(r) => r.fmt(f), Self::Value(r) => r.fmt(f), Self::StackSlot(r) => r.fmt(f), + Self::DynamicStackSlot(r) => r.fmt(f), + Self::DynamicType(r) => r.fmt(f), Self::GlobalValue(r) => r.fmt(f), Self::JumpTable(r) => r.fmt(f), Self::Constant(r) => r.fmt(f), @@ -457,6 +501,18 @@ impl From for AnyEntity { } } +impl From for AnyEntity { + fn from(r: DynamicStackSlot) -> Self { + Self::DynamicStackSlot(r) + } +} + +impl From for AnyEntity { + fn from(r: DynamicType) -> Self { + Self::DynamicType(r) + } +} + impl From for AnyEntity { fn from(r: GlobalValue) -> Self { Self::GlobalValue(r) diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 0092a33306..856c8f5f0d 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -7,12 +7,12 @@ use crate::entity::{PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::JumpTables; use crate::ir::{ - instructions::BranchInfo, Block, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, - HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot, - StackSlotData, Table, TableData, + instructions::BranchInfo, Block, DynamicStackSlot, DynamicStackSlotData, DynamicType, + ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstructionData, + JumpTable, JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData, Type, }; use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; -use crate::ir::{SourceLocs, StackSlots}; +use crate::ir::{DynamicStackSlots, SourceLocs, StackSlots}; use crate::isa::CallConv; use crate::value_label::ValueLabelsRanges; use crate::write::write_function; @@ -78,8 +78,11 @@ pub struct Function { /// Signature of this function. pub signature: Signature, - /// Stack slots allocated in this function. - pub stack_slots: StackSlots, + /// Sized stack slots allocated in this function. + pub sized_stack_slots: StackSlots, + + /// Dynamic stack slots allocated in this function. + pub dynamic_stack_slots: DynamicStackSlots, /// Global values referenced. pub global_values: PrimaryMap, @@ -120,7 +123,8 @@ impl Function { version_marker: VersionMarker, name, signature: sig, - stack_slots: StackSlots::new(), + sized_stack_slots: StackSlots::new(), + dynamic_stack_slots: DynamicStackSlots::new(), global_values: PrimaryMap::new(), heaps: PrimaryMap::new(), tables: PrimaryMap::new(), @@ -135,7 +139,8 @@ impl Function { /// Clear all data structures in this function. pub fn clear(&mut self) { self.signature.clear(CallConv::Fast); - self.stack_slots.clear(); + self.sized_stack_slots.clear(); + self.dynamic_stack_slots.clear(); self.global_values.clear(); self.heaps.clear(); self.tables.clear(); @@ -156,10 +161,16 @@ impl Function { self.jump_tables.push(data) } - /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and - /// `stack_addr` instructions. - pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { - self.stack_slots.push(data) + /// Creates a sized stack slot in the function, to be used by `stack_load`, `stack_store` + /// and `stack_addr` instructions. + pub fn create_sized_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.sized_stack_slots.push(data) + } + + /// Creates a dynamic stack slot in the function, to be used by `dynamic_stack_load`, + /// `dynamic_stack_store` and `dynamic_stack_addr` instructions. + pub fn create_dynamic_stack_slot(&mut self, data: DynamicStackSlotData) -> DynamicStackSlot { + self.dynamic_stack_slots.push(data) } /// Adds a signature which can later be used to declare an external function import. @@ -177,6 +188,26 @@ impl Function { self.global_values.push(data) } + /// Find the global dyn_scale value associated with given DynamicType + pub fn get_dyn_scale(&self, ty: DynamicType) -> GlobalValue { + self.dfg.dynamic_types.get(ty).unwrap().dynamic_scale + } + + /// Find the global dyn_scale for the given stack slot. + pub fn get_dynamic_slot_scale(&self, dss: DynamicStackSlot) -> GlobalValue { + let dyn_ty = self.dynamic_stack_slots.get(dss).unwrap().dyn_ty; + self.get_dyn_scale(dyn_ty) + } + + /// Get a concrete `Type` from a user defined `DynamicType`. + pub fn get_concrete_dynamic_ty(&self, ty: DynamicType) -> Option { + self.dfg + .dynamic_types + .get(ty) + .unwrap_or_else(|| panic!("Undeclared dynamic vector type: {}", ty)) + .concrete() + } + /// Declares a heap accessible to the function. pub fn create_heap(&mut self, data: HeapData) -> Heap { self.heaps.push(data) @@ -322,8 +353,8 @@ impl Function { /// Size occupied by all stack slots associated with this function. /// /// Does not include any padding necessary due to offsets - pub fn stack_size(&self) -> u32 { - self.stack_slots.values().map(|ss| ss.size).sum() + pub fn fixed_stack_size(&self) -> u32 { + self.sized_stack_slots.values().map(|ss| ss.size).sum() } } diff --git a/cranelift/codegen/src/ir/globalvalue.rs b/cranelift/codegen/src/ir/globalvalue.rs index e70f8221fb..8ec39bf0a4 100644 --- a/cranelift/codegen/src/ir/globalvalue.rs +++ b/cranelift/codegen/src/ir/globalvalue.rs @@ -76,6 +76,13 @@ pub enum GlobalValueData { /// Does this symbol refer to a thread local storage value? tls: bool, }, + + /// Value is a multiple of how many instances of `vector_type` will fit in + /// a target vector register. + DynScaleTargetConst { + /// Base vector type. + vector_type: Type, + }, } impl GlobalValueData { @@ -92,6 +99,7 @@ impl GlobalValueData { match *self { Self::VMContext { .. } | Self::Symbol { .. } => isa.pointer_type(), Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type, + Self::DynScaleTargetConst { .. } => isa.pointer_type(), } } @@ -154,6 +162,9 @@ impl fmt::Display for GlobalValueData { } Ok(()) } + Self::DynScaleTargetConst { vector_type } => { + write!(f, "dyn_scale_target_const.{}", vector_type) + } } } } diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index df7357fe2c..02a4d48e87 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -633,6 +633,8 @@ pub struct ValueTypeSet { pub bools: BitSet8, /// Allowed ref widths pub refs: BitSet8, + /// Allowed dynamic vectors minimum lane sizes + pub dynamic_lanes: BitSet16, } impl ValueTypeSet { @@ -656,8 +658,13 @@ impl ValueTypeSet { /// Does `typ` belong to this set? pub fn contains(self, typ: Type) -> bool { - let l2l = typ.log2_lane_count(); - self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + if typ.is_dynamic_vector() { + let l2l = typ.log2_min_lane_count(); + self.dynamic_lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } else { + let l2l = typ.log2_lane_count(); + self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } } /// Get an example member of this type set. @@ -712,6 +719,9 @@ enum OperandConstraint { /// This operand is `ctrlType.merge_lanes()`. MergeLanes, + + /// This operands is `ctrlType.dynamic_to_vector()`. + DynamicToVector, } impl OperandConstraint { @@ -738,15 +748,48 @@ impl OperandConstraint { .expect("invalid type for half_vector"), ), DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")), - SplitLanes => Bound( + SplitLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .split_lanes() + .expect("invalid type for split_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .split_lanes() + .expect("invalid type for split_lanes"), + ) + } + } + MergeLanes => { + if ctrl_type.is_dynamic_vector() { + Bound( + ctrl_type + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector") + .merge_lanes() + .expect("invalid type for merge_lanes") + .vector_to_dynamic() + .expect("invalid dynamic type"), + ) + } else { + Bound( + ctrl_type + .merge_lanes() + .expect("invalid type for merge_lanes"), + ) + } + } + DynamicToVector => Bound( ctrl_type - .split_lanes() - .expect("invalid type for split_lanes"), - ), - MergeLanes => Bound( - ctrl_type - .merge_lanes() - .expect("invalid type for merge_lanes"), + .dynamic_to_vector() + .expect("invalid type for dynamic_to_vector"), ), } } @@ -860,11 +903,13 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(5, 7), + dynamic_lanes: BitSet16::from_range(0, 4), }; assert!(!vts.contains(I8)); assert!(vts.contains(I32)); assert!(vts.contains(I64)); assert!(vts.contains(I32X4)); + assert!(vts.contains(I32X4XN)); assert!(!vts.contains(F32)); assert!(!vts.contains(B1)); assert!(vts.contains(B8)); @@ -879,6 +924,7 @@ mod tests { floats: BitSet8::from_range(5, 7), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert_eq!(vts.example().to_string(), "f32"); @@ -888,6 +934,7 @@ mod tests { floats: BitSet8::from_range(5, 7), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert_eq!(vts.example().to_string(), "f32x2"); @@ -897,9 +944,11 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(3, 7), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert!(!vts.contains(B32X2)); assert!(vts.contains(B32X4)); + assert!(vts.contains(B16X4XN)); assert_eq!(vts.example().to_string(), "b32x4"); let vts = ValueTypeSet { @@ -909,6 +958,7 @@ mod tests { floats: BitSet8::from_range(0, 0), bools: BitSet8::from_range(0, 0), refs: BitSet8::from_range(0, 0), + dynamic_lanes: BitSet16::from_range(0, 8), }; assert!(vts.contains(I32)); assert!(vts.contains(I32X4)); diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 713d2fd37a..ac0a3bb44c 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -5,6 +5,7 @@ mod builder; pub mod condcodes; pub mod constant; pub mod dfg; +pub mod dynamic_type; pub mod entities; mod extfunc; mod extname; @@ -33,9 +34,10 @@ pub use crate::ir::builder::{ }; pub use crate::ir::constant::{ConstantData, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; +pub use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; pub use crate::ir::entities::{ - Block, Constant, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, - Table, Value, + Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Heap, Immediate, Inst, + JumpTable, SigRef, StackSlot, Table, Value, }; pub use crate::ir::extfunc::{ AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, @@ -53,7 +55,9 @@ pub use crate::ir::libcall::{get_probestack_funcref, LibCall}; pub use crate::ir::memflags::{Endianness, MemFlags}; pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; pub use crate::ir::sourceloc::SourceLoc; -pub use crate::ir::stackslot::{StackSlotData, StackSlotKind, StackSlots}; +pub use crate::ir::stackslot::{ + DynamicStackSlotData, DynamicStackSlots, StackSlotData, StackSlotKind, StackSlots, +}; pub use crate::ir::table::TableData; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs index 4c30eb48b6..e4db80d5d7 100644 --- a/cranelift/codegen/src/ir/stackslot.rs +++ b/cranelift/codegen/src/ir/stackslot.rs @@ -4,10 +4,18 @@ //! use crate::entity::PrimaryMap; +use crate::ir::entities::{DynamicStackSlot, DynamicType}; use crate::ir::StackSlot; use core::fmt; use core::str::FromStr; +/// imports only needed for testing. +#[allow(unused_imports)] +use crate::ir::{DynamicTypeData, GlobalValueData}; + +#[allow(unused_imports)] +use crate::ir::types::*; + #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -25,6 +33,9 @@ pub enum StackSlotKind { /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load` /// and `stack_store` instructions. ExplicitSlot, + /// An explicit stack slot for dynamic vector types. This is a chunk of stack memory + /// for use by the `dynamic_stack_load` and `dynamic_stack_store` instructions. + ExplicitDynamicSlot, } impl FromStr for StackSlotKind { @@ -34,6 +45,7 @@ impl FromStr for StackSlotKind { use self::StackSlotKind::*; match s { "explicit_slot" => Ok(ExplicitSlot), + "explicit_dynamic_slot" => Ok(ExplicitDynamicSlot), _ => Err(()), } } @@ -44,6 +56,7 @@ impl fmt::Display for StackSlotKind { use self::StackSlotKind::*; f.write_str(match *self { ExplicitSlot => "explicit_slot", + ExplicitDynamicSlot => "explicit_dynamic_slot", }) } } @@ -68,11 +81,15 @@ impl StackSlotData { /// Get the alignment in bytes of this stack slot given the stack pointer alignment. pub fn alignment(&self, max_align: StackSize) -> StackSize { debug_assert!(max_align.is_power_of_two()); - // We want to find the largest power of two that divides both `self.size` and `max_align`. - // That is the same as isolating the rightmost bit in `x`. - let x = self.size | max_align; - // C.f. Hacker's delight. - x & x.wrapping_neg() + if self.kind == StackSlotKind::ExplicitDynamicSlot { + max_align + } else { + // We want to find the largest power of two that divides both `self.size` and `max_align`. + // That is the same as isolating the rightmost bit in `x`. + let x = self.size | max_align; + // C.f. Hacker's delight. + x & x.wrapping_neg() + } } } @@ -82,9 +99,43 @@ impl fmt::Display for StackSlotData { } } +/// Contents of a dynamic stack slot. +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct DynamicStackSlotData { + /// The kind of stack slot. + pub kind: StackSlotKind, + + /// The type of this slot. + pub dyn_ty: DynamicType, +} + +impl DynamicStackSlotData { + /// Create a stack slot with the specified byte size. + pub fn new(kind: StackSlotKind, dyn_ty: DynamicType) -> Self { + assert!(kind == StackSlotKind::ExplicitDynamicSlot); + Self { kind, dyn_ty } + } + + /// Get the alignment in bytes of this stack slot given the stack pointer alignment. + pub fn alignment(&self, max_align: StackSize) -> StackSize { + debug_assert!(max_align.is_power_of_two()); + max_align + } +} + +impl fmt::Display for DynamicStackSlotData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.kind, self.dyn_ty) + } +} + /// All allocated stack slots. pub type StackSlots = PrimaryMap; +/// All allocated dynamic stack slots. +pub type DynamicStackSlots = PrimaryMap; + #[cfg(test)] mod tests { use super::*; @@ -95,16 +146,56 @@ mod tests { fn stack_slot() { let mut func = Function::new(); - let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); - let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8)); + let ss0 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + let ss1 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8)); assert_eq!(ss0.to_string(), "ss0"); assert_eq!(ss1.to_string(), "ss1"); - assert_eq!(func.stack_slots[ss0].size, 4); - assert_eq!(func.stack_slots[ss1].size, 8); + assert_eq!(func.sized_stack_slots[ss0].size, 4); + assert_eq!(func.sized_stack_slots[ss1].size, 8); - assert_eq!(func.stack_slots[ss0].to_string(), "explicit_slot 4"); - assert_eq!(func.stack_slots[ss1].to_string(), "explicit_slot 8"); + assert_eq!(func.sized_stack_slots[ss0].to_string(), "explicit_slot 4"); + assert_eq!(func.sized_stack_slots[ss1].to_string(), "explicit_slot 8"); + } + + #[test] + fn dynamic_stack_slot() { + let mut func = Function::new(); + + let int_vector_ty = I32X4; + let fp_vector_ty = F64X2; + let scale0 = GlobalValueData::DynScaleTargetConst { + vector_type: int_vector_ty, + }; + let scale1 = GlobalValueData::DynScaleTargetConst { + vector_type: fp_vector_ty, + }; + let gv0 = func.create_global_value(scale0); + let gv1 = func.create_global_value(scale1); + let dtd0 = DynamicTypeData::new(int_vector_ty, gv0); + let dtd1 = DynamicTypeData::new(fp_vector_ty, gv1); + let dt0 = func.dfg.make_dynamic_ty(dtd0); + let dt1 = func.dfg.make_dynamic_ty(dtd1); + + let dss0 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt0, + )); + let dss1 = func.create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + dt1, + )); + assert_eq!(dss0.to_string(), "dss0"); + assert_eq!(dss1.to_string(), "dss1"); + + assert_eq!( + func.dynamic_stack_slots[dss0].to_string(), + "explicit_dynamic_slot dt0" + ); + assert_eq!( + func.dynamic_stack_slots[dss1].to_string(), + "explicit_dynamic_slot dt1" + ); } #[test] diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index b7fce2fb20..cacafb5bfb 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -233,7 +233,12 @@ impl Type { /// /// A vector type has 2 or more lanes. pub fn is_vector(self) -> bool { - self.0 >= constants::VECTOR_BASE + self.0 >= constants::VECTOR_BASE && !self.is_dynamic_vector() + } + + /// Is this a SIMD vector type with a runtime number of lanes? + pub fn is_dynamic_vector(self) -> bool { + self.0 >= constants::DYNAMIC_VECTOR_BASE } /// Is this a scalar boolean type? @@ -288,19 +293,62 @@ impl Type { /// /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0. pub fn log2_lane_count(self) -> u32 { - (self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32 + if self.is_dynamic_vector() { + 0 + } else { + (self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32 + } + } + + /// Get log_2 of the number of lanes in this vector/dynamic type. + pub fn log2_min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + (self + .0 + .saturating_sub(constants::VECTOR_BASE + constants::LANE_BASE) + >> 4) as u32 + } else { + self.log2_lane_count() + } } /// Get the number of lanes in this SIMD vector type. /// /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1. pub fn lane_count(self) -> u32 { - 1 << self.log2_lane_count() + if self.is_dynamic_vector() { + 0 + } else { + 1 << self.log2_lane_count() + } } /// Get the total number of bits used to represent this type. pub fn bits(self) -> u32 { - self.lane_bits() * self.lane_count() + if self.is_dynamic_vector() { + 0 + } else { + self.lane_bits() * self.lane_count() + } + } + + /// Get the minimum of lanes in this SIMD vector type, this supports both fixed and + /// dynamic types. + pub fn min_lane_count(self) -> u32 { + if self.is_dynamic_vector() { + 1 << self.log2_min_lane_count() + } else { + 1 << self.log2_lane_count() + } + } + + /// Get the minimum number of bits used to represent this type. + pub fn min_bits(self) -> u32 { + if self.is_dynamic_vector() { + self.lane_bits() * self.min_lane_count() + } else { + self.bits() + } } /// Get the number of bytes used to store this type in memory. @@ -315,23 +363,46 @@ impl Type { /// If this is already a SIMD vector type, this produces a SIMD vector type with `n * /// self.lane_count()` lanes. pub fn by(self, n: u32) -> Option { + if self.is_dynamic_vector() { + return None; + } if self.lane_bits() == 0 || !n.is_power_of_two() { return None; } let log2_lanes: u32 = n.trailing_zeros(); let new_type = u32::from(self.0) + (log2_lanes << 4); - if new_type < 0x100 { + if new_type < constants::DYNAMIC_VECTOR_BASE as u32 + && (new_type as u16) < constants::DYNAMIC_VECTOR_BASE + { Some(Self(new_type as u16)) } else { None } } + /// Convert a fixed vector type to a dynamic one. + pub fn vector_to_dynamic(self) -> Option { + assert!(self.is_vector()); + if self.bits() > 256 { + return None; + } + let new_ty = self.0 + constants::VECTOR_BASE; + let ty = Some(Self(new_ty)); + assert!(ty.unwrap().is_dynamic_vector()); + return ty; + } + + /// Convert a dynamic vector type to a fixed one. + pub fn dynamic_to_vector(self) -> Option { + assert!(self.is_dynamic_vector()); + Some(Self(self.0 - constants::VECTOR_BASE)) + } + /// Get a SIMD vector with half the number of lanes. /// /// There is no `double_vector()` method. Use `t.by(2)` instead. pub fn half_vector(self) -> Option { - if self.is_vector() { + if self.is_vector() && !self.is_dynamic_vector() { Some(Self(self.0 - 0x10)) } else { None @@ -418,6 +489,8 @@ impl Display for Type { write!(f, "f{}", self.lane_bits()) } else if self.is_vector() { write!(f, "{}x{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}x{}xN", self.lane_type(), self.min_lane_count()) } else if self.is_ref() { write!(f, "r{}", self.lane_bits()) } else { @@ -441,6 +514,8 @@ impl Debug for Type { write!(f, "types::F{}", self.lane_bits()) } else if self.is_vector() { write!(f, "{:?}X{}", self.lane_type(), self.lane_count()) + } else if self.is_dynamic_vector() { + write!(f, "{:?}X{}XN", self.lane_type(), self.min_lane_count()) } else if self.is_ref() { write!(f, "types::R{}", self.lane_bits()) } else { @@ -568,6 +643,55 @@ mod tests { assert_eq!(F64.by(8), Some(F64X8)); } + #[test] + fn dynamic_vectors() { + // Identification. + assert_eq!(I8X16XN.is_dynamic_vector(), true); + assert_eq!(B16X4XN.is_dynamic_vector(), true); + assert_eq!(F32X8XN.is_dynamic_vector(), true); + assert_eq!(F64X4XN.is_dynamic_vector(), true); + assert_eq!(I128X2XN.is_dynamic_vector(), true); + + // Lane counts. + assert_eq!(I16X8XN.lane_count(), 0); + assert_eq!(I16X8XN.min_lane_count(), 8); + + // Size + assert_eq!(B32X2XN.bits(), 0); + assert_eq!(B32X2XN.min_bits(), 64); + + // Change lane counts + assert_eq!(F64X4XN.half_vector(), None); + assert_eq!(I8X8XN.by(2), None); + + // Conversions to and from vectors. + assert_eq!(B8.by(8).unwrap().vector_to_dynamic(), Some(B8X8XN)); + assert_eq!(I8.by(16).unwrap().vector_to_dynamic(), Some(I8X16XN)); + assert_eq!(I16.by(8).unwrap().vector_to_dynamic(), Some(I16X8XN)); + assert_eq!(B16.by(16).unwrap().vector_to_dynamic(), Some(B16X16XN)); + assert_eq!(B32.by(2).unwrap().vector_to_dynamic(), Some(B32X2XN)); + assert_eq!(B32.by(8).unwrap().vector_to_dynamic(), Some(B32X8XN)); + assert_eq!(I32.by(4).unwrap().vector_to_dynamic(), Some(I32X4XN)); + assert_eq!(F32.by(4).unwrap().vector_to_dynamic(), Some(F32X4XN)); + assert_eq!(F64.by(2).unwrap().vector_to_dynamic(), Some(F64X2XN)); + assert_eq!(I128.by(2).unwrap().vector_to_dynamic(), Some(I128X2XN)); + + assert_eq!(I128X2XN.dynamic_to_vector(), Some(I128X2)); + assert_eq!(B64X2XN.dynamic_to_vector(), Some(B64X2)); + assert_eq!(F32X4XN.dynamic_to_vector(), Some(F32X4)); + assert_eq!(F64X4XN.dynamic_to_vector(), Some(F64X4)); + assert_eq!(I32X2XN.dynamic_to_vector(), Some(I32X2)); + assert_eq!(I32X8XN.dynamic_to_vector(), Some(I32X8)); + assert_eq!(I16X16XN.dynamic_to_vector(), Some(I16X16)); + assert_eq!(I8X32XN.dynamic_to_vector(), Some(I8X32)); + + assert_eq!(I8X64.vector_to_dynamic(), None); + assert_eq!(B16X32.vector_to_dynamic(), None); + assert_eq!(F32X16.vector_to_dynamic(), None); + assert_eq!(I64X8.vector_to_dynamic(), None); + assert_eq!(I128X4.vector_to_dynamic(), None); + } + #[test] fn format_scalars() { assert_eq!(IFLAGS.to_string(), "iflags"); diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index b626b766f0..1ef9e1a68f 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -5,7 +5,7 @@ use crate::ir::types; use crate::ir::types::*; use crate::ir::MemFlags; use crate::ir::Opcode; -use crate::ir::{ExternalName, LibCall}; +use crate::ir::{ExternalName, LibCall, Signature}; use crate::isa; use crate::isa::aarch64::{inst::EmitState, inst::*}; use crate::isa::unwind::UnwindInst; @@ -155,6 +155,7 @@ fn saved_reg_stack_size( } else { vec_reg.len() & 1 }; + // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs? let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size; (int_save_bytes, vec_save_bytes) @@ -365,9 +366,15 @@ impl ABIMachineSpec for AArch64MachineDeps { RegClass::Int => xreg(*next_reg), RegClass::Float => vreg(*next_reg), }; + // Overlay Z-regs on V-regs for parameter passing. + let ty = if param.value_type.is_dynamic_vector() { + dynamic_to_fixed(param.value_type) + } else { + param.value_type + }; ret.push(ABIArg::reg( reg.to_real_reg().unwrap(), - param.value_type, + ty, param.extension, param.purpose, )); @@ -558,6 +565,7 @@ impl ABIMachineSpec for AArch64MachineDeps { } fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + // FIXME: Do something different for dynamic types? let mem = mem.into(); Inst::LoadAddr { rd: into_reg, mem } } @@ -931,6 +939,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -938,7 +947,7 @@ impl ABIMachineSpec for AArch64MachineDeps { ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let (clobbered_int, clobbered_vec) = - get_regs_restored_in_epilogue(call_conv, flags, clobbers); + get_regs_restored_in_epilogue(call_conv, flags, sig, clobbers); // Free the fixed frame if necessary. if fixed_frame_storage_size > 0 { @@ -1146,11 +1155,12 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, vector_size: u32) -> u32 { + assert_eq!(vector_size % 8, 0); // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, - RegClass::Float => 2, + RegClass::Float => vector_size / 8, } } @@ -1195,12 +1205,15 @@ impl ABIMachineSpec for AArch64MachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> Vec> { let mut regs: Vec> = regs .iter() .cloned() - .filter(|r| is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), r.to_reg())) + .filter(|r| { + is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg()) + }) .collect(); // Sort registers for deterministic code output. We can do an unstable @@ -1235,7 +1248,12 @@ fn legal_type_for_machine(ty: Type) -> bool { /// Is the given register saved in the prologue if clobbered, i.e., is it a /// callee-save? -fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r: RealReg) -> bool { +fn is_reg_saved_in_prologue( + call_conv: isa::CallConv, + enable_pinned_reg: bool, + sig: &Signature, + r: RealReg, +) -> bool { if call_conv.extends_baldrdash() { match r.class() { RegClass::Int => { @@ -1249,6 +1267,14 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r }; } + // FIXME: We need to inspect whether a function is returning Z or P regs too. + let save_z_regs = sig + .params + .iter() + .filter(|p| p.value_type.is_dynamic_vector()) + .count() + != 0; + match r.class() { RegClass::Int => { // x19 - x28 inclusive are callee-saves. @@ -1262,8 +1288,17 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r } } RegClass::Float => { - // v8 - v15 inclusive are callee-saves. - r.hw_enc() >= 8 && r.hw_enc() <= 15 + // If a subroutine takes at least one argument in scalable vector registers + // or scalable predicate registers, or if it is a function that returns + // results in such registers, it must ensure that the entire contents of + // z8-z23 are preserved across the call. In other cases it need only + // preserve the low 64 bits of z8-z15. + if save_z_regs { + r.hw_enc() >= 8 && r.hw_enc() <= 23 + } else { + // v8 - v15 inclusive are callee-saves. + r.hw_enc() >= 8 && r.hw_enc() <= 15 + } } } } @@ -1274,12 +1309,13 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, enable_pinned_reg: bool, r fn get_regs_restored_in_epilogue( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut vec_saves = vec![]; for ® in regs { - if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), reg.to_reg()) { + if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, reg.to_reg()) { match reg.to_reg().class() { RegClass::Int => int_saves.push(reg), RegClass::Float => vec_saves.push(reg), diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 7a907ca8a7..6397ff3c1c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -919,6 +919,17 @@ (Size64x2) )) +(type DynamicVectorSize extern + (enum + (Size8x8xN) + (Size8x16xN) + (Size16x4xN) + (Size16x8xN) + (Size32x2xN) + (Size32x4xN) + (Size64x2xN) +)) + ;; Helper for calculating the `VectorSize` corresponding to a type (decl vector_size (Type) VectorSize) (rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8)) @@ -928,6 +939,13 @@ (rule (vector_size (multi_lane 32 2)) (VectorSize.Size32x2)) (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4)) (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2)) +(rule (vector_size (dynamic_lane 8 8)) (VectorSize.Size8x8)) +(rule (vector_size (dynamic_lane 8 16)) (VectorSize.Size8x16)) +(rule (vector_size (dynamic_lane 16 4)) (VectorSize.Size16x4)) +(rule (vector_size (dynamic_lane 16 8)) (VectorSize.Size16x8)) +(rule (vector_size (dynamic_lane 32 2)) (VectorSize.Size32x2)) +(rule (vector_size (dynamic_lane 32 4)) (VectorSize.Size32x4)) +(rule (vector_size (dynamic_lane 64 2)) (VectorSize.Size64x2)) ;; A floating-point unit (FPU) operation with one arg. (type FPUOp1 diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 6bb97c945b..ef9abd42ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -706,12 +706,9 @@ impl VectorSize { /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. pub fn lane_size(&self) -> ScalarSize { match self { - VectorSize::Size8x8 => ScalarSize::Size8, - VectorSize::Size8x16 => ScalarSize::Size8, - VectorSize::Size16x4 => ScalarSize::Size16, - VectorSize::Size16x8 => ScalarSize::Size16, - VectorSize::Size32x2 => ScalarSize::Size32, - VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size8x8 | VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 | VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 | VectorSize::Size32x4 => ScalarSize::Size32, VectorSize::Size64x2 => ScalarSize::Size64, } } @@ -743,3 +740,18 @@ impl VectorSize { (q, size) } } + +pub(crate) fn dynamic_to_fixed(ty: Type) -> Type { + match ty { + I8X8XN => I8X8, + I8X16XN => I8X16, + I16X4XN => I16X4, + I16X8XN => I16X8, + I32X2XN => I32X2, + I32X4XN => I32X4, + I64X2XN => I64X2, + F32X4XN => F32X4, + F64X2XN => F64X2, + _ => unreachable!("unhandled type: {}", ty), + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index a4aadb67a1..7ff0a2f2a2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -89,12 +89,12 @@ pub fn mem_finalize( //============================================================================= // Instructions and subcomponents: emission -fn machreg_to_gpr(m: Reg) -> u32 { +pub(crate) fn machreg_to_gpr(m: Reg) -> u32 { assert_eq!(m.class(), RegClass::Int); u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() } -fn machreg_to_vec(m: Reg) -> u32 { +pub(crate) fn machreg_to_vec(m: Reg) -> u32 { assert_eq!(m.class(), RegClass::Float); u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } @@ -2259,7 +2259,7 @@ impl MachInstEmit for Inst { VectorSize::Size16x8 => 0b00010, VectorSize::Size32x4 => 0b00100, VectorSize::Size64x2 => 0b01000, - _ => unimplemented!(), + _ => unimplemented!("Unexpected VectorSize: {:?}", size), }; sink.put4( 0b010_01110000_00000_000011_00000_00000 diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index f3bf2c4e82..e4044f2de8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1194,6 +1194,7 @@ impl MachInst for Inst { assert!(ty.bits() <= 128); Ok((&[RegClass::Float], &[I8X16])) } + _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])), IFLAGS | FFLAGS => Ok((&[RegClass::Int], &[I64])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 2cbfe5f332..ba86baeeb4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -332,12 +332,9 @@ pub fn show_vreg_element(reg: Reg, idx: u8, size: VectorSize) -> String { assert_eq!(RegClass::Float, reg.class()); let s = show_reg(reg); let suffix = match size { - VectorSize::Size8x8 => ".b", - VectorSize::Size8x16 => ".b", - VectorSize::Size16x4 => ".h", - VectorSize::Size16x8 => ".h", - VectorSize::Size32x2 => ".s", - VectorSize::Size32x4 => ".s", + VectorSize::Size8x8 | VectorSize::Size8x16 => ".b", + VectorSize::Size16x4 | VectorSize::Size16x8 => ".h", + VectorSize::Size32x2 | VectorSize::Size32x4 => ".s", VectorSize::Size64x2 => ".d", }; format!("{}{}[{}]", s, suffix, idx) diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index b029ce0101..f5d86252b3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -117,7 +117,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/aarch64/inst_neon.isle b/cranelift/codegen/src/isa/aarch64/inst_neon.isle new file mode 100644 index 0000000000..2b6fd5792e --- /dev/null +++ b/cranelift/codegen/src/isa/aarch64/inst_neon.isle @@ -0,0 +1,8 @@ + +;; Move helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(decl fpu_move_128 (Reg) Reg) +(rule (fpu_move_128 src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.FpuMove128 dst src)))) + (writable_reg_to_reg dst))) + diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 08fd779ec3..6e1739adcb 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -81,6 +81,9 @@ (rule (lower (has_type ty @ (multi_lane _ _) (iadd x y))) (add_vec x y (vector_size ty))) +(rule (lower (has_type ty @ (dynamic_lane _ _) (iadd x y))) + (value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + ;; `i128` (rule (lower (has_type $I128 (iadd x y))) (let @@ -157,6 +160,8 @@ ;; vectors (rule (lower (has_type ty @ (multi_lane _ _) (isub x y))) (sub_vec x y (vector_size ty))) +(rule (lower (has_type ty @ (dynamic_lane _ _) (isub x y))) + (value_reg (sub_vec (put_in_reg x) (put_in_reg y) (vector_size ty)))) ;; `i128` (rule (lower (has_type $I128 (isub x y))) @@ -244,6 +249,10 @@ (rule (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y))) (mul x y (vector_size ty))) +;; Case for 'dynamic' i8x16, i16x8, and i32x4. +(rule (lower (has_type ty @ (dynamic_lane _ _) (imul x y))) + (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + ;; Special lowering for i64x2. ;; ;; This I64X2 multiplication is performed with several 32-bit diff --git a/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle new file mode 100644 index 0000000000..3b9337094d --- /dev/null +++ b/cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle @@ -0,0 +1,30 @@ + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (iadd x y))) + (value_reg (vec_rrr (VecALUOp.Add) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (isub x y))) + (value_reg (vec_rrr (VecALUOp.Sub) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (lane_fits_in_32 ty @ (dynamic_lane _ _)) (imul x y))) + (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (fadd x y))) + (value_reg (vec_rrr (VecALUOp.Fadd) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty @ (dynamic_lane _ _) (fsub x y))) + (value_reg (vec_rrr (VecALUOp.Fsub) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;;; Rules for `dynamic_stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (dynamic_stack_addr stack_slot)) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (abi_dynamic_stackslot_addr dst stack_slot)))) + (value_reg dst))) + +;;; Rules for `extract_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (extract_vector x 0)) + (value_reg (fpu_move_128 (put_in_reg x)))) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index acbfd5095b..5a5f2c0a99 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -124,7 +124,10 @@ pub(crate) fn lower_insn_to_regs>( insn, &inputs[..], outputs[0], - |ctx, dst, elem_ty, mem| { + |ctx, dst, mut elem_ty, mem| { + if elem_ty.is_dynamic_vector() { + elem_ty = dynamic_to_fixed(elem_ty); + } let rd = dst.only_reg().unwrap(); let is_float = ty_has_float_or_vec_representation(elem_ty); ctx.emit(match (ty_bits(elem_ty), sign_extend, is_float) { @@ -177,7 +180,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { let off = ctx.data(insn).load_store_offset().unwrap(); - let elem_ty = match op { + let mut elem_ty = match op { Opcode::Istore8 => I8, Opcode::Istore16 => I16, Opcode::Istore32 => I32, @@ -200,6 +203,9 @@ pub(crate) fn lower_insn_to_regs>( flags, }); } else { + if elem_ty.is_dynamic_vector() { + elem_ty = dynamic_to_fixed(elem_ty); + } let rd = dst.only_reg().unwrap(); let mem = lower_address(ctx, elem_ty, &inputs[1..], off); ctx.emit(match (ty_bits(elem_ty), is_float) { @@ -231,12 +237,15 @@ pub(crate) fn lower_insn_to_regs>( }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let offset: i32 = offset.into(); - let inst = ctx - .abi() - .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + assert!(ctx.abi().sized_stackslot_offsets().is_valid(stack_slot)); + let inst = + ctx.abi() + .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); ctx.emit(inst); } + Opcode::DynamicStackAddr => implemented_in_isle(ctx), + Opcode::AtomicRmw => implemented_in_isle(ctx), Opcode::AtomicCas => implemented_in_isle(ctx), @@ -249,7 +258,10 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::Fence {}); } - Opcode::StackLoad | Opcode::StackStore => { + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } @@ -684,7 +696,8 @@ pub(crate) fn lower_insn_to_regs>( let idx = *imm; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(input_ty); let ty = ty.unwrap(); if ty_has_int_representation(ty) { @@ -730,7 +743,14 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Splat => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let size = VectorSize::from_ty(ty.unwrap()); + let ty = ty.unwrap(); + // TODO: Handle SVE Dup. + let ty = if ty.is_dynamic_vector() { + dynamic_to_fixed(ty) + } else { + ty + }; + let size = VectorSize::from_ty(ty); if let Some((_, insn)) = maybe_input_insn_multi( ctx, @@ -1284,7 +1304,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if !ty.is_vector() { + if !ty.is_vector() && !ty.is_dynamic_vector() { let fpu_op = match op { Opcode::Fadd => FPUOp2::Add, Opcode::Fsub => FPUOp2::Sub, @@ -1336,7 +1356,7 @@ pub(crate) fn lower_insn_to_regs>( debug_assert!(lane_type == F32 || lane_type == F64); - if ty.is_vector() { + if ty.is_vector() || ty.is_dynamic_vector() { let size = VectorSize::from_ty(ty); // pmin(a,b) => bitsel(b, a, cmpgt(a, b)) @@ -2015,7 +2035,15 @@ pub(crate) fn lower_insn_to_regs>( .map_or(true, |insn| { const_param_to_u128(ctx, insn).expect("Invalid immediate bytes") != 0 }); - let op = match (op, ty.unwrap()) { + let ty = ty.unwrap(); + let ty = if ty.is_dynamic_vector() { + ty.dynamic_to_vector() + .unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty)) + } else { + ty + }; + + let op = match (op, ty) { (Opcode::Snarrow, I8X16) => VecRRNarrowOp::Sqxtn16, (Opcode::Snarrow, I16X8) => VecRRNarrowOp::Sqxtn32, (Opcode::Snarrow, I32X4) => VecRRNarrowOp::Sqxtn64, @@ -2057,7 +2085,14 @@ pub(crate) fn lower_insn_to_regs>( Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let (t, high_half) = match (ty.unwrap(), op) { + let ty = ty.unwrap(); + let ty = if ty.is_dynamic_vector() { + ty.dynamic_to_vector() + .unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty)) + } else { + ty + }; + let (t, high_half) = match (ty, op) { (I16X8, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false), (I16X8, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true), (I16X8, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false), @@ -2182,6 +2217,8 @@ pub(crate) fn lower_insn_to_regs>( }); } + Opcode::ExtractVector => implemented_in_isle(ctx), + Opcode::ConstAddr | Opcode::Vconcat | Opcode::Vsplit | Opcode::IfcmpSp => { return Err(CodegenError::Unsupported(format!( "Unimplemented lowering: {}", diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 1b05f887c1..e4cfd78f98 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -1,7 +1,7 @@ //! ARM 64-bit Instruction Set Architecture. use crate::ir::condcodes::IntCC; -use crate::ir::Function; +use crate::ir::{Function, Type}; use crate::isa::aarch64::settings as aarch64_settings; use crate::isa::{Builder as IsaBuilder, TargetIsa}; use crate::machinst::{ @@ -57,7 +57,7 @@ impl AArch64Backend { flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone()); - let abi = Box::new(abi::AArch64ABICallee::new(func, flags, self.isa_flags())?); + let abi = Box::new(abi::AArch64ABICallee::new(func, self)?); compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -76,7 +76,8 @@ impl TargetIsa for AArch64Backend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -87,7 +88,8 @@ impl TargetIsa for AArch64Backend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -109,6 +111,10 @@ impl TargetIsa for AArch64Backend { self.isa_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn unsigned_add_overflow_condition(&self) -> IntCC { // Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on // overflow of an add. diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 691c90e32b..3d556d4935 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -196,7 +196,7 @@ pub struct TargetFrontendConfig { impl TargetFrontendConfig { /// Get the pointer type of this target. pub fn pointer_type(self) -> ir::Type { - ir::Type::int(u16::from(self.pointer_bits())).unwrap() + ir::Type::int(self.pointer_bits() as u16).unwrap() } /// Get the width of pointers on this target, in units of bits. @@ -226,6 +226,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; + /// Get the ISA-dependent maximum vector register size, in bytes. + fn dynamic_vector_bytes(&self, dynamic_ty: ir::Type) -> u32; + /// Compile the given function. fn compile_function( &self, @@ -311,7 +314,7 @@ impl<'a> dyn TargetIsa + 'a { /// Get the pointer type of this ISA. pub fn pointer_type(&self) -> ir::Type { - ir::Type::int(u16::from(self.pointer_bits())).unwrap() + ir::Type::int(self.pointer_bits() as u16).unwrap() } /// Get the width of pointers on this ISA. diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index f5d56b7113..77dcc87e94 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -61,6 +61,7 @@ use crate::ir; use crate::ir::condcodes::IntCC; use crate::ir::types; use crate::ir::MemFlags; +use crate::ir::Signature; use crate::ir::Type; use crate::isa; use crate::isa::s390x::inst::*; @@ -556,6 +557,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, + _: &Signature, _: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -633,7 +635,7 @@ impl ABIMachineSpec for S390xMachineDeps { unimplemented!("StructArgs not implemented for S390X yet"); } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, _vector_scale: u32) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, @@ -665,6 +667,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + _sig: &Signature, regs: &[Writable], ) -> Vec> { assert!( @@ -688,7 +691,7 @@ impl ABIMachineSpec for S390xMachineDeps { _is_leaf: bool, _stack_args_size: u32, _num_clobbered_callee_saves: usize, - _fixed_frame_storage_size: u32, + _frame_storage_size: u32, ) -> bool { // The call frame set-up is handled by gen_clobber_save(). false diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 11006ed643..00868224c5 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -1158,9 +1158,6 @@ ;; Helpers for stack-slot addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) -(extern constructor abi_stackslot_addr abi_stackslot_addr) - (decl stack_addr_impl (Type StackSlot Offset32) Reg) (rule (stack_addr_impl ty stack_slot offset) (let ((dst WritableReg (temp_writable_reg ty)) diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs index 5702134dbf..152dabe44b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -148,7 +148,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func @@ -206,7 +206,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 2585253e53..5dfc2ec3ec 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -2301,7 +2301,7 @@ (decl lower_call_ret_arg (ABISig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) (rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) - (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_stack_arg_space abi) 0))) + (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0))) (_ Unit (copy_reg_to_arg_slot 0 slot ret_arg))) (output_none))) @@ -2309,7 +2309,7 @@ (decl lower_call_rets (ABISig Range InstOutputBuilder) InstOutput) (rule (lower_call_rets abi (range_empty) builder) (output_builder_finish builder)) (rule (lower_call_rets abi (range_unwrap head tail) builder) - (let ((ret ValueRegs (copy_from_arg (abi_stack_arg_space abi) (abi_get_ret abi head))) + (let ((ret ValueRegs (copy_from_arg (abi_sized_stack_arg_space abi) (abi_get_ret abi head))) (_ Unit (output_builder_push builder ret))) (lower_call_rets abi tail builder))) diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index fe402f83ad..2c87621aae 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -197,7 +197,11 @@ impl LowerBackend for S390xBackend { | Opcode::SqmulRoundSat | Opcode::FvpromoteLow | Opcode::Fvdemote - | Opcode::IaddPairwise => { + | Opcode::IaddPairwise + | Opcode::DynamicStackLoad + | Opcode::DynamicStackStore + | Opcode::DynamicStackAddr + | Opcode::ExtractVector => { unreachable!( "TODO: not yet implemented in ISLE: inst = `{}`, type = `{:?}`", ctx.dfg().display_inst(ir_inst), diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index ec775e0b41..2d41c6a88a 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -16,7 +16,7 @@ use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData, - MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList, + MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::unwind::UnwindInst, machinst::{InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, @@ -77,7 +77,7 @@ where } fn abi_accumulate_outgoing_args_size(&mut self, abi: &ABISig) -> Unit { - let off = abi.stack_arg_space() + abi.stack_ret_space(); + let off = abi.sized_stack_arg_space() + abi.sized_stack_ret_space(); self.lower_ctx .abi() .accumulate_outgoing_args_size(off as u32); @@ -531,17 +531,6 @@ where } } - #[inline] - fn abi_stackslot_addr( - &mut self, - dst: WritableReg, - stack_slot: StackSlot, - offset: Offset32, - ) -> MInst { - let offset = u32::try_from(i32::from(offset)).unwrap(); - self.lower_ctx.abi().stackslot_addr(stack_slot, offset, dst) - } - #[inline] fn inst_builder_new(&mut self) -> VecMInstBuilder { Cell::new(Vec::::new()) diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 3e9674d6f2..c860a479dd 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -1,7 +1,7 @@ //! IBM Z 64-bit Instruction Set Architecture. use crate::ir::condcodes::IntCC; -use crate::ir::Function; +use crate::ir::{Function, Type}; use crate::isa::s390x::settings as s390x_settings; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; @@ -58,7 +58,7 @@ impl S390xBackend { flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); - let abi = Box::new(abi::S390xABICallee::new(func, flags, self.isa_flags())?); + let abi = Box::new(abi::S390xABICallee::new(func, self)?); compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -77,7 +77,8 @@ impl TargetIsa for S390xBackend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -88,7 +89,8 @@ impl TargetIsa for S390xBackend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -110,6 +112,10 @@ impl TargetIsa for S390xBackend { self.isa_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn unsigned_add_overflow_condition(&self) -> IntCC { // The ADD LOGICAL family of instructions set the condition code // differently from normal comparisons, in a way that cannot be diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index fa2be84ded..65954c8051 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,7 +1,7 @@ //! Implementation of the standard x64 ABI. use crate::ir::types::*; -use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type}; +use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, Signature, TrapCode, Type}; use crate::isa; use crate::isa::{unwind::UnwindInst, x64::inst::*, CallConv}; use crate::machinst::abi_impl::*; @@ -573,6 +573,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -580,7 +581,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { ) -> SmallVec<[Self::I; 16]> { let mut insts = SmallVec::new(); - let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, flags, clobbers); + let clobbered_callee_saves = + Self::get_clobbered_callee_saves(call_conv, flags, sig, clobbers); let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); // Restore regs by loading from offsets of RSP. RSP will be @@ -722,11 +724,11 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { + fn get_number_of_spillslots_for_value(rc: RegClass, vector_scale: u32) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, - RegClass::Float => 2, + RegClass::Float => vector_scale / 8, } } @@ -771,6 +773,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_clobbered_callee_saves( call_conv: CallConv, flags: &settings::Flags, + _sig: &Signature, regs: &[Writable], ) -> Vec> { let mut regs: Vec> = match call_conv { @@ -805,7 +808,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _is_leaf: bool, _stack_args_size: u32, _num_clobbered_callee_saves: usize, - _fixed_frame_storage_size: u32, + _frame_storage_size: u32, ) -> bool { true } diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index c28ea3b623..d3970a575a 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -144,7 +144,7 @@ mod tests { pos.ins().return_(&[]); if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); + func.sized_stack_slots.push(stack_slot); } func diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 2c4641296d..07bf0c6e74 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2169,6 +2169,8 @@ fn lower_insn_to_regs>( }); } + Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), + Opcode::StackAddr => { let (stack_slot, offset) = match *ctx.data(insn) { InstructionData::StackLoad { @@ -2180,9 +2182,9 @@ fn lower_insn_to_regs>( }; let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let offset: i32 = offset.into(); - let inst = ctx - .abi() - .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); + let inst = + ctx.abi() + .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); ctx.emit(inst); } @@ -2908,7 +2910,11 @@ fn lower_insn_to_regs>( // Unimplemented opcodes below. These are not currently used by Wasm // lowering or other known embeddings, but should be either supported or - // removed eventually. + // removed eventually + Opcode::ExtractVector => { + unimplemented!("ExtractVector not supported"); + } + Opcode::Cls => unimplemented!("Cls not supported"), Opcode::Fma => unimplemented!("Fma not supported"), @@ -2965,7 +2971,10 @@ fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - Opcode::StackLoad | Opcode::StackStore => { + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { panic!("Direct stack memory access not supported; should have been legalized"); } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index c732b56194..cb03b558d9 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -3,7 +3,7 @@ use self::inst::EmitInfo; use super::TargetIsa; -use crate::ir::{condcodes::IntCC, Function}; +use crate::ir::{condcodes::IntCC, Function, Type}; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv; use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings}; @@ -53,7 +53,7 @@ impl X64Backend { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); - let abi = Box::new(abi::X64ABICallee::new(&func, flags, self.isa_flags())?); + let abi = Box::new(abi::X64ABICallee::new(&func, self)?); compile::compile::(&func, self, abi, &self.reg_env, emit_info) } } @@ -72,7 +72,8 @@ impl TargetIsa for X64Backend { let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); - let stackslot_offsets = emit_result.stackslot_offsets; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; if let Some(disasm) = emit_result.disasm.as_ref() { log::debug!("disassembly:\n{}", disasm); @@ -83,7 +84,8 @@ impl TargetIsa for X64Backend { frame_size, disasm: emit_result.disasm, value_labels_ranges, - stackslot_offsets, + sized_stackslot_offsets, + dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, }) @@ -97,6 +99,10 @@ impl TargetIsa for X64Backend { self.x64_flags.iter().collect() } + fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 { + 16 + } + fn name(&self) -> &'static str { "x64" } diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs index 50974a1d85..751f4f4035 100644 --- a/cranelift/codegen/src/legalizer/globalvalue.rs +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -28,9 +28,23 @@ pub fn expand_global_value( readonly, } => load_addr(inst, func, base, offset, global_type, readonly, isa), ir::GlobalValueData::Symbol { tls, .. } => symbol(inst, func, global_value, isa, tls), + ir::GlobalValueData::DynScaleTargetConst { vector_type } => { + const_vector_scale(inst, func, vector_type, isa) + } } } +fn const_vector_scale(inst: ir::Inst, func: &mut ir::Function, ty: ir::Type, isa: &dyn TargetIsa) { + assert!(ty.bytes() <= 16); + + // Use a minimum of 128-bits for the base type. + let base_bytes = std::cmp::max(ty.bytes(), 16); + let scale = (isa.dynamic_vector_bytes(ty) / base_bytes) as i64; + assert!(scale > 0); + let pos = FuncCursor::new(func).at_inst(inst); + pos.func.dfg.replace(inst).iconst(isa.pointer_type(), scale); +} + /// Expand a `global_value` instruction for a vmctx global. fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) { // Get the value representing the `vmctx` argument. diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 755be3a9ec..ae7caf0345 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -115,6 +115,41 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: mflags.set_aligned(); pos.func.dfg.replace(inst).store(mflags, arg, addr, 0); } + InstructionData::DynamicStackLoad { + opcode: ir::Opcode::DynamicStackLoad, + dynamic_stack_slot, + } => { + let ty = pos.func.dfg.value_type(pos.func.dfg.first_result(inst)); + assert!(ty.is_dynamic_vector()); + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(pos.func).at_inst(inst); + pos.use_srcloc(inst); + + let addr = pos.ins().dynamic_stack_addr(addr_ty, dynamic_stack_slot); + + // Stack slots are required to be accessible and aligned. + let mflags = MemFlags::trusted(); + pos.func.dfg.replace(inst).load(ty, mflags, addr, 0); + } + InstructionData::DynamicStackStore { + opcode: ir::Opcode::DynamicStackStore, + arg, + dynamic_stack_slot, + } => { + pos.use_srcloc(inst); + let addr_ty = isa.pointer_type(); + let vector_ty = pos.func.dfg.value_type(arg); + assert!(vector_ty.is_dynamic_vector()); + + let addr = pos.ins().dynamic_stack_addr(addr_ty, dynamic_stack_slot); + + let mut mflags = MemFlags::new(); + // Stack slots are required to be accessible and aligned. + mflags.set_notrap(); + mflags.set_aligned(); + pos.func.dfg.replace(inst).store(mflags, arg, addr, 0); + } InstructionData::TableAddr { opcode: ir::Opcode::TableAddr, table, diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 4d7442b670..db9936efe8 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1,7 +1,7 @@ //! ABI definitions. use crate::binemit::StackMap; -use crate::ir::{Signature, StackSlot}; +use crate::ir::{DynamicStackSlot, Signature, StackSlot}; use crate::isa::CallConv; use crate::machinst::*; use crate::settings; @@ -47,11 +47,17 @@ pub trait ABICallee { /// Number of return values. fn num_retvals(&self) -> usize; - /// Number of stack slots (not spill slots). - fn num_stackslots(&self) -> usize; + /// Number of sized stack slots (not spill slots). + fn num_sized_stackslots(&self) -> usize; - /// The offsets of all stack slots (not spill slots) for debuginfo purposes. - fn stackslot_offsets(&self) -> &PrimaryMap; + /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes. + fn sized_stackslot_offsets(&self) -> &PrimaryMap; + + /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes. + fn dynamic_stackslot_offsets(&self) -> &PrimaryMap; + + /// All the defined dynamic types. + fn dynamic_type_size(&self, ty: Type) -> u32; /// Generate an instruction which copies an argument to a destination /// register. @@ -101,8 +107,16 @@ pub trait ABICallee { /// Update with the clobbered registers, post-regalloc. fn set_clobbered(&mut self, clobbered: Vec>); - /// Get the address of a stackslot. - fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I; + /// Get the address of a sized stackslot. + fn sized_stackslot_addr( + &self, + slot: StackSlot, + offset: u32, + into_reg: Writable, + ) -> Self::I; + + /// Get the address of a dynamic stackslot. + fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable) -> Self::I; /// Load from a spillslot. fn load_spillslot( diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index b2a11e2d76..45bf8884b9 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -126,7 +126,8 @@ use super::abi::*; use crate::binemit::StackMap; use crate::ir::types::*; -use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot}; +use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot}; +use crate::isa::TargetIsa; use crate::machinst::*; use crate::settings; use crate::CodegenResult; @@ -138,6 +139,8 @@ use std::convert::TryFrom; use std::marker::PhantomData; use std::mem; +use std::collections::HashMap; + /// A location for (part of) an argument or return value. These "storage slots" /// are specified for each register-sized part of an argument. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -430,6 +433,7 @@ pub trait ABIMachineSpec { fn get_clobbered_callee_saves( call_conv: isa::CallConv, flags: &settings::Flags, + sig: &Signature, regs: &[Writable], ) -> Vec>; @@ -465,6 +469,7 @@ pub trait ABIMachineSpec { /// clobber-save sequence finished. fn gen_clobber_restore( call_conv: isa::CallConv, + sig: &Signature, flags: &settings::Flags, clobbers: &[Writable], fixed_frame_storage_size: u32, @@ -495,7 +500,7 @@ pub trait ABIMachineSpec { ) -> SmallVec<[Self::I; 8]>; /// Get the number of spillslots required for the given register-class. - fn get_number_of_spillslots_for_value(rc: RegClass) -> u32; + fn get_number_of_spillslots_for_value(rc: RegClass, target_vector_bytes: u32) -> u32; /// Get the current virtual-SP offset from an instruction-emission state. fn get_virtual_sp_offset_from_state(s: &::State) -> i64; @@ -528,9 +533,9 @@ pub struct ABISig { /// pointer. rets: Vec, /// Space on stack used to store arguments. - stack_arg_space: i64, + sized_stack_arg_space: i64, /// Space on stack used to store return values. - stack_ret_space: i64, + sized_stack_ret_space: i64, /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, /// Specific order for copying into arguments at callsites. We must be @@ -550,15 +555,15 @@ impl ABISig { // Compute args and retvals from signature. Handle retvals first, // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = M::compute_arg_locs( + let (rets, sized_stack_ret_space, _) = M::compute_arg_locs( sig.call_conv, flags, &sig.returns, ArgsOrRets::Rets, /* extra ret-area ptr = */ false, )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs( + let need_stack_return_area = sized_stack_ret_space > 0; + let (args, sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs( sig.call_conv, flags, &sig.params, @@ -586,8 +591,8 @@ impl ABISig { sig, args, rets, - stack_arg_space, - stack_ret_space, + sized_stack_arg_space, + sized_stack_ret_space, stack_ret_arg, copy_to_arg_order, ); @@ -595,8 +600,8 @@ impl ABISig { Ok(ABISig { args, rets, - stack_arg_space, - stack_ret_space, + sized_stack_arg_space, + sized_stack_ret_space, stack_ret_arg, copy_to_arg_order, call_conv: sig.call_conv, @@ -666,8 +671,8 @@ impl ABISig { } /// Get total stack space required for arguments. - pub fn stack_arg_space(&self) -> i64 { - self.stack_arg_space + pub fn sized_stack_arg_space(&self) -> i64 { + self.sized_stack_arg_space } /// Get the number of return values expected. @@ -681,8 +686,8 @@ impl ABISig { } /// Get total stack space required for return values. - pub fn stack_ret_space(&self) -> i64 { - self.stack_ret_space + pub fn sized_stack_ret_space(&self) -> i64 { + self.sized_stack_ret_space } /// Get information specifying how to pass the implicit pointer @@ -699,15 +704,19 @@ pub struct ABICalleeImpl { ir_sig: ir::Signature, /// Signature: arg and retval regs. sig: ABISig, - /// Offsets to each stackslot. - stackslots: PrimaryMap, - /// Total stack size of all stackslots. + /// Defined dynamic types. + dynamic_type_sizes: HashMap, + /// Offsets to each dynamic stackslot. + dynamic_stackslots: PrimaryMap, + /// Offsets to each sized stackslot. + sized_stackslots: PrimaryMap, + /// Total stack size of all stackslots stackslots_size: u32, /// Stack size to be reserved for outgoing arguments. outgoing_args_size: u32, /// Clobbered registers, from regalloc. clobbered: Vec>, - /// Total number of spillslots, from regalloc. + /// Total number of spillslots, including for 'dynamic' types, from regalloc. spillslots: Option, /// Storage allocated for the fixed part of the stack frame. This is /// usually the same as the total frame size below, except in the case @@ -766,13 +775,10 @@ fn get_special_purpose_param_register( impl ABICalleeImpl { /// Create a new body ABI instance. - pub fn new( - f: &ir::Function, - flags: settings::Flags, - isa_flags: Vec, - ) -> CodegenResult { + pub fn new(f: &ir::Function, isa: &dyn TargetIsa) -> CodegenResult { log::trace!("ABI: func signature {:?}", f.signature); + let flags = isa.flags().clone(); let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); let sig = ABISig::from_func_sig::(&ir_sig, &flags)?; @@ -791,16 +797,41 @@ impl ABICalleeImpl { call_conv ); - // Compute stackslot locations and total stackslot size. - let mut stack_offset: u32 = 0; - let mut stackslots = PrimaryMap::new(); - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size; + // Compute sized stackslot locations and total stackslot size. + let mut sized_stack_offset: u32 = 0; + let mut sized_stackslots = PrimaryMap::new(); + for (stackslot, data) in f.sized_stack_slots.iter() { + let off = sized_stack_offset; + sized_stack_offset += data.size; let mask = M::word_bytes() - 1; - stack_offset = (stack_offset + mask) & !mask; - debug_assert_eq!(stackslot.as_u32() as usize, stackslots.len()); - stackslots.push(off); + sized_stack_offset = (sized_stack_offset + mask) & !mask; + debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len()); + sized_stackslots.push(off); + } + + // Compute dynamic stackslot locations and total stackslot size. + let mut dynamic_stackslots = PrimaryMap::new(); + let mut dynamic_stack_offset: u32 = sized_stack_offset; + for (stackslot, data) in f.dynamic_stack_slots.iter() { + debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len()); + let off = dynamic_stack_offset; + let ty = f + .get_concrete_dynamic_ty(data.dyn_ty) + .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", data.dyn_ty)); + dynamic_stack_offset += isa.dynamic_vector_bytes(ty); + let mask = M::word_bytes() - 1; + dynamic_stack_offset = (dynamic_stack_offset + mask) & !mask; + dynamic_stackslots.push(off); + } + let stackslots_size = dynamic_stack_offset; + + let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len()); + for (dyn_ty, _data) in f.dfg.dynamic_types.iter() { + let ty = f + .get_concrete_dynamic_ty(dyn_ty) + .unwrap_or_else(|| panic!("invalid dynamic vector type: {}", dyn_ty)); + let size = isa.dynamic_vector_bytes(ty); + dynamic_type_sizes.insert(ty, size); } // Figure out what instructions, if any, will be needed to check the @@ -827,8 +858,10 @@ impl ABICalleeImpl { Ok(Self { ir_sig, sig, - stackslots, - stackslots_size: stack_offset, + dynamic_stackslots, + dynamic_type_sizes, + sized_stackslots, + stackslots_size, outgoing_args_size: 0, clobbered: vec![], spillslots: None, @@ -837,7 +870,7 @@ impl ABICalleeImpl { ret_area_ptr: None, call_conv, flags, - isa_flags, + isa_flags: isa.isa_flags(), is_leaf: f.is_leaf(), stack_limit, probestack_min_frame, @@ -1060,12 +1093,16 @@ impl ABICallee for ABICalleeImpl { self.sig.rets.len() } - fn num_stackslots(&self) -> usize { - self.stackslots.len() + fn num_sized_stackslots(&self) -> usize { + self.sized_stackslots.len() } - fn stackslot_offsets(&self) -> &PrimaryMap { - &self.stackslots + fn sized_stackslot_offsets(&self) -> &PrimaryMap { + &self.sized_stackslots + } + + fn dynamic_stackslot_offsets(&self) -> &PrimaryMap { + &self.dynamic_stackslots } fn gen_copy_arg_to_regs( @@ -1256,15 +1293,34 @@ impl ABICallee for ABICalleeImpl { self.clobbered = clobbered; } - /// Produce an instruction that computes a stackslot address. - fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I { + /// Produce an instruction that computes a sized stackslot address. + fn sized_stackslot_addr( + &self, + slot: StackSlot, + offset: u32, + into_reg: Writable, + ) -> Self::I { // Offset from beginning of stackslot area, which is at nominal SP (see // [MemArg::NominalSPOffset] for more details on nominal SP tracking). - let stack_off = self.stackslots[slot] as i64; + let stack_off = self.sized_stackslots[slot] as i64; let sp_off: i64 = stack_off + (offset as i64); M::gen_get_stack_addr(StackAMode::NominalSPOffset(sp_off, I8), into_reg, I8) } + /// Produce an instruction that computes a dynamic stackslot address. + fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable) -> Self::I { + let stack_off = self.dynamic_stackslots[slot] as i64; + M::gen_get_stack_addr( + StackAMode::NominalSPOffset(stack_off, I64X2XN), + into_reg, + I64X2XN, + ) + } + + fn dynamic_type_size(&self, ty: Type) -> u32 { + self.dynamic_type_sizes[&ty] + } + /// Load from a spillslot. fn load_spillslot( &self, @@ -1339,8 +1395,12 @@ impl ABICallee for ABICalleeImpl { } let mask = M::stack_align(self.call_conv) - 1; let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack. - let clobbered_callee_saves = - M::get_clobbered_callee_saves(self.call_conv, &self.flags, &self.clobbered); + let clobbered_callee_saves = M::get_clobbered_callee_saves( + self.call_conv, + &self.flags, + self.signature(), + &self.clobbered, + ); let mut insts = smallvec![]; if !self.call_conv.extends_baldrdash() { @@ -1408,6 +1468,7 @@ impl ABICallee for ABICalleeImpl { // Restore clobbered registers. insts.extend(M::gen_clobber_restore( self.call_conv, + self.signature(), &self.flags, &self.clobbered, self.fixed_frame_storage_size, @@ -1441,11 +1502,21 @@ impl ABICallee for ABICalleeImpl { } fn stack_args_size(&self) -> u32 { - self.sig.stack_arg_space as u32 + self.sig.sized_stack_arg_space as u32 } fn get_spillslot_size(&self, rc: RegClass) -> u32 { - M::get_number_of_spillslots_for_value(rc) + let max = if self.dynamic_type_sizes.len() == 0 { + 16 + } else { + *self + .dynamic_type_sizes + .iter() + .max_by(|x, y| x.1.cmp(&y.1)) + .map(|(_k, v)| v) + .unwrap() + }; + M::get_number_of_spillslots_for_value(rc, max) } fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I { @@ -1586,17 +1657,17 @@ impl ABICaller for ABICallerImpl { } fn accumulate_outgoing_args_size>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; ctx.abi().accumulate_outgoing_args_size(off as u32); } fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; + let off = self.sig.sized_stack_arg_space + self.sig.sized_stack_ret_space; adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } @@ -1720,7 +1791,7 @@ impl ABICaller for ABICallerImpl { ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty)); } &ABIArgSlot::Stack { offset, ty, .. } => { - let ret_area_base = self.sig.stack_arg_space; + let ret_area_base = self.sig.sized_stack_arg_space; ctx.emit(M::gen_load_stack( StackAMode::SPOffset(offset + ret_area_base, ty), *into_reg, @@ -1744,7 +1815,7 @@ impl ABICaller for ABICallerImpl { let word_type = M::word_type(); if let Some(i) = self.sig.stack_ret_arg { let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); - let ret_area_base = self.sig.stack_arg_space; + let ret_area_base = self.sig.sized_stack_arg_space; ctx.emit(M::gen_get_stack_addr( StackAMode::SPOffset(ret_area_base, I8), rd, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 2f2b48ca8f..28005863df 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -6,7 +6,9 @@ use smallvec::SmallVec; use std::cell::Cell; pub use super::MachLabel; -pub use crate::ir::{ArgumentExtension, ExternalName, FuncRef, GlobalValue, SigRef}; +pub use crate::ir::{ + ArgumentExtension, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, SigRef, StackSlot, +}; pub use crate::isa::unwind::UnwindInst; pub use crate::machinst::{ABIArg, ABIArgSlot, ABISig, RealReg, Reg, RelocDistance, Writable}; @@ -243,7 +245,18 @@ macro_rules! isle_prelude_methods { #[inline] fn fits_in_32(&mut self, ty: Type) -> Option { - if ty.bits() <= 32 { + if ty.bits() <= 32 && !ty.is_dynamic_vector() { + Some(ty) + } else { + None + } + } + + #[inline] + fn lane_fits_in_32(&mut self, ty: Type) -> Option { + if !ty.is_vector() && !ty.is_dynamic_vector() { + None + } else if ty.lane_type().bits() <= 32 { Some(ty) } else { None @@ -252,7 +265,7 @@ macro_rules! isle_prelude_methods { #[inline] fn fits_in_64(&mut self, ty: Type) -> Option { - if ty.bits() <= 64 { + if ty.bits() <= 64 && !ty.is_dynamic_vector() { Some(ty) } else { None @@ -418,6 +431,36 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn dynamic_lane(&mut self, ty: Type) -> Option<(u32, u32)> { + if ty.is_dynamic_vector() { + Some((ty.lane_bits(), ty.min_lane_count())) + } else { + None + } + } + + #[inline] + fn dynamic_int_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() && crate::machinst::ty_has_int_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + + #[inline] + fn dynamic_fp_lane(&mut self, ty: Type) -> Option { + if ty.is_dynamic_vector() + && crate::machinst::ty_has_float_or_vec_representation(ty.lane_type()) + { + Some(ty.lane_bits()) + } else { + None + } + } + #[inline] fn def_inst(&mut self, val: Value) -> Option { self.lower_ctx.dfg().value_def(val).inst() @@ -635,12 +678,12 @@ macro_rules! isle_prelude_methods { } } - fn abi_stack_arg_space(&mut self, abi: &ABISig) -> i64 { - abi.stack_arg_space() + fn abi_sized_stack_arg_space(&mut self, abi: &ABISig) -> i64 { + abi.sized_stack_arg_space() } - fn abi_stack_ret_space(&mut self, abi: &ABISig) -> i64 { - abi.stack_ret_space() + fn abi_sized_stack_ret_space(&mut self, abi: &ABISig) -> i64 { + abi.sized_stack_ret_space() } fn abi_arg_only_slot(&mut self, arg: &ABIArg) -> Option { @@ -656,6 +699,31 @@ macro_rules! isle_prelude_methods { } } + fn abi_stackslot_addr( + &mut self, + dst: WritableReg, + stack_slot: StackSlot, + offset: Offset32, + ) -> MInst { + let offset = u32::try_from(i32::from(offset)).unwrap(); + self.lower_ctx + .abi() + .sized_stackslot_addr(stack_slot, offset, dst) + } + + fn abi_dynamic_stackslot_addr( + &mut self, + dst: WritableReg, + stack_slot: DynamicStackSlot, + ) -> MInst { + assert!(self + .lower_ctx + .abi() + .dynamic_stackslot_offsets() + .is_valid(stack_slot)); + self.lower_ctx.abi().dynamic_stackslot_addr(stack_slot, dst) + } + fn real_reg_to_reg(&mut self, reg: RealReg) -> Reg { Reg::from(reg) } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 6d0d145349..a47f4a2a95 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -45,7 +45,7 @@ //! ``` use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; -use crate::ir::{SourceLoc, StackSlot, Type}; +use crate::ir::{DynamicStackSlot, SourceLoc, StackSlot, Type}; use crate::result::CodegenResult; use crate::settings::Flags; use crate::value_label::ValueLabelsRanges; @@ -282,7 +282,9 @@ pub struct MachCompileResult { /// Debug info: value labels to registers/stackslots at code offsets. pub value_labels_ranges: ValueLabelsRanges, /// Debug info: stackslots to stack pointer offsets. - pub stackslot_offsets: PrimaryMap, + pub sized_stackslot_offsets: PrimaryMap, + /// Debug info: stackslots to stack pointer offsets. + pub dynamic_stackslot_offsets: PrimaryMap, /// Basic-block layout info: block start offsets. /// /// This info is generated only if the `machine_code_cfg_info` diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bc4966de77..a29945383a 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -19,7 +19,9 @@ use crate::fx::FxHashMap; use crate::fx::FxHashSet; -use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel}; +use crate::ir::{ + self, types, Constant, ConstantData, DynamicStackSlot, LabelValueLoc, SourceLoc, ValueLabel, +}; use crate::machinst::*; use crate::timing; use crate::ValueLocRange; @@ -207,8 +209,11 @@ pub struct EmitResult { /// epilogue(s), and makes use of the regalloc results. pub disasm: Option, - /// Offsets of stackslots. - pub stackslot_offsets: PrimaryMap, + /// Offsets of sized stackslots. + pub sized_stackslot_offsets: PrimaryMap, + + /// Offsets of dynamic stackslots. + pub dynamic_stackslot_offsets: PrimaryMap, /// Value-labels information (debug metadata). pub value_labels_ranges: ValueLabelsRanges, @@ -1038,7 +1043,8 @@ impl VCode { inst_offsets, func_body_len, disasm: if want_disasm { Some(disasm) } else { None }, - stackslot_offsets: self.abi.stackslot_offsets().clone(), + sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(), + dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), value_labels_ranges, frame_size, } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index bf93a3146e..ccaef32341 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -256,6 +256,8 @@ (extern const $F32X4 Type) (extern const $F64X2 Type) +(extern const $I32X4XN Type) + ;; Get the bit width of a given type. (decl pure ty_bits (Type) u8) (extern constructor ty_bits ty_bits) @@ -290,6 +292,10 @@ (decl fits_in_32 (Type) Type) (extern extractor fits_in_32 fits_in_32) +;; An extractor that only matches types that can fit in 32 bits. +(decl lane_fits_in_32 (Type) Type) +(extern extractor lane_fits_in_32 lane_fits_in_32) + ;; An extractor that only matches types that can fit in 64 bits. (decl fits_in_64 (Type) Type) (extern extractor fits_in_64 fits_in_64) @@ -433,6 +439,21 @@ (decl multi_lane (u32 u32) Type) (extern extractor multi_lane multi_lane) +;; Match a dynamic-lane type, extracting (# bits per lane) from the given +;; type. +(decl dynamic_lane (u32 u32) Type) +(extern extractor dynamic_lane dynamic_lane) + +;; Match a dynamic-lane integer type, extracting (# bits per lane) from the given +;; type. +(decl dynamic_int_lane (u32) Type) +(extern extractor dynamic_int_lane dynamic_int_lane) + +;; Match a dynamic-lane floating point type, extracting (# bits per lane) +;; from the given type. +(decl dynamic_fp_lane (u32) Type) +(extern extractor dynamic_fp_lane dynamic_fp_lane) + ;; Match the instruction that defines the given value, if any. (decl def_inst (Inst) Value) (extern extractor def_inst def_inst) @@ -727,12 +748,20 @@ (extern extractor abi_no_ret_arg abi_no_ret_arg) ;; Size of the argument area. -(decl abi_stack_arg_space (ABISig) i64) -(extern constructor abi_stack_arg_space abi_stack_arg_space) +(decl abi_sized_stack_arg_space (ABISig) i64) +(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) ;; Size of the return-value area. -(decl abi_stack_ret_space (ABISig) i64) -(extern constructor abi_stack_ret_space abi_stack_ret_space) +(decl abi_sized_stack_ret_space (ABISig) i64) +(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) + +;; StackSlot addr +(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) +(extern constructor abi_stackslot_addr abi_stackslot_addr) + +;; DynamicStackSlot addr +(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst) +(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr) ;; Extractor to detect the special case where an argument or ;; return value only requires a single slot to be passed. diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 0d44f47e74..b9bc0c2ee4 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -65,8 +65,8 @@ use crate::ir; use crate::ir::entities::AnyEntity; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::{ - types, ArgumentPurpose, Block, Constant, FuncRef, Function, GlobalValue, Inst, JumpTable, - Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, + types, ArgumentPurpose, Block, Constant, DynamicStackSlot, FuncRef, Function, GlobalValue, + Inst, JumpTable, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, }; use crate::isa::TargetIsa; use crate::iterators::IteratorExtras; @@ -681,6 +681,14 @@ impl<'a> Verifier<'a> { StackLoad { stack_slot, .. } | StackStore { stack_slot, .. } => { self.verify_stack_slot(inst, stack_slot, errors)?; } + DynamicStackLoad { + dynamic_stack_slot, .. + } + | DynamicStackStore { + dynamic_stack_slot, .. + } => { + self.verify_dynamic_stack_slot(inst, dynamic_stack_slot, errors)?; + } UnaryGlobalValue { global_value, .. } => { self.verify_global_value(inst, global_value, errors)?; } @@ -819,7 +827,7 @@ impl<'a> Verifier<'a> { ss: StackSlot, errors: &mut VerifierErrors, ) -> VerifierStepResult<()> { - if !self.func.stack_slots.is_valid(ss) { + if !self.func.sized_stack_slots.is_valid(ss) { errors.nonfatal(( inst, self.context(inst), @@ -830,6 +838,23 @@ impl<'a> Verifier<'a> { } } + fn verify_dynamic_stack_slot( + &self, + inst: Inst, + ss: DynamicStackSlot, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.dynamic_stack_slots.is_valid(ss) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid dynamic stack slot {}", ss), + )) + } else { + Ok(()) + } + } + fn verify_global_value( &self, inst: Inst, diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index ff8bfea582..51b2235db5 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -41,7 +41,12 @@ pub trait FuncWriter { fn super_preamble(&mut self, w: &mut dyn Write, func: &Function) -> Result { let mut any = false; - for (ss, slot) in func.stack_slots.iter() { + for (ss, slot) in func.dynamic_stack_slots.iter() { + any = true; + self.write_entity_definition(w, func, ss.into(), slot)?; + } + + for (ss, slot) in func.sized_stack_slots.iter() { any = true; self.write_entity_definition(w, func, ss.into(), slot)?; } @@ -493,6 +498,14 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt offset, .. } => write!(w, " {}, {}{}", arg, stack_slot, offset), + DynamicStackLoad { + dynamic_stack_slot, .. + } => write!(w, " {}", dynamic_stack_slot), + DynamicStackStore { + arg, + dynamic_stack_slot, + .. + } => write!(w, " {}, {}", arg, dynamic_stack_slot), HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm), TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg), Load { @@ -570,7 +583,7 @@ mod tests { f.name = ExternalName::testcase("foo"); assert_eq!(f.to_string(), "function %foo() fast {\n}\n"); - f.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); assert_eq!( f.to_string(), "function %foo() fast {\n ss0 = explicit_slot 4\n}\n" diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif new file mode 100644 index 0000000000..7f39747abc --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif @@ -0,0 +1,164 @@ +test compile +target aarch64 + +function %snarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sqxtn v0.8b, v2.8h +; nextln: sqxtn2 v0.16b, v2.8h +; nextln: ret + +function %snarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sqxtn v0.4h, v2.4s +; nextln: sqxtn2 v0.8h, v2.4s +; nextln: ret + +function %snarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = snarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: sqxtn v0.2s, v2.2d +; nextln: sqxtn2 v0.4s, v2.2d +; nextln: ret + +function %unarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sqxtun v0.8b, v2.8h +; nextln: sqxtun2 v0.16b, v2.8h +; nextln: ret + +function %unarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sqxtun v0.4h, v2.4s +; nextln: sqxtun2 v0.8h, v2.4s +; nextln: ret + +function %unarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = unarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: sqxtun v0.2s, v2.2d +; nextln: sqxtun2 v0.4s, v2.2d +; nextln: ret + +function %uunarrow_i16x8(i16) -> i8x16 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i16x8*gv0 + dt1 = i8x16*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: uqxtn v0.8b, v2.8h +; nextln: uqxtn2 v0.16b, v2.8h +; nextln: ret + +function %uunarrow_i32x4(i32) -> i16x8 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i32x4*gv0 + dt1 = i16x8*gv0 + +block0(v0: i32): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: uqxtn v0.4h, v2.4s +; nextln: uqxtn2 v0.8h, v2.4s +; nextln: ret + +function %uunarrow_i64x2(i64) -> i32x4 { + gv0 = dyn_scale_target_const.i64x2 + gv1 = dyn_scale_target_const.i32x4 + dt0 = i64x2*gv0 + dt1 = i32x4*gv0 + +block0(v0: i64): + v1 = splat.dt0 v0 + v2 = uunarrow.dt0 v1, v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.2d, x0 +; nextln: uqxtn v0.2s, v2.2d +; nextln: uqxtn2 v0.4s, v2.2d +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif new file mode 100644 index 0000000000..255e19bfde --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif @@ -0,0 +1,104 @@ +test compile +target aarch64 + +function %i8x16_splat_add(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.16b, w0 +; nextln: dup v6.16b, w1 +; nextln: add v0.16b, v4.16b, v6.16b +; nextln: ret + +function %i16x8_splat_add(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.8h, w0 +; nextln: dup v6.8h, w1 +; nextln: add v0.8h, v4.8h, v6.8h +; nextln: ret + +function %i32x4_splat_mul(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.4s, w0 +; nextln: dup v6.4s, w1 +; nextln: mul v0.4s, v4.4s, v6.4s +; nextln: ret + +function %i64x2_splat_sub(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.2d, x0 +; nextln: dup v6.2d, x1 +; nextln: sub v0.2d, v4.2d, v6.2d +; nextln: ret + +function %f32x4_splat_add(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.4s, v0.s[0] +; nextln: dup v6.4s, v1.s[0] +; nextln: fadd v0.4s, v4.4s, v6.4s +; nextln: ret + +function %f64x2_splat_sub(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} + +; check: dup v4.2d, v0.d[0] +; nextln: dup v6.2d, v1.d[0] +; nextln: fsub v0.2d, v4.2d, v6.2d +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif new file mode 100644 index 0000000000..5161c48ae1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif @@ -0,0 +1,104 @@ +test compile +target aarch64 + +function %swidenhigh_i8x16(i8) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv1 + dt1 = i16x8*gv0 + +block0(v0: i8): + v1 = splat.dt0 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.16b, w0 +; nextln: sxtl2 v0.8h, v2.16b +; nextln: ret + +function %swidenhigh_i16x8(i16) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv1 + dt1 = i32x4*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sxtl2 v0.4s, v2.8h +; nextln: ret + +function %swidenhigh_i32x4(i32) -> i64x2 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv1 + dt1 = i32x4*gv0 + +block0(v0: i32): + v1 = splat.dt1 v0 + v2 = swiden_high v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sxtl2 v0.2d, v2.4s +; nextln: ret + +function %swidenlow_i8x16(i8) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + gv1 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv1 + dt1 = i16x8*gv0 + +block0(v0: i8): + v1 = splat.dt0 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.16b, w0 +; nextln: sxtl v0.8h, v2.8b +; nextln: ret + +function %swidenlow_i16x8(i16) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv1 + dt1 = i32x4*gv0 + +block0(v0: i16): + v1 = splat.dt0 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.8h, w0 +; nextln: sxtl v0.4s, v2.4h +; nextln: ret + +function %swidenlow_i32x4(i32) -> i64x2 { + gv0 = dyn_scale_target_const.i32x4 + gv1 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv1 + dt1 = i32x4*gv0 + +block0(v0: i32): + v1 = splat.dt1 v0 + v2 = swiden_low v1 + v3 = extract_vector v2, 0 + return v3 +} + +; check: dup v2.4s, w0 +; nextln: sxtl v0.2d, v2.2s +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif b/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif new file mode 100644 index 0000000000..982457c889 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif @@ -0,0 +1,129 @@ +test compile precise-output +target aarch64 + +function %store_scale() { + gv0 = dyn_scale_target_const.i32x4 + ss0 = explicit_slot 8 + +block0: + v0 = global_value.i64 gv0 + stack_store.i64 v0, ss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; movz x2, #1 +; str x2, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_scale_lt_128() { + gv0 = dyn_scale_target_const.i16x4 + ss0 = explicit_slot 8 + +block0: + v0 = global_value.i64 gv0 + stack_store.i64 v0, ss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; movz x2, #1 +; str x2, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_explicit(i32) { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0(v0: i32): + v1 = splat.dt0 v0 + dynamic_stack_store.dt0 v1, dss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; dup v2.4s, w0 +; mov x4, sp +; str q2, [x4] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %load_explicit() -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0: + v0 = dynamic_stack_load.dt0 dss0 + v1 = extract_vector.dt0 v0, 0 + return v1 +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x3, sp +; ldr q0, [x3] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %store_implicit(i32) { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0(v0: i32): + v1 = splat.dt0 v0 + dynamic_stack_store v1, dss0 + return +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; dup v2.4s, w0 +; mov x4, sp +; str q2, [x4] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + +function %addr() -> i64 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + dss0 = explicit_dynamic_slot dt0 + +block0: + v0 = dynamic_stack_addr.i64 dss0 + return v0 +} + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret + diff --git a/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif new file mode 100644 index 0000000000..de7dcdc79f --- /dev/null +++ b/cranelift/filetests/filetests/runtests/dynamic-simd-arithmetic.clif @@ -0,0 +1,197 @@ +test run +target aarch64 + +function %i8x16_splat_add(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_add(1, 3) == [4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4] + +function %i16x8_splat_add(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_add(255, 254) == [509 509 509 509 509 509 509 509] + +function %i32x4_splat_add(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32sv_splat_add(1234, 8765) == [9999 9999 9999 9999] + +function %i64x2_splat_add(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = iadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i64x2_splat_add(4321, 8765) == [13086 13086] + +function %i8x16_splat_sub(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_sub(127, 126) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +function %i16x8_splat_sub(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_sub(12345, 6789) == [5556 5556 5556 5556 5556 5556 5556 5556] + +function %i32x4_splat_sub(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32x4_splat_sub(1, 3) == [-2 -2 -2 -2] + +function %i64x2_splat_sub(i64, i64) -> i64x2 { + gv0 = dyn_scale_target_const.i64x2 + dt0 = i64x2*gv0 + +block0(v0: i64, v1: i64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = isub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i64x2_splat_sub(255, 65535) == [-65280 -65280] + +function %i8x16_splat_mul(i8, i8) -> i8x16 { + gv0 = dyn_scale_target_const.i8x16 + dt0 = i8x16*gv0 + +block0(v0: i8, v1: i8): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i8x16_splat_mul(15, 15) == [225 225 225 225 225 225 225 225 225 225 225 225 225 225 225 225] + +function %i16x8_splat_mul(i16, i16) -> i16x8 { + gv0 = dyn_scale_target_const.i16x8 + dt0 = i16x8*gv0 + +block0(v0: i16, v1: i16): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i16x8_splat_mul(135, 246) == [33210 33210 33210 33210 33210 33210 33210 33210] + +function %i32x4_splat_mul(i32, i32) -> i32x4 { + gv0 = dyn_scale_target_const.i32x4 + dt0 = i32x4*gv0 + +block0(v0: i32, v1: i32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = imul v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %i32x4_splat_mul(2, 3) == [6 6 6 6] + +function %f32x4_splat_add(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f32x4_splat_add(0x1.2, 0x3.4) == [0x4.6 0x4.6 0x4.6 0x4.6] + +function %f64x2_splat_add(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fadd v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f64x2_splat_add(0x1.0, 0x2.0) == [0x3.0 0x3.0] + +function %f32x4_splat_sub(f32, f32) -> f32x4 { + gv0 = dyn_scale_target_const.f32x4 + dt0 = f32x4*gv0 + +block0(v0: f32, v1: f32): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f32x4_splat_sub(0x1.2, 0x3.4) == [-0x2.2 -0x2.2 -0x2.2 -0x2.2] + +function %f64x2_splat_sub(f64, f64) -> f64x2 { + gv0 = dyn_scale_target_const.f64x2 + dt0 = f64x2*gv0 + +block0(v0: f64, v1: f64): + v2 = splat.dt0 v0 + v3 = splat.dt0 v1 + v4 = fsub v2, v3 + v5 = extract_vector v4, 0 + return v5 +} +; run: %f64x2_splat_sub(0x1.0, 0x3.0) == [-0x2.0 -0x2.0] diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index da3d60e8f8..5441bf03a8 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -6,10 +6,11 @@ use cranelift_codegen::entity::{EntitySet, SecondaryMap}; use cranelift_codegen::ir; use cranelift_codegen::ir::condcodes::IntCC; use cranelift_codegen::ir::{ - types, AbiParam, Block, DataFlowGraph, ExtFuncData, ExternalName, FuncRef, Function, - GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, - InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, SigRef, Signature, StackSlot, - StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, ValueLabelStart, + types, AbiParam, Block, DataFlowGraph, DynamicStackSlot, DynamicStackSlotData, ExtFuncData, + ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, Inst, + InstBuilder, InstBuilderBase, InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, + SigRef, Signature, StackSlot, StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, + ValueLabelStart, }; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_codegen::packed_option::PackedOption; @@ -370,10 +371,16 @@ impl<'a> FunctionBuilder<'a> { self.func.create_jump_table(data) } - /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and + /// Creates a sized stack slot in the function, to be used by `stack_load`, `stack_store` and /// `stack_addr` instructions. - pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { - self.func.create_stack_slot(data) + pub fn create_sized_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.func.create_sized_stack_slot(data) + } + + /// Creates a dynamic stack slot in the function, to be used by `dynamic_stack_load`, + /// `dynamic_stack_store` and `dynamic_stack_addr` instructions. + pub fn create_dynamic_stack_slot(&mut self, data: DynamicStackSlotData) -> DynamicStackSlot { + self.func.create_dynamic_stack_slot(data) } /// Adds a signature which can later be used to declare an external function import. diff --git a/cranelift/interpreter/src/interpreter.rs b/cranelift/interpreter/src/interpreter.rs index 504af716f9..a41447b986 100644 --- a/cranelift/interpreter/src/interpreter.rs +++ b/cranelift/interpreter/src/interpreter.rs @@ -301,12 +301,12 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { fn push_frame(&mut self, function: &'a Function) { if let Some(frame) = self.frame_stack.iter().last() { - self.frame_offset += frame.function.stack_size() as usize; + self.frame_offset += frame.function.fixed_stack_size() as usize; } // Grow the stack by the space necessary for this frame self.stack - .extend(iter::repeat(0).take(function.stack_size() as usize)); + .extend(iter::repeat(0).take(function.fixed_stack_size() as usize)); self.frame_stack.push(Frame::new(function)); } @@ -314,11 +314,11 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { if let Some(frame) = self.frame_stack.pop() { // Shorten the stack after exiting the frame self.stack - .truncate(self.stack.len() - frame.function.stack_size() as usize); + .truncate(self.stack.len() - frame.function.fixed_stack_size() as usize); // Reset frame_offset to the start of this function if let Some(frame) = self.frame_stack.iter().last() { - self.frame_offset -= frame.function.stack_size() as usize; + self.frame_offset -= frame.function.fixed_stack_size() as usize; } } } @@ -358,7 +358,7 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { slot: StackSlot, offset: u64, ) -> Result { - let stack_slots = &self.get_current_function().stack_slots; + let stack_slots = &self.get_current_function().sized_stack_slots; let stack_slot = &stack_slots[slot]; // offset must be `0 <= Offset < sizeof(SS)` @@ -539,6 +539,7 @@ impl<'a> State<'a, DataValue> for InterpreterState<'a> { action_stack.push(ResolveAction::Resolve(base)); } GlobalValueData::Symbol { .. } => unimplemented!(), + GlobalValueData::DynScaleTargetConst { .. } => unimplemented!(), }, Some(ResolveAction::Add(dv)) => { current_val = current_val diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index df8e25530c..d1f8b7c971 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -381,6 +381,9 @@ where }) }) } + Opcode::DynamicStackAddr => unimplemented!("DynamicStackSlot"), + Opcode::DynamicStackLoad => unimplemented!("DynamicStackLoad"), + Opcode::DynamicStackStore => unimplemented!("DynamicStackStore"), Opcode::GlobalValue => { if let InstructionData::UnaryGlobalValue { global_value, .. } = inst { assign_or_memtrap(state.resolve_global_value(global_value)) @@ -995,6 +998,9 @@ where assign(vectorizelanes(&new_vec, ctrl_ty)?) } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), + Opcode::ExtractVector => { + unimplemented!("ExtractVector not supported"); + } }) } diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index f100d63219..6b471be0f8 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -15,40 +15,43 @@ use std::u16; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Token<'a> { Comment(&'a str), - LPar, // '(' - RPar, // ')' - LBrace, // '{' - RBrace, // '}' - LBracket, // '[' - RBracket, // ']' - Minus, // '-' - Plus, // '+' - Comma, // ',' - Dot, // '.' - Colon, // ':' - Equal, // '=' - Not, // '!' - Arrow, // '->' - Float(&'a str), // Floating point immediate - Integer(&'a str), // Integer immediate - Type(types::Type), // i32, f32, b32x4, ... - Value(Value), // v12, v7 - Block(Block), // block3 - Cold, // cold (flag on block) - StackSlot(u32), // ss3 - GlobalValue(u32), // gv3 - Heap(u32), // heap2 - Table(u32), // table2 - JumpTable(u32), // jt2 - Constant(u32), // const2 - FuncRef(u32), // fn2 - SigRef(u32), // sig2 - UserRef(u32), // u345 - Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... - String(&'a str), // "arbitrary quoted string with no escape" ... - HexSequence(&'a str), // #89AF - Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) - SourceLoc(&'a str), // @00c7 + LPar, // '(' + RPar, // ')' + LBrace, // '{' + RBrace, // '}' + LBracket, // '[' + RBracket, // ']' + Minus, // '-' + Plus, // '+' + Multiply, // '*' + Comma, // ',' + Dot, // '.' + Colon, // ':' + Equal, // '=' + Not, // '!' + Arrow, // '->' + Float(&'a str), // Floating point immediate + Integer(&'a str), // Integer immediate + Type(types::Type), // i32, f32, b32x4, ... + DynamicType(u32), // dt5 + Value(Value), // v12, v7 + Block(Block), // block3 + Cold, // cold (flag on block) + StackSlot(u32), // ss3 + DynamicStackSlot(u32), // dss4 + GlobalValue(u32), // gv3 + Heap(u32), // heap2 + Table(u32), // table2 + JumpTable(u32), // jt2 + Constant(u32), // const2 + FuncRef(u32), // fn2 + SigRef(u32), // sig2 + UserRef(u32), // u345 + Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... + String(&'a str), // "arbitrary quoted string with no escape" ... + HexSequence(&'a str), // #89AF + Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) + SourceLoc(&'a str), // @00c7 } /// A `Token` with an associated location. @@ -341,6 +344,8 @@ impl<'a> Lexer<'a> { "v" => Value::with_number(number).map(Token::Value), "block" => Block::with_number(number).map(Token::Block), "ss" => Some(Token::StackSlot(number)), + "dss" => Some(Token::DynamicStackSlot(number)), + "dt" => Some(Token::DynamicType(number)), "gv" => Some(Token::GlobalValue(number)), "heap" => Some(Token::Heap(number)), "table" => Some(Token::Table(number)), @@ -482,6 +487,7 @@ impl<'a> Lexer<'a> { Some('=') => Some(self.scan_char(Token::Equal)), Some('!') => Some(self.scan_char(Token::Not)), Some('+') => Some(self.scan_number()), + Some('*') => Some(self.scan_char(Token::Multiply)), Some('-') => { if self.looking_at("->") { Some(self.scan_chars(2, Token::Arrow)) diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 3e04e3af96..f1f279ef15 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -11,16 +11,17 @@ use crate::testfile::{Comment, Details, Feature, TestFile}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir; -use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::entities::{AnyEntity, DynamicType}; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{ - AbiParam, ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, ExtFuncData, - ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, - JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, - StackSlotKind, Table, TableData, Type, Value, + AbiParam, ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, DynamicStackSlot, + DynamicStackSlotData, DynamicTypeData, ExtFuncData, ExternalName, FuncRef, Function, + GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags, + Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, + Value, }; use cranelift_codegen::isa::{self, CallConv}; use cranelift_codegen::packed_option::ReservedValue; @@ -249,11 +250,11 @@ impl Context { // Allocate a new stack slot. fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { self.map.def_ss(ss, loc)?; - while self.function.stack_slots.next_key().index() <= ss.index() { + while self.function.sized_stack_slots.next_key().index() <= ss.index() { self.function - .create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0)); + .create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0)); } - self.function.stack_slots[ss] = data; + self.function.sized_stack_slots[ss] = data; Ok(()) } @@ -266,6 +267,47 @@ impl Context { } } + // Allocate a new stack slot. + fn add_dss( + &mut self, + ss: DynamicStackSlot, + data: DynamicStackSlotData, + loc: Location, + ) -> ParseResult<()> { + self.map.def_dss(ss, loc)?; + while self.function.dynamic_stack_slots.next_key().index() <= ss.index() { + self.function + .create_dynamic_stack_slot(DynamicStackSlotData::new( + StackSlotKind::ExplicitDynamicSlot, + data.dyn_ty, + )); + } + self.function.dynamic_stack_slots[ss] = data; + Ok(()) + } + + // Resolve a reference to a dynamic stack slot. + fn check_dss(&self, dss: DynamicStackSlot, loc: Location) -> ParseResult<()> { + if !self.map.contains_dss(dss) { + err!(loc, "undefined dynamic stack slot {}", dss) + } else { + Ok(()) + } + } + + // Allocate a new dynamic type. + fn add_dt(&mut self, dt: DynamicType, data: DynamicTypeData, loc: Location) -> ParseResult<()> { + self.map.def_dt(dt, loc)?; + while self.function.dfg.dynamic_types.next_key().index() <= dt.index() { + self.function.dfg.make_dynamic_ty(DynamicTypeData::new( + data.base_vector_ty, + data.dynamic_scale, + )); + } + self.function.dfg.dynamic_types[dt] = data; + Ok(()) + } + // Allocate a global value slot. fn add_gv(&mut self, gv: GlobalValue, data: GlobalValueData, loc: Location) -> ParseResult<()> { self.map.def_gv(gv, loc)?; @@ -597,6 +639,33 @@ impl<'a> Parser<'a> { err!(self.loc, err_msg) } + // Match and consume a dynamic stack slot reference. + fn match_dss(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::DynamicStackSlot(ss)) = self.token() { + self.consume(); + if let Some(ss) = DynamicStackSlot::with_number(ss) { + return Ok(ss); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a dynamic type reference. + fn match_dt(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::DynamicType(dt)) = self.token() { + self.consume(); + if let Some(dt) = DynamicType::with_number(dt) { + return Ok(dt); + } + } + err!(self.loc, err_msg) + } + + // Extract Type from DynamicType + fn concrete_from_dt(&mut self, dt: DynamicType, ctx: &mut Context) -> Option { + ctx.function.get_concrete_dynamic_ty(dt) + } + // Match and consume a global value reference. fn match_gv(&mut self, err_msg: &str) -> ParseResult { if let Some(Token::GlobalValue(gv)) = self.token() { @@ -986,7 +1055,7 @@ impl<'a> Parser<'a> { vec![value; lane_size as usize] } - if !ty.is_vector() { + if !ty.is_vector() && !ty.is_dynamic_vector() { err!(self.loc, "Expected a controlling vector type, not {}", ty) } else { let constant_data = match ty.lane_type() { @@ -1386,6 +1455,18 @@ impl<'a> Parser<'a> { self.parse_stack_slot_decl() .and_then(|(ss, dat)| ctx.add_ss(ss, dat, loc)) } + Some(Token::DynamicStackSlot(..)) => { + self.start_gathering_comments(); + let loc = self.loc; + self.parse_dynamic_stack_slot_decl() + .and_then(|(dss, dat)| ctx.add_dss(dss, dat, loc)) + } + Some(Token::DynamicType(..)) => { + self.start_gathering_comments(); + let loc = self.loc; + self.parse_dynamic_type_decl() + .and_then(|(dt, dat)| ctx.add_dt(dt, dat, loc)) + } Some(Token::GlobalValue(..)) => { self.start_gathering_comments(); self.parse_global_value_decl() @@ -1465,6 +1546,39 @@ impl<'a> Parser<'a> { Ok((ss, data)) } + fn parse_dynamic_stack_slot_decl( + &mut self, + ) -> ParseResult<(DynamicStackSlot, DynamicStackSlotData)> { + let dss = self.match_dss("expected stack slot number: dss«n»")?; + self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; + let kind = self.match_enum("expected stack slot kind")?; + let dt = self.match_dt("expected dynamic type")?; + let data = DynamicStackSlotData::new(kind, dt); + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(dss); + + // TBD: stack-slot-decl ::= StackSlot(ss) "=" stack-slot-kind Bytes * {"," stack-slot-flag} + Ok((dss, data)) + } + + fn parse_dynamic_type_decl(&mut self) -> ParseResult<(DynamicType, DynamicTypeData)> { + let dt = self.match_dt("expected dynamic type number: dt«n»")?; + self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; + let vector_base_ty = self.match_type("expected base type")?; + assert!(vector_base_ty.is_vector(), "expected vector type"); + self.match_token( + Token::Multiply, + "expected '*' followed by a dynamic scale value", + )?; + let dyn_scale = self.match_gv("expected dynamic scale global value")?; + let data = DynamicTypeData::new(vector_base_ty, dyn_scale); + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(dt); + Ok((dt, data)) + } + // Parse a global value decl. // // global-val-decl ::= * GlobalValue(gv) "=" global-val-desc @@ -1472,6 +1586,7 @@ impl<'a> Parser<'a> { // | "load" "." type "notrap" "aligned" GlobalValue(base) [offset] // | "iadd_imm" "(" GlobalValue(base) ")" imm64 // | "symbol" ["colocated"] name + imm64 + // | "dyn_scale_target_const" "." type // fn parse_global_value_decl(&mut self) -> ParseResult<(GlobalValue, GlobalValueData)> { let gv = self.match_gv("expected global value number: gv«n»")?; @@ -1530,6 +1645,15 @@ impl<'a> Parser<'a> { tls, } } + "dyn_scale_target_const" => { + self.match_token( + Token::Dot, + "expected '.' followed by type in dynamic scale global value decl", + )?; + let vector_type = self.match_type("expected load type")?; + assert!(vector_type.is_vector(), "Expected vector type"); + GlobalValueData::DynScaleTargetConst { vector_type } + } other => return err!(self.loc, "Unknown global value kind '{}'", other), }; @@ -2095,7 +2219,12 @@ impl<'a> Parser<'a> { // Look for a controlling type variable annotation. // instruction ::= [inst-results "="] Opcode(opc) * ["." Type] ... let explicit_ctrl_type = if self.optional(Token::Dot) { - Some(self.match_type("expected type after 'opcode.'")?) + if let Some(Token::Type(_t)) = self.token() { + Some(self.match_type("expected type after 'opcode.'")?) + } else { + let dt = self.match_dt("expected dynamic type")?; + self.concrete_from_dt(dt, ctx) + } } else { None }; @@ -2489,7 +2618,7 @@ impl<'a> Parser<'a> { I128 => DataValue::from(self.match_imm128("expected an i128")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), - _ if ty.is_vector() => { + _ if (ty.is_vector() || ty.is_dynamic_vector()) => { let as_vec = self.match_uimm128(ty)?.into_vec(); if as_vec.len() == 16 { let mut as_array = [0; 16]; @@ -2824,6 +2953,25 @@ impl<'a> Parser<'a> { offset, } } + InstructionFormat::DynamicStackLoad => { + let dss = self.match_dss("expected dynamic stack slot number: dss«n»")?; + ctx.check_dss(dss, self.loc)?; + InstructionData::DynamicStackLoad { + opcode, + dynamic_stack_slot: dss, + } + } + InstructionFormat::DynamicStackStore => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let dss = self.match_dss("expected dynamic stack slot number: dss«n»")?; + ctx.check_dss(dss, self.loc)?; + InstructionData::DynamicStackStore { + opcode, + arg, + dynamic_stack_slot: dss, + } + } InstructionFormat::HeapAddr => { let heap = self.match_heap("expected heap identifier")?; ctx.check_heap(heap, self.loc)?; @@ -3080,17 +3228,23 @@ mod tests { .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%foo"); - let mut iter = func.stack_slots.keys(); + let mut iter = func.sized_stack_slots.keys(); let _ss0 = iter.next().unwrap(); let ss1 = iter.next().unwrap(); assert_eq!(ss1.to_string(), "ss1"); - assert_eq!(func.stack_slots[ss1].kind, StackSlotKind::ExplicitSlot); - assert_eq!(func.stack_slots[ss1].size, 1); + assert_eq!( + func.sized_stack_slots[ss1].kind, + StackSlotKind::ExplicitSlot + ); + assert_eq!(func.sized_stack_slots[ss1].size, 1); let _ss2 = iter.next().unwrap(); let ss3 = iter.next().unwrap(); assert_eq!(ss3.to_string(), "ss3"); - assert_eq!(func.stack_slots[ss3].kind, StackSlotKind::ExplicitSlot); - assert_eq!(func.stack_slots[ss3].size, 13); + assert_eq!( + func.sized_stack_slots[ss3].kind, + StackSlotKind::ExplicitSlot + ); + assert_eq!(func.sized_stack_slots[ss3].size, 13); assert_eq!(iter.next(), None); // Catch duplicate definitions. diff --git a/cranelift/reader/src/sourcemap.rs b/cranelift/reader/src/sourcemap.rs index 2a9298b8c1..00425dc586 100644 --- a/cranelift/reader/src/sourcemap.rs +++ b/cranelift/reader/src/sourcemap.rs @@ -8,9 +8,10 @@ use crate::error::{Location, ParseResult}; use crate::lexer::split_entity_name; -use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::entities::{AnyEntity, DynamicType}; use cranelift_codegen::ir::{ - Block, Constant, FuncRef, GlobalValue, Heap, JumpTable, SigRef, StackSlot, Table, Value, + Block, Constant, DynamicStackSlot, FuncRef, GlobalValue, Heap, JumpTable, SigRef, StackSlot, + Table, Value, }; use std::collections::HashMap; @@ -38,6 +39,11 @@ impl SourceMap { self.locations.contains_key(&ss.into()) } + /// Look up a dynamic stack slot entity. + pub fn contains_dss(&self, dss: DynamicStackSlot) -> bool { + self.locations.contains_key(&dss.into()) + } + /// Look up a global value entity. pub fn contains_gv(&self, gv: GlobalValue) -> bool { self.locations.contains_key(&gv.into()) @@ -173,6 +179,16 @@ impl SourceMap { self.def_entity(entity.into(), loc) } + /// Define the dynamic stack slot `entity`. + pub fn def_dss(&mut self, entity: DynamicStackSlot, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the dynamic type `entity`. + pub fn def_dt(&mut self, entity: DynamicType, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + /// Define the global value `entity`. pub fn def_gv(&mut self, entity: GlobalValue, loc: Location) -> ParseResult<()> { self.def_entity(entity.into(), loc) diff --git a/cranelift/src/bugpoint.rs b/cranelift/src/bugpoint.rs index 8f0f027bf7..dcc48245f2 100644 --- a/cranelift/src/bugpoint.rs +++ b/cranelift/src/bugpoint.rs @@ -575,7 +575,7 @@ impl Mutator for RemoveUnusedEntities { let mut stack_slots = StackSlots::new(); - for (stack_slot, stack_slot_data) in func.stack_slots.clone().iter() { + for (stack_slot, stack_slot_data) in func.sized_stack_slots.clone().iter() { if let Some(stack_slot_usage) = stack_slot_usage_map.get(&stack_slot) { let new_stack_slot = stack_slots.push(stack_slot_data.clone()); for &inst in stack_slot_usage { @@ -591,7 +591,7 @@ impl Mutator for RemoveUnusedEntities { } } - func.stack_slots = stack_slots; + func.sized_stack_slots = stack_slots; "Remove unused stack slots" } @@ -617,9 +617,9 @@ impl Mutator for RemoveUnusedEntities { // These can create cyclic references, which cause complications. Just skip // the global value removal for now. // FIXME Handle them in a better way. - GlobalValueData::Load { .. } | GlobalValueData::IAddImm { .. } => { - return None - } + GlobalValueData::Load { .. } + | GlobalValueData::IAddImm { .. } + | GlobalValueData::DynScaleTargetConst { .. } => return None, } } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 11916350e4..d993041342 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -264,7 +264,7 @@ impl wasmtime_environ::Compiler for Compiler { let length = u32::try_from(code_buf.len()).unwrap(); - let stack_slots = std::mem::take(&mut context.func.stack_slots); + let sized_stack_slots = std::mem::take(&mut context.func.sized_stack_slots); self.save_context(CompilerContext { func_translator, @@ -275,7 +275,7 @@ impl wasmtime_environ::Compiler for Compiler { body: code_buf, relocations: func_relocs, value_labels_ranges: ranges.unwrap_or(Default::default()), - stack_slots, + sized_stack_slots, unwind_info, traps, info: FunctionInfo { @@ -613,7 +613,7 @@ impl Compiler { let values_vec_byte_size = u32::try_from(value_size * values_vec_len).unwrap(); let values_vec_len = u32::try_from(values_vec_len).unwrap(); - let ss = builder.func.create_stack_slot(ir::StackSlotData::new( + let ss = builder.func.create_sized_stack_slot(ir::StackSlotData::new( ir::StackSlotKind::ExplicitSlot, values_vec_byte_size, )); @@ -712,7 +712,7 @@ impl Compiler { body: code_buf, unwind_info, relocations: Vec::new(), - stack_slots: Default::default(), + sized_stack_slots: Default::default(), value_labels_ranges: Default::default(), info: Default::default(), address_map: Default::default(), diff --git a/crates/cranelift/src/debug/transform/expression.rs b/crates/cranelift/src/debug/transform/expression.rs index a72e5daa42..2b47f991cc 100644 --- a/crates/cranelift/src/debug/transform/expression.rs +++ b/crates/cranelift/src/debug/transform/expression.rs @@ -17,7 +17,7 @@ use wasmtime_environ::{DefinedFuncIndex, EntityRef}; pub struct FunctionFrameInfo<'a> { pub value_ranges: &'a ValueLabelsRanges, pub memory_offset: ModuleMemoryOffset, - pub stack_slots: &'a StackSlots, + pub sized_stack_slots: &'a StackSlots, } impl<'a> FunctionFrameInfo<'a> { @@ -1207,11 +1207,11 @@ mod tests { use wasmtime_environ::{DefinedFuncIndex, EntityRef}; let addr_tr = create_mock_address_transform(); - let stack_slots = StackSlots::new(); + let sized_stack_slots = StackSlots::new(); let (value_ranges, value_labels) = create_mock_value_ranges(); let fi = FunctionFrameInfo { memory_offset: ModuleMemoryOffset::None, - stack_slots: &stack_slots, + sized_stack_slots: &sized_stack_slots, value_ranges: &value_ranges, }; diff --git a/crates/cranelift/src/debug/transform/utils.rs b/crates/cranelift/src/debug/transform/utils.rs index fca9b455c9..3cba3f5677 100644 --- a/crates/cranelift/src/debug/transform/utils.rs +++ b/crates/cranelift/src/debug/transform/utils.rs @@ -178,7 +178,7 @@ where let frame_info = FunctionFrameInfo { value_ranges: &func.value_labels_ranges, memory_offset: memory_offset.clone(), - stack_slots: &func.stack_slots, + sized_stack_slots: &func.sized_stack_slots, }; Some(frame_info) } else { diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index e98abefd5b..a5bf431800 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -42,8 +42,9 @@ pub struct CompiledFunction { relocations: Vec, value_labels_ranges: cranelift_codegen::ValueLabelsRanges, - stack_slots: ir::StackSlots, + sized_stack_slots: ir::StackSlots, + // TODO: Add dynamic_stack_slots? info: FunctionInfo, }