[RFC] Dynamic Vector Support (#4200)
Introduce a new concept in the IR that allows a producer to create dynamic vector types. An IR function can now contain global value(s) that represent a dynamic scaling factor, for a given fixed-width vector type. A dynamic type is then created by 'multiplying' the corresponding global value with a fixed-width type. These new types can be used just like the existing types and the type system has a set of hard-coded dynamic types, such as I32X4XN, which the user defined types map onto. The dynamic types are also used explicitly to create dynamic stack slots, which have no set size like their existing counterparts. New IR instructions are added to access these new stack entities. Currently, during codegen, the dynamic scaling factor has to be lowered to a constant so the dynamic slots do eventually have a compile-time known size, as do spill slots. The current lowering for aarch64 just targets Neon, using a dynamic scale of 1. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -61,6 +61,7 @@ use crate::ir;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::types;
|
||||
use crate::ir::MemFlags;
|
||||
use crate::ir::Signature;
|
||||
use crate::ir::Type;
|
||||
use crate::isa;
|
||||
use crate::isa::s390x::inst::*;
|
||||
@@ -556,6 +557,7 @@ impl ABIMachineSpec for S390xMachineDeps {
|
||||
|
||||
fn gen_clobber_restore(
|
||||
call_conv: isa::CallConv,
|
||||
_: &Signature,
|
||||
_: &settings::Flags,
|
||||
clobbers: &[Writable<RealReg>],
|
||||
fixed_frame_storage_size: u32,
|
||||
@@ -633,7 +635,7 @@ impl ABIMachineSpec for S390xMachineDeps {
|
||||
unimplemented!("StructArgs not implemented for S390X yet");
|
||||
}
|
||||
|
||||
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
|
||||
fn get_number_of_spillslots_for_value(rc: RegClass, _vector_scale: u32) -> u32 {
|
||||
// We allocate in terms of 8-byte slots.
|
||||
match rc {
|
||||
RegClass::Int => 1,
|
||||
@@ -665,6 +667,7 @@ impl ABIMachineSpec for S390xMachineDeps {
|
||||
fn get_clobbered_callee_saves(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
_sig: &Signature,
|
||||
regs: &[Writable<RealReg>],
|
||||
) -> Vec<Writable<RealReg>> {
|
||||
assert!(
|
||||
@@ -688,7 +691,7 @@ impl ABIMachineSpec for S390xMachineDeps {
|
||||
_is_leaf: bool,
|
||||
_stack_args_size: u32,
|
||||
_num_clobbered_callee_saves: usize,
|
||||
_fixed_frame_storage_size: u32,
|
||||
_frame_storage_size: u32,
|
||||
) -> bool {
|
||||
// The call frame set-up is handled by gen_clobber_save().
|
||||
false
|
||||
|
||||
@@ -1158,9 +1158,6 @@
|
||||
|
||||
;; Helpers for stack-slot addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
|
||||
(extern constructor abi_stackslot_addr abi_stackslot_addr)
|
||||
|
||||
(decl stack_addr_impl (Type StackSlot Offset32) Reg)
|
||||
(rule (stack_addr_impl ty stack_slot offset)
|
||||
(let ((dst WritableReg (temp_writable_reg ty))
|
||||
|
||||
@@ -148,7 +148,7 @@ mod tests {
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
func.sized_stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
@@ -206,7 +206,7 @@ mod tests {
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
func.sized_stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
|
||||
@@ -2301,7 +2301,7 @@
|
||||
(decl lower_call_ret_arg (ABISig) InstOutput)
|
||||
(rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none))
|
||||
(rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot)))
|
||||
(let ((ret_arg Reg (load_addr (memarg_stack_off (abi_stack_arg_space abi) 0)))
|
||||
(let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0)))
|
||||
(_ Unit (copy_reg_to_arg_slot 0 slot ret_arg)))
|
||||
(output_none)))
|
||||
|
||||
@@ -2309,7 +2309,7 @@
|
||||
(decl lower_call_rets (ABISig Range InstOutputBuilder) InstOutput)
|
||||
(rule (lower_call_rets abi (range_empty) builder) (output_builder_finish builder))
|
||||
(rule (lower_call_rets abi (range_unwrap head tail) builder)
|
||||
(let ((ret ValueRegs (copy_from_arg (abi_stack_arg_space abi) (abi_get_ret abi head)))
|
||||
(let ((ret ValueRegs (copy_from_arg (abi_sized_stack_arg_space abi) (abi_get_ret abi head)))
|
||||
(_ Unit (output_builder_push builder ret)))
|
||||
(lower_call_rets abi tail builder)))
|
||||
|
||||
|
||||
@@ -197,7 +197,11 @@ impl LowerBackend for S390xBackend {
|
||||
| Opcode::SqmulRoundSat
|
||||
| Opcode::FvpromoteLow
|
||||
| Opcode::Fvdemote
|
||||
| Opcode::IaddPairwise => {
|
||||
| Opcode::IaddPairwise
|
||||
| Opcode::DynamicStackLoad
|
||||
| Opcode::DynamicStackStore
|
||||
| Opcode::DynamicStackAddr
|
||||
| Opcode::ExtractVector => {
|
||||
unreachable!(
|
||||
"TODO: not yet implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||
ctx.dfg().display_inst(ir_inst),
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::settings::Flags;
|
||||
use crate::{
|
||||
ir::{
|
||||
condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData,
|
||||
MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList,
|
||||
MemFlags, Opcode, TrapCode, Value, ValueList,
|
||||
},
|
||||
isa::unwind::UnwindInst,
|
||||
machinst::{InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData},
|
||||
@@ -77,7 +77,7 @@ where
|
||||
}
|
||||
|
||||
fn abi_accumulate_outgoing_args_size(&mut self, abi: &ABISig) -> Unit {
|
||||
let off = abi.stack_arg_space() + abi.stack_ret_space();
|
||||
let off = abi.sized_stack_arg_space() + abi.sized_stack_ret_space();
|
||||
self.lower_ctx
|
||||
.abi()
|
||||
.accumulate_outgoing_args_size(off as u32);
|
||||
@@ -531,17 +531,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abi_stackslot_addr(
|
||||
&mut self,
|
||||
dst: WritableReg,
|
||||
stack_slot: StackSlot,
|
||||
offset: Offset32,
|
||||
) -> MInst {
|
||||
let offset = u32::try_from(i32::from(offset)).unwrap();
|
||||
self.lower_ctx.abi().stackslot_addr(stack_slot, offset, dst)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn inst_builder_new(&mut self) -> VecMInstBuilder {
|
||||
Cell::new(Vec::<MInst>::new())
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! IBM Z 64-bit Instruction Set Architecture.
|
||||
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::Function;
|
||||
use crate::ir::{Function, Type};
|
||||
use crate::isa::s390x::settings as s390x_settings;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
@@ -58,7 +58,7 @@ impl S390xBackend {
|
||||
flags: shared_settings::Flags,
|
||||
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
|
||||
let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone());
|
||||
let abi = Box::new(abi::S390xABICallee::new(func, flags, self.isa_flags())?);
|
||||
let abi = Box::new(abi::S390xABICallee::new(func, self)?);
|
||||
compile::compile::<S390xBackend>(func, self, abi, &self.machine_env, emit_info)
|
||||
}
|
||||
}
|
||||
@@ -77,7 +77,8 @@ impl TargetIsa for S390xBackend {
|
||||
let frame_size = emit_result.frame_size;
|
||||
let value_labels_ranges = emit_result.value_labels_ranges;
|
||||
let buffer = emit_result.buffer.finish();
|
||||
let stackslot_offsets = emit_result.stackslot_offsets;
|
||||
let sized_stackslot_offsets = emit_result.sized_stackslot_offsets;
|
||||
let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets;
|
||||
|
||||
if let Some(disasm) = emit_result.disasm.as_ref() {
|
||||
log::debug!("disassembly:\n{}", disasm);
|
||||
@@ -88,7 +89,8 @@ impl TargetIsa for S390xBackend {
|
||||
frame_size,
|
||||
disasm: emit_result.disasm,
|
||||
value_labels_ranges,
|
||||
stackslot_offsets,
|
||||
sized_stackslot_offsets,
|
||||
dynamic_stackslot_offsets,
|
||||
bb_starts: emit_result.bb_offsets,
|
||||
bb_edges: emit_result.bb_edges,
|
||||
})
|
||||
@@ -110,6 +112,10 @@ impl TargetIsa for S390xBackend {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn dynamic_vector_bytes(&self, _dyn_ty: Type) -> u32 {
|
||||
16
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// The ADD LOGICAL family of instructions set the condition code
|
||||
// differently from normal comparisons, in a way that cannot be
|
||||
|
||||
Reference in New Issue
Block a user