diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 054f6d8b4a..5d25aaab1c 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -162,6 +162,7 @@ impl ABIMachineSpec for AArch64MachineDeps { assert!(size % 8 == 0, "StructArgument size is not properly aligned"); next_stack += size; ret.push(ABIArg::StructArg { + pointer: None, offset, size, purpose: param.purpose, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f9039a4150..842342d5b9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -624,10 +624,15 @@ pub(crate) fn lower_insn_to_regs>( abi.emit_stack_pre_adjust(ctx); assert!(inputs.len() == abi.num_args()); - for i in abi.get_copy_to_arg_order() { - let input = inputs[i]; - let arg_regs = put_input_in_regs(ctx, input); - abi.emit_copy_regs_to_arg(ctx, i, arg_regs); + let mut arg_regs = vec![]; + for input in inputs { + arg_regs.push(put_input_in_regs(ctx, *input)) + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + abi.emit_copy_regs_to_buffer(ctx, i, *arg_regs); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + abi.emit_copy_regs_to_arg(ctx, i, *arg_regs); } abi.emit_call(ctx); for (i, output) in outputs.iter().enumerate() { diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 37a62c6d0e..f1797c3ded 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -188,7 +188,7 @@ fn get_vecreg_for_ret(idx: usize) -> Option { static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; /// The size of the register save area -static REG_SAVE_AREA_SIZE: u32 = 160; +pub static REG_SAVE_AREA_SIZE: u32 = 160; impl Into for StackAMode { fn into(self) -> MemArg { @@ -247,7 +247,9 @@ impl ABIMachineSpec for S390xMachineDeps { &ir::ArgumentPurpose::VMContext | &ir::ArgumentPurpose::Normal | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} + | &ir::ArgumentPurpose::SignatureId + | &ir::ArgumentPurpose::StructReturn + | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", param.purpose, params @@ -287,14 +289,13 @@ impl ABIMachineSpec for S390xMachineDeps { candidate }; - if let Some(reg) = candidate { - ret.push(ABIArg::reg( - reg.to_real_reg().unwrap(), - param.value_type, - param.extension, - param.purpose, - )); + let slot = if let Some(reg) = candidate { *next_reg += 1; + ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: param.value_type, + extension: param.extension, + } } else { // Compute size. Every argument or return value takes a slot of // at least 8 bytes, except for return values in the Wasmtime ABI. @@ -318,13 +319,28 @@ impl ABIMachineSpec for S390xMachineDeps { } else { 0 }; - ret.push(ABIArg::stack( - (next_stack + offset) as i64, - param.value_type, - param.extension, - param.purpose, - )); + let offset = (next_stack + offset) as i64; next_stack += slot_size; + ABIArgSlot::Stack { + offset, + ty: param.value_type, + extension: param.extension, + } + }; + + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + ret.push(ABIArg::StructArg { + pointer: Some(slot), + offset: 0, + size: size as u64, + purpose: param.purpose, + }); + } else { + ret.push(ABIArg::Slots { + slots: smallvec![slot], + purpose: param.purpose, + }); } } @@ -353,6 +369,22 @@ impl ABIMachineSpec for S390xMachineDeps { None }; + // After all arguments are in their well-defined location, + // allocate buffers for all StructArg arguments. + for i in 0..ret.len() { + match &mut ret[i] { + &mut ABIArg::StructArg { + ref mut offset, + size, + .. + } => { + *offset = next_stack as i64; + next_stack += size; + } + _ => {} + } + } + // To avoid overflow issues, limit the arg/return size to something // reasonable -- here, 128 MB. if next_stack > STACK_ARG_RET_SIZE_LIMIT { diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index cfadb13cce..f1ed099eb1 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -351,6 +351,12 @@ (rd Reg) (mem MemArg)) + ;; A memory copy of 1-256 bytes. + (Mvc + (dst MemArgPair) + (src MemArgPair) + (len_minus_one u8)) + ;; A load-multiple instruction. (LoadMultiple64 (rt WritableReg) @@ -1473,6 +1479,9 @@ (decl uimm32shifted_from_inverted_value (UImm32Shifted) Value) (extern extractor uimm32shifted_from_inverted_value uimm32shifted_from_inverted_value) +(decl len_minus_one (u8) u64) +(extern extractor len_minus_one len_minus_one) + ;; Helpers for masking shift amounts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1547,6 +1556,9 @@ (type MemArg extern (enum)) +(decl memarg_flags (MemArg) MemFlags) +(extern constructor memarg_flags memarg_flags) + (decl memarg_reg_plus_reg (Reg Reg u8 MemFlags) MemArg) (extern constructor memarg_reg_plus_reg memarg_reg_plus_reg) @@ -1621,6 +1633,26 @@ (if (memarg_symbol_offset_sum sym_offset load_offset)) inst) + +;; Accessors for `MemArgPair`. + +(type MemArgPair extern (enum)) + +;; Convert a MemArg to a MemArgPair, reloading the address if necessary. +(decl memarg_pair (MemArg) MemArgPair) +(rule (memarg_pair (memarg_pair_from_memarg mem)) mem) +(rule (memarg_pair mem) (memarg_pair_from_reg + (load_addr mem) (memarg_flags mem))) + +;; Convert a MemArg to a MemArgPair if no reloading is necessary. +(decl memarg_pair_from_memarg (MemArgPair) MemArg) +(extern extractor memarg_pair_from_memarg memarg_pair_from_memarg) + +;; Create a MemArgPair from a single base register. +(decl memarg_pair_from_reg (Reg MemFlags) MemArgPair) +(extern constructor memarg_pair_from_reg memarg_pair_from_reg) + + ;; Helpers for stack-slot addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl stack_addr_impl (Type StackSlot Offset32) Reg) @@ -2038,6 +2070,11 @@ (rule (storerev64 src addr) (SideEffectNoResult.Inst (MInst.StoreRev64 src addr))) +;; Helper for emitting `MInst.Mvc` instructions. +(decl mvc (MemArgPair MemArgPair u8) SideEffectNoResult) +(rule (mvc dst src len_minus_one) + (SideEffectNoResult.Inst (MInst.Mvc dst src len_minus_one))) + ;; Helper for emitting `MInst.FpuRR` instructions. (decl fpu_rr (Type FPUOp1 Reg) Reg) (rule (fpu_rr ty op src) @@ -2521,10 +2558,35 @@ (rule (emit_arg_load $F64 mem) (vec_load_lane_undef $F64X2 mem 0)) (rule (emit_arg_load (ty_vec128 ty) mem) (vec_load ty mem)) +;; Helpers to emit a memory copy (MVC or memcpy libcall). +(decl emit_memcpy (MemArg MemArg u64) Unit) +(rule (emit_memcpy dst src (len_minus_one len)) + (emit_side_effect (mvc (memarg_pair dst) (memarg_pair src) len))) +(rule (emit_memcpy dst src len) + (let ((libcall LibCallInfo (lib_call_info_memcpy)) + (_ Unit (lib_accumulate_outgoing_args_size libcall)) + (_ Unit (emit_mov $I64 (writable_gpr 2) (load_addr dst))) + (_ Unit (emit_mov $I64 (writable_gpr 3) (load_addr src))) + (_ Unit (emit_imm $I64 (writable_gpr 4) len))) + (emit_side_effect (lib_call libcall)))) + +;; Prepare a stack copy of a single (oversized) argument. +(decl copy_to_buffer (i64 ABIArg Value) InstOutput) +(rule (copy_to_buffer base (abi_arg_only_slot slot) _) (output_none)) +(rule (copy_to_buffer base (abi_arg_struct_pointer _ offset size) val) + (let ((dst MemArg (memarg_stack_off base offset)) + (src MemArg (memarg_reg_plus_off val 0 0 (memflags_trusted))) + (_ Unit (emit_memcpy dst src size))) + (output_none))) + ;; Copy a single argument/return value to its slots. +;; For oversized arguments, set the slot to the buffer address. (decl copy_to_arg (i64 ABIArg Value) Unit) (rule (copy_to_arg base (abi_arg_only_slot slot) val) (copy_val_to_arg_slot base slot val)) +(rule (copy_to_arg base (abi_arg_struct_pointer slot offset _) _) + (let ((ptr Reg (load_addr (memarg_stack_off base offset)))) + (copy_reg_to_arg_slot base slot ptr))) ;; Copy a single argument/return value from its slots. (decl copy_from_arg (i64 ABIArg) ValueRegs) @@ -3262,6 +3324,24 @@ (extern constructor abi_accumulate_outgoing_args_size abi_accumulate_outgoing_args_size) +;; Helpers for generating calls to library routines ;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type LibCallInfo extern (enum)) + +(decl lib_call_info_memcpy () LibCallInfo) +(extern constructor lib_call_info_memcpy lib_call_info_memcpy) + +(decl lib_call_info (LibCallInfo) BoxCallInfo) +(extern constructor lib_call_info lib_call_info) + +(decl lib_call (LibCallInfo) SideEffectNoResult) +(rule (lib_call libcall) + (call_impl (writable_link_reg) (lib_call_info libcall))) + +(decl lib_accumulate_outgoing_args_size (LibCallInfo) Unit) +(extern constructor lib_accumulate_outgoing_args_size lib_accumulate_outgoing_args_size) + + ;; Helpers for generating vector pack and unpack instructions ;;;;;;;;;;;;;;;;;; (decl vec_widen_type (Type) Type) diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs index 2ea3cf5409..7a0905641b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/args.rs +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -145,6 +145,66 @@ impl MemArg { } } +/// A memory argument for an instruction with two memory operands. +/// We cannot use two instances of MemArg, because we do not have +/// two free temp registers that would be needed to reload two +/// addresses in the general case. Also, two copies of MemArg would +/// increase the size of Inst beyond its current limit. Use this +/// simplified form instead that never needs any reloads, and suffices +/// for all current users. +#[derive(Clone, Debug)] +pub struct MemArgPair { + pub base: Reg, + pub disp: UImm12, + pub flags: MemFlags, +} + +impl MemArgPair { + /// Convert a MemArg to a MemArgPair if possible. + pub fn maybe_from_memarg(mem: &MemArg) -> Option { + match mem { + &MemArg::BXD12 { + base, + index, + disp, + flags, + } => { + if index != zero_reg() { + None + } else { + Some(MemArgPair { base, disp, flags }) + } + } + &MemArg::RegOffset { reg, off, flags } => { + if off < 0 { + None + } else { + let disp = UImm12::maybe_from_u64(off as u64)?; + Some(MemArgPair { + base: reg, + disp, + flags, + }) + } + } + _ => None, + } + } + + pub(crate) fn can_trap(&self) -> bool { + !self.flags.notrap() + } + + /// Edit registers with allocations. + pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + MemArgPair { + base: allocs.next(self.base), + disp: self.disp, + flags: self.flags, + } + } +} + //============================================================================= // Instruction sub-components (conditions, branches and branch targets): // definitions diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 0cddea1f68..48335b215c 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -297,6 +297,35 @@ pub fn mem_imm16_emit( } } +pub fn mem_mem_emit( + dst: &MemArgPair, + src: &MemArgPair, + len_minus_one: u8, + opcode_ss: u8, + add_trap: bool, + sink: &mut MachBuffer, + state: &mut EmitState, +) { + if add_trap && (dst.can_trap() || src.can_trap()) { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + } + + put( + sink, + &enc_ss_a( + opcode_ss, + dst.base, + dst.disp.bits(), + src.base, + src.disp.bits(), + len_minus_one, + ), + ); +} + pub fn mem_vrx_emit( rd: Reg, mem: &MemArg, @@ -853,6 +882,31 @@ fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] { enc } +/// SSa-type instructions. +/// +/// 47 39 31 27 15 11 +/// opcode l b1 d1 b2 d2 +/// 40 32 28 16 12 0 +/// +/// +fn enc_ss_a(opcode: u8, b1: Reg, d1: u32, b2: Reg, d2: u32, l: u8) -> [u8; 6] { + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + let b2 = machreg_to_gpr(b2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode; + enc[1] = l; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc[4] = b2 << 4 | d2_hi; + enc[5] = d2_lo; + enc +} + /// VRIa-type instructions. /// /// 47 39 35 31 15 11 7 @@ -2025,6 +2079,16 @@ impl MachInstEmit for Inst { }; mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state); } + &Inst::Mvc { + ref dst, + ref src, + len_minus_one, + } => { + let dst = dst.with_allocs(&mut allocs); + let src = src.with_allocs(&mut allocs); + let opcode = 0xd2; // MVC + mem_mem_emit(&dst, &src, len_minus_one, opcode, true, sink, state); + } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(&mut allocs); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index ffb3feb1da..dc18dd1d43 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -5862,6 +5862,24 @@ fn test_s390x_binemit() { "stgrl %r1, label1", )); + insns.push(( + Inst::Mvc { + dst: MemArgPair { + base: gpr(2), + disp: UImm12::maybe_from_u64(0x345).unwrap(), + flags: MemFlags::trusted(), + }, + src: MemArgPair { + base: gpr(8), + disp: UImm12::maybe_from_u64(0x9ab).unwrap(), + flags: MemFlags::trusted(), + }, + len_minus_one: 255, + }, + "D2FF234589AB", + "mvc 837(255,%r2), 2475(%r8)", + )); + insns.push(( Inst::LoadMultiple64 { rt: writable_gpr(8), diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 5d47cba215..aadc8692bc 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -140,6 +140,7 @@ impl Inst { | Inst::StoreRev16 { .. } | Inst::StoreRev32 { .. } | Inst::StoreRev64 { .. } + | Inst::Mvc { .. } | Inst::LoadMultiple64 { .. } | Inst::StoreMultiple64 { .. } | Inst::Mov32 { .. } @@ -600,6 +601,12 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC | &Inst::StoreImm64SExt16 { ref mem, .. } => { memarg_operands(mem, collector); } + &Inst::Mvc { + ref dst, ref src, .. + } => { + collector.reg_use(dst.base); + collector.reg_use(src.base); + } &Inst::LoadMultiple64 { rt, rt2, ref mem, .. } => { @@ -1763,6 +1770,22 @@ impl Inst { format!("{}{} {}, {}", mem_str, op, mem, imm) } + &Inst::Mvc { + ref dst, + ref src, + len_minus_one, + } => { + let dst = dst.with_allocs(allocs); + let src = src.with_allocs(allocs); + format!( + "mvc {}({},{}), {}({})", + dst.disp.pretty_print_default(), + len_minus_one, + show_reg(dst.base), + src.disp.pretty_print_default(), + show_reg(src.base) + ) + } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 277f7b02a6..7b65d3864a 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -3591,16 +3591,30 @@ (_ InstOutput (side_effect (abi_call_ind abi target (Opcode.CallIndirect))))) (lower_call_rets abi (range 0 (abi_num_rets abi)) (output_builder_new)))) -;; Lower function arguments by loading them into registers / stack slots. +;; Lower function arguments. (decl lower_call_args (ABISig Range ValueSlice) InstOutput) -(rule (lower_call_args abi (range_empty) _) (lower_call_ret_arg abi)) -(rule (lower_call_args abi (range_unwrap head tail) args) - (let ((idx usize (abi_copy_to_arg_order abi head)) - (_ Unit (copy_to_arg 0 (abi_get_arg abi idx) - (value_slice_get args idx)))) - (lower_call_args abi tail args))) +(rule (lower_call_args abi range args) + (let ((_ InstOutput (lower_call_args_buffer abi range args)) + (_ InstOutput (lower_call_args_slots abi range args))) + (lower_call_ret_arg abi))) -;; Lower the implicit return-area pointer argument, if present. +;; Lower function arguments (part 1): prepare buffer copies. +(decl lower_call_args_buffer (ABISig Range ValueSlice) InstOutput) +(rule (lower_call_args_buffer abi (range_empty) _) (output_none)) +(rule (lower_call_args_buffer abi (range_unwrap head tail) args) + (let ((_ InstOutput (copy_to_buffer 0 (abi_get_arg abi head) + (value_slice_get args head)))) + (lower_call_args_buffer abi tail args))) + +;; Lower function arguments (part 2): set up registers / stack slots. +(decl lower_call_args_slots (ABISig Range ValueSlice) InstOutput) +(rule (lower_call_args_slots abi (range_empty) _) (output_none)) +(rule (lower_call_args_slots abi (range_unwrap head tail) args) + (let ((_ Unit (copy_to_arg 0 (abi_get_arg abi head) + (value_slice_get args head)))) + (lower_call_args_slots abi tail args))) + +;; Lower function arguments (part 3): implicit return-area pointer. (decl lower_call_ret_arg (ABISig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) (rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 59d3d4b874..1ee948505d 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -4,10 +4,10 @@ pub mod generated_code; // Types that the generated ISLE code uses via `use super::*`. -use crate::isa::s390x::abi::S390xMachineDeps; +use crate::isa::s390x::abi::{S390xMachineDeps, REG_SAVE_AREA_SIZE}; use crate::isa::s390x::inst::{ - stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg, UImm12, - UImm16Shifted, UImm32Shifted, + gpr, stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg, + MemArgPair, UImm12, UImm16Shifted, UImm32Shifted, }; use crate::isa::s390x::settings::Flags as IsaFlags; use crate::machinst::isle::*; @@ -16,18 +16,26 @@ use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData, - MemFlags, Opcode, TrapCode, Value, ValueList, + LibCall, MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::unwind::UnwindInst, + isa::CallConv, + machinst::abi_impl::ABIMachineSpec, machinst::{InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, }; use regalloc2::PReg; +use smallvec::{smallvec, SmallVec}; use std::boxed::Box; use std::cell::Cell; use std::convert::TryFrom; use std::vec::Vec; use target_lexicon::Triple; +/// Information describing a library call to be emitted. +pub struct LibCallInfo { + libcall: LibCall, +} + type BoxCallInfo = Box; type BoxCallIndInfo = Box; type VecMachLabel = Vec; @@ -125,6 +133,49 @@ where }) } + fn lib_call_info_memcpy(&mut self) -> LibCallInfo { + LibCallInfo { + libcall: LibCall::Memcpy, + } + } + + fn lib_accumulate_outgoing_args_size(&mut self, _: &LibCallInfo) -> Unit { + // Libcalls only require the register save area. + self.lower_ctx + .abi() + .accumulate_outgoing_args_size(REG_SAVE_AREA_SIZE); + } + + fn lib_call_info(&mut self, info: &LibCallInfo) -> BoxCallInfo { + let caller_callconv = self.lower_ctx.abi().call_conv(); + let callee_callconv = CallConv::for_libcall(&self.flags, caller_callconv); + + // Uses and defs are defined by the particular libcall. + let (uses, defs): (SmallVec<[Reg; 8]>, SmallVec<[WritableReg; 8]>) = match info.libcall { + LibCall::Memcpy => ( + smallvec![gpr(2), gpr(3), gpr(4)], + smallvec![writable_gpr(2)], + ), + _ => unreachable!(), + }; + + // Clobbers are defined by the calling convention. Remove deps from clobbers. + let mut clobbers = S390xMachineDeps::get_regs_clobbered_by_call(callee_callconv); + for reg in &defs { + clobbers.remove(PReg::from(reg.to_reg().to_real_reg().unwrap())); + } + + Box::new(CallInfo { + dest: ExternalName::LibCall(info.libcall), + uses, + defs, + clobbers, + opcode: Opcode::Call, + caller_callconv, + callee_callconv, + }) + } + #[inline] fn allow_div_traps(&mut self, _: Type) -> Option<()> { if !self.flags.avoid_div_traps() { @@ -468,6 +519,15 @@ where Some(imm.negate_bits()) } + #[inline] + fn len_minus_one(&mut self, len: u64) -> Option { + if len > 0 && len <= 256 { + Some((len - 1) as u8) + } else { + None + } + } + #[inline] fn mask_amt_imm(&mut self, ty: Type, amt: i64) -> u8 { let mask = ty.lane_bits() - 1; @@ -599,6 +659,11 @@ where MemFlags::trusted() } + #[inline] + fn memarg_flags(&mut self, mem: &MemArg) -> MemFlags { + mem.get_flags() + } + #[inline] fn memarg_reg_plus_reg(&mut self, x: Reg, y: Reg, bias: u8, flags: MemFlags) -> MemArg { MemArg::BXD12 { @@ -643,6 +708,20 @@ where } } + #[inline] + fn memarg_pair_from_memarg(&mut self, mem: &MemArg) -> Option { + MemArgPair::maybe_from_memarg(mem) + } + + #[inline] + fn memarg_pair_from_reg(&mut self, reg: Reg, flags: MemFlags) -> MemArgPair { + MemArgPair { + base: reg, + disp: UImm12::zero(), + flags, + } + } + #[inline] fn inst_builder_new(&mut self) -> VecMInstBuilder { Cell::new(Vec::::new()) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index e92e95c399..13cb586c5c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -90,6 +90,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { assert!(size % 8 == 0, "StructArgument size is not properly aligned"); next_stack += size; ret.push(ABIArg::StructArg { + pointer: None, offset, size, purpose: param.purpose, diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 2225a9bb7f..046ff28972 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -712,12 +712,18 @@ where inputs.len(&self.lower_ctx.dfg().value_lists) - off, abi.num_args() ); - for i in caller.get_copy_to_arg_order() { + let mut arg_regs = vec![]; + for i in 0..abi.num_args() { let input = inputs .get(off + i, &self.lower_ctx.dfg().value_lists) .unwrap(); - let arg_regs = self.lower_ctx.put_value_in_regs(input); - caller.emit_copy_regs_to_arg(self.lower_ctx, i, arg_regs); + arg_regs.push(self.lower_ctx.put_value_in_regs(input)); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + caller.emit_copy_regs_to_arg(self.lower_ctx, i, *arg_regs); } caller.emit_call(self.lower_ctx); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 8a26964ab3..03b40d5d96 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -195,6 +195,8 @@ pub trait ABICaller { fn signature(&self) -> &Signature; /// Emit a copy of an argument value from a source register, prior to the call. + /// For large arguments with associated stack buffer, this may load the address + /// of the buffer into the argument register, if required by the ABI. fn emit_copy_regs_to_arg>( &self, ctx: &mut C, @@ -202,10 +204,17 @@ pub trait ABICaller { from_reg: ValueRegs, ); - /// Specific order for copying into arguments at callsites. We must be - /// careful to copy into StructArgs first, because we need to be able - /// to invoke memcpy() before we've loaded other arg regs (see above). - fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]>; + /// Emit a copy of a large argument into its associated stack buffer, if any. + /// We must be careful to perform all these copies (as necessary) before setting + /// up the argument registers, since we may have to invoke memcpy(), which could + /// clobber any registers already set up. The back-end should call this routine + /// for all arguments before calling emit_copy_regs_to_arg for all arguments. + fn emit_copy_regs_to_buffer>( + &self, + ctx: &mut C, + idx: usize, + from_reg: ValueRegs, + ); /// Emit a copy a return value into a destination register, after the call returns. fn emit_copy_retval_to_regs>( diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 433f8053d0..92f9e2181e 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -185,6 +185,10 @@ pub enum ABIArg { /// area; on the callee side, we compute a pointer to this stack area and /// provide that as the argument's value. StructArg { + /// Register or stack slot holding a pointer to the buffer as passed + /// by the caller to the callee. If None, the ABI defines the buffer + /// to reside at a well-known location (i.e. at `offset` below). + pointer: Option, /// Offset of this arg relative to base of stack args. offset: i64, /// Size of this arg on the stack. @@ -195,14 +199,6 @@ pub enum ABIArg { } impl ABIArg { - /// Is this a StructArg? - fn is_struct_arg(&self) -> bool { - match self { - &ABIArg::StructArg { .. } => true, - _ => false, - } - } - /// Create an ABIArg from one register. pub fn reg( reg: RealReg, @@ -530,10 +526,6 @@ pub struct ABISig { sized_stack_ret_space: i64, /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, - /// Specific order for copying into arguments at callsites. We must be - /// careful to copy into StructArgs first, because we need to be able - /// to invoke memcpy() before we've loaded other arg regs (see above). - copy_to_arg_order: SmallVec<[usize; 8]>, /// Calling convention used. call_conv: isa::CallConv, } @@ -563,30 +555,14 @@ impl ABISig { need_stack_return_area, )?; - let mut copy_to_arg_order = SmallVec::new(); - for (i, arg) in args.iter().enumerate() { - // Struct args. - if arg.is_struct_arg() { - copy_to_arg_order.push(i); - } - } - for (i, arg) in args.iter().enumerate() { - // Non-struct args. Skip an appended return-area arg for multivalue - // returns, if any. - if !arg.is_struct_arg() && i < sig.params.len() { - copy_to_arg_order.push(i); - } - } - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?} copy_to_arg_order = {:?}", + "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", sig, args, rets, sized_stack_arg_space, sized_stack_ret_space, stack_ret_arg, - copy_to_arg_order, ); Ok(ABISig { @@ -595,7 +571,6 @@ impl ABISig { sized_stack_arg_space, sized_stack_ret_space, stack_ret_arg, - copy_to_arg_order, call_conv: sig.call_conv, }) } @@ -608,13 +583,25 @@ impl ABISig { // Compute uses: all arg regs. let mut uses = smallvec![]; for arg in &self.args { - if let &ABIArg::Slots { ref slots, .. } = arg { - for slot in slots { - match slot { - &ABIArgSlot::Reg { reg, .. } => { - uses.push(Reg::from(reg)); + match arg { + &ABIArg::Slots { ref slots, .. } => { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + uses.push(Reg::from(reg)); + } + _ => {} + } + } + } + &ABIArg::StructArg { ref pointer, .. } => { + if let Some(slot) = pointer { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + uses.push(Reg::from(reg)); + } + _ => {} } - _ => {} } } } @@ -643,11 +630,6 @@ impl ABISig { (uses, defs, clobbers) } - /// Specific order for copying into arguments at callsites. - pub fn copy_to_arg_order(&self, idx: usize) -> usize { - self.copy_to_arg_order[idx] - } - /// Get the number of arguments expected. pub fn num_args(&self) -> usize { if self.stack_ret_arg.is_some() { @@ -1106,55 +1088,67 @@ impl ABICallee for ABICalleeImpl { into_regs: ValueRegs>, ) -> SmallInstVec { let mut insts = smallvec![]; + let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable| { + match slot { + &ABIArgSlot::Reg { reg, ty, .. } => { + // Extension mode doesn't matter (we're copying out, not in; we + // ignore high bits by convention). + insts.push(M::gen_move(*into_reg, reg.into(), ty)); + } + &ABIArgSlot::Stack { + offset, + ty, + extension, + .. + } => { + // However, we have to respect the extention mode for stack + // slots, or else we grab the wrong bytes on big-endian. + let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ty = match (ext, ty_bits(ty) as u32) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) + if n < M::word_bits() => + { + M::word_type() + } + _ => ty, + }; + insts.push(M::gen_load_stack( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + ty, + ), + *into_reg, + ty, + )); + } + } + }; + match &self.sig.args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(into_regs.len(), slots.len()); for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { - match slot { - &ABIArgSlot::Reg { reg, ty, .. } => { - // Extension mode doesn't matter (we're copying out, not in; we - // ignore high bits by convention). - insts.push(M::gen_move(*into_reg, reg.into(), ty)); - } - &ABIArgSlot::Stack { - offset, - ty, - extension, - .. - } => { - // However, we have to respect the extention mode for stack - // slots, or else we grab the wrong bytes on big-endian. - let ext = M::get_ext_mode(self.sig.call_conv, extension); - let ty = match (ext, ty_bits(ty) as u32) { - (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) - if n < M::word_bits() => - { - M::word_type() - } - _ => ty, - }; - insts.push(M::gen_load_stack( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - ty, - ), - *into_reg, - ty, - )); - } - } + copy_arg_slot_to_reg(&slot, &into_reg); } } - &ABIArg::StructArg { offset, .. } => { + &ABIArg::StructArg { + pointer, offset, .. + } => { let into_reg = into_regs.only_reg().unwrap(); - insts.push(M::gen_get_stack_addr( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + if let Some(slot) = pointer { + // Buffer address is passed in a register or stack slot. + copy_arg_slot_to_reg(&slot, &into_reg); + } else { + // Buffer address is implicitly defined by the ABI. + insts.push(M::gen_get_stack_addr( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + I8, + ), + into_reg, I8, - ), - into_reg, - I8, - )); + )); + } } } insts @@ -1668,6 +1662,37 @@ impl ABICaller for ABICallerImpl { adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } + fn emit_copy_regs_to_buffer>( + &self, + ctx: &mut C, + idx: usize, + from_regs: ValueRegs, + ) { + match &self.sig.args[idx] { + &ABIArg::Slots { .. } => {} + &ABIArg::StructArg { offset, size, .. } => { + let src_ptr = from_regs.only_reg().unwrap(); + let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + ctx.emit(M::gen_get_stack_addr( + StackAMode::SPOffset(offset, I8), + dst_ptr, + I8, + )); + // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes. + // N.B.: because we process StructArg params *first*, this is + // safe w.r.t. clobbers: we have not yet filled in any other + // arg regs. + let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); + for insn in + M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) + .into_iter() + { + ctx.emit(insn); + } + } + } + } + fn emit_copy_regs_to_arg>( &self, ctx: &mut C, @@ -1744,33 +1769,12 @@ impl ABICaller for ABICallerImpl { } } } - &ABIArg::StructArg { offset, size, .. } => { - let src_ptr = from_regs.only_reg().unwrap(); - let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); - ctx.emit(M::gen_get_stack_addr( - StackAMode::SPOffset(offset, I8), - dst_ptr, - I8, - )); - // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes. - // N.B.: because we process StructArg params *first*, this is - // safe w.r.t. clobbers: we have not yet filled in any other - // arg regs. - let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); - for insn in - M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) - .into_iter() - { - ctx.emit(insn); - } + &ABIArg::StructArg { pointer, .. } => { + assert!(pointer.is_none()); // Only supported via ISLE. } } } - fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]> { - self.sig.copy_to_arg_order.clone() - } - fn emit_copy_retval_to_regs>( &self, ctx: &mut C, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index ee7b55ae26..695b02fc38 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -780,10 +780,6 @@ macro_rules! isle_prelude_methods { regs.regs()[idx] } - fn abi_copy_to_arg_order(&mut self, abi: &ABISig, idx: usize) -> usize { - abi.copy_to_arg_order(idx) - } - fn abi_num_args(&mut self, abi: &ABISig) -> usize { abi.num_args() } @@ -833,6 +829,24 @@ macro_rules! isle_prelude_methods { } } + fn abi_arg_struct_pointer(&mut self, arg: &ABIArg) -> Option<(ABIArgSlot, i64, u64)> { + match arg { + &ABIArg::StructArg { + pointer, + offset, + size, + .. + } => { + if let Some(pointer) = pointer { + Some((pointer, offset, size)) + } else { + None + } + } + _ => None, + } + } + fn abi_stackslot_addr( &mut self, dst: WritableReg, diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 15b7fca802..661ecb9fed 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -828,10 +828,6 @@ (Sext) )) -;; Specific order for copying into arguments at callsites. -(decl abi_copy_to_arg_order (ABISig usize) usize) -(extern constructor abi_copy_to_arg_order abi_copy_to_arg_order) - ;; Get the number of arguments expected. (decl abi_num_args (ABISig) usize) (extern constructor abi_num_args abi_num_args) @@ -878,6 +874,11 @@ (decl abi_arg_only_slot (ABIArgSlot) ABIArg) (extern extractor abi_arg_only_slot abi_arg_only_slot) +;; Extractor to detect the special case where a struct argument +;; is explicitly passed by reference using a hidden pointer. +(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg) +(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer) + ;; Convert a real register number into a virtual register. (decl real_reg_to_reg (RealReg) Reg) (extern constructor real_reg_to_reg real_reg_to_reg) diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 2c1ce3986b..97a262b232 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -281,8 +281,8 @@ block0(v0: i64): ; block0: ; mov x7, x0 ; movz x0, #42 -; mov x1, x7 ; movz x2, #42 +; mov x1, x7 ; ldr x10, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x10 ; ldp fp, lr, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/s390x/struct-arg.clif b/cranelift/filetests/filetests/isa/s390x/struct-arg.clif new file mode 100644 index 0000000000..cc28db4bc0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/struct-arg.clif @@ -0,0 +1,124 @@ +test compile precise-output +target s390x + +function u0:0(i64 sarg(64)) -> i8 system_v { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; block0: +; llc %r2, 0(%r2) +; br %r14 + +function u0:1(i64 sarg(64), i64) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v1 + v3 = load.i8 v0 + v4 = iadd.i8 v2, v3 + return v4 +} + +; block0: +; llc %r5, 0(%r3) +; llc %r2, 0(%r2) +; ark %r2, %r5, %r2 +; br %r14 + +function u0:2(i64) -> i8 system_v { +fn1 = colocated u0:0(i64 sarg(64)) -> i8 system_v + +block0(v0: i64): + v1 = call fn1(v0) + return v1 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -224 +; virtual_sp_offset_adjust 224 +; block0: +; mvc 160(63,%r15), 0(%r2) +; la %r2, 160(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 336(%r15) +; br %r14 + +function u0:3(i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64): + v2 = call fn1(v0, v1) + return v2 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -224 +; virtual_sp_offset_adjust 224 +; block0: +; mvc 160(63,%r15), 0(%r3) +; la %r3, 160(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 336(%r15) +; br %r14 + +function u0:4(i64 sarg(256), i64 sarg(64)) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v0 + v3 = load.i8 v1 + v4 = iadd.i8 v2, v3 + return v4 +} + +; block0: +; llc %r5, 0(%r2) +; llc %r2, 0(%r3) +; ark %r2, %r5, %r2 +; br %r14 + +function u0:5(i64, i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(256), i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64, v2: i64): + v3 = call fn1(v0, v1, v2) + return v3 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -480 +; virtual_sp_offset_adjust 480 +; block0: +; mvc 160(255,%r15), 0(%r3) +; mvc 416(63,%r15), 0(%r4) +; la %r3, 160(%r15) +; la %r4, 416(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 592(%r15) +; br %r14 + +function u0:6(i64, i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(1024), i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64, v2: i64): + v3 = call fn1(v0, v1, v2) + return v3 +} + +; stmg %r7, %r15, 56(%r15) +; aghi %r15, -1248 +; virtual_sp_offset_adjust 1248 +; block0: +; lgr %r7, %r2 +; lgr %r9, %r4 +; la %r2, 160(%r15) +; la %r3, 0(%r3) +; lghi %r4, 1024 +; brasl %r14, %Memcpy +; lgr %r4, %r9 +; mvc 1184(63,%r15), 0(%r4) +; lgr %r2, %r7 +; la %r3, 160(%r15) +; la %r4, 1184(%r15) +; brasl %r14, u0:0 +; lmg %r7, %r15, 1304(%r15) +; br %r14 +