From b9dd48e34b7c66d323bce6be1eb407a055bda56e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 3 Aug 2022 21:00:07 +0200 Subject: [PATCH] [s390x, abi_impl] Support struct args using explicit pointers (#4585) This adds support for StructArgument on s390x. The ABI for this platform requires that the address of the buffer holding the copy of the struct argument is passed from caller to callee as hidden pointer, using a register or overflow stack slot. To implement this, I've added an optional "pointer" filed to ABIArg::StructArg, and code to handle the pointer both in common abi_impl code and the s390x back-end. One notable change necessary to make this work involved the "copy_to_arg_order" mechanism. Currently, for struct args we only need to copy the data (and that need to happen before setting up any other args), while for non-struct args we only need to set up the appropriate registers or stack slots. This order is ensured by sorting the arguments appropriately into a "copy_to_arg_order" list. However, for struct args with explicit pointers we need to *both* copy the data (again, before everything else), *and* set up a register or stack slot. Since we now need to touch the argument twice, we cannot solve the ordering problem by a simple sort. Instead, the abi_impl common code now provided *two* callbacks, emit_copy_regs_to_buffer and emit_copy_regs_to_arg, and expects the back end to first call copy..to_buffer for all args, and then call copy.._to_arg for all args. This required updates to all back ends. In the s390x back end, in addition to the new ABI code, I'm now adding code to actually copy the struct data, using the MVC instruction (for small buffers) or a memcpy libcall (for larger buffers). This also requires a bit of new infrastructure: - MVC is the first memory-to-memory instruction we use, which needed a bit of memory argument tweaking - We also need to set up the infrastructure to emit libcalls. (This implements the first half of issue #4565.) --- cranelift/codegen/src/isa/aarch64/abi.rs | 1 + .../codegen/src/isa/aarch64/lower_inst.rs | 13 +- cranelift/codegen/src/isa/s390x/abi.rs | 62 +++-- cranelift/codegen/src/isa/s390x/inst.isle | 80 +++++++ cranelift/codegen/src/isa/s390x/inst/args.rs | 60 +++++ cranelift/codegen/src/isa/s390x/inst/emit.rs | 64 ++++++ .../codegen/src/isa/s390x/inst/emit_tests.rs | 18 ++ cranelift/codegen/src/isa/s390x/inst/mod.rs | 23 ++ cranelift/codegen/src/isa/s390x/lower.isle | 30 ++- cranelift/codegen/src/isa/s390x/lower/isle.rs | 87 ++++++- cranelift/codegen/src/isa/x64/abi.rs | 1 + cranelift/codegen/src/isa/x64/lower/isle.rs | 12 +- cranelift/codegen/src/machinst/abi.rs | 17 +- cranelift/codegen/src/machinst/abi_impl.rs | 214 +++++++++--------- cranelift/codegen/src/machinst/isle.rs | 22 +- cranelift/codegen/src/prelude.isle | 9 +- .../filetests/filetests/isa/aarch64/call.clif | 2 +- .../filetests/isa/s390x/struct-arg.clif | 124 ++++++++++ 18 files changed, 687 insertions(+), 152 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/s390x/struct-arg.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 054f6d8b4a..5d25aaab1c 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -162,6 +162,7 @@ impl ABIMachineSpec for AArch64MachineDeps { assert!(size % 8 == 0, "StructArgument size is not properly aligned"); next_stack += size; ret.push(ABIArg::StructArg { + pointer: None, offset, size, purpose: param.purpose, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f9039a4150..842342d5b9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -624,10 +624,15 @@ pub(crate) fn lower_insn_to_regs>( abi.emit_stack_pre_adjust(ctx); assert!(inputs.len() == abi.num_args()); - for i in abi.get_copy_to_arg_order() { - let input = inputs[i]; - let arg_regs = put_input_in_regs(ctx, input); - abi.emit_copy_regs_to_arg(ctx, i, arg_regs); + let mut arg_regs = vec![]; + for input in inputs { + arg_regs.push(put_input_in_regs(ctx, *input)) + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + abi.emit_copy_regs_to_buffer(ctx, i, *arg_regs); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + abi.emit_copy_regs_to_arg(ctx, i, *arg_regs); } abi.emit_call(ctx); for (i, output) in outputs.iter().enumerate() { diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 37a62c6d0e..f1797c3ded 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -188,7 +188,7 @@ fn get_vecreg_for_ret(idx: usize) -> Option { static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; /// The size of the register save area -static REG_SAVE_AREA_SIZE: u32 = 160; +pub static REG_SAVE_AREA_SIZE: u32 = 160; impl Into for StackAMode { fn into(self) -> MemArg { @@ -247,7 +247,9 @@ impl ABIMachineSpec for S390xMachineDeps { &ir::ArgumentPurpose::VMContext | &ir::ArgumentPurpose::Normal | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} + | &ir::ArgumentPurpose::SignatureId + | &ir::ArgumentPurpose::StructReturn + | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", param.purpose, params @@ -287,14 +289,13 @@ impl ABIMachineSpec for S390xMachineDeps { candidate }; - if let Some(reg) = candidate { - ret.push(ABIArg::reg( - reg.to_real_reg().unwrap(), - param.value_type, - param.extension, - param.purpose, - )); + let slot = if let Some(reg) = candidate { *next_reg += 1; + ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: param.value_type, + extension: param.extension, + } } else { // Compute size. Every argument or return value takes a slot of // at least 8 bytes, except for return values in the Wasmtime ABI. @@ -318,13 +319,28 @@ impl ABIMachineSpec for S390xMachineDeps { } else { 0 }; - ret.push(ABIArg::stack( - (next_stack + offset) as i64, - param.value_type, - param.extension, - param.purpose, - )); + let offset = (next_stack + offset) as i64; next_stack += slot_size; + ABIArgSlot::Stack { + offset, + ty: param.value_type, + extension: param.extension, + } + }; + + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + ret.push(ABIArg::StructArg { + pointer: Some(slot), + offset: 0, + size: size as u64, + purpose: param.purpose, + }); + } else { + ret.push(ABIArg::Slots { + slots: smallvec![slot], + purpose: param.purpose, + }); } } @@ -353,6 +369,22 @@ impl ABIMachineSpec for S390xMachineDeps { None }; + // After all arguments are in their well-defined location, + // allocate buffers for all StructArg arguments. + for i in 0..ret.len() { + match &mut ret[i] { + &mut ABIArg::StructArg { + ref mut offset, + size, + .. + } => { + *offset = next_stack as i64; + next_stack += size; + } + _ => {} + } + } + // To avoid overflow issues, limit the arg/return size to something // reasonable -- here, 128 MB. if next_stack > STACK_ARG_RET_SIZE_LIMIT { diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index cfadb13cce..f1ed099eb1 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -351,6 +351,12 @@ (rd Reg) (mem MemArg)) + ;; A memory copy of 1-256 bytes. + (Mvc + (dst MemArgPair) + (src MemArgPair) + (len_minus_one u8)) + ;; A load-multiple instruction. (LoadMultiple64 (rt WritableReg) @@ -1473,6 +1479,9 @@ (decl uimm32shifted_from_inverted_value (UImm32Shifted) Value) (extern extractor uimm32shifted_from_inverted_value uimm32shifted_from_inverted_value) +(decl len_minus_one (u8) u64) +(extern extractor len_minus_one len_minus_one) + ;; Helpers for masking shift amounts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1547,6 +1556,9 @@ (type MemArg extern (enum)) +(decl memarg_flags (MemArg) MemFlags) +(extern constructor memarg_flags memarg_flags) + (decl memarg_reg_plus_reg (Reg Reg u8 MemFlags) MemArg) (extern constructor memarg_reg_plus_reg memarg_reg_plus_reg) @@ -1621,6 +1633,26 @@ (if (memarg_symbol_offset_sum sym_offset load_offset)) inst) + +;; Accessors for `MemArgPair`. + +(type MemArgPair extern (enum)) + +;; Convert a MemArg to a MemArgPair, reloading the address if necessary. +(decl memarg_pair (MemArg) MemArgPair) +(rule (memarg_pair (memarg_pair_from_memarg mem)) mem) +(rule (memarg_pair mem) (memarg_pair_from_reg + (load_addr mem) (memarg_flags mem))) + +;; Convert a MemArg to a MemArgPair if no reloading is necessary. +(decl memarg_pair_from_memarg (MemArgPair) MemArg) +(extern extractor memarg_pair_from_memarg memarg_pair_from_memarg) + +;; Create a MemArgPair from a single base register. +(decl memarg_pair_from_reg (Reg MemFlags) MemArgPair) +(extern constructor memarg_pair_from_reg memarg_pair_from_reg) + + ;; Helpers for stack-slot addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl stack_addr_impl (Type StackSlot Offset32) Reg) @@ -2038,6 +2070,11 @@ (rule (storerev64 src addr) (SideEffectNoResult.Inst (MInst.StoreRev64 src addr))) +;; Helper for emitting `MInst.Mvc` instructions. +(decl mvc (MemArgPair MemArgPair u8) SideEffectNoResult) +(rule (mvc dst src len_minus_one) + (SideEffectNoResult.Inst (MInst.Mvc dst src len_minus_one))) + ;; Helper for emitting `MInst.FpuRR` instructions. (decl fpu_rr (Type FPUOp1 Reg) Reg) (rule (fpu_rr ty op src) @@ -2521,10 +2558,35 @@ (rule (emit_arg_load $F64 mem) (vec_load_lane_undef $F64X2 mem 0)) (rule (emit_arg_load (ty_vec128 ty) mem) (vec_load ty mem)) +;; Helpers to emit a memory copy (MVC or memcpy libcall). +(decl emit_memcpy (MemArg MemArg u64) Unit) +(rule (emit_memcpy dst src (len_minus_one len)) + (emit_side_effect (mvc (memarg_pair dst) (memarg_pair src) len))) +(rule (emit_memcpy dst src len) + (let ((libcall LibCallInfo (lib_call_info_memcpy)) + (_ Unit (lib_accumulate_outgoing_args_size libcall)) + (_ Unit (emit_mov $I64 (writable_gpr 2) (load_addr dst))) + (_ Unit (emit_mov $I64 (writable_gpr 3) (load_addr src))) + (_ Unit (emit_imm $I64 (writable_gpr 4) len))) + (emit_side_effect (lib_call libcall)))) + +;; Prepare a stack copy of a single (oversized) argument. +(decl copy_to_buffer (i64 ABIArg Value) InstOutput) +(rule (copy_to_buffer base (abi_arg_only_slot slot) _) (output_none)) +(rule (copy_to_buffer base (abi_arg_struct_pointer _ offset size) val) + (let ((dst MemArg (memarg_stack_off base offset)) + (src MemArg (memarg_reg_plus_off val 0 0 (memflags_trusted))) + (_ Unit (emit_memcpy dst src size))) + (output_none))) + ;; Copy a single argument/return value to its slots. +;; For oversized arguments, set the slot to the buffer address. (decl copy_to_arg (i64 ABIArg Value) Unit) (rule (copy_to_arg base (abi_arg_only_slot slot) val) (copy_val_to_arg_slot base slot val)) +(rule (copy_to_arg base (abi_arg_struct_pointer slot offset _) _) + (let ((ptr Reg (load_addr (memarg_stack_off base offset)))) + (copy_reg_to_arg_slot base slot ptr))) ;; Copy a single argument/return value from its slots. (decl copy_from_arg (i64 ABIArg) ValueRegs) @@ -3262,6 +3324,24 @@ (extern constructor abi_accumulate_outgoing_args_size abi_accumulate_outgoing_args_size) +;; Helpers for generating calls to library routines ;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type LibCallInfo extern (enum)) + +(decl lib_call_info_memcpy () LibCallInfo) +(extern constructor lib_call_info_memcpy lib_call_info_memcpy) + +(decl lib_call_info (LibCallInfo) BoxCallInfo) +(extern constructor lib_call_info lib_call_info) + +(decl lib_call (LibCallInfo) SideEffectNoResult) +(rule (lib_call libcall) + (call_impl (writable_link_reg) (lib_call_info libcall))) + +(decl lib_accumulate_outgoing_args_size (LibCallInfo) Unit) +(extern constructor lib_accumulate_outgoing_args_size lib_accumulate_outgoing_args_size) + + ;; Helpers for generating vector pack and unpack instructions ;;;;;;;;;;;;;;;;;; (decl vec_widen_type (Type) Type) diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs index 2ea3cf5409..7a0905641b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/args.rs +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -145,6 +145,66 @@ impl MemArg { } } +/// A memory argument for an instruction with two memory operands. +/// We cannot use two instances of MemArg, because we do not have +/// two free temp registers that would be needed to reload two +/// addresses in the general case. Also, two copies of MemArg would +/// increase the size of Inst beyond its current limit. Use this +/// simplified form instead that never needs any reloads, and suffices +/// for all current users. +#[derive(Clone, Debug)] +pub struct MemArgPair { + pub base: Reg, + pub disp: UImm12, + pub flags: MemFlags, +} + +impl MemArgPair { + /// Convert a MemArg to a MemArgPair if possible. + pub fn maybe_from_memarg(mem: &MemArg) -> Option { + match mem { + &MemArg::BXD12 { + base, + index, + disp, + flags, + } => { + if index != zero_reg() { + None + } else { + Some(MemArgPair { base, disp, flags }) + } + } + &MemArg::RegOffset { reg, off, flags } => { + if off < 0 { + None + } else { + let disp = UImm12::maybe_from_u64(off as u64)?; + Some(MemArgPair { + base: reg, + disp, + flags, + }) + } + } + _ => None, + } + } + + pub(crate) fn can_trap(&self) -> bool { + !self.flags.notrap() + } + + /// Edit registers with allocations. + pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + MemArgPair { + base: allocs.next(self.base), + disp: self.disp, + flags: self.flags, + } + } +} + //============================================================================= // Instruction sub-components (conditions, branches and branch targets): // definitions diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 0cddea1f68..48335b215c 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -297,6 +297,35 @@ pub fn mem_imm16_emit( } } +pub fn mem_mem_emit( + dst: &MemArgPair, + src: &MemArgPair, + len_minus_one: u8, + opcode_ss: u8, + add_trap: bool, + sink: &mut MachBuffer, + state: &mut EmitState, +) { + if add_trap && (dst.can_trap() || src.can_trap()) { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + } + + put( + sink, + &enc_ss_a( + opcode_ss, + dst.base, + dst.disp.bits(), + src.base, + src.disp.bits(), + len_minus_one, + ), + ); +} + pub fn mem_vrx_emit( rd: Reg, mem: &MemArg, @@ -853,6 +882,31 @@ fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] { enc } +/// SSa-type instructions. +/// +/// 47 39 31 27 15 11 +/// opcode l b1 d1 b2 d2 +/// 40 32 28 16 12 0 +/// +/// +fn enc_ss_a(opcode: u8, b1: Reg, d1: u32, b2: Reg, d2: u32, l: u8) -> [u8; 6] { + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + let b2 = machreg_to_gpr(b2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode; + enc[1] = l; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc[4] = b2 << 4 | d2_hi; + enc[5] = d2_lo; + enc +} + /// VRIa-type instructions. /// /// 47 39 35 31 15 11 7 @@ -2025,6 +2079,16 @@ impl MachInstEmit for Inst { }; mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state); } + &Inst::Mvc { + ref dst, + ref src, + len_minus_one, + } => { + let dst = dst.with_allocs(&mut allocs); + let src = src.with_allocs(&mut allocs); + let opcode = 0xd2; // MVC + mem_mem_emit(&dst, &src, len_minus_one, opcode, true, sink, state); + } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(&mut allocs); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index ffb3feb1da..dc18dd1d43 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -5862,6 +5862,24 @@ fn test_s390x_binemit() { "stgrl %r1, label1", )); + insns.push(( + Inst::Mvc { + dst: MemArgPair { + base: gpr(2), + disp: UImm12::maybe_from_u64(0x345).unwrap(), + flags: MemFlags::trusted(), + }, + src: MemArgPair { + base: gpr(8), + disp: UImm12::maybe_from_u64(0x9ab).unwrap(), + flags: MemFlags::trusted(), + }, + len_minus_one: 255, + }, + "D2FF234589AB", + "mvc 837(255,%r2), 2475(%r8)", + )); + insns.push(( Inst::LoadMultiple64 { rt: writable_gpr(8), diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 5d47cba215..aadc8692bc 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -140,6 +140,7 @@ impl Inst { | Inst::StoreRev16 { .. } | Inst::StoreRev32 { .. } | Inst::StoreRev64 { .. } + | Inst::Mvc { .. } | Inst::LoadMultiple64 { .. } | Inst::StoreMultiple64 { .. } | Inst::Mov32 { .. } @@ -600,6 +601,12 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC | &Inst::StoreImm64SExt16 { ref mem, .. } => { memarg_operands(mem, collector); } + &Inst::Mvc { + ref dst, ref src, .. + } => { + collector.reg_use(dst.base); + collector.reg_use(src.base); + } &Inst::LoadMultiple64 { rt, rt2, ref mem, .. } => { @@ -1763,6 +1770,22 @@ impl Inst { format!("{}{} {}, {}", mem_str, op, mem, imm) } + &Inst::Mvc { + ref dst, + ref src, + len_minus_one, + } => { + let dst = dst.with_allocs(allocs); + let src = src.with_allocs(allocs); + format!( + "mvc {}({},{}), {}({})", + dst.disp.pretty_print_default(), + len_minus_one, + show_reg(dst.base), + src.disp.pretty_print_default(), + show_reg(src.base) + ) + } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 277f7b02a6..7b65d3864a 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -3591,16 +3591,30 @@ (_ InstOutput (side_effect (abi_call_ind abi target (Opcode.CallIndirect))))) (lower_call_rets abi (range 0 (abi_num_rets abi)) (output_builder_new)))) -;; Lower function arguments by loading them into registers / stack slots. +;; Lower function arguments. (decl lower_call_args (ABISig Range ValueSlice) InstOutput) -(rule (lower_call_args abi (range_empty) _) (lower_call_ret_arg abi)) -(rule (lower_call_args abi (range_unwrap head tail) args) - (let ((idx usize (abi_copy_to_arg_order abi head)) - (_ Unit (copy_to_arg 0 (abi_get_arg abi idx) - (value_slice_get args idx)))) - (lower_call_args abi tail args))) +(rule (lower_call_args abi range args) + (let ((_ InstOutput (lower_call_args_buffer abi range args)) + (_ InstOutput (lower_call_args_slots abi range args))) + (lower_call_ret_arg abi))) -;; Lower the implicit return-area pointer argument, if present. +;; Lower function arguments (part 1): prepare buffer copies. +(decl lower_call_args_buffer (ABISig Range ValueSlice) InstOutput) +(rule (lower_call_args_buffer abi (range_empty) _) (output_none)) +(rule (lower_call_args_buffer abi (range_unwrap head tail) args) + (let ((_ InstOutput (copy_to_buffer 0 (abi_get_arg abi head) + (value_slice_get args head)))) + (lower_call_args_buffer abi tail args))) + +;; Lower function arguments (part 2): set up registers / stack slots. +(decl lower_call_args_slots (ABISig Range ValueSlice) InstOutput) +(rule (lower_call_args_slots abi (range_empty) _) (output_none)) +(rule (lower_call_args_slots abi (range_unwrap head tail) args) + (let ((_ Unit (copy_to_arg 0 (abi_get_arg abi head) + (value_slice_get args head)))) + (lower_call_args_slots abi tail args))) + +;; Lower function arguments (part 3): implicit return-area pointer. (decl lower_call_ret_arg (ABISig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) (rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 59d3d4b874..1ee948505d 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -4,10 +4,10 @@ pub mod generated_code; // Types that the generated ISLE code uses via `use super::*`. -use crate::isa::s390x::abi::S390xMachineDeps; +use crate::isa::s390x::abi::{S390xMachineDeps, REG_SAVE_AREA_SIZE}; use crate::isa::s390x::inst::{ - stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg, UImm12, - UImm16Shifted, UImm32Shifted, + gpr, stack_reg, writable_gpr, zero_reg, CallIndInfo, CallInfo, Cond, Inst as MInst, MemArg, + MemArgPair, UImm12, UImm16Shifted, UImm32Shifted, }; use crate::isa::s390x::settings::Flags as IsaFlags; use crate::machinst::isle::*; @@ -16,18 +16,26 @@ use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData, - MemFlags, Opcode, TrapCode, Value, ValueList, + LibCall, MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::unwind::UnwindInst, + isa::CallConv, + machinst::abi_impl::ABIMachineSpec, machinst::{InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData}, }; use regalloc2::PReg; +use smallvec::{smallvec, SmallVec}; use std::boxed::Box; use std::cell::Cell; use std::convert::TryFrom; use std::vec::Vec; use target_lexicon::Triple; +/// Information describing a library call to be emitted. +pub struct LibCallInfo { + libcall: LibCall, +} + type BoxCallInfo = Box; type BoxCallIndInfo = Box; type VecMachLabel = Vec; @@ -125,6 +133,49 @@ where }) } + fn lib_call_info_memcpy(&mut self) -> LibCallInfo { + LibCallInfo { + libcall: LibCall::Memcpy, + } + } + + fn lib_accumulate_outgoing_args_size(&mut self, _: &LibCallInfo) -> Unit { + // Libcalls only require the register save area. + self.lower_ctx + .abi() + .accumulate_outgoing_args_size(REG_SAVE_AREA_SIZE); + } + + fn lib_call_info(&mut self, info: &LibCallInfo) -> BoxCallInfo { + let caller_callconv = self.lower_ctx.abi().call_conv(); + let callee_callconv = CallConv::for_libcall(&self.flags, caller_callconv); + + // Uses and defs are defined by the particular libcall. + let (uses, defs): (SmallVec<[Reg; 8]>, SmallVec<[WritableReg; 8]>) = match info.libcall { + LibCall::Memcpy => ( + smallvec![gpr(2), gpr(3), gpr(4)], + smallvec![writable_gpr(2)], + ), + _ => unreachable!(), + }; + + // Clobbers are defined by the calling convention. Remove deps from clobbers. + let mut clobbers = S390xMachineDeps::get_regs_clobbered_by_call(callee_callconv); + for reg in &defs { + clobbers.remove(PReg::from(reg.to_reg().to_real_reg().unwrap())); + } + + Box::new(CallInfo { + dest: ExternalName::LibCall(info.libcall), + uses, + defs, + clobbers, + opcode: Opcode::Call, + caller_callconv, + callee_callconv, + }) + } + #[inline] fn allow_div_traps(&mut self, _: Type) -> Option<()> { if !self.flags.avoid_div_traps() { @@ -468,6 +519,15 @@ where Some(imm.negate_bits()) } + #[inline] + fn len_minus_one(&mut self, len: u64) -> Option { + if len > 0 && len <= 256 { + Some((len - 1) as u8) + } else { + None + } + } + #[inline] fn mask_amt_imm(&mut self, ty: Type, amt: i64) -> u8 { let mask = ty.lane_bits() - 1; @@ -599,6 +659,11 @@ where MemFlags::trusted() } + #[inline] + fn memarg_flags(&mut self, mem: &MemArg) -> MemFlags { + mem.get_flags() + } + #[inline] fn memarg_reg_plus_reg(&mut self, x: Reg, y: Reg, bias: u8, flags: MemFlags) -> MemArg { MemArg::BXD12 { @@ -643,6 +708,20 @@ where } } + #[inline] + fn memarg_pair_from_memarg(&mut self, mem: &MemArg) -> Option { + MemArgPair::maybe_from_memarg(mem) + } + + #[inline] + fn memarg_pair_from_reg(&mut self, reg: Reg, flags: MemFlags) -> MemArgPair { + MemArgPair { + base: reg, + disp: UImm12::zero(), + flags, + } + } + #[inline] fn inst_builder_new(&mut self) -> VecMInstBuilder { Cell::new(Vec::::new()) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index e92e95c399..13cb586c5c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -90,6 +90,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { assert!(size % 8 == 0, "StructArgument size is not properly aligned"); next_stack += size; ret.push(ABIArg::StructArg { + pointer: None, offset, size, purpose: param.purpose, diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 2225a9bb7f..046ff28972 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -712,12 +712,18 @@ where inputs.len(&self.lower_ctx.dfg().value_lists) - off, abi.num_args() ); - for i in caller.get_copy_to_arg_order() { + let mut arg_regs = vec![]; + for i in 0..abi.num_args() { let input = inputs .get(off + i, &self.lower_ctx.dfg().value_lists) .unwrap(); - let arg_regs = self.lower_ctx.put_value_in_regs(input); - caller.emit_copy_regs_to_arg(self.lower_ctx, i, arg_regs); + arg_regs.push(self.lower_ctx.put_value_in_regs(input)); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); + } + for (i, arg_regs) in arg_regs.iter().enumerate() { + caller.emit_copy_regs_to_arg(self.lower_ctx, i, *arg_regs); } caller.emit_call(self.lower_ctx); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 8a26964ab3..03b40d5d96 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -195,6 +195,8 @@ pub trait ABICaller { fn signature(&self) -> &Signature; /// Emit a copy of an argument value from a source register, prior to the call. + /// For large arguments with associated stack buffer, this may load the address + /// of the buffer into the argument register, if required by the ABI. fn emit_copy_regs_to_arg>( &self, ctx: &mut C, @@ -202,10 +204,17 @@ pub trait ABICaller { from_reg: ValueRegs, ); - /// Specific order for copying into arguments at callsites. We must be - /// careful to copy into StructArgs first, because we need to be able - /// to invoke memcpy() before we've loaded other arg regs (see above). - fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]>; + /// Emit a copy of a large argument into its associated stack buffer, if any. + /// We must be careful to perform all these copies (as necessary) before setting + /// up the argument registers, since we may have to invoke memcpy(), which could + /// clobber any registers already set up. The back-end should call this routine + /// for all arguments before calling emit_copy_regs_to_arg for all arguments. + fn emit_copy_regs_to_buffer>( + &self, + ctx: &mut C, + idx: usize, + from_reg: ValueRegs, + ); /// Emit a copy a return value into a destination register, after the call returns. fn emit_copy_retval_to_regs>( diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 433f8053d0..92f9e2181e 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -185,6 +185,10 @@ pub enum ABIArg { /// area; on the callee side, we compute a pointer to this stack area and /// provide that as the argument's value. StructArg { + /// Register or stack slot holding a pointer to the buffer as passed + /// by the caller to the callee. If None, the ABI defines the buffer + /// to reside at a well-known location (i.e. at `offset` below). + pointer: Option, /// Offset of this arg relative to base of stack args. offset: i64, /// Size of this arg on the stack. @@ -195,14 +199,6 @@ pub enum ABIArg { } impl ABIArg { - /// Is this a StructArg? - fn is_struct_arg(&self) -> bool { - match self { - &ABIArg::StructArg { .. } => true, - _ => false, - } - } - /// Create an ABIArg from one register. pub fn reg( reg: RealReg, @@ -530,10 +526,6 @@ pub struct ABISig { sized_stack_ret_space: i64, /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, - /// Specific order for copying into arguments at callsites. We must be - /// careful to copy into StructArgs first, because we need to be able - /// to invoke memcpy() before we've loaded other arg regs (see above). - copy_to_arg_order: SmallVec<[usize; 8]>, /// Calling convention used. call_conv: isa::CallConv, } @@ -563,30 +555,14 @@ impl ABISig { need_stack_return_area, )?; - let mut copy_to_arg_order = SmallVec::new(); - for (i, arg) in args.iter().enumerate() { - // Struct args. - if arg.is_struct_arg() { - copy_to_arg_order.push(i); - } - } - for (i, arg) in args.iter().enumerate() { - // Non-struct args. Skip an appended return-area arg for multivalue - // returns, if any. - if !arg.is_struct_arg() && i < sig.params.len() { - copy_to_arg_order.push(i); - } - } - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?} copy_to_arg_order = {:?}", + "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", sig, args, rets, sized_stack_arg_space, sized_stack_ret_space, stack_ret_arg, - copy_to_arg_order, ); Ok(ABISig { @@ -595,7 +571,6 @@ impl ABISig { sized_stack_arg_space, sized_stack_ret_space, stack_ret_arg, - copy_to_arg_order, call_conv: sig.call_conv, }) } @@ -608,13 +583,25 @@ impl ABISig { // Compute uses: all arg regs. let mut uses = smallvec![]; for arg in &self.args { - if let &ABIArg::Slots { ref slots, .. } = arg { - for slot in slots { - match slot { - &ABIArgSlot::Reg { reg, .. } => { - uses.push(Reg::from(reg)); + match arg { + &ABIArg::Slots { ref slots, .. } => { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + uses.push(Reg::from(reg)); + } + _ => {} + } + } + } + &ABIArg::StructArg { ref pointer, .. } => { + if let Some(slot) = pointer { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + uses.push(Reg::from(reg)); + } + _ => {} } - _ => {} } } } @@ -643,11 +630,6 @@ impl ABISig { (uses, defs, clobbers) } - /// Specific order for copying into arguments at callsites. - pub fn copy_to_arg_order(&self, idx: usize) -> usize { - self.copy_to_arg_order[idx] - } - /// Get the number of arguments expected. pub fn num_args(&self) -> usize { if self.stack_ret_arg.is_some() { @@ -1106,55 +1088,67 @@ impl ABICallee for ABICalleeImpl { into_regs: ValueRegs>, ) -> SmallInstVec { let mut insts = smallvec![]; + let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable| { + match slot { + &ABIArgSlot::Reg { reg, ty, .. } => { + // Extension mode doesn't matter (we're copying out, not in; we + // ignore high bits by convention). + insts.push(M::gen_move(*into_reg, reg.into(), ty)); + } + &ABIArgSlot::Stack { + offset, + ty, + extension, + .. + } => { + // However, we have to respect the extention mode for stack + // slots, or else we grab the wrong bytes on big-endian. + let ext = M::get_ext_mode(self.sig.call_conv, extension); + let ty = match (ext, ty_bits(ty) as u32) { + (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) + if n < M::word_bits() => + { + M::word_type() + } + _ => ty, + }; + insts.push(M::gen_load_stack( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + ty, + ), + *into_reg, + ty, + )); + } + } + }; + match &self.sig.args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(into_regs.len(), slots.len()); for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) { - match slot { - &ABIArgSlot::Reg { reg, ty, .. } => { - // Extension mode doesn't matter (we're copying out, not in; we - // ignore high bits by convention). - insts.push(M::gen_move(*into_reg, reg.into(), ty)); - } - &ABIArgSlot::Stack { - offset, - ty, - extension, - .. - } => { - // However, we have to respect the extention mode for stack - // slots, or else we grab the wrong bytes on big-endian. - let ext = M::get_ext_mode(self.sig.call_conv, extension); - let ty = match (ext, ty_bits(ty) as u32) { - (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) - if n < M::word_bits() => - { - M::word_type() - } - _ => ty, - }; - insts.push(M::gen_load_stack( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - ty, - ), - *into_reg, - ty, - )); - } - } + copy_arg_slot_to_reg(&slot, &into_reg); } } - &ABIArg::StructArg { offset, .. } => { + &ABIArg::StructArg { + pointer, offset, .. + } => { let into_reg = into_regs.only_reg().unwrap(); - insts.push(M::gen_get_stack_addr( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + if let Some(slot) = pointer { + // Buffer address is passed in a register or stack slot. + copy_arg_slot_to_reg(&slot, &into_reg); + } else { + // Buffer address is implicitly defined by the ABI. + insts.push(M::gen_get_stack_addr( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + I8, + ), + into_reg, I8, - ), - into_reg, - I8, - )); + )); + } } } insts @@ -1668,6 +1662,37 @@ impl ABICaller for ABICallerImpl { adjust_stack_and_nominal_sp::(ctx, off as i32, /* is_sub = */ false) } + fn emit_copy_regs_to_buffer>( + &self, + ctx: &mut C, + idx: usize, + from_regs: ValueRegs, + ) { + match &self.sig.args[idx] { + &ABIArg::Slots { .. } => {} + &ABIArg::StructArg { offset, size, .. } => { + let src_ptr = from_regs.only_reg().unwrap(); + let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + ctx.emit(M::gen_get_stack_addr( + StackAMode::SPOffset(offset, I8), + dst_ptr, + I8, + )); + // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes. + // N.B.: because we process StructArg params *first*, this is + // safe w.r.t. clobbers: we have not yet filled in any other + // arg regs. + let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); + for insn in + M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) + .into_iter() + { + ctx.emit(insn); + } + } + } + } + fn emit_copy_regs_to_arg>( &self, ctx: &mut C, @@ -1744,33 +1769,12 @@ impl ABICaller for ABICallerImpl { } } } - &ABIArg::StructArg { offset, size, .. } => { - let src_ptr = from_regs.only_reg().unwrap(); - let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); - ctx.emit(M::gen_get_stack_addr( - StackAMode::SPOffset(offset, I8), - dst_ptr, - I8, - )); - // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes. - // N.B.: because we process StructArg params *first*, this is - // safe w.r.t. clobbers: we have not yet filled in any other - // arg regs. - let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); - for insn in - M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) - .into_iter() - { - ctx.emit(insn); - } + &ABIArg::StructArg { pointer, .. } => { + assert!(pointer.is_none()); // Only supported via ISLE. } } } - fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]> { - self.sig.copy_to_arg_order.clone() - } - fn emit_copy_retval_to_regs>( &self, ctx: &mut C, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index ee7b55ae26..695b02fc38 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -780,10 +780,6 @@ macro_rules! isle_prelude_methods { regs.regs()[idx] } - fn abi_copy_to_arg_order(&mut self, abi: &ABISig, idx: usize) -> usize { - abi.copy_to_arg_order(idx) - } - fn abi_num_args(&mut self, abi: &ABISig) -> usize { abi.num_args() } @@ -833,6 +829,24 @@ macro_rules! isle_prelude_methods { } } + fn abi_arg_struct_pointer(&mut self, arg: &ABIArg) -> Option<(ABIArgSlot, i64, u64)> { + match arg { + &ABIArg::StructArg { + pointer, + offset, + size, + .. + } => { + if let Some(pointer) = pointer { + Some((pointer, offset, size)) + } else { + None + } + } + _ => None, + } + } + fn abi_stackslot_addr( &mut self, dst: WritableReg, diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 15b7fca802..661ecb9fed 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -828,10 +828,6 @@ (Sext) )) -;; Specific order for copying into arguments at callsites. -(decl abi_copy_to_arg_order (ABISig usize) usize) -(extern constructor abi_copy_to_arg_order abi_copy_to_arg_order) - ;; Get the number of arguments expected. (decl abi_num_args (ABISig) usize) (extern constructor abi_num_args abi_num_args) @@ -878,6 +874,11 @@ (decl abi_arg_only_slot (ABIArgSlot) ABIArg) (extern extractor abi_arg_only_slot abi_arg_only_slot) +;; Extractor to detect the special case where a struct argument +;; is explicitly passed by reference using a hidden pointer. +(decl abi_arg_struct_pointer (ABIArgSlot i64 u64) ABIArg) +(extern extractor abi_arg_struct_pointer abi_arg_struct_pointer) + ;; Convert a real register number into a virtual register. (decl real_reg_to_reg (RealReg) Reg) (extern constructor real_reg_to_reg real_reg_to_reg) diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 2c1ce3986b..97a262b232 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -281,8 +281,8 @@ block0(v0: i64): ; block0: ; mov x7, x0 ; movz x0, #42 -; mov x1, x7 ; movz x2, #42 +; mov x1, x7 ; ldr x10, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x10 ; ldp fp, lr, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/s390x/struct-arg.clif b/cranelift/filetests/filetests/isa/s390x/struct-arg.clif new file mode 100644 index 0000000000..cc28db4bc0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/struct-arg.clif @@ -0,0 +1,124 @@ +test compile precise-output +target s390x + +function u0:0(i64 sarg(64)) -> i8 system_v { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; block0: +; llc %r2, 0(%r2) +; br %r14 + +function u0:1(i64 sarg(64), i64) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v1 + v3 = load.i8 v0 + v4 = iadd.i8 v2, v3 + return v4 +} + +; block0: +; llc %r5, 0(%r3) +; llc %r2, 0(%r2) +; ark %r2, %r5, %r2 +; br %r14 + +function u0:2(i64) -> i8 system_v { +fn1 = colocated u0:0(i64 sarg(64)) -> i8 system_v + +block0(v0: i64): + v1 = call fn1(v0) + return v1 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -224 +; virtual_sp_offset_adjust 224 +; block0: +; mvc 160(63,%r15), 0(%r2) +; la %r2, 160(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 336(%r15) +; br %r14 + +function u0:3(i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64): + v2 = call fn1(v0, v1) + return v2 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -224 +; virtual_sp_offset_adjust 224 +; block0: +; mvc 160(63,%r15), 0(%r3) +; la %r3, 160(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 336(%r15) +; br %r14 + +function u0:4(i64 sarg(256), i64 sarg(64)) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v0 + v3 = load.i8 v1 + v4 = iadd.i8 v2, v3 + return v4 +} + +; block0: +; llc %r5, 0(%r2) +; llc %r2, 0(%r3) +; ark %r2, %r5, %r2 +; br %r14 + +function u0:5(i64, i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(256), i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64, v2: i64): + v3 = call fn1(v0, v1, v2) + return v3 +} + +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -480 +; virtual_sp_offset_adjust 480 +; block0: +; mvc 160(255,%r15), 0(%r3) +; mvc 416(63,%r15), 0(%r4) +; la %r3, 160(%r15) +; la %r4, 416(%r15) +; brasl %r14, u0:0 +; lmg %r14, %r15, 592(%r15) +; br %r14 + +function u0:6(i64, i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(1024), i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64, v2: i64): + v3 = call fn1(v0, v1, v2) + return v3 +} + +; stmg %r7, %r15, 56(%r15) +; aghi %r15, -1248 +; virtual_sp_offset_adjust 1248 +; block0: +; lgr %r7, %r2 +; lgr %r9, %r4 +; la %r2, 160(%r15) +; la %r3, 0(%r3) +; lghi %r4, 1024 +; brasl %r14, %Memcpy +; lgr %r4, %r9 +; mvc 1184(63,%r15), 0(%r4) +; lgr %r2, %r7 +; la %r3, 160(%r15) +; la %r4, 1184(%r15) +; brasl %r14, u0:0 +; lmg %r7, %r15, 1304(%r15) +; br %r14 +