From 456561f43180e8174212f35daadc72ac076e5628 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 13 Dec 2020 18:50:59 -0800 Subject: [PATCH] x64 and aarch64: allow StructArgument and StructReturn args. The StructReturn ABI is fairly simple at the codegen/isel level: we only need to take care to return the sret pointer as one of the return values if that wasn't specified in the initial function signature. Struct arguments are a little more complex. A struct argument is stored as a chunk of memory in the stack-args space. However, the CLIF semantics are slightly special: on the caller side, the parameter passed in is a pointer to an arbitrary memory block, and we must memcpy this data to the on-stack struct-argument; and on the callee side, we provide a pointer to the passed-in struct-argument as the CLIF block param value. This is necessary to support various ABIs other than Wasm, such as that of Rust (with the cg_clif codegen backend). --- cranelift/codegen/src/isa/aarch64/abi.rs | 140 +++++++---- cranelift/codegen/src/isa/aarch64/lower.rs | 2 +- .../codegen/src/isa/aarch64/lower_inst.rs | 11 +- cranelift/codegen/src/isa/arm32/abi.rs | 41 ++-- cranelift/codegen/src/isa/arm32/lower.rs | 2 +- cranelift/codegen/src/isa/arm32/lower_inst.rs | 6 +- cranelift/codegen/src/isa/x64/abi.rs | 157 ++++++++---- cranelift/codegen/src/isa/x64/lower.rs | 11 +- cranelift/codegen/src/machinst/abi.rs | 13 +- cranelift/codegen/src/machinst/abi_impl.rs | 231 +++++++++++++++--- cranelift/codegen/src/machinst/lower.rs | 25 +- .../filetests/filetests/isa/x64/i128.clif | 2 +- .../filetests/isa/x64/struct-arg.clif | 147 +++++++++++ .../filetests/isa/x64/struct-ret.clif | 19 ++ 14 files changed, 641 insertions(+), 166 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/struct-arg.clif create mode 100644 cranelift/filetests/filetests/isa/x64/struct-ret.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index d5d88e7770..8b371cb159 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -4,6 +4,8 @@ use crate::ir; use crate::ir::types; use crate::ir::types::*; use crate::ir::MemFlags; +use crate::ir::Opcode; +use crate::ir::{ExternalName, LibCall}; use crate::isa; use crate::isa::aarch64::{inst::EmitState, inst::*}; use crate::machinst::*; @@ -76,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt match ¶m.purpose { &ir::ArgumentPurpose::VMContext => { // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg( - ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()), - ir::types::I64, - param.extension, - param.purpose, - )) + Some(ABIArg::Reg { + regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()), + ty: ir::types::I64, + extension: param.extension, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::SignatureId => { // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg( - ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()), - ir::types::I64, - param.extension, - param.purpose, - )) + Some(ABIArg::Reg { + regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()), + ty: ir::types::I64, + extension: param.extension, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::CalleeTLS => { // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack( - BALDRDASH_CALLEE_TLS_OFFSET, - ir::types::I64, - ir::ArgumentExtension::None, - param.purpose, - )) + Some(ABIArg::Stack { + offset: BALDRDASH_CALLEE_TLS_OFFSET, + ty: ir::types::I64, + extension: ir::ArgumentExtension::None, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::CallerTLS => { // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack( - BALDRDASH_CALLER_TLS_OFFSET, - ir::types::I64, - ir::ArgumentExtension::None, - param.purpose, - )) + Some(ABIArg::Stack { + offset: BALDRDASH_CALLER_TLS_OFFSET, + ty: ir::types::I64, + extension: ir::ArgumentExtension::None, + purpose: param.purpose, + }) } _ => None, } @@ -208,7 +210,9 @@ impl ABIMachineSpec for AArch64MachineDeps { | &ir::ArgumentPurpose::StackLimit | &ir::ArgumentPurpose::SignatureId | &ir::ArgumentPurpose::CallerTLS - | &ir::ArgumentPurpose::CalleeTLS => {} + | &ir::ArgumentPurpose::CalleeTLS + | &ir::ArgumentPurpose::StructReturn + | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", param.purpose, params @@ -233,18 +237,28 @@ impl ABIMachineSpec for AArch64MachineDeps { if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { assert!(rc == RegClass::I64); ret.push(param); + } else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + let offset = next_stack as i64; + let size = size as u64; + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + next_stack += size; + ret.push(ABIArg::StructArg { + offset, + size, + purpose: param.purpose, + }); } else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 { let reg = match rc { RegClass::I64 => xreg(*next_reg), RegClass::V128 => vreg(*next_reg), _ => unreachable!(), }; - ret.push(ABIArg::Reg( - ValueRegs::one(reg.to_real_reg()), - param.value_type, - param.extension, - param.purpose, - )); + ret.push(ABIArg::Reg { + regs: ValueRegs::one(reg.to_real_reg()), + ty: param.value_type, + extension: param.extension, + purpose: param.purpose, + }); *next_reg += 1; remaining_reg_vals -= 1; } else { @@ -255,12 +269,12 @@ impl ABIMachineSpec for AArch64MachineDeps { // Align. debug_assert!(size.is_power_of_two()); next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - param.purpose, - )); + ret.push(ABIArg::Stack { + offset: next_stack as i64, + ty: param.value_type, + extension: param.extension, + purpose: param.purpose, + }); next_stack += size; } } @@ -272,19 +286,19 @@ impl ABIMachineSpec for AArch64MachineDeps { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 { - ret.push(ABIArg::Reg( - ValueRegs::one(xreg(next_xreg).to_real_reg()), - I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - )); + ret.push(ABIArg::Reg { + regs: ValueRegs::one(xreg(next_xreg).to_real_reg()), + ty: I64, + extension: ir::ArgumentExtension::None, + purpose: ir::ArgumentPurpose::Normal, + }); } else { - ret.push(ABIArg::Stack( - next_stack as i64, - I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - )); + ret.push(ABIArg::Stack { + offset: next_stack as i64, + ty: I64, + extension: ir::ArgumentExtension::None, + purpose: ir::ArgumentPurpose::Normal, + }); next_stack += 8; } Some(ret.len() - 1) @@ -708,6 +722,34 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } + fn gen_memcpy( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + ) -> SmallVec<[Self::I; 8]> { + // Baldrdash should not use struct args. + assert!(!call_conv.extends_baldrdash()); + let mut insts = SmallVec::new(); + let arg0 = writable_xreg(0); + let arg1 = writable_xreg(1); + let arg2 = writable_xreg(2); + insts.push(Inst::gen_move(arg0, dst, I64)); + insts.push(Inst::gen_move(arg1, src, I64)); + insts.extend(Inst::load_constant(arg2, size as u64).into_iter()); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Memcpy), + uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()], + defs: Self::get_regs_clobbered_by_call(call_conv), + opcode: Opcode::Call, + caller_callconv: call_conv, + callee_callconv: call_conv, + }), + }); + insts + } + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 37c5e79c8d..0f37bb6123 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1231,7 +1231,7 @@ impl LowerBackend for AArch64Backend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { - lower_inst::lower_insn_to_regs(ctx, ir_inst) + lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags) } fn lower_branch_group>( diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 1c4e3d7e99..73c11008c4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -7,6 +7,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; use crate::machinst::lower::*; use crate::machinst::*; +use crate::settings::Flags; use crate::{CodegenError, CodegenResult}; use crate::isa::aarch64::abi::*; @@ -24,6 +25,7 @@ use super::lower::*; pub(crate) fn lower_insn_to_regs>( ctx: &mut C, insn: IRInst, + flags: &Flags, ) -> CodegenResult<()> { let op = ctx.data(insn).opcode(); let inputs = insn_inputs(ctx, insn); @@ -1803,7 +1805,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICaller::from_func(sig, &extname, dist, caller_conv)?, + AArch64ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, &inputs[..], ) } @@ -1813,7 +1815,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICaller::from_ptr(sig, ptr, op, caller_conv)?, + AArch64ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, &inputs[1..], ) } @@ -1822,8 +1824,9 @@ pub(crate) fn lower_insn_to_regs>( abi.emit_stack_pre_adjust(ctx); assert!(inputs.len() == abi.num_args()); - for (i, input) in inputs.iter().enumerate() { - let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); + for i in abi.get_copy_to_arg_order() { + let input = inputs[i]; + let arg_reg = put_input_in_reg(ctx, input, NarrowValueMode::None); abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg)); } abi.emit_call(ctx); diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index 9e92a7b7aa..e1a64aeb76 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -81,12 +81,12 @@ impl ABIMachineSpec for Arm32MachineDeps { if next_rreg < max_reg_val { let reg = rreg(next_rreg); - ret.push(ABIArg::Reg( - ValueRegs::one(reg.to_real_reg()), - param.value_type, - param.extension, - param.purpose, - )); + ret.push(ABIArg::Reg { + regs: ValueRegs::one(reg.to_real_reg()), + ty: param.value_type, + extension: param.extension, + purpose: param.purpose, + }); next_rreg += 1; } else { // Arguments are stored on stack in reversed order. @@ -101,12 +101,12 @@ impl ABIMachineSpec for Arm32MachineDeps { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_rreg < max_reg_val { - ret.push(ABIArg::Reg( - ValueRegs::one(rreg(next_rreg).to_real_reg()), - I32, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - )); + ret.push(ABIArg::Reg { + regs: ValueRegs::one(rreg(next_rreg).to_real_reg()), + ty: I32, + extension: ir::ArgumentExtension::None, + purpose: ir::ArgumentPurpose::Normal, + }); } else { stack_args.push(( I32, @@ -124,12 +124,12 @@ impl ABIMachineSpec for Arm32MachineDeps { let max_stack = next_stack; for (ty, ext, purpose) in stack_args.into_iter().rev() { next_stack -= 4; - ret.push(ABIArg::Stack( - (max_stack - next_stack) as i64, + ret.push(ABIArg::Stack { + offset: (max_stack - next_stack) as i64, ty, - ext, + extension: ext, purpose, - )); + }); } assert_eq!(next_stack, 0); @@ -426,6 +426,15 @@ impl ABIMachineSpec for Arm32MachineDeps { insts } + fn gen_memcpy( + _call_conv: isa::CallConv, + _dst: Reg, + _src: Reg, + _size: usize, + ) -> SmallVec<[Self::I; 8]> { + unimplemented!("StructArgs not implemented for ARM32 yet"); + } + fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 { match rc { RegClass::I32 => 1, diff --git a/cranelift/codegen/src/isa/arm32/lower.rs b/cranelift/codegen/src/isa/arm32/lower.rs index 372c18b8e9..f2a35f9820 100644 --- a/cranelift/codegen/src/isa/arm32/lower.rs +++ b/cranelift/codegen/src/isa/arm32/lower.rs @@ -224,7 +224,7 @@ impl LowerBackend for Arm32Backend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { - lower_inst::lower_insn_to_regs(ctx, ir_inst) + lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags) } fn lower_branch_group>( diff --git a/cranelift/codegen/src/isa/arm32/lower_inst.rs b/cranelift/codegen/src/isa/arm32/lower_inst.rs index dd453d772a..16fd528c56 100644 --- a/cranelift/codegen/src/isa/arm32/lower_inst.rs +++ b/cranelift/codegen/src/isa/arm32/lower_inst.rs @@ -5,6 +5,7 @@ use crate::ir::Inst as IRInst; use crate::ir::Opcode; use crate::machinst::lower::*; use crate::machinst::*; +use crate::settings::Flags; use crate::CodegenResult; use crate::isa::arm32::abi::*; @@ -18,6 +19,7 @@ use super::lower::*; pub(crate) fn lower_insn_to_regs>( ctx: &mut C, insn: IRInst, + flags: &Flags, ) -> CodegenResult<()> { let op = ctx.data(insn).opcode(); let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) @@ -502,7 +504,7 @@ pub(crate) fn lower_insn_to_regs>( assert_eq!(inputs.len(), sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - Arm32ABICaller::from_func(sig, &extname, dist, caller_conv)?, + Arm32ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, &inputs[..], ) } @@ -512,7 +514,7 @@ pub(crate) fn lower_insn_to_regs>( assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv)?, + Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, &inputs[1..], ) } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index aa757392e3..d4f7d5c60c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg( - ValueRegs::one(regs::r14().to_real_reg()), - types::I64, - param.extension, - param.purpose, - )) + Some(ABIArg::Reg { + regs: ValueRegs::one(regs::r14().to_real_reg()), + ty: types::I64, + extension: param.extension, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::SignatureId => { // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg( - ValueRegs::one(regs::r10().to_real_reg()), - types::I64, - param.extension, - param.purpose, - )) + Some(ABIArg::Reg { + regs: ValueRegs::one(regs::r10().to_real_reg()), + ty: types::I64, + extension: param.extension, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::CalleeTLS => { // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack( - BALDRDASH_CALLEE_TLS_OFFSET, - ir::types::I64, - ir::ArgumentExtension::None, - param.purpose, - )) + Some(ABIArg::Stack { + offset: BALDRDASH_CALLEE_TLS_OFFSET, + ty: ir::types::I64, + extension: ir::ArgumentExtension::None, + purpose: param.purpose, + }) } &ir::ArgumentPurpose::CallerTLS => { // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020. assert!(call_conv == isa::CallConv::Baldrdash2020); - Some(ABIArg::Stack( - BALDRDASH_CALLER_TLS_OFFSET, - ir::types::I64, - ir::ArgumentExtension::None, - param.purpose, - )) + Some(ABIArg::Stack { + offset: BALDRDASH_CALLER_TLS_OFFSET, + ty: ir::types::I64, + extension: ir::ArgumentExtension::None, + purpose: param.purpose, + }) } _ => None, } @@ -131,7 +131,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { | &ir::ArgumentPurpose::StackLimit | &ir::ArgumentPurpose::SignatureId | &ir::ArgumentPurpose::CalleeTLS - | &ir::ArgumentPurpose::CallerTLS => {} + | &ir::ArgumentPurpose::CallerTLS + | &ir::ArgumentPurpose::StructReturn + | &ir::ArgumentPurpose::StructArgument(_) => {} _ => panic!( "Unsupported argument purpose {:?} in signature: {:?}", param.purpose, params @@ -143,6 +145,19 @@ impl ABIMachineSpec for X64ABIMachineSpec { continue; } + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + let offset = next_stack as i64; + let size = size as u64; + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + next_stack += size; + ret.push(ABIArg::StructArg { + offset, + size, + purpose: param.purpose, + }); + continue; + } + // Find regclass(es) of the register(s) used to store a value of this type. let (rcs, _) = Inst::rc_for_type(param.value_type)?; let intreg = rcs[0] == RegClass::I64; @@ -183,12 +198,12 @@ impl ABIMachineSpec for X64ABIMachineSpec { 2 => ValueRegs::two(regs[0], regs[1]), _ => panic!("More than two registers unexpected"), }; - ret.push(ABIArg::Reg( + ret.push(ABIArg::Reg { regs, - param.value_type, - param.extension, - param.purpose, - )); + ty: param.value_type, + extension: param.extension, + purpose: param.purpose, + }); if intreg { next_gpr += num_regs; } else { @@ -202,12 +217,12 @@ impl ABIMachineSpec for X64ABIMachineSpec { // Align. debug_assert!(size.is_power_of_two()); next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - param.purpose, - )); + ret.push(ABIArg::Stack { + offset: next_stack as i64, + ty: param.value_type, + extension: param.extension, + purpose: param.purpose, + }); next_stack += size; } } @@ -219,19 +234,19 @@ impl ABIMachineSpec for X64ABIMachineSpec { let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { - ret.push(ABIArg::Reg( - ValueRegs::one(reg.to_real_reg()), - types::I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - )); + ret.push(ABIArg::Reg { + regs: ValueRegs::one(reg.to_real_reg()), + ty: types::I64, + extension: ir::ArgumentExtension::None, + purpose: ir::ArgumentPurpose::Normal, + }); } else { - ret.push(ABIArg::Stack( - next_stack as i64, - types::I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - )); + ret.push(ABIArg::Stack { + offset: next_stack as i64, + ty: types::I64, + extension: ir::ArgumentExtension::None, + purpose: ir::ArgumentPurpose::Normal, + }); next_stack += 8; } Some(ret.len() - 1) @@ -441,6 +456,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { let stack_size = clobbered_size + fixed_frame_storage_size; // Align to 16 bytes. let stack_size = (stack_size + 15) & !15; + let clobbered_size = stack_size - fixed_frame_storage_size; // Adjust the stack pointer downward with one `sub rsp, IMM` // instruction. if stack_size > 0 { @@ -567,6 +583,51 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } + fn gen_memcpy( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + ) -> SmallVec<[Self::I; 8]> { + // Baldrdash should not use struct args. + assert!(!call_conv.extends_baldrdash()); + let mut insts = SmallVec::new(); + let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap(); + let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap(); + let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap(); + // We need a register to load the address of `memcpy()` below and we + // don't have a lowering context to allocate a temp here; so just use a + // register we know we are free to mutate as part of this sequence + // (because it is clobbered by the call as per the ABI anyway). + let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap(); + insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64)); + insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64)); + insts.extend( + Inst::gen_constant( + ValueRegs::one(Writable::from_reg(arg2)), + size as u128, + I64, + |_| panic!("tmp should not be needed"), + ) + .into_iter(), + ); + // We use an indirect call and a full LoadExtName because we do not have + // information about the libcall `RelocDistance` here, so we + // conservatively use the more flexible calling sequence. + insts.push(Inst::LoadExtName { + dst: Writable::from_reg(memcpy_addr), + name: Box::new(ExternalName::LibCall(LibCall::Memcpy)), + offset: 0, + }); + insts.push(Inst::call_unknown( + RegMem::reg(memcpy_addr), + /* uses = */ vec![arg0, arg1, arg2], + /* defs = */ Self::get_regs_clobbered_by_call(call_conv), + Opcode::Call, + )); + insts + } + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index a25da666b3..17f8ab992c 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1083,7 +1083,7 @@ fn emit_vm_call>( let sig = make_libcall_sig(ctx, insn, call_conv, types::I64); let caller_conv = ctx.abi().call_conv(); - let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv)?; + let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv, flags)?; abi.emit_stack_pre_adjust(ctx); @@ -3091,7 +3091,7 @@ fn lower_insn_to_regs>( assert_eq!(inputs.len(), sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - X64ABICaller::from_func(sig, &extname, dist, caller_conv)?, + X64ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, &inputs[..], ) } @@ -3102,7 +3102,7 @@ fn lower_insn_to_regs>( assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - X64ABICaller::from_ptr(sig, ptr, op, caller_conv)?, + X64ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, &inputs[1..], ) } @@ -3112,8 +3112,9 @@ fn lower_insn_to_regs>( abi.emit_stack_pre_adjust(ctx); assert_eq!(inputs.len(), abi.num_args()); - for (i, input) in inputs.iter().enumerate() { - let arg_regs = put_input_in_regs(ctx, *input); + for i in abi.get_copy_to_arg_order() { + let input = inputs[i]; + let arg_regs = put_input_in_regs(ctx, input); abi.emit_copy_regs_to_arg(ctx, i, arg_regs); } abi.emit_call(ctx); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 59738bd3a5..ca4b2414df 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1,7 +1,7 @@ //! ABI definitions. use crate::binemit::StackMap; -use crate::ir::StackSlot; +use crate::ir::{Signature, StackSlot}; use crate::isa::CallConv; use crate::machinst::*; use crate::settings; @@ -27,6 +27,9 @@ pub trait ABICallee { /// lowering context exists. fn init(&mut self, maybe_tmp: Option>); + /// Access the (possibly legalized) signature. + fn signature(&self) -> &Signature; + /// Accumulate outgoing arguments. This ensures that at least SIZE bytes /// are allocated in the prologue to be available for use in function calls /// to hold arguments and/or return values. If this function is called @@ -215,6 +218,9 @@ pub trait ABICaller { /// Get the number of arguments expected. fn num_args(&self) -> usize; + /// Access the (possibly legalized) signature. + fn signature(&self) -> &Signature; + /// Emit a copy of an argument value from a source register, prior to the call. fn emit_copy_regs_to_arg>( &self, @@ -223,6 +229,11 @@ pub trait ABICaller { from_reg: ValueRegs, ); + /// Specific order for copying into arguments at callsites. We must be + /// careful to copy into StructArgs first, because we need to be able + /// to invoke memcpy() before we've loaded other arg regs (see above). + fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]>; + /// Emit a copy a return value into a destination register, after the call returns. fn emit_copy_retval_to_regs>( &self, diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index d315c3defb..439e93e2d9 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -111,7 +111,7 @@ use super::abi::*; use crate::binemit::StackMap; use crate::ir::types::*; -use crate::ir::{ArgumentExtension, StackSlot}; +use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot}; use crate::machinst::*; use crate::settings; use crate::CodegenResult; @@ -128,22 +128,58 @@ use std::mem; #[derive(Clone, Copy, Debug)] pub enum ABIArg { /// In a real register (or set of registers). - Reg( - ValueRegs, - ir::Type, - ir::ArgumentExtension, - ir::ArgumentPurpose, - ), + Reg { + /// Register(s) that hold this arg. + regs: ValueRegs, + /// Value type of this arg. + ty: ir::Type, + /// Should this arg be zero- or sign-extended? + extension: ir::ArgumentExtension, + /// Purpose of this arg. + purpose: ir::ArgumentPurpose, + }, /// Arguments only: on stack, at given offset from SP at entry. - Stack(i64, ir::Type, ir::ArgumentExtension, ir::ArgumentPurpose), + Stack { + /// Offset of this arg relative to the base of stack args. + offset: i64, + /// Value type of this arg. + ty: ir::Type, + /// Should this arg be zero- or sign-extended? + extension: ir::ArgumentExtension, + /// Purpose of this arg. + purpose: ir::ArgumentPurpose, + }, + /// Structure argument. We reserve stack space for it, but the CLIF-level + /// semantics are a little weird: the value passed to the call instruction, + /// and received in the corresponding block param, is a *pointer*. On the + /// caller side, we memcpy the data from the passed-in pointer to the stack + /// area; on the callee side, we compute a pointer to this stack area and + /// provide that as the argument's value. + StructArg { + /// Offset of this arg relative to base of stack args. + offset: i64, + /// Size of this arg on the stack. + size: u64, + /// Purpose of this arg. + purpose: ir::ArgumentPurpose, + }, } impl ABIArg { /// Get the purpose of this arg. fn get_purpose(self) -> ir::ArgumentPurpose { match self { - ABIArg::Reg(_, _, _, purpose) => purpose, - ABIArg::Stack(_, _, _, purpose) => purpose, + ABIArg::Reg { purpose, .. } => purpose, + ABIArg::Stack { purpose, .. } => purpose, + ABIArg::StructArg { purpose, .. } => purpose, + } + } + + /// Is this a StructArg? + fn is_struct_arg(self) -> bool { + match self { + ABIArg::StructArg { .. } => true, + _ => false, } } } @@ -371,6 +407,16 @@ pub trait ABIMachineSpec { callee_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>; + /// Generate a memcpy invocation. Used to set up struct args. May clobber + /// caller-save registers; we only memcpy before we start to set up args for + /// a call. + fn gen_memcpy( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + size: usize, + ) -> SmallVec<[Self::I; 8]>; + /// Get the number of spillslots required for the given register-class and /// type. fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32; @@ -455,6 +501,8 @@ impl ABISig { /// ABI object for a function body. pub struct ABICalleeImpl { + /// CLIF-level signature, possibly normalized. + ir_sig: ir::Signature, /// Signature: arg and retval regs. sig: ABISig, /// Offsets to each stackslot. @@ -510,8 +558,8 @@ fn get_special_purpose_param_register( ) -> Option { let idx = f.signature.special_param_index(purpose)?; match abi.args[idx] { - ABIArg::Reg(regs, ..) => Some(regs.only_reg().unwrap().to_reg()), - ABIArg::Stack(..) => None, + ABIArg::Reg { regs, .. } => Some(regs.only_reg().unwrap().to_reg()), + _ => None, } } @@ -520,7 +568,8 @@ impl ABICalleeImpl { pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { debug!("ABI: func signature {:?}", f.signature); - let sig = ABISig::from_func_sig::(&f.signature)?; + let ir_sig = ensure_struct_return_ptr_is_returned(&f.signature); + let sig = ABISig::from_func_sig::(&ir_sig)?; let call_conv = f.signature.call_conv; // Only these calling conventions are supported. @@ -567,6 +616,7 @@ impl ABICalleeImpl { }; Ok(Self { + ir_sig, sig, stackslots, stackslots_size: stack_offset, @@ -787,9 +837,30 @@ fn gen_store_base_offset_multi( ret } +fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature { + let params_structret = sig + .params + .iter() + .find(|p| p.purpose == ArgumentPurpose::StructReturn); + let rets_have_structret = sig.returns.len() > 0 + && sig + .returns + .iter() + .any(|arg| arg.purpose == ArgumentPurpose::StructReturn); + let mut sig = sig.clone(); + if params_structret.is_some() && !rets_have_structret { + sig.returns.insert(0, params_structret.unwrap().clone()); + } + sig +} + impl ABICallee for ABICalleeImpl { type I = M::I; + fn signature(&self) -> &ir::Signature { + &self.ir_sig + } + fn temp_needed(&self) -> Option { if self.sig.stack_ret_arg.is_some() { Some(M::word_type()) @@ -822,7 +893,7 @@ impl ABICallee for ABICalleeImpl { fn liveins(&self) -> Set { let mut set: Set = Set::empty(); for &arg in &self.sig.args { - if let ABIArg::Reg(regs, ..) = arg { + if let ABIArg::Reg { regs, .. } = arg { for &r in regs.regs() { set.insert(r); } @@ -834,7 +905,7 @@ impl ABICallee for ABICalleeImpl { fn liveouts(&self) -> Set { let mut set: Set = Set::empty(); for &ret in &self.sig.rets { - if let ABIArg::Reg(regs, ..) = ret { + if let ABIArg::Reg { regs, .. } = ret { for &r in regs.regs() { set.insert(r); } @@ -863,14 +934,25 @@ impl ABICallee for ABICalleeImpl { match &self.sig.args[idx] { // Extension mode doesn't matter (we're copying out, not in; we // ignore high bits by convention). - &ABIArg::Reg(regs, ty, ..) => { + &ABIArg::Reg { regs, ty, .. } => { gen_move_multi::(into_regs, regs.map(|r| r.to_reg()), ty) } - &ABIArg::Stack(off, ty, ..) => gen_load_stack_multi::( - StackAMode::FPOffset(M::fp_to_arg_offset(self.call_conv, &self.flags) + off, ty), + &ABIArg::Stack { offset, ty, .. } => gen_load_stack_multi::( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + ty, + ), into_regs, ty, ), + &ABIArg::StructArg { offset, .. } => smallvec![M::gen_get_stack_addr( + StackAMode::FPOffset( + M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + I8, + ), + into_regs.only_reg().unwrap(), + I8, + )], } } @@ -892,10 +974,15 @@ impl ABICallee for ABICalleeImpl { let mut ret = smallvec![]; let word_bits = M::word_bits() as u8; match &self.sig.rets[idx] { - &ABIArg::Reg(regs, ty, ext, ..) => { + &ABIArg::Reg { + regs, + ty, + extension, + .. + } => { let from_bits = ty_bits(ty) as u8; let dest_regs = writable_value_regs(regs.map(|r| r.to_reg())); - let ext = M::get_ext_mode(self.sig.call_conv, ext); + let ext = M::get_ext_mode(self.sig.call_conv, extension); match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < word_bits => @@ -921,14 +1008,20 @@ impl ABICallee for ABICalleeImpl { ), }; } - &ABIArg::Stack(off, mut ty, ext, ..) => { + &ABIArg::Stack { + offset, + ty, + extension, + .. + } => { + let mut ty = ty; let from_bits = ty_bits(ty) as u8; // A machine ABI implementation should ensure that stack frames // have "reasonable" size. All current ABIs for machinst // backends (aarch64 and x64) enforce a 128MB limit. - let off = i32::try_from(off) + let off = i32::try_from(offset) .expect("Argument stack offset greater than 2GB; should hit impl limit first"); - let ext = M::get_ext_mode(self.sig.call_conv, ext); + let ext = M::get_ext_mode(self.sig.call_conv, extension); // Trash the from_reg; it should be its last use. match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) @@ -961,6 +1054,7 @@ impl ABICallee for ABICalleeImpl { .into_iter(), ); } + &ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"), } ret } @@ -1248,7 +1342,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec uses.extend(regs.regs().iter().map(|r| r.to_reg())), + &ABIArg::Reg { regs, .. } => uses.extend(regs.regs().iter().map(|r| r.to_reg())), _ => {} } } @@ -1257,7 +1351,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec { + &ABIArg::Reg { regs, .. } => { defs.extend(regs.regs().iter().map(|r| Writable::from_reg(r.to_reg()))) } _ => {} @@ -1269,6 +1363,8 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec { + /// CLIF-level signature, possibly normalized. + ir_sig: ir::Signature, /// The called function's signature. sig: ABISig, /// All uses for the callsite, i.e., function args. @@ -1281,6 +1377,8 @@ pub struct ABICallerImpl { opcode: ir::Opcode, /// Caller's calling convention. caller_conv: isa::CallConv, + /// The settings controlling this compilation. + flags: settings::Flags, _mach: PhantomData, } @@ -1301,16 +1399,20 @@ impl ABICallerImpl { extname: &ir::ExternalName, dist: RelocDistance, caller_conv: isa::CallConv, + flags: &settings::Flags, ) -> CodegenResult> { - let sig = ABISig::from_func_sig::(sig)?; + let ir_sig = ensure_struct_return_ptr_is_returned(sig); + let sig = ABISig::from_func_sig::(&ir_sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); Ok(ABICallerImpl { + ir_sig, sig, uses, defs, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, caller_conv, + flags: flags.clone(), _mach: PhantomData, }) } @@ -1322,16 +1424,20 @@ impl ABICallerImpl { ptr: Reg, opcode: ir::Opcode, caller_conv: isa::CallConv, + flags: &settings::Flags, ) -> CodegenResult> { - let sig = ABISig::from_func_sig::(sig)?; + let ir_sig = ensure_struct_return_ptr_is_returned(sig); + let sig = ABISig::from_func_sig::(&ir_sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); Ok(ABICallerImpl { + ir_sig, sig, uses, defs, dest: CallDest::Reg(ptr), opcode, caller_conv, + flags: flags.clone(), _mach: PhantomData, }) } @@ -1355,6 +1461,10 @@ fn adjust_stack_and_nominal_sp>( impl ABICaller for ABICallerImpl { type I = M::I; + fn signature(&self) -> &ir::Signature { + &self.ir_sig + } + fn num_args(&self) -> usize { if self.sig.stack_ret_arg.is_some() { self.sig.args.len() - 1 @@ -1387,8 +1497,13 @@ impl ABICaller for ABICallerImpl { let word_rc = M::word_reg_class(); let word_bits = M::word_bits() as usize; match &self.sig.args[idx] { - &ABIArg::Reg(regs, ty, ext, _) => { - let ext = M::get_ext_mode(self.sig.call_conv, ext); + &ABIArg::Reg { + regs, + ty, + extension, + .. + } => { + let ext = M::get_ext_mode(self.sig.call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { let reg = regs.only_reg().unwrap(); assert_eq!(word_rc, reg.get_class()); @@ -1414,8 +1529,14 @@ impl ABICaller for ABICallerImpl { } } } - &ABIArg::Stack(off, mut ty, ext, _) => { - let ext = M::get_ext_mode(self.sig.call_conv, ext); + &ABIArg::Stack { + offset, + ty, + extension, + .. + } => { + let mut ty = ty; + let ext = M::get_ext_mode(self.sig.call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { let from_reg = from_regs .only_reg() @@ -1439,7 +1560,28 @@ impl ABICaller for ABICallerImpl { // Store the extended version. ty = M::word_type(); } - for insn in gen_store_stack_multi::(StackAMode::SPOffset(off, ty), from_regs, ty) + for insn in + gen_store_stack_multi::(StackAMode::SPOffset(offset, ty), from_regs, ty) + { + ctx.emit(insn); + } + } + &ABIArg::StructArg { offset, size, .. } => { + let src_ptr = from_regs.only_reg().unwrap(); + let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + ctx.emit(M::gen_get_stack_addr( + StackAMode::SPOffset(offset, I8), + dst_ptr, + I8, + )); + // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes. + // N.B.: because we process StructArg params *first*, this is + // safe w.r.t. clobbers: we have not yet filled in any other + // arg regs. + let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, self.sig.call_conv); + for insn in + M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) + .into_iter() { ctx.emit(insn); } @@ -1447,6 +1589,24 @@ impl ABICaller for ABICallerImpl { } } + fn get_copy_to_arg_order(&self) -> SmallVec<[usize; 8]> { + let mut ret = SmallVec::new(); + for (i, arg) in self.sig.args.iter().enumerate() { + // Struct args. + if arg.is_struct_arg() { + ret.push(i); + } + } + for (i, arg) in self.sig.args.iter().enumerate() { + // Non-struct args. Skip an appended return-area arg for multivalue + // returns, if any. + if !arg.is_struct_arg() && i < self.ir_sig.params.len() { + ret.push(i); + } + } + ret + } + fn emit_copy_retval_to_regs>( &self, ctx: &mut C, @@ -1456,21 +1616,22 @@ impl ABICaller for ABICallerImpl { match &self.sig.rets[idx] { // Extension mode doesn't matter because we're copying out, not in, // and we ignore high bits in our own registers by convention. - &ABIArg::Reg(regs, ty, _, _) => { + &ABIArg::Reg { regs, ty, .. } => { for insn in gen_move_multi::(into_regs, regs.map(|r| r.to_reg()), ty) { ctx.emit(insn); } } - &ABIArg::Stack(off, ty, _, _) => { + &ABIArg::Stack { offset, ty, .. } => { let ret_area_base = self.sig.stack_arg_space; for insn in gen_load_stack_multi::( - StackAMode::SPOffset(off + ret_area_base, ty), + StackAMode::SPOffset(offset + ret_area_base, ty), into_regs, ty, ) { ctx.emit(insn); } } + &ABIArg::StructArg { .. } => panic!("Unexpected StructArg location for return value"), } } diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 28e4edd0c7..e35e3b068e 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -375,8 +375,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } } - let vm_context = f - .signature + let vm_context = vcode + .abi() + .signature() .special_param_index(ArgumentPurpose::VMContext) .map(|vm_context_index| { let entry_block = f.layout.entry_block().unwrap(); @@ -386,7 +387,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // Assign vreg(s) to each return value. let mut retval_regs = vec![]; - for ret in &f.signature.returns { + for ret in &vcode.abi().signature().returns.clone() { let regs = alloc_vregs(ret.value_type, &mut next_vreg, &mut vcode)?; retval_regs.push(regs); debug!("retval gets regs {:?}", regs); @@ -465,6 +466,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for insn in self.vcode.abi().gen_copy_arg_to_regs(i, regs).into_iter() { self.emit(insn); } + if self.abi().signature().params[i].purpose == ArgumentPurpose::StructReturn { + assert!(regs.len() == 1); + let ty = self.abi().signature().params[i].value_type; + // The ABI implementation must have ensured that a StructReturn + // arg is present in the return values. + let struct_ret_idx = self + .abi() + .signature() + .returns + .iter() + .position(|ret| ret.purpose == ArgumentPurpose::StructReturn) + .expect("StructReturn return value not present!"); + self.emit(I::gen_move( + Writable::from_reg(self.retval_regs[struct_ret_idx].regs()[0]), + regs.regs()[0].to_reg(), + ty, + )); + } } if let Some(insn) = self.vcode.abi().gen_retval_area_setup() { self.emit(insn); diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index e7ee34f283..5e5d2ffb86 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1006,7 +1006,7 @@ block0(v0: i128, v1: i128): ; nextln: movq %rsp, %rbp ; nextln: subq $$16, %rsp ; nextln: movq %r12, 0(%rsp) -; nextln: virtual_sp_offset_adjust 8 +; nextln: virtual_sp_offset_adjust 16 ; nextln: movq %r8, %r12 ; nextln: subq $$16, %rsp ; nextln: virtual_sp_offset_adjust 16 diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif new file mode 100644 index 0000000000..3c867038a7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -0,0 +1,147 @@ +test compile +target x86_64 +feature "experimental_x64" + +function u0:0(i64 sarg(64)) -> i8 system_v { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: lea 16(%rbp), %rsi +; nextln: movzbq 0(%rsi), %rsi +; nextln: movq %rsi, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function u0:1(i64 sarg(64), i64) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v1 + v3 = load.i8 v0 + v4 = iadd.i8 v2, v3 + return v4 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: lea 16(%rbp), %rsi +; nextln: movzbq 0(%rdi), %rdi +; nextln: movzbq 0(%rsi), %rsi +; nextln: addl %esi, %edi +; nextln: movq %rdi, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function u0:2(i64) -> i8 system_v { +fn1 = colocated u0:0(i64 sarg(64)) -> i8 system_v + +block0(v0: i64): + v1 = call fn1(v0) + return v1 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rsi +; nextln: subq $$64, %rsp +; nextln: virtual_sp_offset_adjust 64 +; nextln: lea 0(%rsp), %rdi +; nextln: movl $$64, %edx +; nextln: load_ext_name %Memcpy+0, %rcx +; nextln: call *%rcx +; nextln: call User { namespace: 0, index: 0 } +; nextln: addq $$64, %rsp +; nextln: virtual_sp_offset_adjust -64 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function u0:3(i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64): + v2 = call fn1(v0, v1) + return v2 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %r12, 0(%rsp) +; nextln: virtual_sp_offset_adjust 16 +; nextln: movq %rdi, %r12 +; nextln: subq $$64, %rsp +; nextln: virtual_sp_offset_adjust 64 +; nextln: lea 0(%rsp), %rdi +; nextln: movl $$64, %edx +; nextln: load_ext_name %Memcpy+0, %rcx +; nextln: call *%rcx +; nextln: movq %r12, %rdi +; nextln: call User { namespace: 0, index: 0 } +; nextln: addq $$64, %rsp +; nextln: virtual_sp_offset_adjust -64 +; nextln: movq 0(%rsp), %r12 +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function u0:4(i64 sarg(128), i64 sarg(64)) -> i8 system_v { +block0(v0: i64, v1: i64): + v2 = load.i8 v0 + v3 = load.i8 v1 + v4 = iadd.i8 v2, v3 + return v4 +} + +; check: movq %rsp, %rbp +; nextln: lea 16(%rbp), %rsi +; nextln: lea 144(%rbp), %rdi +; nextln: movzbq 0(%rsi), %rsi +; nextln: movzbq 0(%rdi), %rdi +; nextln: addl %edi, %esi +; nextln: movq %rsi, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret + +function u0:5(i64, i64, i64) -> i8 system_v { +fn1 = colocated u0:0(i64, i64 sarg(128), i64 sarg(64)) -> i8 system_v + +block0(v0: i64, v1: i64, v2: i64): + v3 = call fn1(v0, v1, v2) + return v3 +} + +; check: movq %rsp, %rbp +; nextln: subq $$16, %rsp +; nextln: movq %r12, 0(%rsp) +; nextln: movq %r13, 8(%rsp) +; nextln: virtual_sp_offset_adjust 16 +; nextln: movq %rdi, %r12 +; nextln: movq %rdx, %r13 +; nextln: subq $$192, %rsp +; nextln: virtual_sp_offset_adjust 192 +; nextln: lea 0(%rsp), %rdi +; nextln: movl $$128, %edx +; nextln: load_ext_name %Memcpy+0, %rcx +; nextln: call *%rcx +; nextln: lea 128(%rsp), %rdi +; nextln: movq %r13, %rsi +; nextln: movl $$64, %edx +; nextln: load_ext_name %Memcpy+0, %rcx +; nextln: call *%rcx +; nextln: movq %r12, %rdi +; nextln: call User { namespace: 0, index: 0 } +; nextln: addq $$192, %rsp +; nextln: virtual_sp_offset_adjust -192 +; nextln: movq 0(%rsp), %r12 +; nextln: movq 8(%rsp), %r13 +; nextln: addq $$16, %rsp +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif new file mode 100644 index 0000000000..05ebbd100b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -0,0 +1,19 @@ +test compile +target x86_64 +feature "experimental_x64" + +function %f0(i64 sret) { +block0(v0: i64): + v1 = iconst.i64 42 + store v1, v0 + return +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movq %rdi, %rax +; nextln: movl $$42, %esi +; nextln: movq %rsi, 0(%rdi) +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret