Merge pull request #2541 from cfallin/struct-arg-ret

x64 and aarch64: allow StructArgument and StructReturn args.
This commit is contained in:
Chris Fallin
2021-01-17 23:50:19 -08:00
committed by GitHub
14 changed files with 641 additions and 166 deletions

View File

@@ -4,6 +4,8 @@ use crate::ir;
use crate::ir::types;
use crate::ir::types::*;
use crate::ir::MemFlags;
use crate::ir::Opcode;
use crate::ir::{ExternalName, LibCall};
use crate::isa;
use crate::isa::aarch64::{inst::EmitState, inst::*};
use crate::machinst::*;
@@ -76,41 +78,41 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
ir::types::I64,
param.extension,
param.purpose,
))
Some(ABIArg::Reg {
regs: ValueRegs::one(xreg(BALDRDASH_TLS_REG).to_real_reg()),
ty: ir::types::I64,
extension: param.extension,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
ir::types::I64,
param.extension,
param.purpose,
))
Some(ABIArg::Reg {
regs: ValueRegs::one(xreg(BALDRDASH_SIG_REG).to_real_reg()),
ty: ir::types::I64,
extension: param.extension,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
Some(ABIArg::Stack {
offset: BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
Some(ABIArg::Stack {
offset: BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
}
_ => None,
}
@@ -208,7 +210,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
| &ir::ArgumentPurpose::StackLimit
| &ir::ArgumentPurpose::SignatureId
| &ir::ArgumentPurpose::CallerTLS
| &ir::ArgumentPurpose::CalleeTLS => {}
| &ir::ArgumentPurpose::CalleeTLS
| &ir::ArgumentPurpose::StructReturn
| &ir::ArgumentPurpose::StructArgument(_) => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
@@ -233,18 +237,28 @@ impl ABIMachineSpec for AArch64MachineDeps {
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
assert!(rc == RegClass::I64);
ret.push(param);
} else if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size as u64;
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size;
ret.push(ABIArg::StructArg {
offset,
size,
purpose: param.purpose,
});
} else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
let reg = match rc {
RegClass::I64 => xreg(*next_reg),
RegClass::V128 => vreg(*next_reg),
_ => unreachable!(),
};
ret.push(ABIArg::Reg(
ValueRegs::one(reg.to_real_reg()),
param.value_type,
param.extension,
param.purpose,
));
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
*next_reg += 1;
remaining_reg_vals -= 1;
} else {
@@ -255,12 +269,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack(
next_stack as i64,
param.value_type,
param.extension,
param.purpose,
));
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_stack += size;
}
}
@@ -272,19 +286,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
ret.push(ABIArg::Reg(
ValueRegs::one(xreg(next_xreg).to_real_reg()),
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
ret.push(ABIArg::Reg {
regs: ValueRegs::one(xreg(next_xreg).to_real_reg()),
ty: I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
} else {
ret.push(ABIArg::Stack(
next_stack as i64,
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
next_stack += 8;
}
Some(ret.len() - 1)
@@ -708,6 +722,34 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_memcpy(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
) -> SmallVec<[Self::I; 8]> {
// Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new();
let arg0 = writable_xreg(0);
let arg1 = writable_xreg(1);
let arg2 = writable_xreg(2);
insts.push(Inst::gen_move(arg0, dst, I64));
insts.push(Inst::gen_move(arg1, src, I64));
insts.extend(Inst::load_constant(arg2, size as u64).into_iter());
insts.push(Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::Memcpy),
uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()],
defs: Self::get_regs_clobbered_by_call(call_conv),
opcode: Opcode::Call,
caller_callconv: call_conv,
callee_callconv: call_conv,
}),
});
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {

View File

@@ -1231,7 +1231,7 @@ impl LowerBackend for AArch64Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(

View File

@@ -7,6 +7,7 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::settings::Flags;
use crate::{CodegenError, CodegenResult};
use crate::isa::aarch64::abi::*;
@@ -24,6 +25,7 @@ use super::lower::*;
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
flags: &Flags,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs = insn_inputs(ctx, insn);
@@ -1803,7 +1805,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(inputs.len() == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(
AArch64ABICaller::from_func(sig, &extname, dist, caller_conv)?,
AArch64ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
&inputs[..],
)
}
@@ -1813,7 +1815,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(inputs.len() - 1 == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(
AArch64ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
AArch64ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
&inputs[1..],
)
}
@@ -1822,8 +1824,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
abi.emit_stack_pre_adjust(ctx);
assert!(inputs.len() == abi.num_args());
for (i, input) in inputs.iter().enumerate() {
let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
for i in abi.get_copy_to_arg_order() {
let input = inputs[i];
let arg_reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg));
}
abi.emit_call(ctx);

View File

@@ -81,12 +81,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
if next_rreg < max_reg_val {
let reg = rreg(next_rreg);
ret.push(ABIArg::Reg(
ValueRegs::one(reg.to_real_reg()),
param.value_type,
param.extension,
param.purpose,
));
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_rreg += 1;
} else {
// Arguments are stored on stack in reversed order.
@@ -101,12 +101,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_rreg < max_reg_val {
ret.push(ABIArg::Reg(
ValueRegs::one(rreg(next_rreg).to_real_reg()),
I32,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
ret.push(ABIArg::Reg {
regs: ValueRegs::one(rreg(next_rreg).to_real_reg()),
ty: I32,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
} else {
stack_args.push((
I32,
@@ -124,12 +124,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
let max_stack = next_stack;
for (ty, ext, purpose) in stack_args.into_iter().rev() {
next_stack -= 4;
ret.push(ABIArg::Stack(
(max_stack - next_stack) as i64,
ret.push(ABIArg::Stack {
offset: (max_stack - next_stack) as i64,
ty,
ext,
extension: ext,
purpose,
));
});
}
assert_eq!(next_stack, 0);
@@ -426,6 +426,15 @@ impl ABIMachineSpec for Arm32MachineDeps {
insts
}
fn gen_memcpy(
_call_conv: isa::CallConv,
_dst: Reg,
_src: Reg,
_size: usize,
) -> SmallVec<[Self::I; 8]> {
unimplemented!("StructArgs not implemented for ARM32 yet");
}
fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
match rc {
RegClass::I32 => 1,

View File

@@ -224,7 +224,7 @@ impl LowerBackend for Arm32Backend {
type MInst = Inst;
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst)
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags)
}
fn lower_branch_group<C: LowerCtx<I = Inst>>(

View File

@@ -5,6 +5,7 @@ use crate::ir::Inst as IRInst;
use crate::ir::Opcode;
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::settings::Flags;
use crate::CodegenResult;
use crate::isa::arm32::abi::*;
@@ -18,6 +19,7 @@ use super::lower::*;
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
flags: &Flags,
) -> CodegenResult<()> {
let op = ctx.data(insn).opcode();
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
@@ -502,7 +504,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
Arm32ABICaller::from_func(sig, &extname, dist, caller_conv)?,
Arm32ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
&inputs[..],
)
}
@@ -512,7 +514,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
&inputs[1..],
)
}

View File

@@ -31,41 +31,41 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<A
match &param.purpose {
&ir::ArgumentPurpose::VMContext => {
// This is SpiderMonkey's `WasmTlsReg`.
Some(ABIArg::Reg(
ValueRegs::one(regs::r14().to_real_reg()),
types::I64,
param.extension,
param.purpose,
))
Some(ABIArg::Reg {
regs: ValueRegs::one(regs::r14().to_real_reg()),
ty: types::I64,
extension: param.extension,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::SignatureId => {
// This is SpiderMonkey's `WasmTableCallSigReg`.
Some(ABIArg::Reg(
ValueRegs::one(regs::r10().to_real_reg()),
types::I64,
param.extension,
param.purpose,
))
Some(ABIArg::Reg {
regs: ValueRegs::one(regs::r10().to_real_reg()),
ty: types::I64,
extension: param.extension,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::CalleeTLS => {
// This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
BALDRDASH_CALLEE_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
Some(ABIArg::Stack {
offset: BALDRDASH_CALLEE_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
}
&ir::ArgumentPurpose::CallerTLS => {
// This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
assert!(call_conv == isa::CallConv::Baldrdash2020);
Some(ABIArg::Stack(
BALDRDASH_CALLER_TLS_OFFSET,
ir::types::I64,
ir::ArgumentExtension::None,
param.purpose,
))
Some(ABIArg::Stack {
offset: BALDRDASH_CALLER_TLS_OFFSET,
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
purpose: param.purpose,
})
}
_ => None,
}
@@ -131,7 +131,9 @@ impl ABIMachineSpec for X64ABIMachineSpec {
| &ir::ArgumentPurpose::StackLimit
| &ir::ArgumentPurpose::SignatureId
| &ir::ArgumentPurpose::CalleeTLS
| &ir::ArgumentPurpose::CallerTLS => {}
| &ir::ArgumentPurpose::CallerTLS
| &ir::ArgumentPurpose::StructReturn
| &ir::ArgumentPurpose::StructArgument(_) => {}
_ => panic!(
"Unsupported argument purpose {:?} in signature: {:?}",
param.purpose, params
@@ -143,6 +145,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
continue;
}
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size as u64;
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size;
ret.push(ABIArg::StructArg {
offset,
size,
purpose: param.purpose,
});
continue;
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
let intreg = rcs[0] == RegClass::I64;
@@ -183,12 +198,12 @@ impl ABIMachineSpec for X64ABIMachineSpec {
2 => ValueRegs::two(regs[0], regs[1]),
_ => panic!("More than two registers unexpected"),
};
ret.push(ABIArg::Reg(
ret.push(ABIArg::Reg {
regs,
param.value_type,
param.extension,
param.purpose,
));
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
if intreg {
next_gpr += num_regs;
} else {
@@ -202,12 +217,12 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// Align.
debug_assert!(size.is_power_of_two());
next_stack = (next_stack + size - 1) & !(size - 1);
ret.push(ABIArg::Stack(
next_stack as i64,
param.value_type,
param.extension,
param.purpose,
));
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: param.value_type,
extension: param.extension,
purpose: param.purpose,
});
next_stack += size;
}
}
@@ -219,19 +234,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
ret.push(ABIArg::Reg(
ValueRegs::one(reg.to_real_reg()),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
ret.push(ABIArg::Reg {
regs: ValueRegs::one(reg.to_real_reg()),
ty: types::I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
} else {
ret.push(ABIArg::Stack(
next_stack as i64,
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
ret.push(ABIArg::Stack {
offset: next_stack as i64,
ty: types::I64,
extension: ir::ArgumentExtension::None,
purpose: ir::ArgumentPurpose::Normal,
});
next_stack += 8;
}
Some(ret.len() - 1)
@@ -441,6 +456,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let stack_size = clobbered_size + fixed_frame_storage_size;
// Align to 16 bytes.
let stack_size = (stack_size + 15) & !15;
let clobbered_size = stack_size - fixed_frame_storage_size;
// Adjust the stack pointer downward with one `sub rsp, IMM`
// instruction.
if stack_size > 0 {
@@ -567,6 +583,51 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}
fn gen_memcpy(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
) -> SmallVec<[Self::I; 8]> {
// Baldrdash should not use struct args.
assert!(!call_conv.extends_baldrdash());
let mut insts = SmallVec::new();
let arg0 = get_intreg_for_arg_systemv(&call_conv, 0).unwrap();
let arg1 = get_intreg_for_arg_systemv(&call_conv, 1).unwrap();
let arg2 = get_intreg_for_arg_systemv(&call_conv, 2).unwrap();
// We need a register to load the address of `memcpy()` below and we
// don't have a lowering context to allocate a temp here; so just use a
// register we know we are free to mutate as part of this sequence
// (because it is clobbered by the call as per the ABI anyway).
let memcpy_addr = get_intreg_for_arg_systemv(&call_conv, 3).unwrap();
insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64));
insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64));
insts.extend(
Inst::gen_constant(
ValueRegs::one(Writable::from_reg(arg2)),
size as u128,
I64,
|_| panic!("tmp should not be needed"),
)
.into_iter(),
);
// We use an indirect call and a full LoadExtName because we do not have
// information about the libcall `RelocDistance` here, so we
// conservatively use the more flexible calling sequence.
insts.push(Inst::LoadExtName {
dst: Writable::from_reg(memcpy_addr),
name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
offset: 0,
});
insts.push(Inst::call_unknown(
RegMem::reg(memcpy_addr),
/* uses = */ vec![arg0, arg1, arg2],
/* defs = */ Self::get_regs_clobbered_by_call(call_conv),
Opcode::Call,
));
insts
}
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {

View File

@@ -1083,7 +1083,7 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
let sig = make_libcall_sig(ctx, insn, call_conv, types::I64);
let caller_conv = ctx.abi().call_conv();
let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv)?;
let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv, flags)?;
abi.emit_stack_pre_adjust(ctx);
@@ -3091,7 +3091,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
X64ABICaller::from_func(sig, &extname, dist, caller_conv)?,
X64ABICaller::from_func(sig, &extname, dist, caller_conv, flags)?,
&inputs[..],
)
}
@@ -3102,7 +3102,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
X64ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
X64ABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?,
&inputs[1..],
)
}
@@ -3112,8 +3112,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
abi.emit_stack_pre_adjust(ctx);
assert_eq!(inputs.len(), abi.num_args());
for (i, input) in inputs.iter().enumerate() {
let arg_regs = put_input_in_regs(ctx, *input);
for i in abi.get_copy_to_arg_order() {
let input = inputs[i];
let arg_regs = put_input_in_regs(ctx, input);
abi.emit_copy_regs_to_arg(ctx, i, arg_regs);
}
abi.emit_call(ctx);