Address review comments.

This commit is contained in:
Chris Fallin
2020-06-02 16:57:50 -07:00
parent 615362068f
commit fe97659813
13 changed files with 224 additions and 169 deletions

View File

@@ -73,9 +73,9 @@
//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134 //! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134
//! //!
//! In brief: //! In brief:
//! - Returns are processed in *reverse* order. //! - Return values are processed in *reverse* order.
//! - The first return in this order (so the last return) goes into the ordinary //! - The first return value in this order (so the last return) goes into the
//! return register, X0. //! ordinary return register, X0.
//! - Any further returns go in a struct-return area, allocated upwards (in //! - Any further returns go in a struct-return area, allocated upwards (in
//! address order) during the reverse traversal. //! address order) during the reverse traversal.
//! - This struct-return area is provided by the caller, and a pointer to its //! - This struct-return area is provided by the caller, and a pointer to its
@@ -98,6 +98,7 @@ use crate::isa;
use crate::isa::aarch64::{inst::*, lower::ty_bits}; use crate::isa::aarch64::{inst::*, lower::ty_bits};
use crate::machinst::*; use crate::machinst::*;
use crate::settings; use crate::settings;
use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box; use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
@@ -134,6 +135,11 @@ struct ABISig {
call_conv: isa::CallConv, call_conv: isa::CallConv,
} }
/// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
// Spidermonkey specific ABI convention. // Spidermonkey specific ABI convention.
/// This is SpiderMonkey's `WasmTableCallSigReg`. /// This is SpiderMonkey's `WasmTableCallSigReg`.
@@ -208,14 +214,15 @@ enum ArgsOrRets {
/// Process a list of parameters or return values and allocate them to X-regs, /// Process a list of parameters or return values and allocate them to X-regs,
/// V-regs, and stack slots. /// V-regs, and stack slots.
/// ///
/// Returns the list of argument locations, and the stack-space used (rounded up /// Returns the list of argument locations, the stack-space used (rounded up
/// to a 16-byte-aligned boundary). /// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the
/// index of the extra synthetic arg that was added.
fn compute_arg_locs( fn compute_arg_locs(
call_conv: isa::CallConv, call_conv: isa::CallConv,
params: &[ir::AbiParam], params: &[ir::AbiParam],
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
) -> (Vec<ABIArg>, i64) { ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
let is_baldrdash = call_conv.extends_baldrdash(); let is_baldrdash = call_conv.extends_baldrdash();
// See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4. // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
@@ -290,7 +297,7 @@ fn compute_arg_locs(
ret.reverse(); ret.reverse();
} }
if add_ret_area_ptr { let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args); debug_assert!(args_or_rets == ArgsOrRets::Args);
if next_xreg < max_reg_vals { if next_xreg < max_reg_vals {
ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), I64)); ret.push(ABIArg::Reg(xreg(next_xreg).to_real_reg(), I64));
@@ -298,35 +305,39 @@ fn compute_arg_locs(
ret.push(ABIArg::Stack(next_stack as i64, I64)); ret.push(ABIArg::Stack(next_stack as i64, I64));
next_stack += 8; next_stack += 8;
} }
} Some(ret.len() - 1)
} else {
None
};
next_stack = (next_stack + 15) & !15; next_stack = (next_stack + 15) & !15;
(ret, next_stack as i64) // To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded);
}
Ok((ret, next_stack as i64, extra_arg))
} }
impl ABISig { impl ABISig {
fn from_func_sig(sig: &ir::Signature) -> ABISig { fn from_func_sig(sig: &ir::Signature) -> CodegenResult<ABISig> {
// Compute args and retvals from signature. Handle retvals first, // Compute args and retvals from signature. Handle retvals first,
// because we may need to add a return-area arg to the args. // because we may need to add a return-area arg to the args.
let (rets, stack_ret_space) = compute_arg_locs( let (rets, stack_ret_space, _) = compute_arg_locs(
sig.call_conv, sig.call_conv,
&sig.returns, &sig.returns,
ArgsOrRets::Rets, ArgsOrRets::Rets,
/* extra ret-area ptr = */ false, /* extra ret-area ptr = */ false,
); )?;
let need_stack_return_area = stack_ret_space > 0; let need_stack_return_area = stack_ret_space > 0;
let (args, stack_arg_space) = compute_arg_locs( let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs(
sig.call_conv, sig.call_conv,
&sig.params, &sig.params,
ArgsOrRets::Args, ArgsOrRets::Args,
need_stack_return_area, need_stack_return_area,
); )?;
let stack_ret_arg = if need_stack_return_area {
Some(args.len() - 1)
} else {
None
};
trace!( trace!(
"ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}",
@@ -338,14 +349,14 @@ impl ABISig {
stack_ret_arg stack_ret_arg
); );
ABISig { Ok(ABISig {
args, args,
rets, rets,
stack_arg_space, stack_arg_space,
stack_ret_space, stack_ret_space,
stack_ret_arg, stack_ret_arg,
call_conv: sig.call_conv, call_conv: sig.call_conv,
} })
} }
} }
@@ -446,15 +457,7 @@ fn gen_stack_limit(f: &ir::Function, abi: &ABISig, gv: ir::GlobalValue) -> (Reg,
} => { } => {
let base = generate_gv(f, abi, base, insts); let base = generate_gv(f, abi, base, insts);
let into_reg = writable_spilltmp_reg(); let into_reg = writable_spilltmp_reg();
let mem = if let Some(offset) = let mem = MemArg::RegOffset(base, offset.into(), I64);
UImm12Scaled::maybe_from_i64(offset.into(), ir::types::I8)
{
MemArg::UnsignedOffset(base, offset)
} else {
let offset: i64 = offset.into();
insts.extend(Inst::load_constant(into_reg, offset as u64));
MemArg::RegReg(base, into_reg.to_reg())
};
insts.push(Inst::ULoad64 { insts.push(Inst::ULoad64 {
rd: into_reg, rd: into_reg,
mem, mem,
@@ -481,10 +484,10 @@ fn get_special_purpose_param_register(
impl AArch64ABIBody { impl AArch64ABIBody {
/// Create a new body ABI instance. /// Create a new body ABI instance.
pub fn new(f: &ir::Function, flags: settings::Flags) -> Self { pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
debug!("AArch64 ABI: func signature {:?}", f.signature); debug!("AArch64 ABI: func signature {:?}", f.signature);
let sig = ABISig::from_func_sig(&f.signature); let sig = ABISig::from_func_sig(&f.signature)?;
let call_conv = f.signature.call_conv; let call_conv = f.signature.call_conv;
// Only these calling conventions are supported. // Only these calling conventions are supported.
@@ -517,7 +520,7 @@ impl AArch64ABIBody {
.map(|reg| (reg, Vec::new())) .map(|reg| (reg, Vec::new()))
.or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv))); .or_else(|| f.stack_limit.map(|gv| gen_stack_limit(f, &sig, gv)));
Self { Ok(Self {
sig, sig,
stackslots, stackslots,
stackslots_size: stack_offset, stackslots_size: stack_offset,
@@ -529,7 +532,7 @@ impl AArch64ABIBody {
flags, flags,
is_leaf: f.is_leaf(), is_leaf: f.is_leaf(),
stack_limit, stack_limit,
} })
} }
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
@@ -635,15 +638,22 @@ impl AArch64ABIBody {
fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst { fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
match ty { match ty {
types::B1 types::B1 | types::B8 | types::I8 => Inst::ULoad8 {
| types::B8 rd: into_reg,
| types::I8 mem,
| types::B16 srcloc: None,
| types::I16 },
| types::B32 types::B16 | types::I16 => Inst::ULoad16 {
| types::I32 rd: into_reg,
| types::B64 mem,
| types::I64 => Inst::ULoad64 { srcloc: None,
},
types::B32 | types::I32 => Inst::ULoad32 {
rd: into_reg,
mem,
srcloc: None,
},
types::B64 | types::I64 => Inst::ULoad64 {
rd: into_reg, rd: into_reg,
mem, mem,
srcloc: None, srcloc: None,
@@ -664,15 +674,22 @@ fn load_stack(mem: MemArg, into_reg: Writable<Reg>, ty: Type) -> Inst {
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
match ty { match ty {
types::B1 types::B1 | types::B8 | types::I8 => Inst::Store8 {
| types::B8 rd: from_reg,
| types::I8 mem,
| types::B16 srcloc: None,
| types::I16 },
| types::B32 types::B16 | types::I16 => Inst::Store16 {
| types::I32 rd: from_reg,
| types::B64 mem,
| types::I64 => Inst::Store64 { srcloc: None,
},
types::B32 | types::I32 => Inst::Store32 {
rd: from_reg,
mem,
srcloc: None,
},
types::B64 | types::I64 => Inst::Store64 {
rd: from_reg, rd: from_reg,
mem, mem,
srcloc: None, srcloc: None,
@@ -791,17 +808,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
impl ABIBody for AArch64ABIBody { impl ABIBody for AArch64ABIBody {
type I = Inst; type I = Inst;
fn needed_tmps(&self) -> usize { fn temp_needed(&self) -> bool {
if self.sig.stack_ret_arg.is_some() { self.sig.stack_ret_arg.is_some()
1
} else {
0
}
} }
fn init_with_tmps(&mut self, tmps: &[Writable<Reg>]) { fn init(&mut self, maybe_tmp: Option<Writable<Reg>>) {
if self.sig.stack_ret_arg.is_some() { if self.sig.stack_ret_arg.is_some() {
self.ret_area_ptr = Some(tmps[0]); assert!(maybe_tmp.is_some());
self.ret_area_ptr = maybe_tmp;
} }
} }
@@ -845,14 +859,14 @@ impl ABIBody for AArch64ABIBody {
match &self.sig.args[idx] { match &self.sig.args[idx] {
&ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty), &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty),
&ABIArg::Stack(off, ty) => load_stack( &ABIArg::Stack(off, ty) => load_stack(
MemArg::FPOffset(self.fp_to_arg_offset() + off), MemArg::FPOffset(self.fp_to_arg_offset() + off, ty),
into_reg, into_reg,
ty, ty,
), ),
} }
} }
fn gen_retval_area_setup(&self) -> Vec<Inst> { fn gen_retval_area_setup(&self) -> Option<Inst> {
if let Some(i) = self.sig.stack_ret_arg { if let Some(i) = self.sig.stack_ret_arg {
let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap());
trace!( trace!(
@@ -860,10 +874,10 @@ impl ABIBody for AArch64ABIBody {
inst, inst,
self.ret_area_ptr.unwrap().to_reg() self.ret_area_ptr.unwrap().to_reg()
); );
vec![inst] Some(inst)
} else { } else {
trace!("gen_retval_area_setup: not needed"); trace!("gen_retval_area_setup: not needed");
vec![] None
} }
} }
@@ -924,8 +938,7 @@ impl ABIBody for AArch64ABIBody {
} }
_ => {} _ => {}
}; };
let mem = MemArg::reg_maybe_offset(self.ret_area_ptr.unwrap().to_reg(), off, ty) let mem = MemArg::RegOffset(self.ret_area_ptr.unwrap().to_reg(), off, ty);
.expect("Return-value area is too large");
ret.push(store_stack(mem, from_reg.to_reg(), ty)) ret.push(store_stack(mem, from_reg.to_reg(), ty))
} }
} }
@@ -961,7 +974,7 @@ impl ABIBody for AArch64ABIBody {
let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let sp_off: i64 = stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty)
} }
/// Store to a stackslot. /// Store to a stackslot.
@@ -971,7 +984,7 @@ impl ABIBody for AArch64ABIBody {
let stack_off = self.stackslots[slot.as_u32() as usize] as i64; let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
let sp_off: i64 = stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty)
} }
/// Produce an instruction that computes a stackslot address. /// Produce an instruction that computes a stackslot address.
@@ -982,7 +995,7 @@ impl ABIBody for AArch64ABIBody {
let sp_off: i64 = stack_off + (offset as i64); let sp_off: i64 = stack_off + (offset as i64);
Inst::LoadAddr { Inst::LoadAddr {
rd: into_reg, rd: into_reg,
mem: MemArg::NominalSPOffset(sp_off), mem: MemArg::NominalSPOffset(sp_off, I8),
} }
} }
@@ -993,7 +1006,7 @@ impl ABIBody for AArch64ABIBody {
let spill_off = islot * 8; let spill_off = islot * 8;
let sp_off = self.stackslots_size as i64 + spill_off; let sp_off = self.stackslots_size as i64 + spill_off;
trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) load_stack(MemArg::NominalSPOffset(sp_off, ty), into_reg, ty)
} }
/// Store to a spillslot. /// Store to a spillslot.
@@ -1003,7 +1016,7 @@ impl ABIBody for AArch64ABIBody {
let spill_off = islot * 8; let spill_off = islot * 8;
let sp_off = self.stackslots_size as i64 + spill_off; let sp_off = self.stackslots_size as i64 + spill_off;
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty)
} }
fn gen_prologue(&mut self) -> Vec<Inst> { fn gen_prologue(&mut self) -> Vec<Inst> {
@@ -1290,17 +1303,17 @@ impl AArch64ABICall {
extname: &ir::ExternalName, extname: &ir::ExternalName,
dist: RelocDistance, dist: RelocDistance,
loc: ir::SourceLoc, loc: ir::SourceLoc,
) -> AArch64ABICall { ) -> CodegenResult<AArch64ABICall> {
let sig = ABISig::from_func_sig(sig); let sig = ABISig::from_func_sig(sig)?;
let (uses, defs) = abisig_to_uses_and_defs(&sig); let (uses, defs) = abisig_to_uses_and_defs(&sig);
AArch64ABICall { Ok(AArch64ABICall {
sig, sig,
uses, uses,
defs, defs,
dest: CallDest::ExtName(extname.clone(), dist), dest: CallDest::ExtName(extname.clone(), dist),
loc, loc,
opcode: ir::Opcode::Call, opcode: ir::Opcode::Call,
} })
} }
/// Create a callsite ABI object for a call to a function pointer with the /// Create a callsite ABI object for a call to a function pointer with the
@@ -1310,17 +1323,17 @@ impl AArch64ABICall {
ptr: Reg, ptr: Reg,
loc: ir::SourceLoc, loc: ir::SourceLoc,
opcode: ir::Opcode, opcode: ir::Opcode,
) -> AArch64ABICall { ) -> CodegenResult<AArch64ABICall> {
let sig = ABISig::from_func_sig(sig); let sig = ABISig::from_func_sig(sig)?;
let (uses, defs) = abisig_to_uses_and_defs(&sig); let (uses, defs) = abisig_to_uses_and_defs(&sig);
AArch64ABICall { Ok(AArch64ABICall {
sig, sig,
uses, uses,
defs, defs,
dest: CallDest::Reg(ptr), dest: CallDest::Reg(ptr),
loc, loc,
opcode, opcode,
} })
} }
} }
@@ -1394,7 +1407,9 @@ impl ABICall for AArch64ABICall {
from_reg, from_reg,
ty, ty,
)), )),
&ABIArg::Stack(off, ty) => ctx.emit(store_stack(MemArg::SPOffset(off), from_reg, ty)), &ABIArg::Stack(off, ty) => {
ctx.emit(store_stack(MemArg::SPOffset(off, ty), from_reg, ty))
}
} }
} }
@@ -1409,7 +1424,7 @@ impl ABICall for AArch64ABICall {
&ABIArg::Stack(off, ty) => { &ABIArg::Stack(off, ty) => {
let ret_area_base = self.sig.stack_arg_space; let ret_area_base = self.sig.stack_arg_space;
ctx.emit(load_stack( ctx.emit(load_stack(
MemArg::SPOffset(off + ret_area_base), MemArg::SPOffset(off + ret_area_base, ty),
into_reg, into_reg,
ty, ty,
)); ));
@@ -1427,7 +1442,7 @@ impl ABICall for AArch64ABICall {
let ret_area_base = self.sig.stack_arg_space; let ret_area_base = self.sig.stack_arg_space;
ctx.emit(Inst::LoadAddr { ctx.emit(Inst::LoadAddr {
rd, rd,
mem: MemArg::SPOffset(ret_area_base), mem: MemArg::SPOffset(ret_area_base, I8),
}); });
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
} }

View File

@@ -145,11 +145,15 @@ pub enum MemArg {
/// Reference to a "label": e.g., a symbol. /// Reference to a "label": e.g., a symbol.
Label(MemLabel), Label(MemLabel),
/// Arbitrary offset from a register. Converted to generation of large
/// offsets with multiple instructions as necessary during code emission.
RegOffset(Reg, i64, Type),
/// Offset from the stack pointer. /// Offset from the stack pointer.
SPOffset(i64), SPOffset(i64, Type),
/// Offset from the frame pointer. /// Offset from the frame pointer.
FPOffset(i64), FPOffset(i64, Type),
/// Offset from the "nominal stack pointer", which is where the real SP is /// Offset from the "nominal stack pointer", which is where the real SP is
/// just after stack and spill slots are allocated in the function prologue. /// just after stack and spill slots are allocated in the function prologue.
@@ -163,7 +167,7 @@ pub enum MemArg {
/// SP" is where the actual SP is after the function prologue and before /// SP" is where the actual SP is after the function prologue and before
/// clobber pushes. See the diagram in the documentation for /// clobber pushes. See the diagram in the documentation for
/// [crate::isa::aarch64::abi](the ABI module) for more details. /// [crate::isa::aarch64::abi](the ABI module) for more details.
NominalSPOffset(i64), NominalSPOffset(i64, Type),
} }
impl MemArg { impl MemArg {
@@ -174,17 +178,6 @@ impl MemArg {
MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64)) MemArg::UnsignedOffset(reg, UImm12Scaled::zero(I64))
} }
/// Memory reference using an address in a register and an offset, if possible.
pub fn reg_maybe_offset(reg: Reg, offset: i64, value_type: Type) -> Option<MemArg> {
if let Some(simm9) = SImm9::maybe_from_i64(offset) {
Some(MemArg::Unscaled(reg, simm9))
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(offset, value_type) {
Some(MemArg::UnsignedOffset(reg, uimm12s))
} else {
None
}
}
/// Memory reference using the sum of two registers as an address. /// Memory reference using the sum of two registers as an address.
pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg { pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> MemArg {
MemArg::RegReg(reg1, reg2) MemArg::RegReg(reg1, reg2)
@@ -431,8 +424,11 @@ impl ShowWithRRU for MemArg {
simm9.show_rru(mb_rru) simm9.show_rru(mb_rru)
), ),
// Eliminated by `mem_finalize()`. // Eliminated by `mem_finalize()`.
&MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => { &MemArg::SPOffset(..)
panic!("Unexpected stack-offset mem-arg mode!") | &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..)
| &MemArg::RegOffset(..) => {
panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
} }
} }
} }

View File

@@ -5,6 +5,7 @@ use crate::ir::constant::ConstantData;
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::TrapCode; use crate::ir::TrapCode;
use crate::isa::aarch64::inst::*; use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::ty_bits;
use regalloc::{Reg, RegClass, Writable}; use regalloc::{Reg, RegClass, Writable};
@@ -29,8 +30,12 @@ pub fn mem_finalize(
state: &EmitState, state: &EmitState,
) -> (SmallVec<[Inst; 4]>, MemArg) { ) -> (SmallVec<[Inst; 4]>, MemArg) {
match mem { match mem {
&MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => { &MemArg::RegOffset(_, off, ty)
| &MemArg::SPOffset(off, ty)
| &MemArg::FPOffset(off, ty)
| &MemArg::NominalSPOffset(off, ty) => {
let basereg = match mem { let basereg = match mem {
&MemArg::RegOffset(reg, _, _) => reg,
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(), &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(),
&MemArg::FPOffset(..) => fp_reg(), &MemArg::FPOffset(..) => fp_reg(),
_ => unreachable!(), _ => unreachable!(),
@@ -52,6 +57,9 @@ pub fn mem_finalize(
if let Some(simm9) = SImm9::maybe_from_i64(off) { if let Some(simm9) = SImm9::maybe_from_i64(off) {
let mem = MemArg::Unscaled(basereg, simm9); let mem = MemArg::Unscaled(basereg, simm9);
(smallvec![], mem) (smallvec![], mem)
} else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
let mem = MemArg::UnsignedOffset(basereg, uimm12s);
(smallvec![], mem)
} else { } else {
let tmp = writable_spilltmp_reg(); let tmp = writable_spilltmp_reg();
let mut const_insts = Inst::load_constant(tmp, off as u64); let mut const_insts = Inst::load_constant(tmp, off as u64);
@@ -654,17 +662,17 @@ impl MachInstEmit for Inst {
// This is the base opcode (top 10 bits) for the "unscaled // This is the base opcode (top 10 bits) for the "unscaled
// immediate" form (Unscaled). Other addressing modes will OR in // immediate" form (Unscaled). Other addressing modes will OR in
// other values for bits 24/25 (bits 1/2 of this constant). // other values for bits 24/25 (bits 1/2 of this constant).
let op = match self { let (op, bits) = match self {
&Inst::ULoad8 { .. } => 0b0011100001, &Inst::ULoad8 { .. } => (0b0011100001, 8),
&Inst::SLoad8 { .. } => 0b0011100010, &Inst::SLoad8 { .. } => (0b0011100010, 8),
&Inst::ULoad16 { .. } => 0b0111100001, &Inst::ULoad16 { .. } => (0b0111100001, 16),
&Inst::SLoad16 { .. } => 0b0111100010, &Inst::SLoad16 { .. } => (0b0111100010, 16),
&Inst::ULoad32 { .. } => 0b1011100001, &Inst::ULoad32 { .. } => (0b1011100001, 32),
&Inst::SLoad32 { .. } => 0b1011100010, &Inst::SLoad32 { .. } => (0b1011100010, 32),
&Inst::ULoad64 { .. } => 0b1111100001, &Inst::ULoad64 { .. } => (0b1111100001, 64),
&Inst::FpuLoad32 { .. } => 0b1011110001, &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
&Inst::FpuLoad64 { .. } => 0b1111110001, &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
&Inst::FpuLoad128 { .. } => 0b0011110011, &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
_ => unreachable!(), _ => unreachable!(),
}; };
@@ -678,6 +686,9 @@ impl MachInstEmit for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
} }
&MemArg::UnsignedOffset(reg, uimm12scaled) => { &MemArg::UnsignedOffset(reg, uimm12scaled) => {
if uimm12scaled.value() != 0 {
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
}
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
} }
&MemArg::RegReg(r1, r2) => { &MemArg::RegReg(r1, r2) => {
@@ -686,19 +697,7 @@ impl MachInstEmit for Inst {
)); ));
} }
&MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => { &MemArg::RegScaled(r1, r2, ty) | &MemArg::RegScaledExtended(r1, r2, ty, _) => {
match (ty, self) { assert_eq!(bits, ty_bits(ty));
(I8, &Inst::ULoad8 { .. }) => {}
(I8, &Inst::SLoad8 { .. }) => {}
(I16, &Inst::ULoad16 { .. }) => {}
(I16, &Inst::SLoad16 { .. }) => {}
(I32, &Inst::ULoad32 { .. }) => {}
(I32, &Inst::SLoad32 { .. }) => {}
(I64, &Inst::ULoad64 { .. }) => {}
(F32, &Inst::FpuLoad32 { .. }) => {}
(F64, &Inst::FpuLoad64 { .. }) => {}
(I128, &Inst::FpuLoad128 { .. }) => {}
_ => panic!("Mismatching reg-scaling type in MemArg"),
}
let extendop = match &mem { let extendop = match &mem {
&MemArg::RegScaled(..) => None, &MemArg::RegScaled(..) => None,
&MemArg::RegScaledExtended(_, _, _, op) => Some(op), &MemArg::RegScaledExtended(_, _, _, op) => Some(op),
@@ -746,6 +745,7 @@ impl MachInstEmit for Inst {
&MemArg::SPOffset(..) &MemArg::SPOffset(..)
| &MemArg::FPOffset(..) | &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
} }
} }
@@ -791,14 +791,14 @@ impl MachInstEmit for Inst {
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} }
let op = match self { let (op, bits) = match self {
&Inst::Store8 { .. } => 0b0011100000, &Inst::Store8 { .. } => (0b0011100000, 8),
&Inst::Store16 { .. } => 0b0111100000, &Inst::Store16 { .. } => (0b0111100000, 16),
&Inst::Store32 { .. } => 0b1011100000, &Inst::Store32 { .. } => (0b1011100000, 32),
&Inst::Store64 { .. } => 0b1111100000, &Inst::Store64 { .. } => (0b1111100000, 64),
&Inst::FpuStore32 { .. } => 0b1011110000, &Inst::FpuStore32 { .. } => (0b1011110000, 32),
&Inst::FpuStore64 { .. } => 0b1111110000, &Inst::FpuStore64 { .. } => (0b1111110000, 64),
&Inst::FpuStore128 { .. } => 0b0011110010, &Inst::FpuStore128 { .. } => (0b0011110010, 128),
_ => unreachable!(), _ => unreachable!(),
}; };
@@ -812,6 +812,9 @@ impl MachInstEmit for Inst {
sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
} }
&MemArg::UnsignedOffset(reg, uimm12scaled) => { &MemArg::UnsignedOffset(reg, uimm12scaled) => {
if uimm12scaled.value() != 0 {
assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
}
sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
} }
&MemArg::RegReg(r1, r2) => { &MemArg::RegReg(r1, r2) => {
@@ -843,6 +846,7 @@ impl MachInstEmit for Inst {
&MemArg::SPOffset(..) &MemArg::SPOffset(..)
| &MemArg::FPOffset(..) | &MemArg::FPOffset(..)
| &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"),
&MemArg::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
} }
} }

View File

@@ -1311,7 +1311,7 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
rd: writable_xreg(1), rd: writable_xreg(1),
mem: MemArg::FPOffset(32768), mem: MemArg::FPOffset(32768, I8),
srcloc: None, srcloc: None,
}, },
"100090D2B063308B010240F9", "100090D2B063308B010240F9",
@@ -1320,7 +1320,7 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
rd: writable_xreg(1), rd: writable_xreg(1),
mem: MemArg::FPOffset(-32768), mem: MemArg::FPOffset(-32768, I8),
srcloc: None, srcloc: None,
}, },
"F0FF8F92B063308B010240F9", "F0FF8F92B063308B010240F9",
@@ -1329,7 +1329,7 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
rd: writable_xreg(1), rd: writable_xreg(1),
mem: MemArg::FPOffset(1048576), // 2^20 mem: MemArg::FPOffset(1048576, I8), // 2^20
srcloc: None, srcloc: None,
}, },
"1002A0D2B063308B010240F9", "1002A0D2B063308B010240F9",
@@ -1338,13 +1338,43 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::ULoad64 { Inst::ULoad64 {
rd: writable_xreg(1), rd: writable_xreg(1),
mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 mem: MemArg::FPOffset(1048576 + 1, I8), // 2^20 + 1
srcloc: None, srcloc: None,
}, },
"300080D21002A0F2B063308B010240F9", "300080D21002A0F2B063308B010240F9",
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 8, I64),
srcloc: None,
},
"E18040F8",
"ldur x1, [x7, #8]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 1024, I64),
srcloc: None,
},
"E10042F9",
"ldr x1, [x7, #1024]",
));
insns.push((
Inst::ULoad64 {
rd: writable_xreg(1),
mem: MemArg::RegOffset(xreg(7), 1048576, I64),
srcloc: None,
},
"1002A0D2F060308B010240F9",
"movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
));
insns.push(( insns.push((
Inst::Store8 { Inst::Store8 {
rd: xreg(1), rd: xreg(1),

View File

@@ -259,7 +259,12 @@ impl UImm12Scaled {
/// Value after scaling. /// Value after scaling.
pub fn value(&self) -> u32 { pub fn value(&self) -> u32 {
self.value as u32 * self.scale_ty.bytes() self.value as u32
}
/// The value type which is the scaling base.
pub fn scale_ty(&self) -> Type {
self.scale_ty
} }
} }

View File

@@ -1004,6 +1004,9 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) {
&MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => { &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => {
collector.add_use(stack_reg()); collector.add_use(stack_reg());
} }
&MemArg::RegOffset(r, ..) => {
collector.add_use(r);
}
} }
} }
@@ -1318,6 +1321,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
&mut MemArg::FPOffset(..) &mut MemArg::FPOffset(..)
| &mut MemArg::SPOffset(..) | &mut MemArg::SPOffset(..)
| &mut MemArg::NominalSPOffset(..) => {} | &mut MemArg::NominalSPOffset(..) => {}
&mut MemArg::RegOffset(ref mut r, ..) => map_use(m, r),
}; };
} }

View File

@@ -539,12 +539,10 @@ pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
// TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
// mul instructions (Load/StoreComplex don't include scale factors). // mul instructions (Load/StoreComplex don't include scale factors).
// Handle one reg and offset that fits in immediate, if possible. // Handle one reg and offset.
if addends.len() == 1 { if addends.len() == 1 {
let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64); let reg = input_to_reg(ctx, addends[0], NarrowValueMode::ZeroExtend64);
if let Some(memarg) = MemArg::reg_maybe_offset(reg, offset as i64, elem_ty) { return MemArg::RegOffset(reg, offset as i64, elem_ty);
return memarg;
}
} }
// Handle two regs and a zero offset, if possible. // Handle two regs and a zero offset, if possible.

View File

@@ -1335,7 +1335,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(inputs.len() == sig.params.len()); assert!(inputs.len() == sig.params.len());
assert!(outputs.len() == sig.returns.len()); assert!(outputs.len() == sig.returns.len());
( (
AArch64ABICall::from_func(sig, &extname, dist, loc), AArch64ABICall::from_func(sig, &extname, dist, loc)?,
&inputs[..], &inputs[..],
) )
} }
@@ -1344,7 +1344,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap(); let sig = ctx.call_sig(insn).unwrap();
assert!(inputs.len() - 1 == sig.params.len()); assert!(inputs.len() - 1 == sig.params.len());
assert!(outputs.len() == sig.returns.len()); assert!(outputs.len() == sig.returns.len());
(AArch64ABICall::from_ptr(sig, ptr, loc, op), &inputs[1..]) (AArch64ABICall::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
} }
_ => unreachable!(), _ => unreachable!(),
}; };

View File

@@ -46,7 +46,7 @@ impl AArch64Backend {
func: &Function, func: &Function,
flags: settings::Flags, flags: settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> { ) -> CodegenResult<VCode<inst::Inst>> {
let abi = Box::new(abi::AArch64ABIBody::new(func, flags)); let abi = Box::new(abi::AArch64ABIBody::new(func, flags)?);
compile::compile::<AArch64Backend>(func, self, abi) compile::compile::<AArch64Backend>(func, self, abi)
} }
} }

View File

@@ -184,11 +184,11 @@ impl X64ABIBody {
impl ABIBody for X64ABIBody { impl ABIBody for X64ABIBody {
type I = Inst; type I = Inst;
fn needed_tmps(&self) -> usize { fn temp_needed(&self) -> bool {
0 false
} }
fn init_with_tmps(&mut self, _: &[Writable<Reg>]) {} fn init(&mut self, _: Option<Writable<Reg>>) {}
fn flags(&self) -> &settings::Flags { fn flags(&self) -> &settings::Flags {
&self.flags &self.flags
@@ -239,8 +239,8 @@ impl ABIBody for X64ABIBody {
} }
} }
fn gen_retval_area_setup(&self) -> Vec<Inst> { fn gen_retval_area_setup(&self) -> Option<Inst> {
vec![] None
} }
fn gen_copy_reg_to_retval( fn gen_copy_reg_to_retval(

View File

@@ -12,11 +12,14 @@ pub trait ABIBody {
/// The instruction type for the ISA associated with this ABI. /// The instruction type for the ISA associated with this ABI.
type I: VCodeInst; type I: VCodeInst;
/// How many temps are needed? /// Does the ABI-body code need a temp reg? One will be provided to `init()`
fn needed_tmps(&self) -> usize; /// as the `maybe_tmp` arg if so.
fn temp_needed(&self) -> bool;
/// Initialize, providing the requersted temps. /// Initialize. This is called after the ABIBody is constructed because it
fn init_with_tmps(&mut self, tmps: &[Writable<Reg>]); /// may be provided with a temp vreg, which can only be allocated once the
/// lowering context exists.
fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
/// Get the settings controlling this function's compilation. /// Get the settings controlling this function's compilation.
fn flags(&self) -> &settings::Flags; fn flags(&self) -> &settings::Flags;
@@ -40,12 +43,12 @@ pub trait ABIBody {
/// register. /// register.
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I; fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
/// Generate any setup instructions needed to save values to the /// Generate any setup instruction needed to save values to the
/// return-value area. This is usually used when were are multiple return /// return-value area. This is usually used when were are multiple return
/// values or an otherwise large return value that must be passed on the /// values or an otherwise large return value that must be passed on the
/// stack; typically the ABI specifies an extra hidden argument that is a /// stack; typically the ABI specifies an extra hidden argument that is a
/// pointer to that memory. /// pointer to that memory.
fn gen_retval_area_setup(&self) -> Vec<Self::I>; fn gen_retval_area_setup(&self) -> Option<Self::I>;
/// Generate an instruction which copies a source register to a return value slot. /// Generate an instruction which copies a source register to a return value slot.
fn gen_copy_reg_to_retval( fn gen_copy_reg_to_retval(

View File

@@ -383,7 +383,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg); let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
self.emit(insn); self.emit(insn);
} }
for insn in self.vcode.abi().gen_retval_area_setup().into_iter() { if let Some(insn) = self.vcode.abi().gen_retval_area_setup() {
self.emit(insn); self.emit(insn);
} }
} }
@@ -652,11 +652,13 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> { pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
debug!("about to lower function: {:?}", self.f); debug!("about to lower function: {:?}", self.f);
// Initialize the ABI object with any temps it needs. // Initialize the ABI object, giving it a temp if requested.
let tmps: SmallVec<[Writable<Reg>; 4]> = (0..self.vcode.abi().needed_tmps()) let maybe_tmp = if self.vcode.abi().temp_needed() {
.map(|_| self.alloc_tmp(RegClass::I64, I64)) Some(self.alloc_tmp(RegClass::I64, I64))
.collect(); } else {
self.vcode.abi().init_with_tmps(&tmps[..]); None
};
self.vcode.abi().init(maybe_tmp);
// Get the pinned reg here (we only parameterize this function on `B`, // Get the pinned reg here (we only parameterize this function on `B`,
// not the whole `Lower` impl). // not the whole `Lower` impl).

View File

@@ -64,8 +64,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x16, [x0] ; nextln: ldur x16, [x0]
; nextln: ldr x16, [x16, #4] ; nextln: ldur x16, [x16, #4]
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
@@ -128,8 +128,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x16, [x0] ; nextln: ldur x16, [x0]
; nextln: ldr x16, [x16, #4] ; nextln: ldur x16, [x16, #4]
; nextln: add x16, x16, #32 ; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
@@ -151,8 +151,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: ldr x16, [x0] ; nextln: ldur x16, [x0]
; nextln: ldr x16, [x16, #4] ; nextln: ldur x16, [x16, #4]
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8
; nextln: udf ; nextln: udf
@@ -179,9 +179,7 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: movz x16, #6784 ; nextln: movz x16, #6784 ; movk x16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; nextln: movk x16, #6, LSL #16
; nextln: ldr x16, [x0, x16]
; nextln: add x16, x16, #32 ; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; nextln: b.hs 8