diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index a2d2552d86..c71096d485 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1,4 +1,63 @@ //! Implementation of the standard AArch64 ABI. +//! +//! We implement the standard AArch64 ABI, as documented by ARM. This ABI +//! specifies how arguments are passed (in registers or on the stack, as +//! appropriate), which registers are caller- and callee-saved, and how a +//! particular part of the stack frame (the FP/LR pair) must be linked through +//! the active stack frames. +//! +//! Note, however, that the exact stack layout is up to us. We settled on the +//! below design based on several requirements. In particular, we need to be +//! able to generate instructions (or instruction sequences) to access +//! arguments, stack slots, and spill slots before we know how many spill slots +//! or clobber-saves there will be, because of our pass structure. We also +//! prefer positive offsets to negative offsets because of an asymmetry in +//! AArch64 addressing modes (positive offsets have a larger possible range +//! without a long-form sequence to synthesize an arbitrary offset). Finally, it +//! is not allowed to access memory below the current SP value. +//! +//! As a result, we keep the FP/LR pair just below stack args so that we can +//! access these args at known offsets from FP, and we access on-stack storage +//! using positive offsets from SP. In order to allow codegen for the latter +//! before knowing how many clobber-saves we have, and also allow it while SP is +//! being adjusted to set up a call, we implement a "nominal SP" tracking +//! feature by which a fixup (distance between actual SP and a "nominal" SP) is +//! known at each instruction. See the documentation for +//! [MemArg::NominalSPOffset] for more on this. +//! +//! The stack looks like: +//! +//! ```plain +//! (high address) +//! +//! +---------------------------+ +//! | ... | +//! | stack args | +//! | (accessed via FP) | +//! +---------------------------+ +//! SP at function entry -----> | LR (pushed by prologue) | +//! +---------------------------+ +//! FP after prologue --------> | FP (pushed by prologue) | +//! +---------------------------+ +//! | ... | +//! | spill slots | +//! | (accessed via nominal-SP) | +//! | ... | +//! | stack slots | +//! | (accessed via nominal-SP) | +//! nominal SP ---------------> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | clobbered callee-saves | +//! SP at end of prologue ----> | (pushed by prologue) | +//! +---------------------------+ +//! | ... | +//! | args for call | +//! SP before making a call --> | (pushed at callsite) | +//! +---------------------------+ +//! +//! (low address) +//! ``` use crate::ir; use crate::ir::types; @@ -13,7 +72,7 @@ use alloc::vec::Vec; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; -use log::debug; +use log::{debug, trace}; /// A location for an argument or return value. #[derive(Clone, Copy, Debug)] @@ -188,7 +247,7 @@ pub struct AArch64ABIBody { /// Total number of spillslots, from regalloc. spillslots: Option, /// Total frame size. - frame_size: Option, + total_frame_size: Option, /// Calling convention this function expects. call_conv: isa::CallConv, /// The settings controlling this function's compilation. @@ -347,7 +406,7 @@ impl AArch64ABIBody { stackslots_size: stack_offset, clobbered: Set::empty(), spillslots: None, - frame_size: None, + total_frame_size: None, call_conv, flags, is_leaf: f.is_leaf(), @@ -355,9 +414,9 @@ impl AArch64ABIBody { } } - /// Returns the size of a function call frame (including return address and FP) for this - /// function's body. - fn frame_size(&self) -> i64 { + /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return + /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). + fn fp_to_arg_offset(&self) -> i64 { if self.call_conv.extends_baldrdash() { let num_words = self.flags.baldrdash_prologue_words() as i64; debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); @@ -383,8 +442,8 @@ impl AArch64ABIBody { /// happening so late in the pipeline (e.g. after register allocation). This /// means that we need to do manual register allocation here and also be /// careful to not clobber any callee-saved or argument registers. For now - /// this routine makes do with the `writable_spilltmp_reg` as one temporary - /// register, and a second register of `x16` which is caller-saved. This + /// this routine makes do with the `spilltmp_reg` as one temporary + /// register, and a second register of `tmp2` which is caller-saved. This /// should be fine for us since no spills should happen in this sequence of /// instructions, so our register won't get accidentally clobbered. /// @@ -413,9 +472,9 @@ impl AArch64ABIBody { // Note though that `stack_limit`'s register may be the same as // `scratch`. If our stack size doesn't fit into an immediate this // means we need a second scratch register for loading the stack size - // into a register. We use `x16` here since it's caller-saved and we're - // in the function prologue and nothing else is allocated to it yet. + // into a register. let scratch = writable_spilltmp_reg(); + let scratch2 = writable_tmp2_reg(); let stack_size = u64::from(stack_size); if let Some(imm12) = Imm12::maybe_from_u64(stack_size) { insts.push(Inst::AluRRImm12 { @@ -425,16 +484,12 @@ impl AArch64ABIBody { imm12, }); } else { - let scratch2 = 16; - insts.extend(Inst::load_constant( - Writable::from_reg(xreg(scratch2)), - stack_size.into(), - )); + insts.extend(Inst::load_constant(scratch2, stack_size.into())); insts.push(Inst::AluRRRExtend { alu_op: ALUOp::Add64, rd: scratch, rn: stack_limit, - rm: xreg(scratch2), + rm: scratch2.to_reg(), extendop: ExtendOp::UXTX, }); } @@ -460,8 +515,7 @@ impl AArch64ABIBody { } } -fn load_stack_from_fp(fp_offset: i64, into_reg: Writable, ty: Type) -> Inst { - let mem = MemArg::FPOffset(fp_offset); +fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { match ty { types::B1 | types::B8 @@ -486,15 +540,11 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable, ty: Type) -> Inst mem, srcloc: None, }, - _ => unimplemented!("load_stack_from_fp({})", ty), + _ => unimplemented!("load_stack({})", ty), } } fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { - debug_assert!(match &mem { - MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(), - _ => true, - }); match ty { types::B1 | types::B8 @@ -523,50 +573,6 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { } } -fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst { - store_stack(MemArg::FPOffset(fp_offset), from_reg, ty) -} - -fn store_stack_sp>( - ctx: &mut C, - sp_offset: i64, - from_reg: Reg, - ty: Type, -) -> Vec { - if SImm9::maybe_from_i64(sp_offset).is_some() { - vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)] - } else { - // mem_finalize will try to generate an add, but in an addition, x31 is the zero register, - // not sp! So we have to synthesize the full add here. - let tmp1 = ctx.tmp(RegClass::I64, I64); - let tmp2 = ctx.tmp(RegClass::I64, I64); - let mut result = Vec::new(); - // tmp1 := sp - result.push(Inst::Mov { - rd: tmp1, - rm: stack_reg(), - }); - // tmp2 := offset - for inst in Inst::load_constant(tmp2, sp_offset as u64) { - result.push(inst); - } - // tmp1 := add tmp1, tmp2 - result.push(Inst::AluRRR { - alu_op: ALUOp::Add64, - rd: tmp1, - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - // Actual store. - result.push(store_stack( - MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()), - from_reg, - ty, - )); - result - } -} - fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { match r.get_class() { @@ -706,7 +712,11 @@ impl ABIBody for AArch64ABIBody { fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> Inst { match &self.sig.args[idx] { &ABIArg::Reg(r, ty) => Inst::gen_move(into_reg, r.to_reg(), ty), - &ABIArg::Stack(off, ty) => load_stack_from_fp(off + self.frame_size(), into_reg, ty), + &ABIArg::Stack(off, ty) => load_stack( + MemArg::FPOffset(self.fp_to_arg_offset() + off), + into_reg, + ty, + ), } } @@ -767,8 +777,8 @@ impl ABIBody for AArch64ABIBody { } _ => {} }; - ret.push(store_stack_fp( - off + self.frame_size(), + ret.push(store_stack( + MemArg::FPOffset(self.fp_to_arg_offset() + off), from_reg.to_reg(), ty, )) @@ -793,6 +803,7 @@ impl ABIBody for AArch64ABIBody { self.clobbered = clobbered; } + /// Load from a stackslot. fn load_stackslot( &self, slot: StackSlot, @@ -800,47 +811,54 @@ impl ABIBody for AArch64ABIBody { ty: Type, into_reg: Writable, ) -> Inst { - // Offset from beginning of stackslot area, which is at FP - stackslots_size. + // Offset from beginning of stackslot area, which is at nominal-SP (see + // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); - load_stack_from_fp(fp_off, into_reg, ty) + let sp_off: i64 = stack_off + (offset as i64); + trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off); + load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) } + /// Store to a stackslot. fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of stackslot area, which is at FP - stackslots_size. + // Offset from beginning of stackslot area, which is at nominal-SP (see + // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); - store_stack_fp(fp_off, from_reg, ty) + let sp_off: i64 = stack_off + (offset as i64); + trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off); + store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) } + /// Produce an instruction that computes a stackslot address. fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Inst { - // Offset from beginning of stackslot area, which is at FP - stackslots_size. + // Offset from beginning of stackslot area, which is at nominal-SP (see + // [MemArg::NominalSPOffset] for more details on nominal-SP tracking). let stack_off = self.stackslots[slot.as_u32() as usize] as i64; - let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64); + let sp_off: i64 = stack_off + (offset as i64); Inst::LoadAddr { rd: into_reg, - mem: MemArg::FPOffset(fp_off), + mem: MemArg::NominalSPOffset(sp_off), } } - // Load from a spillslot. + /// Load from a spillslot. fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { - // Note that when spills/fills are generated, we don't yet know how many - // spillslots there will be, so we allocate *downward* from the beginning - // of the stackslot area. Hence: FP - stackslot_size - 8*spillslot - - // sizeof(ty). + // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. let islot = slot.get() as i64; - let ty_size = self.get_spillslot_size(into_reg.to_reg().get_class(), ty) * 8; - let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64; - load_stack_from_fp(fp_off, into_reg, ty) + let spill_off = islot * 8; + let sp_off = self.stackslots_size as i64 + spill_off; + trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + load_stack(MemArg::NominalSPOffset(sp_off), into_reg, ty) } - // Store to a spillslot. + /// Store to a spillslot. fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { + // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. let islot = slot.get() as i64; - let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8; - let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64; - store_stack_fp(fp_off, from_reg, ty) + let spill_off = islot * 8; + let sp_off = self.stackslots_size as i64 + spill_off; + trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + store_stack(MemArg::NominalSPOffset(sp_off), from_reg, ty) } fn gen_prologue(&mut self) -> Vec { @@ -916,9 +934,18 @@ impl ABIBody for AArch64ABIBody { } } + // N.B.: "nominal SP", which we use to refer to stackslots + // and spillslots, is *here* (the value of SP at this program point). + // If we push any clobbers below, we emit a virtual-SP adjustment + // meta-instruction so that the nominal-SP references behave as if SP + // were still at this point. See documentation for + // [crate::isa::aarch64::abi](this module) for more details on + // stackframe layout and nominal-SP maintenance. + // Save clobbered registers. let (clobbered_int, clobbered_vec) = get_callee_saves(self.call_conv, self.clobbered.to_vec()); + let mut clobber_size = 0; for reg_pair in clobbered_int.chunks(2) { let (r1, r2) = if reg_pair.len() == 2 { // .to_reg().to_reg(): Writable --> RealReg --> Reg @@ -939,6 +966,7 @@ impl ABIBody for AArch64ABIBody { SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), ), }); + clobber_size += 16; } let vec_save_bytes = clobbered_vec.len() * 16; if vec_save_bytes != 0 { @@ -948,6 +976,7 @@ impl ABIBody for AArch64ABIBody { rn: stack_reg(), imm12: Imm12::maybe_from_u64(vec_save_bytes as u64).unwrap(), }); + clobber_size += vec_save_bytes; } for (i, reg) in clobbered_vec.iter().enumerate() { insts.push(Inst::FpuStore128 { @@ -957,7 +986,13 @@ impl ABIBody for AArch64ABIBody { }); } - self.frame_size = Some(total_stacksize); + if clobber_size > 0 { + insts.push(Inst::VirtualSPOffsetAdj { + offset: clobber_size as i64, + }); + } + + self.total_frame_size = Some(total_stacksize); insts } @@ -1009,6 +1044,12 @@ impl ABIBody for AArch64ABIBody { }); } + // N.B.: we do *not* emit a nominal-SP adjustment here, because (i) there will be no + // references to nominal-SP offsets before the return below, and (ii) the instruction + // emission tracks running SP offset linearly (in straight-line order), not according to + // the CFG, so early returns in the middle of function bodies would cause an incorrect + // offset for the rest of the body. + if !self.call_conv.extends_baldrdash() { // The MOV (alias of ORR) interprets x31 as XZR, so use an ADD here. // MOV to SP is an alias of ADD. @@ -1037,7 +1078,7 @@ impl ABIBody for AArch64ABIBody { } fn frame_size(&self) -> u32 { - self.frame_size + self.total_frame_size .expect("frame size not computed before prologue generation") } @@ -1138,20 +1179,32 @@ impl AArch64ABICall { } } -fn adjust_stack(amt: u64, is_sub: bool) -> Vec { - if amt > 0 { +fn adjust_stack(amount: u64, is_sub: bool) -> Vec { + if amount > 0 { + let sp_adjustment = if is_sub { + amount as i64 + } else { + -(amount as i64) + }; + let adj_meta_insn = Inst::VirtualSPOffsetAdj { + offset: sp_adjustment, + }; + let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; - if let Some(imm12) = Imm12::maybe_from_u64(amt) { - vec![Inst::AluRRImm12 { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }] + if let Some(imm12) = Imm12::maybe_from_u64(amount) { + vec![ + adj_meta_insn, + Inst::AluRRImm12 { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12, + }, + ] } else { let const_load = Inst::LoadConst64 { rd: writable_spilltmp_reg(), - const_data: amt, + const_data: amount, }; let adj = Inst::AluRRRExtend { alu_op, @@ -1160,7 +1213,7 @@ fn adjust_stack(amt: u64, is_sub: bool) -> Vec { rm: spilltmp_reg(), extendop: ExtendOp::UXTX, }; - vec![const_load, adj] + vec![adj_meta_insn, const_load, adj] } } else { vec![] @@ -1182,19 +1235,14 @@ impl ABICall for AArch64ABICall { adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false) } - fn gen_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) -> Vec { + fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec { match &self.sig.args[idx] { &ABIArg::Reg(reg, ty) => vec![Inst::gen_move( Writable::from_reg(reg.to_reg()), from_reg, ty, )], - &ABIArg::Stack(off, ty) => store_stack_sp(ctx, off, from_reg, ty), + &ABIArg::Stack(off, ty) => vec![store_stack(MemArg::SPOffset(off), from_reg, ty)], } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index db385cf5c6..8eb3b9b02a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -112,7 +112,9 @@ pub enum MemLabel { /// A memory argument to load/store, encapsulating the possible addressing modes. #[derive(Clone, Debug)] pub enum MemArg { - Label(MemLabel), + // + // Real ARM64 addressing modes: + // /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation. PostIndexed(Writable, SImm9), /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation. @@ -137,11 +139,31 @@ pub enum MemArg { /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg. UnsignedOffset(Reg, UImm12Scaled), - /// Offset from the stack pointer. Lowered into a real amode at emission. + // + // virtual addressing modes that are lowered at emission time: + // + /// Reference to a "label": e.g., a symbol. + Label(MemLabel), + + /// Offset from the stack pointer. SPOffset(i64), - /// Offset from the frame pointer. Lowered into a real amode at emission. + /// Offset from the frame pointer. FPOffset(i64), + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::aarch64::abi](the ABI module) for more details. + NominalSPOffset(i64), } impl MemArg { @@ -443,7 +465,7 @@ impl ShowWithRRU for MemArg { simm9.show_rru(mb_rru) ), // Eliminated by `mem_finalize()`. - &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { + &MemArg::SPOffset(..) | &MemArg::FPOffset(..) | &MemArg::NominalSPOffset(..) => { panic!("Unexpected stack-offset mem-arg mode!") } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 2d8613b4b3..da7da92050 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -10,6 +10,7 @@ use regalloc::{Reg, RegClass, Writable}; use alloc::vec::Vec; use core::convert::TryFrom; +use log::debug; /// Memory label/reference finalization: convert a MemLabel to a PC-relative /// offset, possibly emitting relocation(s) as necessary. @@ -23,33 +24,44 @@ pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 { /// generic arbitrary stack offset) into real addressing modes, possibly by /// emitting some helper instructions that come immediately before the use /// of this amode. -pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec, MemArg) { +pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg, state: &EmitState) -> (Vec, MemArg) { match mem { - &MemArg::SPOffset(off) | &MemArg::FPOffset(off) => { + &MemArg::SPOffset(off) | &MemArg::FPOffset(off) | &MemArg::NominalSPOffset(off) => { let basereg = match mem { - &MemArg::SPOffset(..) => stack_reg(), + &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => stack_reg(), &MemArg::FPOffset(..) => fp_reg(), _ => unreachable!(), }; + let adj = match mem { + &MemArg::NominalSPOffset(..) => { + debug!( + "mem_finalize: nominal SP offset {} + adj {} -> {}", + off, + state.virtual_sp_offset, + off + state.virtual_sp_offset + ); + state.virtual_sp_offset + } + _ => 0, + }; + let off = off + adj; + if let Some(simm9) = SImm9::maybe_from_i64(off) { let mem = MemArg::Unscaled(basereg, simm9); (vec![], mem) } else { - // In an addition, x31 is the zero register, not sp; we have only one temporary - // so we can't do the proper add here. - debug_assert_ne!( - basereg, - stack_reg(), - "should have diverted SP before mem_finalize" - ); - let tmp = writable_spilltmp_reg(); let mut const_insts = Inst::load_constant(tmp, off as u64); - let add_inst = Inst::AluRRR { + // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form + // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP + // is a valid base (for SPOffset) which we must handle here. + // Also, SP needs to be the first arg, not second. + let add_inst = Inst::AluRRRExtend { alu_op: ALUOp::Add64, rd: tmp, - rn: tmp.to_reg(), - rm: basereg, + rn: basereg, + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, }; const_insts.push(add_inst); (const_insts.to_vec(), MemArg::reg(tmp.to_reg())) @@ -322,8 +334,16 @@ fn enc_fround(top22: u32, rd: Writable, rn: Reg) -> u32 { (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) } +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + virtual_sp_offset: i64, +} + impl MachInstEmit for Inst { - fn emit(&self, sink: &mut O, flags: &settings::Flags) { + type State = EmitState; + + fn emit(&self, sink: &mut O, flags: &settings::Flags, state: &mut EmitState) { match self { &Inst::AluRRR { alu_op, rd, rn, rm } => { let top11 = match alu_op { @@ -596,10 +616,10 @@ impl MachInstEmit for Inst { ref mem, srcloc, } => { - let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); + let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state); for inst in mem_insts.into_iter() { - inst.emit(sink, flags); + inst.emit(sink, flags, state); } // ldst encoding helpers take Reg, not Writable. @@ -697,9 +717,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); } // Eliminated by `mem_finalize()` above. - &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { - panic!("Should not see stack-offset here!") - } + &MemArg::SPOffset(..) + | &MemArg::FPOffset(..) + | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), } } @@ -739,10 +759,10 @@ impl MachInstEmit for Inst { ref mem, srcloc, } => { - let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem); + let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state); for inst in mem_insts.into_iter() { - inst.emit(sink, flags); + inst.emit(sink, flags, state); } let op = match self { @@ -794,9 +814,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); } // Eliminated by `mem_finalize()` above. - &MemArg::SPOffset(..) | &MemArg::FPOffset(..) => { - panic!("Should not see stack-offset here!") - } + &MemArg::SPOffset(..) + | &MemArg::FPOffset(..) + | &MemArg::NominalSPOffset(..) => panic!("Should not see stack-offset here!"), } } @@ -980,11 +1000,11 @@ impl MachInstEmit for Inst { mem: MemArg::Label(MemLabel::PCRel(8)), srcloc: None, }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(8), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); sink.put4(const_data.to_bits()); } &Inst::LoadFpuConst64 { rd, const_data } => { @@ -993,11 +1013,11 @@ impl MachInstEmit for Inst { mem: MemArg::Label(MemLabel::PCRel(8)), srcloc: None, }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(12), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); sink.put8(const_data.to_bits()); } &Inst::FpuCSel32 { rd, rn, rm, cond } => { @@ -1084,7 +1104,7 @@ impl MachInstEmit for Inst { if top22 != 0 { sink.put4(enc_extend(top22, rd, rn)); } else { - Inst::mov32(rd, rn).emit(sink, flags); + Inst::mov32(rd, rn).emit(sink, flags, state); } } &Inst::Extend { @@ -1107,7 +1127,7 @@ impl MachInstEmit for Inst { rn: zero_reg(), rm: rd.to_reg(), }; - sub_inst.emit(sink, flags); + sub_inst.emit(sink, flags, state); } &Inst::Extend { rd, @@ -1248,13 +1268,13 @@ impl MachInstEmit for Inst { // Save index in a tmp (the live range of ridx only goes to start of this // sequence; rtmp1 or rtmp2 may overwrite it). let inst = Inst::gen_move(rtmp2, ridx, I64); - inst.emit(sink, flags); + inst.emit(sink, flags, state); // Load address of jump table let inst = Inst::Adr { rd: rtmp1, label: MemLabel::PCRel(16), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); // Load value out of jump table let inst = Inst::SLoad32 { rd: rtmp2, @@ -1266,7 +1286,7 @@ impl MachInstEmit for Inst { ), srcloc: None, // can't cause a user trap. }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); // Add base of jump table to jump-table-sourced block offset let inst = Inst::AluRRR { alu_op: ALUOp::Add64, @@ -1274,14 +1294,14 @@ impl MachInstEmit for Inst { rn: rtmp1.to_reg(), rm: rtmp2.to_reg(), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); // Branch to computed address. (`targets` here is only used for successor queries // and is not needed for emission.) let inst = Inst::IndirectBr { rn: rtmp1.to_reg(), targets: vec![], }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); // Emit jump table (table of 32-bit offsets). for target in targets { let off = target.as_offset_words() * 4; @@ -1297,11 +1317,11 @@ impl MachInstEmit for Inst { mem: MemArg::Label(MemLabel::PCRel(8)), srcloc: None, // can't cause a user trap. }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(12), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); sink.put8(const_data); } &Inst::LoadExtName { @@ -1315,11 +1335,11 @@ impl MachInstEmit for Inst { mem: MemArg::Label(MemLabel::PCRel(8)), srcloc: None, // can't cause a user trap. }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(12), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); sink.add_reloc(srcloc, Reloc::Abs8, name, offset); if flags.emit_all_ones_funcaddrs() { sink.put8(u64::max_value()); @@ -1327,52 +1347,81 @@ impl MachInstEmit for Inst { sink.put8(0); } } - &Inst::LoadAddr { rd, ref mem } => match *mem { - MemArg::FPOffset(fp_off) => { - let alu_op = if fp_off < 0 { - ALUOp::Sub64 - } else { - ALUOp::Add64 - }; - if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) - { - let inst = Inst::AluRRImm12 { - alu_op, - rd, - imm12, - rn: fp_reg(), - }; - inst.emit(sink, flags); - } else { - let const_insts = - Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); - for inst in const_insts { - inst.emit(sink, flags); - } - let inst = Inst::AluRRR { - alu_op, - rd, - rn: fp_reg(), - rm: rd.to_reg(), - }; - inst.emit(sink, flags); - } + &Inst::LoadAddr { rd, ref mem } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset_from_start(), mem, state); + for inst in mem_insts.into_iter() { + inst.emit(sink, flags, state); } - _ => unimplemented!("{:?}", mem), - }, + + let (reg, offset) = match mem { + MemArg::Unscaled(r, simm9) => (r, simm9.value()), + MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), + _ => panic!("Unsupported case for LoadAddr: {:?}", mem), + }; + let abs_offset = if offset < 0 { + -offset as u64 + } else { + offset as u64 + }; + let alu_op = if offset < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + + if offset == 0 { + let mov = Inst::mov(rd, reg); + mov.emit(sink, flags, state); + } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { + let add = Inst::AluRRImm12 { + alu_op, + rd, + rn: reg, + imm12, + }; + add.emit(sink, flags, state); + } else { + // Use `tmp2` here: `reg` may be `spilltmp` if the `MemArg` on this instruction + // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note + // that no other instructions will be inserted here (we're emitting directly), + // and a live range of `tmp2` should not span this instruction, so this use + // should otherwise be correct. + debug_assert!(rd.to_reg() != tmp2_reg()); + debug_assert!(reg != tmp2_reg()); + let tmp = writable_tmp2_reg(); + for insn in Inst::load_constant(tmp, abs_offset).into_iter() { + insn.emit(sink, flags, state); + } + let add = Inst::AluRRR { + alu_op, + rd, + rn: reg, + rm: tmp.to_reg(), + }; + add.emit(sink, flags, state); + } + } &Inst::GetPinnedReg { rd } => { let inst = Inst::Mov { rd, rm: xreg(PINNED_REG), }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); } &Inst::SetPinnedReg { rm } => { let inst = Inst::Mov { rd: Writable::from_reg(xreg(PINNED_REG)), rm, }; - inst.emit(sink, flags); + inst.emit(sink, flags, state); + } + &Inst::VirtualSPOffsetAdj { offset } => { + debug!( + "virtual sp offset adjusted by {} -> {}", + offset, + state.virtual_sp_offset + offset + ); + state.virtual_sp_offset += offset; } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 9ce622d74c..d9d2fe0fd3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1313,8 +1313,8 @@ fn test_aarch64_binemit() { mem: MemArg::FPOffset(32768), srcloc: None, }, - "0F0090D2EF011D8BE10140F9", - "movz x15, #32768 ; add x15, x15, fp ; ldr x1, [x15]", + "100090D2B063308B010240F9", + "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( Inst::ULoad64 { @@ -1322,8 +1322,8 @@ fn test_aarch64_binemit() { mem: MemArg::FPOffset(-32768), srcloc: None, }, - "EFFF8F92EF011D8BE10140F9", - "movn x15, #32767 ; add x15, x15, fp ; ldr x1, [x15]", + "F0FF8F92B063308B010240F9", + "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( Inst::ULoad64 { @@ -1331,8 +1331,8 @@ fn test_aarch64_binemit() { mem: MemArg::FPOffset(1048576), // 2^20 srcloc: None, }, - "0F02A0D2EF011D8BE10140F9", - "movz x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", + "1002A0D2B063308B010240F9", + "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( Inst::ULoad64 { @@ -1340,8 +1340,8 @@ fn test_aarch64_binemit() { mem: MemArg::FPOffset(1048576 + 1), // 2^20 + 1 srcloc: None, }, - "2F0080D20F02A0F2EF011D8BE10140F9", - "movz x15, #1 ; movk x15, #16, LSL #16 ; add x15, x15, fp ; ldr x1, [x15]", + "300080D21002A0F2B063308B010240F9", + "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( @@ -2794,7 +2794,7 @@ fn test_aarch64_binemit() { // Check the encoding is as expected. let text_size = { let mut code_sec = MachSectionSize::new(0); - insn.emit(&mut code_sec, &flags); + insn.emit(&mut code_sec, &flags, &mut Default::default()); code_sec.size() }; @@ -2802,7 +2802,7 @@ fn test_aarch64_binemit() { let mut sections = MachSections::new(); let code_idx = sections.add_section(0, text_size); let code_sec = sections.get_section(code_idx); - insn.emit(code_sec, &flags); + insn.emit(code_sec, &flags, &mut Default::default()); sections.emit(&mut sink); let actual_encoding = &sink.stringify(); assert_eq!(expected_encoding, actual_encoding); diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 08bde5c64b..b8e6bf65bf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -134,6 +134,11 @@ impl SImm9 { pub fn bits(&self) -> u32 { (self.value as u32) & 0x1ff } + + /// Signed value of immediate. + pub fn value(&self) -> i32 { + self.value as i32 + } } /// An unsigned, scaled 12-bit offset. @@ -172,6 +177,11 @@ impl UImm12Scaled { pub fn bits(&self) -> u32 { (self.value as u32 / self.scale_ty.bytes()) & 0xfff } + + /// Value after scaling. + pub fn value(&self) -> u32 { + self.value as u32 * self.scale_ty.bytes() + } } /// A shifted immediate value in 'imm12' format: supports 12 bits, shifted diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 436c0f4b78..14a9a7b6bf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -13,7 +13,6 @@ use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; use regalloc::{RegUsageCollector, RegUsageMapper, Set}; use alloc::vec::Vec; -use core::convert::TryFrom; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -741,6 +740,12 @@ pub enum Inst { SetPinnedReg { rm: Reg, }, + + /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This + /// controls MemArg::NominalSPOffset args are lowered. + VirtualSPOffsetAdj { + offset: i64, + }, } fn count_zero_half_words(mut value: u64) -> usize { @@ -876,7 +881,7 @@ fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { &MemArg::FPOffset(..) => { collector.add_use(fp_reg()); } - &MemArg::SPOffset(..) => { + &MemArg::SPOffset(..) | &MemArg::NominalSPOffset(..) => { collector.add_use(stack_reg()); } } @@ -1135,6 +1140,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::SetPinnedReg { rm } => { collector.add_use(rm); } + &Inst::VirtualSPOffsetAdj { .. } => {} } } @@ -1186,7 +1192,9 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) { &mut MemArg::Label(..) => {} &mut MemArg::PreIndexed(ref mut r, ..) => map_mod(m, r), &mut MemArg::PostIndexed(ref mut r, ..) => map_mod(m, r), - &mut MemArg::FPOffset(..) | &mut MemArg::SPOffset(..) => {} + &mut MemArg::FPOffset(..) + | &mut MemArg::SPOffset(..) + | &mut MemArg::NominalSPOffset(..) => {} }; } @@ -1706,6 +1714,7 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) { &mut Inst::SetPinnedReg { ref mut rm } => { map_use(mapper, rm); } + &mut Inst::VirtualSPOffsetAdj { .. } => {} } } @@ -1904,7 +1913,7 @@ impl MachInst for Inst { // Pretty-printing of instructions. fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) { - let (mem_insts, mem) = mem_finalize(0, mem); + let (mem_insts, mem) = mem_finalize(0, mem, &mut Default::default()); let mut mem_str = mem_insts .into_iter() .map(|inst| inst.show_rru(mb_rru)) @@ -2618,42 +2627,58 @@ impl ShowWithRRU for Inst { let rd = rd.show_rru(mb_rru); format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) } - &Inst::LoadAddr { rd, ref mem } => match *mem { - MemArg::FPOffset(fp_off) => { - let alu_op = if fp_off < 0 { - ALUOp::Sub64 - } else { - ALUOp::Add64 - }; - if let Some(imm12) = Imm12::maybe_from_u64(u64::try_from(fp_off.abs()).unwrap()) - { - let inst = Inst::AluRRImm12 { - alu_op, - rd, - imm12, - rn: fp_reg(), - }; - inst.show_rru(mb_rru) - } else { - let mut res = String::new(); - let const_insts = - Inst::load_constant(rd, u64::try_from(fp_off.abs()).unwrap()); - for inst in const_insts { - res.push_str(&inst.show_rru(mb_rru)); - res.push_str("; "); - } - let inst = Inst::AluRRR { - alu_op, - rd, - rn: fp_reg(), - rm: rd.to_reg(), - }; - res.push_str(&inst.show_rru(mb_rru)); - res - } + &Inst::LoadAddr { rd, ref mem } => { + // TODO: we really should find a better way to avoid duplication of + // this logic between `emit()` and `show_rru()` -- a separate 1-to-N + // expansion stage (i.e., legalization, but without the slow edit-in-place + // of the existing legalization framework). + let (mem_insts, mem) = mem_finalize(0, mem, &EmitState::default()); + let mut ret = String::new(); + for inst in mem_insts.into_iter() { + ret.push_str(&inst.show_rru(mb_rru)); } - _ => unimplemented!("{:?}", mem), - }, + let (reg, offset) = match mem { + MemArg::Unscaled(r, simm9) => (r, simm9.value()), + MemArg::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), + _ => panic!("Unsupported case for LoadAddr: {:?}", mem), + }; + let abs_offset = if offset < 0 { + -offset as u64 + } else { + offset as u64 + }; + let alu_op = if offset < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + + if offset == 0 { + let mov = Inst::mov(rd, reg); + ret.push_str(&mov.show_rru(mb_rru)); + } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { + let add = Inst::AluRRImm12 { + alu_op, + rd, + rn: reg, + imm12, + }; + ret.push_str(&add.show_rru(mb_rru)); + } else { + let tmp = writable_spilltmp_reg(); + for inst in Inst::load_constant(tmp, abs_offset).into_iter() { + ret.push_str(&inst.show_rru(mb_rru)); + } + let add = Inst::AluRRR { + alu_op, + rd, + rn: reg, + rm: tmp.to_reg(), + }; + ret.push_str(&add.show_rru(mb_rru)); + } + ret + } &Inst::GetPinnedReg { rd } => { let rd = rd.show_rru(mb_rru); format!("get_pinned_reg {}", rd) @@ -2662,6 +2687,7 @@ impl ShowWithRRU for Inst { let rm = rm.show_rru(mb_rru); format!("set_pinned_reg {}", rm) } + &Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index f4f19cf517..3a10231edf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -20,23 +20,21 @@ pub const PINNED_REG: u8 = 21; const XREG_INDICES: [u8; 31] = [ // X0 - X7 32, 33, 34, 35, 36, 37, 38, 39, - // X8 - X14 - 40, 41, 42, 43, 44, 45, 46, - // X15 - 59, + // X8 - X15 + 40, 41, 42, 43, 44, 45, 46, 47, // X16, X17 - 47, 48, + 58, 59, // X18 60, // X19, X20 - 49, 50, + 48, 49, // X21, put aside because it's the pinned register. - 58, + 57, // X22 - X28 - 51, 52, 53, 54, 55, 56, 57, - // X29 + 50, 51, 52, 53, 54, 55, 56, + // X29 (FP) 61, - // X30 + // X30 (LR) 62, ]; @@ -125,14 +123,17 @@ pub fn writable_fp_reg() -> Writable { Writable::from_reg(fp_reg()) } -/// Get a reference to the "spill temp" register. This register is used to -/// compute the address of a spill slot when a direct offset addressing mode from -/// FP is not sufficient (+/- 2^11 words). We exclude this register from regalloc -/// and reserve it for this purpose for simplicity; otherwise we need a -/// multi-stage analysis where we first determine how many spill slots we have, -/// then perhaps remove the reg from the pool and recompute regalloc. +/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is +/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not +/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this +/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how +/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc. +/// +/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is +/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it +/// to live through call instructions. pub fn spilltmp_reg() -> Reg { - xreg(15) + xreg(16) } /// Get a writable reference to the spilltmp reg. @@ -140,6 +141,20 @@ pub fn writable_spilltmp_reg() -> Writable { Writable::from_reg(spilltmp_reg()) } +/// Get a reference to the second temp register. We need this in some edge cases +/// where we need both the spilltmp and another temporary. +/// +/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is +/// free to use otherwise. +pub fn tmp2_reg() -> Reg { + xreg(17) +} + +/// Get a writable reference to the tmp2 reg. +pub fn writable_tmp2_reg() -> Writable { + Writable::from_reg(tmp2_reg()) +} + /// Create the register universe for AArch64. pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { let mut regs = vec![]; @@ -173,7 +188,7 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { for i in 0u8..32u8 { // See above for excluded registers. - if i == 15 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG { + if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG { continue; } let reg = Reg::new_real( @@ -211,7 +226,8 @@ pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { regs.len() }; - regs.push((xreg(15).to_real_reg(), "x15".to_string())); + regs.push((xreg(16).to_real_reg(), "x16".to_string())); + regs.push((xreg(17).to_real_reg(), "x17".to_string())); regs.push((xreg(18).to_real_reg(), "x18".to_string())); regs.push((fp_reg().to_real_reg(), "fp".to_string())); regs.push((link_reg().to_real_reg(), "lr".to_string())); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f8741212a9..bc2944f2b9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1291,7 +1291,7 @@ pub(crate) fn lower_insn_to_regs>(ctx: &mut C, insn: IRIns assert!(inputs.len() == abi.num_args()); for (i, input) in inputs.iter().enumerate() { let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None); - for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) { + for inst in abi.gen_copy_reg_to_arg(i, arg_reg) { ctx.emit(inst); } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 1a6ab16f69..7c833a47c9 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2183,7 +2183,7 @@ fn test_x64_emit() { // Check the encoding is as expected. let text_size = { let mut code_sec = MachSectionSize::new(0); - insn.emit(&mut code_sec, &flags); + insn.emit(&mut code_sec, &flags, &mut Default::default()); code_sec.size() }; @@ -2191,7 +2191,7 @@ fn test_x64_emit() { let mut sections = MachSections::new(); let code_idx = sections.add_section(0, text_size); let code_sec = sections.get_section(code_idx); - insn.emit(code_sec, &flags); + insn.emit(code_sec, &flags, &mut Default::default()); sections.emit(&mut sink); let actual_encoding = &sink.stringify(); assert_eq!(expected_encoding, actual_encoding); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a18dcb31fd..29e75b21fe 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -950,7 +950,9 @@ impl MachInst for Inst { } impl MachInstEmit for Inst { - fn emit(&self, sink: &mut O, _flags: &settings::Flags) { + type State = (); + + fn emit(&self, sink: &mut O, _flags: &settings::Flags, _: &mut Self::State) { emit::emit(self, sink); } } diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 48278c537a..83aa158662 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -98,7 +98,10 @@ pub trait ABIBody { fn gen_epilogue(&self) -> Vec; /// Returns the full frame size for the given function, after prologue emission has run. This - /// comprises the spill space, incoming argument space, alignment padding, etc. + /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save + /// registers, arguments pushed at callsites within this function, or other ephemeral pushes). + /// This is used for ABI variants where the client generates prologue/epilogue code, as in + /// Baldrdash (SpiderMonkey integration). fn frame_size(&self) -> u32; /// Get the spill-slot size. @@ -133,12 +136,7 @@ pub trait ABICall { fn num_args(&self) -> usize; /// Copy an argument value from a source register, prior to the call. - fn gen_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) -> Vec; + fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Vec; /// Copy a return value into a destination register, after the call returns. fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable) -> Self::I; diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index ccb62deb7e..697601c672 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -214,8 +214,10 @@ pub enum MachTerminator<'a> { /// A trait describing the ability to encode a MachInst into binary machine code. pub trait MachInstEmit { + /// Persistent state carried across `emit` invocations. + type State: Default + Clone + Debug; /// Emit the instruction. - fn emit(&self, code: &mut O, flags: &Flags); + fn emit(&self, code: &mut O, flags: &Flags, state: &mut Self::State); } /// The result of a `MachBackend::compile_function()` call. Contains machine diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 836be33941..a4801bfe3e 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -526,12 +526,13 @@ impl VCode { // Compute block offsets. let mut code_section = MachSectionSize::new(0); let mut block_offsets = vec![0; self.num_blocks()]; + let mut state = Default::default(); for &block in &self.final_block_order { code_section.offset = I::align_basic_block(code_section.offset); block_offsets[block as usize] = code_section.offset; let (start, end) = self.block_ranges[block as usize]; for iix in start..end { - self.insts[iix as usize].emit(&mut code_section, flags); + self.insts[iix as usize].emit(&mut code_section, flags, &mut state); } } @@ -544,13 +545,14 @@ impl VCode { // it (so forward references are now possible), and (ii) mutates the // instructions. let mut code_section = MachSectionSize::new(0); + let mut state = Default::default(); for &block in &self.final_block_order { code_section.offset = I::align_basic_block(code_section.offset); let (start, end) = self.block_ranges[block as usize]; for iix in start..end { self.insts[iix as usize] .with_block_offsets(code_section.offset, &self.final_block_offsets[..]); - self.insts[iix as usize].emit(&mut code_section, flags); + self.insts[iix as usize].emit(&mut code_section, flags, &mut state); } } } @@ -563,6 +565,7 @@ impl VCode { let mut sections = MachSections::new(); let code_idx = sections.add_section(0, self.code_size); let code_section = sections.get_section(code_idx); + let mut state = Default::default(); let flags = self.abi.flags(); let mut cur_srcloc = None; @@ -571,7 +574,7 @@ impl VCode { while new_offset > code_section.cur_offset_from_start() { // Pad with NOPs up to the aligned block offset. let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize); - nop.emit(code_section, flags); + nop.emit(code_section, flags, &mut Default::default()); } assert_eq!(code_section.cur_offset_from_start(), new_offset); @@ -586,7 +589,7 @@ impl VCode { cur_srcloc = Some(srcloc); } - self.insts[iix as usize].emit(code_section, flags); + self.insts[iix as usize].emit(code_section, flags, &mut state); } if cur_srcloc.is_some() { diff --git a/cranelift/filetests/filetests/vcode/aarch64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif index d88a20ceab..4178a4c2f7 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/call.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif @@ -11,8 +11,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, 8 ; b 12 ; data -; nextln: blr x15 +; nextln: ldr x16, 8 ; b 12 ; data +; nextln: blr x16 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index 13b431867e..c9734e7cdd 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -45,8 +45,8 @@ block0(v0: i64): ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; nextln: udf -; nextln: ldr x15 -; nextln: blr x15 +; nextln: ldr x16 +; nextln: blr x16 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -64,13 +64,13 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, [x0] -; nextln: ldr x15, [x15, #4] -; nextln: subs xzr, sp, x15 +; nextln: ldr x16, [x0] +; nextln: ldr x16, [x16, #4] +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf -; nextln: ldr x15 -; nextln: blr x15 +; nextln: ldr x16 +; nextln: blr x16 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -84,8 +84,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: add x15, x0, #176 -; nextln: subs xzr, sp, x15 +; nextln: add x16, x0, #176 +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf ; nextln: sub sp, sp, #176 @@ -104,14 +104,14 @@ block0(v0: i64): ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; nextln: udf -; nextln: movz x16, #6784 -; nextln: movk x16, #6, LSL #16 -; nextln: add x15, x0, x16, UXTX -; nextln: subs xzr, sp, x15 +; nextln: movz x17, #6784 +; nextln: movk x17, #6, LSL #16 +; nextln: add x16, x0, x17, UXTX +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf -; nextln: ldr x15, 8 ; b 12 ; data 400000 -; nextln: sub sp, sp, x15, UXTX +; nextln: ldr x16, 8 ; b 12 ; data 400000 +; nextln: sub sp, sp, x16, UXTX ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -128,10 +128,10 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, [x0] -; nextln: ldr x15, [x15, #4] -; nextln: add x15, x15, #32 -; nextln: subs xzr, sp, x15 +; nextln: ldr x16, [x0] +; nextln: ldr x16, [x16, #4] +; nextln: add x16, x16, #32 +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf ; nextln: sub sp, sp, #32 @@ -151,19 +151,19 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, [x0] -; nextln: ldr x15, [x15, #4] -; nextln: subs xzr, sp, x15 +; nextln: ldr x16, [x0] +; nextln: ldr x16, [x16, #4] +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf -; nextln: movz x16, #6784 -; nextln: movk x16, #6, LSL #16 -; nextln: add x15, x15, x16, UXTX -; nextln: subs xzr, sp, x15 +; nextln: movz x17, #6784 +; nextln: movk x17, #6, LSL #16 +; nextln: add x16, x16, x17, UXTX +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf -; nextln: ldr x15, 8 ; b 12 ; data 400000 -; nextln: sub sp, sp, x15, UXTX +; nextln: ldr x16, 8 ; b 12 ; data 400000 +; nextln: sub sp, sp, x16, UXTX ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -179,11 +179,11 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x15, #6784 -; nextln: movk x15, #6, LSL #16 -; nextln: ldr x15, [x0, x15] -; nextln: add x15, x15, #32 -; nextln: subs xzr, sp, x15 +; nextln: movz x16, #6784 +; nextln: movk x16, #6, LSL #16 +; nextln: ldr x16, [x0, x16] +; nextln: add x16, x16, #32 +; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; nextln: udf ; nextln: sub sp, sp, #32 diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack.clif b/cranelift/filetests/filetests/vcode/aarch64/stack.clif index 99d60d97ad..47c4b37a0f 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack.clif @@ -12,7 +12,7 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: sub x0, fp, #8 +; nextln: mov x0, sp ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -29,9 +29,9 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, 8 ; b 12 ; data 100016 -; nextln: sub sp, sp, x15, UXTX -; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 +; nextln: ldr x16, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x16, UXTX +; nextln: mov x0, sp ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -50,7 +50,7 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: sub x0, fp, #8 +; nextln: mov x0, sp ; nextln: ldur x0, [x0] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -68,9 +68,9 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, 8 ; b 12 ; data 100016 -; nextln: sub sp, sp, x15, UXTX -; nextln: movz x0, #34472; movk x0, #1, LSL #16; sub x0, fp, x0 +; nextln: ldr x16, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x16, UXTX +; nextln: mov x0, sp ; nextln: ldur x0, [x0] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -88,7 +88,7 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #16 -; nextln: sub x1, fp, #8 +; nextln: mov x1, sp ; nextln: stur x0, [x1] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 @@ -106,9 +106,9 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x15, 8 ; b 12 ; data 100016 -; nextln: sub sp, sp, x15, UXTX -; nextln: movz x1, #34472; movk x1, #1, LSL #16; sub x1, fp, x1 +; nextln: ldr x16, 8 ; b 12 ; data 100016 +; nextln: sub sp, sp, x16, UXTX +; nextln: mov x1, sp ; nextln: stur x0, [x1] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16