diff --git a/Cargo.lock b/Cargo.lock index b181fb965d..f7eb191f46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1680,9 +1680,9 @@ dependencies = [ [[package]] name = "regalloc" -version = "0.0.26" +version = "0.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95" +checksum = "b9ba8aaf5fe7cf307c6dbdaeed85478961d29e25e3bee5169e11b92fa9f027a8" dependencies = [ "log", "rustc-hash", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 578acf0e19..cdafe049e2 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -26,7 +26,7 @@ smallvec = { version = "1.0.0" } thiserror = "1.0.4" byteorder = { version = "1.3.2", default-features = false } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.2.0" } -regalloc = "0.0.26" +regalloc = { version = "0.0.27" } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary # machine code. Integration tests that need external dependencies can be diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index f0d6fdf6b5..0ee2eb7c1f 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -61,3 +61,10 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { _ => None, } } + +/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the +/// embedding, and so requires reftyped values to be enumerated with a stackmap)? +pub fn is_safepoint(func: &Function, inst: Inst) -> bool { + let op = func.dfg[inst].opcode(); + op.is_resumable_trap() || op.is_call() +} diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index f89e60b175..fba104ef43 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -90,12 +90,13 @@ //! - Return v1 in memory at `[P+8]`. //! - Return v0 in memory at `[P+16]`. +use crate::binemit::Stackmap; use crate::ir; use crate::ir::types; use crate::ir::types::*; use crate::ir::{ArgumentExtension, StackSlot}; use crate::isa; -use crate::isa::aarch64::{inst::*, lower::ty_bits}; +use crate::isa::aarch64::{inst::EmitState, inst::*, lower::ty_bits}; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; @@ -372,7 +373,10 @@ pub struct AArch64ABIBody { clobbered: Set>, /// Total number of spillslots, from regalloc. spillslots: Option, - /// Total frame size. + /// "Total frame size", as defined by "distance between FP and nominal-SP". + /// Some items are pushed below nominal SP, so the function may actually use + /// more stack than this would otherwise imply. It is simply the initial + /// frame/allocation size needed for stackslots and spillslots. total_frame_size: Option, /// The register holding the return-area pointer, if needed. ret_area_ptr: Option>, @@ -811,6 +815,35 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved } +fn gen_sp_adjust_insts(adj: u64, is_sub: bool, mut f: F) { + let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; + + if let Some(imm12) = Imm12::maybe_from_u64(adj) { + let adj_inst = Inst::AluRRImm12 { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12, + }; + f(adj_inst); + } else { + let tmp = writable_spilltmp_reg(); + let const_inst = Inst::LoadConst64 { + rd: tmp, + const_data: adj, + }; + let adj_inst = Inst::AluRRRExtend { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, + }; + f(const_inst); + f(adj_inst); + } +} + impl ABIBody for AArch64ABIBody { type I = Inst; @@ -1025,6 +1058,29 @@ impl ABIBody for AArch64ABIBody { store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } + fn spillslots_to_stackmap(&self, slots: &[SpillSlot], state: &EmitState) -> Stackmap { + assert!(state.virtual_sp_offset >= 0); + trace!( + "spillslots_to_stackmap: slots = {:?}, state = {:?}", + slots, + state + ); + let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; + let map_words = (map_size + 7) / 8; + let mut bits = std::iter::repeat(false) + .take(map_words as usize) + .collect::>(); + + let first_spillslot_word = + ((self.stackslots_size + state.virtual_sp_offset as u32) / 8) as usize; + for &slot in slots { + let slot = slot.get() as usize; + bits[first_spillslot_word + slot] = true; + } + + Stackmap::from_slice(&bits[..]) + } + fn gen_prologue(&mut self) -> Vec { let mut insts = vec![]; if !self.call_conv.extends_baldrdash() { @@ -1060,6 +1116,9 @@ impl ABIBody for AArch64ABIBody { } let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack. + let mut total_sp_adjust = 0; + let mut nominal_sp_to_real_sp = 0; + if !self.call_conv.extends_baldrdash() { // Leaf functions with zero stack don't need a stack check if one's // specified, otherwise always insert the stack check. @@ -1070,42 +1129,28 @@ impl ABIBody for AArch64ABIBody { } } if total_stacksize > 0 { - // sub sp, sp, #total_stacksize - if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) { - let sub_inst = Inst::AluRRImm12 { - alu_op: ALUOp::Sub64, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }; - insts.push(sub_inst); - } else { - let tmp = writable_spilltmp_reg(); - let const_inst = Inst::LoadConst64 { - rd: tmp, - const_data: total_stacksize as u64, - }; - let sub_inst = Inst::AluRRRExtend { - alu_op: ALUOp::Sub64, - rd: writable_stack_reg(), - rn: stack_reg(), - rm: tmp.to_reg(), - extendop: ExtendOp::UXTX, - }; - insts.push(const_inst); - insts.push(sub_inst); - } + total_sp_adjust += total_stacksize as u64; } } - // N.B.: "nominal SP", which we use to refer to stackslots - // and spillslots, is *here* (the value of SP at this program point). + // N.B.: "nominal SP", which we use to refer to stackslots and + // spillslots, is right here. + // // If we push any clobbers below, we emit a virtual-SP adjustment // meta-instruction so that the nominal-SP references behave as if SP // were still at this point. See documentation for // [crate::isa::aarch64::abi](this module) for more details on // stackframe layout and nominal-SP maintenance. + if total_sp_adjust > 0 { + // sub sp, sp, #total_stacksize + gen_sp_adjust_insts( + total_sp_adjust, + /* is_sub = */ true, + |inst| insts.push(inst), + ); + } + // Save clobbered registers. let (clobbered_int, clobbered_vec) = get_callee_saves(self.call_conv, self.clobbered.to_vec()); @@ -1149,10 +1194,11 @@ impl ABIBody for AArch64ABIBody { srcloc: None, }); } + nominal_sp_to_real_sp += clobber_size as i64; if clobber_size > 0 { insts.push(Inst::VirtualSPOffsetAdj { - offset: clobber_size as i64, + offset: nominal_sp_to_real_sp, }); } @@ -1246,6 +1292,10 @@ impl ABIBody for AArch64ABIBody { .expect("frame size not computed before prologue generation") } + fn stack_args_size(&self) -> u32 { + self.sig.stack_arg_space as u32 + } + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { @@ -1256,15 +1306,30 @@ impl ABIBody for AArch64ABIBody { } } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst { + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); self.store_spillslot(to_slot, ty, from_reg.to_reg()) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Inst { + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) } } +fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { + match (ty, r.get_class()) { + (Some(t), _) => t, + (None, RegClass::I64) => I64, + _ => panic!("Unexpected register class!"), + } +} + enum CallDest { ExtName(ir::ExternalName, RelocDistance), Reg(Reg), @@ -1343,7 +1408,7 @@ impl AArch64ABICall { } } -fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { +fn adjust_stack_and_nominal_sp>(ctx: &mut C, amount: u64, is_sub: bool) { if amount == 0 { return; } @@ -1357,27 +1422,9 @@ fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { offset: sp_adjustment, }); - let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; - if let Some(imm12) = Imm12::maybe_from_u64(amount) { - ctx.emit(Inst::AluRRImm12 { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }) - } else { - ctx.emit(Inst::LoadConst64 { - rd: writable_spilltmp_reg(), - const_data: amount, - }); - ctx.emit(Inst::AluRRRExtend { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - rm: spilltmp_reg(), - extendop: ExtendOp::UXTX, - }); - } + gen_sp_adjust_insts(amount, is_sub, |inst| { + ctx.emit(inst); + }); } impl ABICall for AArch64ABICall { @@ -1393,12 +1440,12 @@ impl ABICall for AArch64ABICall { fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ true) + adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ false) + adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ false) } fn emit_copy_reg_to_arg>( @@ -1453,7 +1500,7 @@ impl ABICall for AArch64ABICall { self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call { + &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint(Inst::Call { info: Box::new(CallInfo { dest: name.clone(), uses, @@ -1469,7 +1516,7 @@ impl ABICall for AArch64ABICall { offset: 0, srcloc: self.loc, }); - ctx.emit(Inst::CallInd { + ctx.emit_safepoint(Inst::CallInd { info: Box::new(CallIndInfo { rn: spilltmp_reg(), uses, @@ -1479,7 +1526,7 @@ impl ABICall for AArch64ABICall { }), }); } - &CallDest::Reg(reg) => ctx.emit(Inst::CallInd { + &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::CallInd { info: Box::new(CallIndInfo { rn: reg, uses, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 79f7624bcc..26cedde3b1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1,6 +1,6 @@ //! AArch64 ISA: binary code emission. -use crate::binemit::{CodeOffset, Reloc}; +use crate::binemit::{CodeOffset, Reloc, Stackmap}; use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::TrapCode; @@ -376,7 +376,37 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - virtual_sp_offset: i64, + /// Addend to convert nominal-SP offsets to real-SP offsets at the current + /// program point. + pub(crate) virtual_sp_offset: i64, + /// Offset of FP from nominal-SP. + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stackmap for upcoming instruction, as provided to `pre_safepoint()`. + stackmap: Option, +} + +impl MachInstEmitState for EmitState { + fn new(abi: &dyn ABIBody) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stackmap: None, + } + } + + fn pre_safepoint(&mut self, stackmap: Stackmap) { + self.stackmap = Some(stackmap); + } +} + +impl EmitState { + fn take_stackmap(&mut self) -> Option { + self.stackmap.take() + } + + fn clear_post_insn(&mut self) { + self.stackmap = None; + } } impl MachInstEmit for Inst { @@ -1463,6 +1493,9 @@ impl MachInstEmit for Inst { // Noop; this is just a placeholder for epilogues. } &Inst::Call { ref info } => { + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0); sink.put4(enc_jump26(0b100101, 0)); if info.opcode.is_call() { @@ -1470,6 +1503,9 @@ impl MachInstEmit for Inst { } } &Inst::CallInd { ref info } => { + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5)); if info.opcode.is_call() { sink.add_call_site(info.loc, info.opcode); @@ -1525,6 +1561,9 @@ impl MachInstEmit for Inst { &Inst::Udf { trap_info } => { let (srcloc, code) = trap_info; sink.add_trap(srcloc, code); + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.put4(0xd4a00000); } &Inst::Adr { rd, off } => { @@ -1709,7 +1748,7 @@ impl MachInstEmit for Inst { debug!( "virtual sp offset adjusted by {} -> {}", offset, - state.virtual_sp_offset + offset + state.virtual_sp_offset + offset, ); state.virtual_sp_offset += offset; } @@ -1728,5 +1767,11 @@ impl MachInstEmit for Inst { let end_off = sink.cur_offset(); debug_assert!((end_off - start_off) <= Inst::worst_case_size()); + + state.clear_post_insn(); + } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + self.print_with_state(mb_rru, state) } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index d7c2efd544..775fd083d5 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1346,11 +1346,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(rn); } &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {} - &Inst::Call { ref info } => { + &Inst::Call { ref info, .. } => { collector.add_uses(&*info.uses); collector.add_defs(&*info.defs); } - &Inst::CallInd { ref info } => { + &Inst::CallInd { ref info, .. } => { collector.add_uses(&*info.uses); collector.add_defs(&*info.defs); collector.add_use(info.rn); @@ -2137,13 +2137,21 @@ impl MachInst for Inst { // feasible for other reasons). 44 } + + fn ref_type_rc(_: &settings::Flags) -> RegClass { + RegClass::I64 + } } //============================================================================= // Pretty-printing of instructions. -fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) { - let (mem_insts, mem) = mem_finalize(0, mem, &mut Default::default()); +fn mem_finalize_for_show( + mem: &MemArg, + mb_rru: Option<&RealRegUniverse>, + state: &EmitState, +) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(0, mem, state); let mut mem_str = mem_insts .into_iter() .map(|inst| inst.show_rru(mb_rru)) @@ -2158,6 +2166,12 @@ fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (Str impl ShowWithRRU for Inst { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.pretty_print(mb_rru, &mut EmitState::default()) + } +} + +impl Inst { + fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) { match alu_op { ALUOp::Add32 => ("add", OperandSize::Size32), @@ -2344,7 +2358,7 @@ impl ShowWithRRU for Inst { srcloc: _srcloc, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { &MemArg::Unscaled(..) => true, @@ -2392,7 +2406,7 @@ impl ShowWithRRU for Inst { srcloc: _srcloc, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { &MemArg::Unscaled(..) => true, @@ -2576,39 +2590,39 @@ impl ShowWithRRU for Inst { } &Inst::FpuLoad32 { rd, ref mem, .. } => { let rd = show_freg_sized(rd.to_reg(), mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad64 { rd, ref mem, .. } => { let rd = show_freg_sized(rd.to_reg(), mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad128 { rd, ref mem, .. } => { let rd = rd.to_reg().show_rru(mb_rru); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuStore32 { rd, ref mem, .. } => { let rd = show_freg_sized(rd, mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore64 { rd, ref mem, .. } => { let rd = show_freg_sized(rd, mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore128 { rd, ref mem, .. } => { let rd = rd.show_rru(mb_rru); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } @@ -2978,7 +2992,7 @@ impl ShowWithRRU for Inst { // this logic between `emit()` and `show_rru()` -- a separate 1-to-N // expansion stage (i.e., legalization, but without the slow edit-in-place // of the existing legalization framework). - let (mem_insts, mem) = mem_finalize(0, mem, &EmitState::default()); + let (mem_insts, mem) = mem_finalize(0, mem, state); let mut ret = String::new(); for inst in mem_insts.into_iter() { ret.push_str(&inst.show_rru(mb_rru)); @@ -3025,7 +3039,10 @@ impl ShowWithRRU for Inst { } ret } - &Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), + &Inst::VirtualSPOffsetAdj { offset } => { + state.virtual_sp_offset += offset; + format!("virtual_sp_offset_adjust {}", offset) + } &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space), } } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 676f206959..d7ddaabf95 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1394,7 +1394,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trap | Opcode::ResumableTrap => { let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap()); - ctx.emit(Inst::Udf { trap_info }) + ctx.emit_safepoint(Inst::Udf { trap_info }); } Opcode::Trapif | Opcode::Trapff => { @@ -1432,10 +1432,11 @@ pub(crate) fn lower_insn_to_regs>( trap_info, kind: CondBrKind::Cond(cond), }); + ctx.emit_safepoint(Inst::Udf { trap_info }) } Opcode::Safepoint => { - panic!("safepoint support not implemented!"); + panic!("safepoint instructions not used by new backend's safepoints!"); } Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 385bedad8c..d19895b675 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -5,6 +5,7 @@ use log::trace; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use std::mem; +use crate::binemit::Stackmap; use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; use crate::isa::{self, x64::inst::*}; use crate::machinst::*; @@ -415,6 +416,10 @@ impl ABIBody for X64ABIBody { ) } + fn spillslots_to_stackmap(&self, _slots: &[SpillSlot], _state: &EmitState) -> Stackmap { + unimplemented!("spillslots_to_stackmap") + } + fn gen_prologue(&mut self) -> Vec { let r_rsp = regs::rsp(); @@ -553,6 +558,10 @@ impl ABIBody for X64ABIBody { .expect("frame size not computed before prologue generation") as u32 } + fn stack_args_size(&self) -> u32 { + unimplemented!("I need to be computed!") + } + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { @@ -563,15 +572,30 @@ impl ABIBody for X64ABIBody { } } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst { + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); self.store_spillslot(to_slot, ty, from_reg.to_reg()) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Inst { + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) } } +fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { + match (ty, r.get_class()) { + (Some(t), _) => t, + (None, RegClass::I64) => I64, + _ => panic!("Unexpected register class!"), + } +} + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { let mut caller_saved = Vec::new(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index b682046956..9c917f0bc9 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1258,6 +1258,10 @@ impl MachInst for Inst { 15 } + fn ref_type_rc(_: &settings::Flags) -> RegClass { + RegClass::I64 + } + type LabelUse = LabelUse; } @@ -1273,6 +1277,18 @@ impl MachInstEmit for Inst { fn emit(&self, sink: &mut MachBuffer, flags: &settings::Flags, state: &mut Self::State) { emit::emit(self, sink, flags, state); } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String { + self.show_rru(mb_rru) + } +} + +impl MachInstEmitState for EmitState { + fn new(_: &dyn ABIBody) -> Self { + EmitState { + virtual_sp_offset: 0, + } + } } /// A label-use (internal relocation) in generated code. diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 6e5170c3f6..84f574317b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1,5 +1,6 @@ //! ABI definitions. +use crate::binemit::Stackmap; use crate::ir::{ArgumentExtension, StackSlot}; use crate::machinst::*; use crate::settings; @@ -100,6 +101,15 @@ pub trait ABIBody { /// Store to a spillslot. fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I; + /// Generate a stackmap, given a list of spillslots and the emission state + /// at a given program point (prior to emission fo the safepointing + /// instruction). + fn spillslots_to_stackmap( + &self, + slots: &[SpillSlot], + state: &::State, + ) -> Stackmap; + /// Generate a prologue, post-regalloc. This should include any stack /// frame or other setup necessary to use the other methods (`load_arg`, /// `store_retval`, and spillslot accesses.) `self` is mutable so that we @@ -113,21 +123,34 @@ pub trait ABIBody { /// likely closely related. fn gen_epilogue(&self) -> Vec; - /// Returns the full frame size for the given function, after prologue emission has run. This - /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save - /// registers, arguments pushed at callsites within this function, or other ephemeral pushes). - /// This is used for ABI variants where the client generates prologue/epilogue code, as in - /// Baldrdash (SpiderMonkey integration). + /// Returns the full frame size for the given function, after prologue + /// emission has run. This comprises the spill slots and stack-storage slots + /// (but not storage for clobbered callee-save registers, arguments pushed + /// at callsites within this function, or other ephemeral pushes). This is + /// used for ABI variants where the client generates prologue/epilogue code, + /// as in Baldrdash (SpiderMonkey integration). fn frame_size(&self) -> u32; + /// Returns the size of arguments expected on the stack. + fn stack_args_size(&self) -> u32; + /// Get the spill-slot size. fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32; - /// Generate a spill. - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I; + /// Generate a spill. The type, if known, is given; this can be used to + /// generate a store instruction optimized for the particular type rather + /// than the RegClass (e.g., only F64 that resides in a V128 register). If + /// no type is given, the implementation should spill the whole register. + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I; - /// Generate a reload (fill). - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Self::I; + /// Generate a reload (fill). As for spills, the type may be given to allow + /// a more optimized load instruction to be generated. + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Self::I; } /// Trait implemented by an object that tracks ABI-related state and can diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 3a3578ceb5..b12cd9f98c 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -140,7 +140,7 @@ //! Given these invariants, we argue why each optimization preserves execution //! semantics below (grep for "Preserves execution semantics"). -use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc}; +use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, Stackmap}; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst}; @@ -168,6 +168,8 @@ pub struct MachBuffer { call_sites: SmallVec<[MachCallSite; 16]>, /// Any source location mappings referring to this code. srclocs: SmallVec<[MachSrcLoc; 64]>, + /// Any stackmaps referring to this code. + stackmaps: SmallVec<[MachStackMap; 8]>, /// The current source location in progress (after `start_srcloc()` and /// before `end_srcloc()`). This is a (start_offset, src_loc) tuple. cur_srcloc: Option<(CodeOffset, SourceLoc)>, @@ -228,6 +230,8 @@ pub struct MachBufferFinalized { call_sites: SmallVec<[MachCallSite; 16]>, /// Any source location mappings referring to this code. srclocs: SmallVec<[MachSrcLoc; 64]>, + /// Any stackmaps referring to this code. + stackmaps: SmallVec<[MachStackMap; 8]>, } static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff; @@ -262,6 +266,7 @@ impl MachBuffer { traps: SmallVec::new(), call_sites: SmallVec::new(), srclocs: SmallVec::new(), + stackmaps: SmallVec::new(), cur_srcloc: None, label_offsets: SmallVec::new(), label_aliases: SmallVec::new(), @@ -1090,6 +1095,7 @@ impl MachBuffer { traps: self.traps, call_sites: self.call_sites, srclocs: self.srclocs, + stackmaps: self.stackmaps, } } @@ -1149,6 +1155,22 @@ impl MachBuffer { self.srclocs.push(MachSrcLoc { start, end, loc }); } } + + /// Add stackmap metadata for this program point: a set of stack offsets + /// (from SP upward) that contain live references. + /// + /// The `offset_to_fp` value is the offset from the nominal SP (at which the + /// `stack_offsets` are based) and the FP value. By subtracting + /// `offset_to_fp` from each `stack_offsets` element, one can obtain + /// live-reference offsets from FP instead. + pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) { + let offset = self.cur_offset(); + self.stackmaps.push(MachStackMap { + offset, + offset_end: offset + insn_len, + stackmap, + }); + } } impl MachBufferFinalized { @@ -1207,6 +1229,11 @@ impl MachBufferFinalized { sink.begin_rodata(); sink.end_codegen(); } + + /// Get the stackmap metadata for this code. + pub fn stackmaps(&self) -> &[MachStackMap] { + &self.stackmaps[..] + } } /// A constant that is deferred to the next constant-pool opportunity. @@ -1286,6 +1313,18 @@ pub struct MachSrcLoc { pub loc: SourceLoc, } +/// Record of stackmap metadata: stack offsets containing references. +#[derive(Clone, Debug)] +pub struct MachStackMap { + /// The code offset at which this stackmap applies. + pub offset: CodeOffset, + /// The code offset at the *end* of the instruction at which this stackmap + /// applies. + pub offset_end: CodeOffset, + /// The Stackmap itself. + pub stackmap: Stackmap, +} + /// Record of branch instruction in the buffer, to facilitate editing. #[derive(Clone, Debug)] struct MachBranch { diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 8254fff77b..38e42fd9f3 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -23,7 +23,7 @@ where // Build the lowering context. let lower = Lower::new(f, abi, block_order)?; // Lower the IR. - let mut vcode = lower.lower(b)?; + let (mut vcode, stackmap_request_info) = lower.lower(b)?; debug!( "vcode from lowering: \n{}", @@ -57,11 +57,23 @@ where } } + // If either there are no reference-typed values, or else there are + // but there are no safepoints at which we need to know about them, + // then we don't need stackmaps. + let sri = if stackmap_request_info.reftyped_vregs.len() > 0 + && stackmap_request_info.safepoint_insns.len() > 0 + { + Some(&stackmap_request_info) + } else { + None + }; + let result = { let _tt = timing::regalloc(); allocate_registers_with_opts( &mut vcode, b.reg_universe(), + sri, Options { run_checker, algorithm, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index ea97607dfe..fbc6c2ee8a 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -4,6 +4,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; +use crate::inst_predicates::is_safepoint; use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; @@ -17,7 +18,7 @@ use crate::machinst::{ }; use crate::CodegenResult; -use regalloc::{Reg, RegClass, VirtualReg, Writable}; +use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable}; use alloc::boxed::Box; use alloc::vec::Vec; @@ -93,6 +94,8 @@ pub trait LowerCtx { /// every side-effecting op; the backend should not try to merge across /// side-effect colors unless the op being merged is known to be pure. fn inst_color(&self, ir_inst: Inst) -> InstColor; + /// Determine whether an instruction is a safepoint. + fn is_safepoint(&self, ir_inst: Inst) -> bool; // Instruction input/output queries: @@ -146,6 +149,8 @@ pub trait LowerCtx { fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable; /// Emit a machine instruction. fn emit(&mut self, mach_inst: Self::I); + /// Emit a machine instruction that is a safepoint. + fn emit_safepoint(&mut self, mach_inst: Self::I); /// Indicate that the given input uses the register returned by /// `get_input()`. Codegen may not happen otherwise for the producing /// instruction if it has no side effects and no uses. @@ -206,6 +211,14 @@ pub trait LowerBackend { } } +/// A pending instruction to insert and auxiliary information about it: its source location and +/// whether it is a safepoint. +struct InstTuple { + loc: SourceLoc, + is_safepoint: bool, + inst: I, +} + /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence /// from original Inst to MachInsts. pub struct Lower<'func, I: VCodeInst> { @@ -237,17 +250,17 @@ pub struct Lower<'func, I: VCodeInst> { next_vreg: u32, /// Insts in reverse block order, before final copy to vcode. - block_insts: Vec<(SourceLoc, I)>, + block_insts: Vec>, /// Ranges in `block_insts` constituting BBs. block_ranges: Vec<(usize, usize)>, /// Instructions collected for the BB in progress, in reverse order, with /// source-locs attached. - bb_insts: Vec<(SourceLoc, I)>, + bb_insts: Vec>, /// Instructions collected for the CLIF inst in progress, in forward order. - ir_insts: Vec, + ir_insts: Vec>, /// The register to use for GetPinnedReg, if any, on this architecture. pinned_reg: Option, @@ -276,6 +289,7 @@ fn alloc_vreg( let v = *next_vreg; *next_vreg += 1; value_regs[value] = Reg::new_virtual(regclass, v); + debug!("value {} gets vreg {:?}", value, v); } value_regs[value].as_virtual_reg().unwrap() } @@ -579,15 +593,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn finish_ir_inst(&mut self, loc: SourceLoc) { - for inst in self.ir_insts.drain(..).rev() { - self.bb_insts.push((loc, inst)); + // `bb_insts` is kept in reverse order, so emit the instructions in + // reverse order. + for mut tuple in self.ir_insts.drain(..).rev() { + tuple.loc = loc; + self.bb_insts.push(tuple); } } fn finish_bb(&mut self) { let start = self.block_insts.len(); - for pair in self.bb_insts.drain(..).rev() { - self.block_insts.push(pair); + for tuple in self.bb_insts.drain(..).rev() { + self.block_insts.push(tuple); } let end = self.block_insts.len(); self.block_ranges.push((start, end)); @@ -595,9 +612,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> { fn copy_bbs_to_vcode(&mut self) { for &(start, end) in self.block_ranges.iter().rev() { - for &(loc, ref inst) in &self.block_insts[start..end] { + for &InstTuple { + loc, + is_safepoint, + ref inst, + } in &self.block_insts[start..end] + { self.vcode.set_srcloc(loc); - self.vcode.push(inst.clone()); + self.vcode.push(inst.clone(), is_safepoint); } self.vcode.end_bb(); } @@ -645,7 +667,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } /// Lower the function. - pub fn lower>(mut self, backend: &B) -> CodegenResult> { + pub fn lower>( + mut self, + backend: &B, + ) -> CodegenResult<(VCode, StackmapRequestInfo)> { debug!("about to lower function: {:?}", self.f); // Initialize the ABI object, giving it a temp if requested. @@ -730,10 +755,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.copy_bbs_to_vcode(); // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode. - let vcode = self.vcode.build(); + let (vcode, stackmap_info) = self.vcode.build(); debug!("built vcode: {:?}", vcode); - Ok(vcode) + Ok((vcode, stackmap_info)) } /// Get the actual inputs for a value. This is the implementation for @@ -874,6 +899,13 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.inst_colors[ir_inst] } + fn is_safepoint(&self, ir_inst: Inst) -> bool { + // There is no safepoint metadata at all if we have no reftyped values + // in this function; lack of metadata implies "nothing to trace", and + // avoids overhead. + self.vcode.have_ref_values() && is_safepoint(self.f, ir_inst) + } + fn num_inputs(&self, ir_inst: Inst) -> usize { self.f.dfg.inst_args(ir_inst).len() } @@ -916,7 +948,19 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { } fn emit(&mut self, mach_inst: I) { - self.ir_insts.push(mach_inst); + self.ir_insts.push(InstTuple { + loc: SourceLoc::default(), + is_safepoint: false, + inst: mach_inst, + }); + } + + fn emit_safepoint(&mut self, mach_inst: I) { + self.ir_insts.push(InstTuple { + loc: SourceLoc::default(), + is_safepoint: true, + inst: mach_inst, + }); } fn use_input_reg(&mut self, input: LowerInput) { diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 380a6fe37c..e7605cff7b 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -96,7 +96,7 @@ //! //! ``` -use crate::binemit::{CodeInfo, CodeOffset}; +use crate::binemit::{CodeInfo, CodeOffset, Stackmap}; use crate::ir::condcodes::IntCC; use crate::ir::{Function, Type}; use crate::result::CodegenResult; @@ -191,6 +191,10 @@ pub trait MachInst: Clone + Debug { /// What is the worst-case instruction size emitted by this instruction type? fn worst_case_size() -> CodeOffset; + /// What is the register class used for reference types (GC-observable pointers)? Can + /// be dependent on compilation flags. + fn ref_type_rc(_flags: &Flags) -> RegClass; + /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. type LabelUse: MachInstLabelUse; @@ -256,9 +260,21 @@ pub enum MachTerminator<'a> { /// A trait describing the ability to encode a MachInst into binary machine code. pub trait MachInstEmit: MachInst { /// Persistent state carried across `emit` invocations. - type State: Default + Clone + Debug; + type State: MachInstEmitState; /// Emit the instruction. fn emit(&self, code: &mut MachBuffer, flags: &Flags, state: &mut Self::State); + /// Pretty-print the instruction. + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String; +} + +/// A trait describing the emission state carried between MachInsts when +/// emitting a function body. +pub trait MachInstEmitState: Default + Clone + Debug { + /// Create a new emission state given the ABI object. + fn new(abi: &dyn ABIBody) -> Self; + /// Update the emission state before emitting an instruction that is a + /// safepoint. + fn pre_safepoint(&mut self, _stackmap: Stackmap) {} } /// The result of a `MachBackend::compile_function()` call. Contains machine diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 74c99b3af1..42bc0a16bd 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -17,14 +17,15 @@ //! See the main module comment in `mod.rs` for more details on the VCode-based //! backend pipeline. -use crate::ir::{self, SourceLoc}; +use crate::ir::{self, types, SourceLoc}; use crate::machinst::*; use crate::settings; use regalloc::Function as RegallocFunction; use regalloc::Set as RegallocSet; use regalloc::{ - BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, + BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, + StackmapRequestInfo, }; use alloc::boxed::Box; @@ -56,6 +57,9 @@ pub struct VCode { /// VReg IR-level types. vreg_types: Vec, + /// Do we have any ref values among our vregs? + have_ref_values: bool, + /// Lowered machine instructions in order corresponding to the original IR. insts: Vec, @@ -82,6 +86,16 @@ pub struct VCode { /// ABI object. abi: Box>, + + /// Safepoint instruction indices. Filled in post-regalloc. (Prior to + /// regalloc, the safepoint instructions are listed in the separate + /// `StackmapRequestInfo` held separate from the `VCode`.) + safepoint_insns: Vec, + + /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s. + /// These are used to generate actual stackmaps at emission. Filled in + /// post-regalloc. + safepoint_slots: Vec>, } /// A builder for a VCode function body. This builder is designed for the @@ -102,6 +116,9 @@ pub struct VCodeBuilder { /// In-progress VCode. vcode: VCode, + /// In-progress stackmap-request info. + stackmap_info: StackmapRequestInfo, + /// Index of the last block-start in the vcode. block_start: InsnIndex, @@ -115,9 +132,17 @@ pub struct VCodeBuilder { impl VCodeBuilder { /// Create a new VCodeBuilder. pub fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCodeBuilder { + let reftype_class = I::ref_type_rc(abi.flags()); let vcode = VCode::new(abi, block_order); + let stackmap_info = StackmapRequestInfo { + reftype_class, + reftyped_vregs: vec![], + safepoint_insns: vec![], + }; + VCodeBuilder { vcode, + stackmap_info, block_start: 0, succ_start: 0, cur_srcloc: SourceLoc::default(), @@ -142,6 +167,15 @@ impl VCodeBuilder { .resize(vreg.get_index() + 1, ir::types::I8); } self.vcode.vreg_types[vreg.get_index()] = ty; + if is_reftype(ty) { + self.stackmap_info.reftyped_vregs.push(vreg); + self.vcode.have_ref_values = true; + } + } + + /// Are there any reference-typed values at all among the vregs? + pub fn have_ref_values(&self) -> bool { + self.vcode.have_ref_values() } /// Set the current block as the entry block. @@ -166,7 +200,7 @@ impl VCodeBuilder { } /// Push an instruction for the current BB and current IR inst within the BB. - pub fn push(&mut self, insn: I) { + pub fn push(&mut self, insn: I, is_safepoint: bool) { match insn.is_term() { MachTerminator::None | MachTerminator::Ret => {} MachTerminator::Uncond(target) => { @@ -186,6 +220,11 @@ impl VCodeBuilder { } self.vcode.insts.push(insn); self.vcode.srclocs.push(self.cur_srcloc); + if is_safepoint { + self.stackmap_info + .safepoint_insns + .push(InstIx::new((self.vcode.insts.len() - 1) as u32)); + } } /// Get the current source location. @@ -198,21 +237,16 @@ impl VCodeBuilder { self.cur_srcloc = srcloc; } - /// Build the final VCode. - pub fn build(self) -> VCode { - self.vcode + /// Build the final VCode, returning the vcode itself as well as auxiliary + /// information, such as the stackmap request information. + pub fn build(self) -> (VCode, StackmapRequestInfo) { + // TODO: come up with an abstraction for "vcode and auxiliary data". The + // auxiliary data needs to be separate from the vcode so that it can be + // referenced as the vcode is mutated (e.g. by the register allocator). + (self.vcode, self.stackmap_info) } } -fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> { - let v = indices - .iter() - .map(|iix| iix.get() as usize) - .chain(iter::once(len)) - .collect::>(); - v.windows(2).map(|p| (p[0], p[1])).collect() -} - fn is_redundant_move(insn: &I) -> bool { if let Some((to, from)) = insn.is_move() { to.to_reg() == from @@ -221,6 +255,11 @@ fn is_redundant_move(insn: &I) -> bool { } } +/// Is this type a reference type? +fn is_reftype(ty: Type) -> bool { + ty == types::R32 || ty == types::R64 +} + impl VCode { /// New empty VCode. fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCode { @@ -228,6 +267,7 @@ impl VCode { liveins: abi.liveins(), liveouts: abi.liveouts(), vreg_types: vec![], + have_ref_values: false, insts: vec![], srclocs: vec![], entry: 0, @@ -236,6 +276,8 @@ impl VCode { block_succs: vec![], block_order, abi, + safepoint_insns: vec![], + safepoint_slots: vec![], } } @@ -249,6 +291,11 @@ impl VCode { self.vreg_types[vreg.get_index()] } + /// Are there any reference-typed values at all among the vregs? + pub fn have_ref_values(&self) -> bool { + self.have_ref_values + } + /// Get the entry block. pub fn entry(&self) -> BlockIndex { self.entry @@ -265,6 +312,11 @@ impl VCode { self.abi.frame_size() } + /// Inbound stack-args size. + pub fn stack_args_size(&self) -> u32 { + self.abi.stack_args_size() + } + /// Get the successors for a block. pub fn succs(&self, block: BlockIndex) -> &[BlockIx] { let (start, end) = self.block_succ_range[block as usize]; @@ -281,17 +333,21 @@ impl VCode { self.abi .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r))); - // We want to move instructions over in final block order, using the new - // block-start map given by the regalloc. - let block_ranges: Vec<(usize, usize)> = - block_ranges(result.target_map.elems(), result.insns.len()); let mut final_insns = vec![]; let mut final_block_ranges = vec![(0, 0); self.num_blocks()]; let mut final_srclocs = vec![]; + let mut final_safepoint_insns = vec![]; + let mut safept_idx = 0; + assert!(result.target_map.elems().len() == self.num_blocks()); for block in 0..self.num_blocks() { + let start = result.target_map.elems()[block].get() as usize; + let end = if block == self.num_blocks() - 1 { + result.insns.len() + } else { + result.target_map.elems()[block + 1].get() as usize + }; let block = block as BlockIndex; - let (start, end) = block_ranges[block as usize]; let final_start = final_insns.len() as InsnIndex; if block == self.entry { @@ -333,6 +389,16 @@ impl VCode { final_insns.push(insn.clone()); final_srclocs.push(srcloc); } + + // Was this instruction a safepoint instruction? Add its final + // index to the safepoint insn-index list if so. + if safept_idx < result.new_safepoint_insns.len() + && (result.new_safepoint_insns[safept_idx].get() as usize) == i + { + let idx = final_insns.len() - 1; + final_safepoint_insns.push(idx as InsnIndex); + safept_idx += 1; + } } let final_end = final_insns.len() as InsnIndex; @@ -344,6 +410,12 @@ impl VCode { self.insts = final_insns; self.srclocs = final_srclocs; self.block_ranges = final_block_ranges; + self.safepoint_insns = final_safepoint_insns; + + // Save safepoint slot-lists. These will be passed to the `EmitState` + // for the machine backend during emission so that it can do + // target-specific translations of slot numbers to stack offsets. + self.safepoint_slots = result.stackmaps; } /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external @@ -353,11 +425,12 @@ impl VCode { I: MachInstEmit, { let mut buffer = MachBuffer::new(); - let mut state = Default::default(); + let mut state = I::State::new(&*self.abi); buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices. let flags = self.abi.flags(); + let mut safepoint_idx = 0; let mut cur_srcloc = None; for block in 0..self.num_blocks() { let block = block as BlockIndex; @@ -381,6 +454,19 @@ impl VCode { cur_srcloc = Some(srcloc); } + if safepoint_idx < self.safepoint_insns.len() + && self.safepoint_insns[safepoint_idx] == iix + { + if self.safepoint_slots[safepoint_idx].len() > 0 { + let stackmap = self.abi.spillslots_to_stackmap( + &self.safepoint_slots[safepoint_idx][..], + &state, + ); + state.pre_safepoint(stackmap); + } + safepoint_idx += 1; + } + self.insts[iix as usize].emit(&mut buffer, flags, &mut state); } @@ -476,13 +562,18 @@ impl RegallocFunction for VCode { self.abi.get_spillslot_size(regclass, ty) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I { - let ty = self.vreg_type(vreg); + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option) -> I { + let ty = vreg.map(|v| self.vreg_type(v)); self.abi.gen_spill(to_slot, from_reg, ty) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, vreg: VirtualReg) -> I { - let ty = self.vreg_type(vreg); + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + vreg: Option, + ) -> I { + let ty = vreg.map(|v| self.vreg_type(v)); self.abi.gen_reload(to_reg, from_slot, ty) } @@ -531,7 +622,7 @@ impl fmt::Debug for VCode { } /// Pretty-printing with `RealRegUniverse` context. -impl ShowWithRRU for VCode { +impl ShowWithRRU for VCode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { use std::fmt::Write; @@ -539,6 +630,8 @@ impl ShowWithRRU for VCode { write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap(); write!(&mut s, " Entry block: {}\n", self.entry).unwrap(); + let mut state = Default::default(); + let mut safepoint_idx = 0; for i in 0..self.num_blocks() { let block = i as BlockIndex; @@ -552,11 +645,22 @@ impl ShowWithRRU for VCode { let (start, end) = self.block_ranges[block as usize]; write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap(); for inst in start..end { + if safepoint_idx < self.safepoint_insns.len() + && self.safepoint_insns[safepoint_idx] == inst + { + write!( + &mut s, + " (safepoint: slots {:?} with EmitState {:?})\n", + self.safepoint_slots[safepoint_idx], state, + ) + .unwrap(); + safepoint_idx += 1; + } write!( &mut s, " Inst {}: {}\n", inst, - self.insts[inst as usize].show_rru(mb_rru) + self.insts[inst as usize].pretty_print(mb_rru, &mut state) ) .unwrap(); } diff --git a/cranelift/codegen/src/regalloc/safepoint.rs b/cranelift/codegen/src/regalloc/safepoint.rs index 0700735611..18fe20ffac 100644 --- a/cranelift/codegen/src/regalloc/safepoint.rs +++ b/cranelift/codegen/src/regalloc/safepoint.rs @@ -1,6 +1,7 @@ use crate::cursor::{Cursor, FuncCursor}; use crate::dominator_tree::DominatorTree; -use crate::ir::{Function, InstBuilder, Opcode}; +use crate::inst_predicates::is_safepoint; +use crate::ir::{Function, InstBuilder}; use crate::isa::TargetIsa; use crate::regalloc::live_value_tracker::LiveValueTracker; use crate::regalloc::liveness::Liveness; @@ -51,12 +52,8 @@ pub fn emit_stackmaps( pos.goto_top(block); while let Some(inst) = pos.next_inst() { - if pos.func.dfg[inst].opcode().is_resumable_trap() { + if is_safepoint(&pos.func, inst) { insert_and_encode_safepoint(&mut pos, tracker, isa); - } else if pos.func.dfg[inst].opcode().is_call() { - insert_and_encode_safepoint(&mut pos, tracker, isa); - } else if pos.func.dfg[inst].opcode() == Opcode::Safepoint { - panic!("safepoint instruction can only be used by the compiler!"); } // Process the instruction and get rid of dead values. diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index 9956b6f4ad..c4e8067b9f 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -63,3 +63,65 @@ block0: ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f5(r64, r64) -> r64, r64, r64 { + fn0 = %f(r64) -> b1 + ss0 = explicit_slot 8 + +block0(v0: r64, v1: r64): + v2 = call fn0(v0) + stack_store.r64 v0, ss0 + brz v2, block1(v1, v0) + jump block2(v0, v1) + +block1(v3: r64, v4: r64): + jump block3(v3, v4) + +block2(v5: r64, v6: r64): + jump block3(v5, v6) + +block3(v7: r64, v8: r64): + v9 = stack_load.r64 ss0 + return v7, v8, v9 +} + +; check: Block 0: +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #32 +; nextln: stp x19, x20, [sp, #-16]! +; nextln: virtual_sp_offset_adjust 16 +; nextln: mov x19, x0 +; nextln: mov x20, x1 +; nextln: mov x0, x19 +; nextln: ldr x16, 8 ; b 12 ; data +; nextln: stur x19, [sp, #24] +; nextln: stur x20, [sp, #32] +; nextln: (safepoint: slots [S0, S1] +; nextln: blr x16 +; nextln: ldur x19, [sp, #24] +; nextln: ldur x20, [sp, #32] +; nextln: add x1, sp, #16 +; nextln: stur x19, [x1] +; nextln: and w0, w0, #1 +; nextln: cbz x0, label1 ; b label3 +; check: Block 1: +; check: b label2 +; check: Block 2: +; check: mov x0, x20 +; nextln: b label5 +; check: Block 3: +; check: b label4 +; check: Block 4: +; check: mov x0, x19 +; nextln: mov x19, x20 +; nextln: b label5 +; check: Block 5: +; check: add x1, sp, #16 +; nextln: ldur x1, [x1] +; nextln: mov x2, x1 +; nextln: mov x1, x19 +; nextln: ldp x19, x20, [sp], #16 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret