Reftypes part two: add support for stackmaps.

This commit adds support for generating stackmaps at safepoints to the new backend framework and to the AArch64 backend in particular. It has been tested to work with SpiderMonkey.
2020-06-29 15:49:18 -07:00
parent b93e8c296d
commit 08353fcc14
17 changed files with 597 additions and 143 deletions
--- a/cranelift/codegen/src/machinst/abi.rs
+++ b/cranelift/codegen/src/machinst/abi.rs
@@ -1,5 +1,6 @@
 //! ABI definitions.

+use crate::binemit::Stackmap;
 use crate::ir::{ArgumentExtension, StackSlot};
 use crate::machinst::*;
 use crate::settings;
@@ -100,6 +101,15 @@ pub trait ABIBody {
    /// Store to a spillslot.
    fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;

+    /// Generate a stackmap, given a list of spillslots and the emission state
+    /// at a given program point (prior to emission fo the safepointing
+    /// instruction).
+    fn spillslots_to_stackmap(
+        &self,
+        slots: &[SpillSlot],
+        state: &<Self::I as MachInstEmit>::State,
+    ) -> Stackmap;
+
    /// Generate a prologue, post-regalloc. This should include any stack
    /// frame or other setup necessary to use the other methods (`load_arg`,
    /// `store_retval`, and spillslot accesses.)  `self` is mutable so that we
@@ -113,21 +123,34 @@ pub trait ABIBody {
    /// likely closely related.
    fn gen_epilogue(&self) -> Vec<Self::I>;

-    /// Returns the full frame size for the given function, after prologue emission has run. This
-    /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save
-    /// registers, arguments pushed at callsites within this function, or other ephemeral pushes).
-    /// This is used for ABI variants where the client generates prologue/epilogue code, as in
-    /// Baldrdash (SpiderMonkey integration).
+    /// Returns the full frame size for the given function, after prologue
+    /// emission has run. This comprises the spill slots and stack-storage slots
+    /// (but not storage for clobbered callee-save registers, arguments pushed
+    /// at callsites within this function, or other ephemeral pushes).  This is
+    /// used for ABI variants where the client generates prologue/epilogue code,
+    /// as in Baldrdash (SpiderMonkey integration).
    fn frame_size(&self) -> u32;

+    /// Returns the size of arguments expected on the stack.
+    fn stack_args_size(&self) -> u32;
+
    /// Get the spill-slot size.
    fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;

-    /// Generate a spill.
-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I;
+    /// Generate a spill. The type, if known, is given; this can be used to
+    /// generate a store instruction optimized for the particular type rather
+    /// than the RegClass (e.g., only F64 that resides in a V128 register). If
+    /// no type is given, the implementation should spill the whole register.
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;

-    /// Generate a reload (fill).
-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> Self::I;
+    /// Generate a reload (fill). As for spills, the type may be given to allow
+    /// a more optimized load instruction to be generated.
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        ty: Option<Type>,
+    ) -> Self::I;
 }

 /// Trait implemented by an object that tracks ABI-related state and can
--- a/cranelift/codegen/src/machinst/buffer.rs
+++ b/cranelift/codegen/src/machinst/buffer.rs
@@ -140,7 +140,7 @@
 //! Given these invariants, we argue why each optimization preserves execution
 //! semantics below (grep for "Preserves execution semantics").

-use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, Stackmap};
 use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
 use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst};

@@ -168,6 +168,8 @@ pub struct MachBuffer<I: VCodeInst> {
    call_sites: SmallVec<[MachCallSite; 16]>,
    /// Any source location mappings referring to this code.
    srclocs: SmallVec<[MachSrcLoc; 64]>,
+    /// Any stackmaps referring to this code.
+    stackmaps: SmallVec<[MachStackMap; 8]>,
    /// The current source location in progress (after `start_srcloc()` and
    /// before `end_srcloc()`).  This is a (start_offset, src_loc) tuple.
    cur_srcloc: Option<(CodeOffset, SourceLoc)>,
@@ -228,6 +230,8 @@ pub struct MachBufferFinalized {
    call_sites: SmallVec<[MachCallSite; 16]>,
    /// Any source location mappings referring to this code.
    srclocs: SmallVec<[MachSrcLoc; 64]>,
+    /// Any stackmaps referring to this code.
+    stackmaps: SmallVec<[MachStackMap; 8]>,
 }

 static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff;
@@ -262,6 +266,7 @@ impl<I: VCodeInst> MachBuffer<I> {
            traps: SmallVec::new(),
            call_sites: SmallVec::new(),
            srclocs: SmallVec::new(),
+            stackmaps: SmallVec::new(),
            cur_srcloc: None,
            label_offsets: SmallVec::new(),
            label_aliases: SmallVec::new(),
@@ -1090,6 +1095,7 @@ impl<I: VCodeInst> MachBuffer<I> {
            traps: self.traps,
            call_sites: self.call_sites,
            srclocs: self.srclocs,
+            stackmaps: self.stackmaps,
        }
    }

@@ -1149,6 +1155,22 @@ impl<I: VCodeInst> MachBuffer<I> {
            self.srclocs.push(MachSrcLoc { start, end, loc });
        }
    }
+
+    /// Add stackmap metadata for this program point: a set of stack offsets
+    /// (from SP upward) that contain live references.
+    ///
+    /// The `offset_to_fp` value is the offset from the nominal SP (at which the
+    /// `stack_offsets` are based) and the FP value. By subtracting
+    /// `offset_to_fp` from each `stack_offsets` element, one can obtain
+    /// live-reference offsets from FP instead.
+    pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) {
+        let offset = self.cur_offset();
+        self.stackmaps.push(MachStackMap {
+            offset,
+            offset_end: offset + insn_len,
+            stackmap,
+        });
+    }
 }

 impl MachBufferFinalized {
@@ -1207,6 +1229,11 @@ impl MachBufferFinalized {
        sink.begin_rodata();
        sink.end_codegen();
    }
+
+    /// Get the stackmap metadata for this code.
+    pub fn stackmaps(&self) -> &[MachStackMap] {
+        &self.stackmaps[..]
+    }
 }

 /// A constant that is deferred to the next constant-pool opportunity.
@@ -1286,6 +1313,18 @@ pub struct MachSrcLoc {
    pub loc: SourceLoc,
 }

+/// Record of stackmap metadata: stack offsets containing references.
+#[derive(Clone, Debug)]
+pub struct MachStackMap {
+    /// The code offset at which this stackmap applies.
+    pub offset: CodeOffset,
+    /// The code offset at the *end* of the instruction at which this stackmap
+    /// applies.
+    pub offset_end: CodeOffset,
+    /// The Stackmap itself.
+    pub stackmap: Stackmap,
+}
+
 /// Record of branch instruction in the buffer, to facilitate editing.
 #[derive(Clone, Debug)]
 struct MachBranch {
--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -23,7 +23,7 @@ where
    // Build the lowering context.
    let lower = Lower::new(f, abi, block_order)?;
    // Lower the IR.
-    let mut vcode = lower.lower(b)?;
+    let (mut vcode, stackmap_request_info) = lower.lower(b)?;

    debug!(
        "vcode from lowering: \n{}",
@@ -57,11 +57,23 @@ where
        }
    }

+    // If either there are no reference-typed values, or else there are
+    // but there are no safepoints at which we need to know about them,
+    // then we don't need stackmaps.
+    let sri = if stackmap_request_info.reftyped_vregs.len() > 0
+        && stackmap_request_info.safepoint_insns.len() > 0
+    {
+        Some(&stackmap_request_info)
+    } else {
+        None
+    };
+
    let result = {
        let _tt = timing::regalloc();
        allocate_registers_with_opts(
            &mut vcode,
            b.reg_universe(),
+            sri,
            Options {
                run_checker,
                algorithm,
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -4,6 +4,7 @@

 use crate::entity::SecondaryMap;
 use crate::fx::{FxHashMap, FxHashSet};
+use crate::inst_predicates::is_safepoint;
 use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit};
 use crate::ir::instructions::BranchInfo;
 use crate::ir::types::I64;
@@ -17,7 +18,7 @@ use crate::machinst::{
 };
 use crate::CodegenResult;

-use regalloc::{Reg, RegClass, VirtualReg, Writable};
+use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable};

 use alloc::boxed::Box;
 use alloc::vec::Vec;
@@ -93,6 +94,8 @@ pub trait LowerCtx {
    /// every side-effecting op; the backend should not try to merge across
    /// side-effect colors unless the op being merged is known to be pure.
    fn inst_color(&self, ir_inst: Inst) -> InstColor;
+    /// Determine whether an instruction is a safepoint.
+    fn is_safepoint(&self, ir_inst: Inst) -> bool;

    // Instruction input/output queries:

@@ -146,6 +149,8 @@ pub trait LowerCtx {
    fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
    /// Emit a machine instruction.
    fn emit(&mut self, mach_inst: Self::I);
+    /// Emit a machine instruction that is a safepoint.
+    fn emit_safepoint(&mut self, mach_inst: Self::I);
    /// Indicate that the given input uses the register returned by
    /// `get_input()`. Codegen may not happen otherwise for the producing
    /// instruction if it has no side effects and no uses.
@@ -206,6 +211,14 @@ pub trait LowerBackend {
    }
 }

+/// A pending instruction to insert and auxiliary information about it: its source location and
+/// whether it is a safepoint.
+struct InstTuple<I: VCodeInst> {
+    loc: SourceLoc,
+    is_safepoint: bool,
+    inst: I,
+}
+
 /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
 /// from original Inst to MachInsts.
 pub struct Lower<'func, I: VCodeInst> {
@@ -237,17 +250,17 @@ pub struct Lower<'func, I: VCodeInst> {
    next_vreg: u32,

    /// Insts in reverse block order, before final copy to vcode.
-    block_insts: Vec<(SourceLoc, I)>,
+    block_insts: Vec<InstTuple<I>>,

    /// Ranges in `block_insts` constituting BBs.
    block_ranges: Vec<(usize, usize)>,

    /// Instructions collected for the BB in progress, in reverse order, with
    /// source-locs attached.
-    bb_insts: Vec<(SourceLoc, I)>,
+    bb_insts: Vec<InstTuple<I>>,

    /// Instructions collected for the CLIF inst in progress, in forward order.
-    ir_insts: Vec<I>,
+    ir_insts: Vec<InstTuple<I>>,

    /// The register to use for GetPinnedReg, if any, on this architecture.
    pinned_reg: Option<Reg>,
@@ -276,6 +289,7 @@ fn alloc_vreg(
        let v = *next_vreg;
        *next_vreg += 1;
        value_regs[value] = Reg::new_virtual(regclass, v);
+        debug!("value {} gets vreg {:?}", value, v);
    }
    value_regs[value].as_virtual_reg().unwrap()
 }
@@ -579,15 +593,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    fn finish_ir_inst(&mut self, loc: SourceLoc) {
-        for inst in self.ir_insts.drain(..).rev() {
-            self.bb_insts.push((loc, inst));
+        // `bb_insts` is kept in reverse order, so emit the instructions in
+        // reverse order.
+        for mut tuple in self.ir_insts.drain(..).rev() {
+            tuple.loc = loc;
+            self.bb_insts.push(tuple);
        }
    }

    fn finish_bb(&mut self) {
        let start = self.block_insts.len();
-        for pair in self.bb_insts.drain(..).rev() {
-            self.block_insts.push(pair);
+        for tuple in self.bb_insts.drain(..).rev() {
+            self.block_insts.push(tuple);
        }
        let end = self.block_insts.len();
        self.block_ranges.push((start, end));
@@ -595,9 +612,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

    fn copy_bbs_to_vcode(&mut self) {
        for &(start, end) in self.block_ranges.iter().rev() {
-            for &(loc, ref inst) in &self.block_insts[start..end] {
+            for &InstTuple {
+                loc,
+                is_safepoint,
+                ref inst,
+            } in &self.block_insts[start..end]
+            {
                self.vcode.set_srcloc(loc);
-                self.vcode.push(inst.clone());
+                self.vcode.push(inst.clone(), is_safepoint);
            }
            self.vcode.end_bb();
        }
@@ -645,7 +667,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    }

    /// Lower the function.
-    pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
+    pub fn lower<B: LowerBackend<MInst = I>>(
+        mut self,
+        backend: &B,
+    ) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
        debug!("about to lower function: {:?}", self.f);

        // Initialize the ABI object, giving it a temp if requested.
@@ -730,10 +755,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
        self.copy_bbs_to_vcode();

        // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
-        let vcode = self.vcode.build();
+        let (vcode, stackmap_info) = self.vcode.build();
        debug!("built vcode: {:?}", vcode);

-        Ok(vcode)
+        Ok((vcode, stackmap_info))
    }

    /// Get the actual inputs for a value. This is the implementation for
@@ -874,6 +899,13 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
        self.inst_colors[ir_inst]
    }

+    fn is_safepoint(&self, ir_inst: Inst) -> bool {
+        // There is no safepoint metadata at all if we have no reftyped values
+        // in this function; lack of metadata implies "nothing to trace", and
+        // avoids overhead.
+        self.vcode.have_ref_values() && is_safepoint(self.f, ir_inst)
+    }
+
    fn num_inputs(&self, ir_inst: Inst) -> usize {
        self.f.dfg.inst_args(ir_inst).len()
    }
@@ -916,7 +948,19 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
    }

    fn emit(&mut self, mach_inst: I) {
-        self.ir_insts.push(mach_inst);
+        self.ir_insts.push(InstTuple {
+            loc: SourceLoc::default(),
+            is_safepoint: false,
+            inst: mach_inst,
+        });
+    }
+
+    fn emit_safepoint(&mut self, mach_inst: I) {
+        self.ir_insts.push(InstTuple {
+            loc: SourceLoc::default(),
+            is_safepoint: true,
+            inst: mach_inst,
+        });
    }

    fn use_input_reg(&mut self, input: LowerInput) {
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -96,7 +96,7 @@
 //!
 //! ```

-use crate::binemit::{CodeInfo, CodeOffset};
+use crate::binemit::{CodeInfo, CodeOffset, Stackmap};
 use crate::ir::condcodes::IntCC;
 use crate::ir::{Function, Type};
 use crate::result::CodegenResult;
@@ -191,6 +191,10 @@ pub trait MachInst: Clone + Debug {
    /// What is the worst-case instruction size emitted by this instruction type?
    fn worst_case_size() -> CodeOffset;

+    /// What is the register class used for reference types (GC-observable pointers)? Can
+    /// be dependent on compilation flags.
+    fn ref_type_rc(_flags: &Flags) -> RegClass;
+
    /// A label-use kind: a type that describes the types of label references that
    /// can occur in an instruction.
    type LabelUse: MachInstLabelUse;
@@ -256,9 +260,21 @@ pub enum MachTerminator<'a> {
 /// A trait describing the ability to encode a MachInst into binary machine code.
 pub trait MachInstEmit: MachInst {
    /// Persistent state carried across `emit` invocations.
-    type State: Default + Clone + Debug;
+    type State: MachInstEmitState<Self>;
    /// Emit the instruction.
    fn emit(&self, code: &mut MachBuffer<Self>, flags: &Flags, state: &mut Self::State);
+    /// Pretty-print the instruction.
+    fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
+}
+
+/// A trait describing the emission state carried between MachInsts when
+/// emitting a function body.
+pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
+    /// Create a new emission state given the ABI object.
+    fn new(abi: &dyn ABIBody<I = I>) -> Self;
+    /// Update the emission state before emitting an instruction that is a
+    /// safepoint.
+    fn pre_safepoint(&mut self, _stackmap: Stackmap) {}
 }

 /// The result of a `MachBackend::compile_function()` call. Contains machine
--- a/cranelift/codegen/src/machinst/vcode.rs
+++ b/cranelift/codegen/src/machinst/vcode.rs
@@ -17,14 +17,15 @@
 //! See the main module comment in `mod.rs` for more details on the VCode-based
 //! backend pipeline.

-use crate::ir::{self, SourceLoc};
+use crate::ir::{self, types, SourceLoc};
 use crate::machinst::*;
 use crate::settings;

 use regalloc::Function as RegallocFunction;
 use regalloc::Set as RegallocSet;
 use regalloc::{
-    BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper,
+    BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
+    StackmapRequestInfo,
 };

 use alloc::boxed::Box;
@@ -56,6 +57,9 @@ pub struct VCode<I: VCodeInst> {
    /// VReg IR-level types.
    vreg_types: Vec<Type>,

+    /// Do we have any ref values among our vregs?
+    have_ref_values: bool,
+
    /// Lowered machine instructions in order corresponding to the original IR.
    insts: Vec<I>,

@@ -82,6 +86,16 @@ pub struct VCode<I: VCodeInst> {

    /// ABI object.
    abi: Box<dyn ABIBody<I = I>>,
+
+    /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
+    /// regalloc, the safepoint instructions are listed in the separate
+    /// `StackmapRequestInfo` held separate from the `VCode`.)
+    safepoint_insns: Vec<InsnIndex>,
+
+    /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s.
+    /// These are used to generate actual stackmaps at emission. Filled in
+    /// post-regalloc.
+    safepoint_slots: Vec<Vec<SpillSlot>>,
 }

 /// A builder for a VCode function body. This builder is designed for the
@@ -102,6 +116,9 @@ pub struct VCodeBuilder<I: VCodeInst> {
    /// In-progress VCode.
    vcode: VCode<I>,

+    /// In-progress stackmap-request info.
+    stackmap_info: StackmapRequestInfo,
+
    /// Index of the last block-start in the vcode.
    block_start: InsnIndex,

@@ -115,9 +132,17 @@ pub struct VCodeBuilder<I: VCodeInst> {
 impl<I: VCodeInst> VCodeBuilder<I> {
    /// Create a new VCodeBuilder.
    pub fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCodeBuilder<I> {
+        let reftype_class = I::ref_type_rc(abi.flags());
        let vcode = VCode::new(abi, block_order);
+        let stackmap_info = StackmapRequestInfo {
+            reftype_class,
+            reftyped_vregs: vec![],
+            safepoint_insns: vec![],
+        };
+
        VCodeBuilder {
            vcode,
+            stackmap_info,
            block_start: 0,
            succ_start: 0,
            cur_srcloc: SourceLoc::default(),
@@ -142,6 +167,15 @@ impl<I: VCodeInst> VCodeBuilder<I> {
                .resize(vreg.get_index() + 1, ir::types::I8);
        }
        self.vcode.vreg_types[vreg.get_index()] = ty;
+        if is_reftype(ty) {
+            self.stackmap_info.reftyped_vregs.push(vreg);
+            self.vcode.have_ref_values = true;
+        }
+    }
+
+    /// Are there any reference-typed values at all among the vregs?
+    pub fn have_ref_values(&self) -> bool {
+        self.vcode.have_ref_values()
    }

    /// Set the current block as the entry block.
@@ -166,7 +200,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
    }

    /// Push an instruction for the current BB and current IR inst within the BB.
-    pub fn push(&mut self, insn: I) {
+    pub fn push(&mut self, insn: I, is_safepoint: bool) {
        match insn.is_term() {
            MachTerminator::None | MachTerminator::Ret => {}
            MachTerminator::Uncond(target) => {
@@ -186,6 +220,11 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        }
        self.vcode.insts.push(insn);
        self.vcode.srclocs.push(self.cur_srcloc);
+        if is_safepoint {
+            self.stackmap_info
+                .safepoint_insns
+                .push(InstIx::new((self.vcode.insts.len() - 1) as u32));
+        }
    }

    /// Get the current source location.
@@ -198,21 +237,16 @@ impl<I: VCodeInst> VCodeBuilder<I> {
        self.cur_srcloc = srcloc;
    }

-    /// Build the final VCode.
-    pub fn build(self) -> VCode<I> {
-        self.vcode
+    /// Build the final VCode, returning the vcode itself as well as auxiliary
+    /// information, such as the stackmap request information.
+    pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
+        // TODO: come up with an abstraction for "vcode and auxiliary data". The
+        // auxiliary data needs to be separate from the vcode so that it can be
+        // referenced as the vcode is mutated (e.g. by the register allocator).
+        (self.vcode, self.stackmap_info)
    }
 }

-fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
-    let v = indices
-        .iter()
-        .map(|iix| iix.get() as usize)
-        .chain(iter::once(len))
-        .collect::<Vec<usize>>();
-    v.windows(2).map(|p| (p[0], p[1])).collect()
-}
-
 fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
    if let Some((to, from)) = insn.is_move() {
        to.to_reg() == from
@@ -221,6 +255,11 @@ fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
    }
 }

+/// Is this type a reference type?
+fn is_reftype(ty: Type) -> bool {
+    ty == types::R32 || ty == types::R64
+}
+
 impl<I: VCodeInst> VCode<I> {
    /// New empty VCode.
    fn new(abi: Box<dyn ABIBody<I = I>>, block_order: BlockLoweringOrder) -> VCode<I> {
@@ -228,6 +267,7 @@ impl<I: VCodeInst> VCode<I> {
            liveins: abi.liveins(),
            liveouts: abi.liveouts(),
            vreg_types: vec![],
+            have_ref_values: false,
            insts: vec![],
            srclocs: vec![],
            entry: 0,
@@ -236,6 +276,8 @@ impl<I: VCodeInst> VCode<I> {
            block_succs: vec![],
            block_order,
            abi,
+            safepoint_insns: vec![],
+            safepoint_slots: vec![],
        }
    }

@@ -249,6 +291,11 @@ impl<I: VCodeInst> VCode<I> {
        self.vreg_types[vreg.get_index()]
    }

+    /// Are there any reference-typed values at all among the vregs?
+    pub fn have_ref_values(&self) -> bool {
+        self.have_ref_values
+    }
+
    /// Get the entry block.
    pub fn entry(&self) -> BlockIndex {
        self.entry
@@ -265,6 +312,11 @@ impl<I: VCodeInst> VCode<I> {
        self.abi.frame_size()
    }

+    /// Inbound stack-args size.
+    pub fn stack_args_size(&self) -> u32 {
+        self.abi.stack_args_size()
+    }
+
    /// Get the successors for a block.
    pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
        let (start, end) = self.block_succ_range[block as usize];
@@ -281,17 +333,21 @@ impl<I: VCodeInst> VCode<I> {
        self.abi
            .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));

-        // We want to move instructions over in final block order, using the new
-        // block-start map given by the regalloc.
-        let block_ranges: Vec<(usize, usize)> =
-            block_ranges(result.target_map.elems(), result.insns.len());
        let mut final_insns = vec![];
        let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
        let mut final_srclocs = vec![];
+        let mut final_safepoint_insns = vec![];
+        let mut safept_idx = 0;

+        assert!(result.target_map.elems().len() == self.num_blocks());
        for block in 0..self.num_blocks() {
+            let start = result.target_map.elems()[block].get() as usize;
+            let end = if block == self.num_blocks() - 1 {
+                result.insns.len()
+            } else {
+                result.target_map.elems()[block + 1].get() as usize
+            };
            let block = block as BlockIndex;
-            let (start, end) = block_ranges[block as usize];
            let final_start = final_insns.len() as InsnIndex;

            if block == self.entry {
@@ -333,6 +389,16 @@ impl<I: VCodeInst> VCode<I> {
                    final_insns.push(insn.clone());
                    final_srclocs.push(srcloc);
                }
+
+                // Was this instruction a safepoint instruction? Add its final
+                // index to the safepoint insn-index list if so.
+                if safept_idx < result.new_safepoint_insns.len()
+                    && (result.new_safepoint_insns[safept_idx].get() as usize) == i
+                {
+                    let idx = final_insns.len() - 1;
+                    final_safepoint_insns.push(idx as InsnIndex);
+                    safept_idx += 1;
+                }
            }

            let final_end = final_insns.len() as InsnIndex;
@@ -344,6 +410,12 @@ impl<I: VCodeInst> VCode<I> {
        self.insts = final_insns;
        self.srclocs = final_srclocs;
        self.block_ranges = final_block_ranges;
+        self.safepoint_insns = final_safepoint_insns;
+
+        // Save safepoint slot-lists. These will be passed to the `EmitState`
+        // for the machine backend during emission so that it can do
+        // target-specific translations of slot numbers to stack offsets.
+        self.safepoint_slots = result.stackmaps;
    }

    /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
@@ -353,11 +425,12 @@ impl<I: VCodeInst> VCode<I> {
        I: MachInstEmit,
    {
        let mut buffer = MachBuffer::new();
-        let mut state = Default::default();
+        let mut state = I::State::new(&*self.abi);

        buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices.

        let flags = self.abi.flags();
+        let mut safepoint_idx = 0;
        let mut cur_srcloc = None;
        for block in 0..self.num_blocks() {
            let block = block as BlockIndex;
@@ -381,6 +454,19 @@ impl<I: VCodeInst> VCode<I> {
                    cur_srcloc = Some(srcloc);
                }

+                if safepoint_idx < self.safepoint_insns.len()
+                    && self.safepoint_insns[safepoint_idx] == iix
+                {
+                    if self.safepoint_slots[safepoint_idx].len() > 0 {
+                        let stackmap = self.abi.spillslots_to_stackmap(
+                            &self.safepoint_slots[safepoint_idx][..],
+                            &state,
+                        );
+                        state.pre_safepoint(stackmap);
+                    }
+                    safepoint_idx += 1;
+                }
+
                self.insts[iix as usize].emit(&mut buffer, flags, &mut state);
            }

@@ -476,13 +562,18 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
        self.abi.get_spillslot_size(regclass, ty)
    }

-    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
-        let ty = self.vreg_type(vreg);
+    fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
+        let ty = vreg.map(|v| self.vreg_type(v));
        self.abi.gen_spill(to_slot, from_reg, ty)
    }

-    fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
-        let ty = self.vreg_type(vreg);
+    fn gen_reload(
+        &self,
+        to_reg: Writable<RealReg>,
+        from_slot: SpillSlot,
+        vreg: Option<VirtualReg>,
+    ) -> I {
+        let ty = vreg.map(|v| self.vreg_type(v));
        self.abi.gen_reload(to_reg, from_slot, ty)
    }

@@ -531,7 +622,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
 }

 /// Pretty-printing with `RealRegUniverse` context.
-impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
+impl<I: VCodeInst> ShowWithRRU for VCode<I> {
    fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
        use std::fmt::Write;

@@ -539,6 +630,8 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
        write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
        write!(&mut s, "  Entry block: {}\n", self.entry).unwrap();

+        let mut state = Default::default();
+        let mut safepoint_idx = 0;
        for i in 0..self.num_blocks() {
            let block = i as BlockIndex;

@@ -552,11 +645,22 @@ impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
            let (start, end) = self.block_ranges[block as usize];
            write!(&mut s, "  (instruction range: {} .. {})\n", start, end).unwrap();
            for inst in start..end {
+                if safepoint_idx < self.safepoint_insns.len()
+                    && self.safepoint_insns[safepoint_idx] == inst
+                {
+                    write!(
+                        &mut s,
+                        "      (safepoint: slots {:?} with EmitState {:?})\n",
+                        self.safepoint_slots[safepoint_idx], state,
+                    )
+                    .unwrap();
+                    safepoint_idx += 1;
+                }
                write!(
                    &mut s,
                    "  Inst {}:   {}\n",
                    inst,
-                    self.insts[inst as usize].show_rru(mb_rru)
+                    self.insts[inst as usize].pretty_print(mb_rru, &mut state)
                )
                .unwrap();
            }