Stack overflow checking with stack probes.

This adds a libcall name, a calling convention, and settings for emitting stack probes, and implements them for x86 system_v ABIs.
2018-04-20 21:41:45 -07:00
parent c5b15c2396
commit 3b1d805758
20 changed files with 585 additions and 155 deletions
--- a/lib/codegen/meta/base/instructions.py
+++ b/lib/codegen/meta/base/instructions.py
@@ -591,12 +591,25 @@ stack_check = Instruction(

    The global variable must be accessible and naturally aligned for a
    pointer-sized value.
+
+    `stack_check` is an alternative way to detect stack overflow, when using
+    a calling convention that doesn't perform stack probes.
    """,
    ins=GV, can_trap=True)

+delta = Operand('delta', Int)
+adjust_sp_down = Instruction(
+    'adjust_sp_down', r"""
+    Subtracts ``delta`` offset value from the stack pointer register.
+
+    This instruction is used to adjust the stack pointer by a dynamic amount.
+    """,
+    ins=(delta,),
+    other_side_effects=True)
+
 StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
-adjust_sp_imm = Instruction(
-    'adjust_sp_imm', r"""
+adjust_sp_up_imm = Instruction(
+    'adjust_sp_up_imm', r"""
    Adds ``Offset`` immediate offset value to the stack pointer register.

    This instruction is used to adjust the stack pointer, primarily in function
@@ -606,6 +619,19 @@ adjust_sp_imm = Instruction(
    ins=(StackOffset,),
    other_side_effects=True)

+StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
+adjust_sp_down_imm = Instruction(
+    'adjust_sp_down_imm', r"""
+    Subtracts ``Offset`` immediate offset value from the stack pointer
+    register.
+
+    This instruction is used to adjust the stack pointer, primarily in function
+    prologues and epilogues. ``Offset`` is constrained to the size of a signed
+    32-bit integer.
+    """,
+    ins=(StackOffset,),
+    other_side_effects=True)
+
 f = Operand('f', iflags)

 ifcmp_sp = Instruction(
--- a/lib/codegen/meta/base/settings.py
+++ b/lib/codegen/meta/base/settings.py
@@ -38,17 +38,27 @@ call_conv = EnumSetting(
        - system_v: System V-style convention used on many platforms
        - fastcall: Windows "fastcall" convention, also used for x64 and ARM
        - baldrdash: SpiderMonkey WebAssembly convention
+        - probestack: specialized convention for the probestack function

        The default calling convention may be overridden by individual
        functions.
        """,
-        'fast', 'cold', 'system_v', 'fastcall', 'baldrdash')
+        'fast', 'cold', 'system_v', 'fastcall', 'baldrdash', 'probestack')

 # Note that Cretonne doesn't currently need an is_pie flag, because PIE is just
 # PIC where symbols can't be pre-empted, which can be expressed with the
 # `colocated` flag on external functions and global variables.
 is_pic = BoolSetting("Enable Position-Independent Code generation")

+colocated_libcalls = BoolSetting(
+        """
+        Use colocated libcalls.
+
+        Generate code that assumes that libcalls can be declared "colocated",
+        meaning they will be defined along with the current function, such that
+        they can use more efficient addressing.
+        """)
+
 return_at_end = BoolSetting(
        """
        Generate functions with at most a single return instruction at the
@@ -115,4 +125,31 @@ allones_funcaddrs = BoolSetting(
        Emit not-yet-relocated function addresses as all-ones bit patterns.
        """)

+#
+# Stack probing options.
+#
+probestack_enabled = BoolSetting(
+        """
+        Enable the use of stack probes, for calling conventions which support
+        this functionality.
+        """,
+        default=True)
+
+probestack_func_adjusts_sp = BoolSetting(
+        """
+        Set this to true of the stack probe function modifies the stack pointer
+        itself.
+        """)
+
+probestack_size_log2 = NumSetting(
+        """
+        The log2 of the size of the stack guard region.
+
+        Stack frames larger than this size will have stack overflow checked
+        by calling the probestack function.
+
+        The default is 12, which translates to a size of 4096.
+        """,
+        default=12)
+
 group.close(globals())
--- a/lib/codegen/meta/isa/x86/encodings.py
+++ b/lib/codegen/meta/isa/x86/encodings.py
@@ -136,29 +136,29 @@ for inst,               rrr in [
        (base.band_imm, 4),
        (base.bor_imm,  1),
        (base.bxor_imm, 6)]:
-    enc_i32_i64(inst, r.rib, 0x83, rrr=rrr)
-    enc_i32_i64(inst, r.rid, 0x81, rrr=rrr)
+    enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
+    enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)

 # TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
 # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.

 # Immediate constants.
-X86_32.enc(base.iconst.i32, *r.puid(0xb8))
+X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))

-X86_64.enc(base.iconst.i32, *r.puid.rex(0xb8))
-X86_64.enc(base.iconst.i32, *r.puid(0xb8))
+X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
+X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
 # The 32-bit immediate movl also zero-extends to 64 bits.
-X86_64.enc(base.iconst.i64, *r.puid.rex(0xb8),
+X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
-X86_64.enc(base.iconst.i64, *r.puid(0xb8),
+X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
           instp=IsUnsignedInt(UnaryImm.imm, 32))
 # Sign-extended 32-bit immediate.
-X86_64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
+X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
-X86_64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
+X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))

 # bool constants.
-enc_both(base.bconst.b1, r.puid_bool, 0xb8)
+enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)

 # Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -180,7 +180,7 @@ for inst,           rrr in [
        (base.ishl_imm, 4),
        (base.ushr_imm, 5),
        (base.sshr_imm, 7)]:
-    enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr)
+    enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)

 # Population count.
 X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
@@ -254,11 +254,21 @@ enc_x86_64(x86.pop.i64, r.popq, 0x58)
 X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
 X86_32.enc(base.copy_special, *r.copysp(0x89))

-# Adjust SP Imm
-X86_32.enc(base.adjust_sp_imm, *r.adjustsp8(0x83))
-X86_32.enc(base.adjust_sp_imm, *r.adjustsp32(0x81))
-X86_64.enc(base.adjust_sp_imm, *r.adjustsp8.rex(0x83, w=1))
-X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))
+# Adjust SP down by a dynamic value (or up, with a negative operand).
+X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
+X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
+
+# Adjust SP up by an immediate (or down, with a negative immediate)
+X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
+X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
+X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
+X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
+
+# Adjust SP down by an immediate (or up, with a negative immediate)
+X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
+X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
+X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
+X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))

 #
 # Float loads and stores.
@@ -406,11 +416,11 @@ X86_64.enc(base.trapff, r.trapff, 0)
 # Comparisons
 #
 enc_i32_i64(base.icmp, r.icscc, 0x39)
-enc_i32_i64(base.icmp_imm, r.icsccib, 0x83, rrr=7)
-enc_i32_i64(base.icmp_imm, r.icsccid, 0x81, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
+enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
 enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
-enc_i32_i64(base.ifcmp_imm, r.rcmpib, 0x83, rrr=7)
-enc_i32_i64(base.ifcmp_imm, r.rcmpid, 0x81, rrr=7)
+enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
+enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
 # TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).

 X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
--- a/lib/codegen/meta/isa/x86/recipes.py
+++ b/lib/codegen/meta/isa/x86/recipes.py
@@ -480,8 +480,8 @@ mulx = TailRecipe(
        ''')

 # XX /n ib with 8-bit immediate sign-extended.
-rib = TailRecipe(
-        'rib', BinaryImm, size=2, ins=GPR, outs=0,
+r_ib = TailRecipe(
+        'r_ib', BinaryImm, size=2, ins=GPR, outs=0,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -491,8 +491,8 @@ rib = TailRecipe(
        ''')

 # XX /n id with 32-bit immediate sign-extended.
-rid = TailRecipe(
-        'rid', BinaryImm, size=5, ins=GPR, outs=0,
+r_id = TailRecipe(
+        'r_id', BinaryImm, size=5, ins=GPR, outs=0,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -502,8 +502,8 @@ rid = TailRecipe(
        ''')

 # XX /n id with 32-bit immediate sign-extended. UnaryImm version.
-uid = TailRecipe(
-        'uid', UnaryImm, size=5, ins=(), outs=GPR,
+u_id = TailRecipe(
+        'u_id', UnaryImm, size=5, ins=(), outs=GPR,
        instp=IsSignedInt(UnaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(out_reg0), sink);
@@ -513,8 +513,8 @@ uid = TailRecipe(
        ''')

 # XX+rd id unary with 32-bit immediate. Note no recipe predicate.
-puid = TailRecipe(
-        'puid', UnaryImm, size=4, ins=(), outs=GPR,
+pu_id = TailRecipe(
+        'pu_id', UnaryImm, size=4, ins=(), outs=GPR,
        emit='''
        // The destination register is encoded in the low bits of the opcode.
        // No ModR/M.
@@ -524,8 +524,8 @@ puid = TailRecipe(
        ''')

 # XX+rd id unary with bool immediate. Note no recipe predicate.
-puid_bool = TailRecipe(
-        'puid_bool', UnaryBool, size=4, ins=(), outs=GPR,
+pu_id_bool = TailRecipe(
+        'pu_id_bool', UnaryBool, size=4, ins=(), outs=GPR,
        emit='''
        // The destination register is encoded in the low bits of the opcode.
        // No ModR/M.
@@ -535,8 +535,8 @@ puid_bool = TailRecipe(
        ''')

 # XX+rd iq unary with 64-bit immediate.
-puiq = TailRecipe(
-        'puiq', UnaryImm, size=8, ins=(), outs=GPR,
+pu_iq = TailRecipe(
+        'pu_iq', UnaryImm, size=8, ins=(), outs=GPR,
        emit='''
        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
        let imm: i64 = imm.into();
@@ -564,8 +564,15 @@ copysp = TailRecipe(
        modrm_rr(dst, src, sink);
        ''')

-adjustsp8 = TailRecipe(
-    'adjustsp8', UnaryImm, size=2, ins=(), outs=(),
+adjustsp = TailRecipe(
+    'adjustsp', Unary, size=1, ins=(GPR), outs=(),
+    emit='''
+    PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink);
+    modrm_rr(RU::rsp.into(), in_reg0, sink);
+    ''')
+
+adjustsp_ib = TailRecipe(
+    'adjustsp_ib', UnaryImm, size=2, ins=(), outs=(),
    instp=IsSignedInt(UnaryImm.imm, 8),
    emit='''
    PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -574,8 +581,8 @@ adjustsp8 = TailRecipe(
    sink.put1(imm as u8);
    ''')

-adjustsp32 = TailRecipe(
-    'adjustsp32', UnaryImm, size=5, ins=(), outs=(),
+adjustsp_id = TailRecipe(
+    'adjustsp_id', UnaryImm, size=5, ins=(), outs=(),
    instp=IsSignedInt(UnaryImm.imm, 32),
    emit='''
    PUT_OP(bits, rex1(RU::rsp.into()), sink);
@@ -1217,8 +1224,8 @@ fcmp = TailRecipe(
        ''')

 # XX /n, MI form with imm8.
-rcmpib = TailRecipe(
-        'rcmpib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
+rcmp_ib = TailRecipe(
+        'rcmp_ib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 8),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1228,8 +1235,8 @@ rcmpib = TailRecipe(
        ''')

 # XX /n, MI form with imm32.
-rcmpid = TailRecipe(
-        'rcmpid', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
+rcmp_id = TailRecipe(
+        'rcmp_id', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags,
        instp=IsSignedInt(BinaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(in_reg0), sink);
@@ -1401,8 +1408,8 @@ icscc = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')

-icsccib = TailRecipe(
-        'icsccib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
+icscc_ib = TailRecipe(
+        'icscc_ib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD,
        instp=IsSignedInt(IntCompareImm.imm, 8),
        emit='''
        // Comparison instruction.
@@ -1429,8 +1436,8 @@ icsccib = TailRecipe(
        modrm_rr(out_reg0, 0, sink);
        ''')

-icsccid = TailRecipe(
-        'icsccid', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
+icscc_id = TailRecipe(
+        'icscc_id', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD,
        instp=IsSignedInt(IntCompareImm.imm, 32),
        emit='''
        // Comparison instruction.
--- a/lib/codegen/src/ir/libcall.rs
+++ b/lib/codegen/src/ir/libcall.rs
@@ -1,6 +1,9 @@
 //! Naming well-known routines in the runtime library.

-use ir::{types, Opcode, Type};
+use ir::{types, Opcode, Type, Inst, Function, FuncRef, ExternalName, Signature, AbiParam,
+         ExtFuncData, ArgumentPurpose};
+use settings::CallConv;
+use isa::{TargetIsa, RegUnit};
 use std::fmt;
 use std::str::FromStr;

@@ -14,6 +17,9 @@ use std::str::FromStr;
 /// This list is likely to grow over time.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum LibCall {
+    /// probe for stack overflow. These are emitted for functions which need
+    /// when the `probestack_enabled` setting is true.
+    Probestack,
    /// ceil.f32
    CeilF32,
    /// ceil.f64
@@ -32,7 +38,8 @@ pub enum LibCall {
    NearestF64,
 }

-const NAME: [&str; 8] = [
+const NAME: [&str; 9] = [
+    "Probestack",
    "CeilF32",
    "CeilF64",
    "FloorF32",
@@ -54,6 +61,7 @@ impl FromStr for LibCall {

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
+            "Probestack" => Ok(LibCall::Probestack),
            "CeilF32" => Ok(LibCall::CeilF32),
            "CeilF64" => Ok(LibCall::CeilF64),
            "FloorF32" => Ok(LibCall::FloorF32),
@@ -97,6 +105,96 @@ impl LibCall {
    }
 }

+/// Get a function reference for `libcall` in `func`, following the signature
+/// for `inst`.
+///
+/// If there is an existing reference, use it, otherwise make a new one.
+pub fn get_libcall_funcref(
+    libcall: LibCall,
+    func: &mut Function,
+    inst: Inst,
+    isa: &TargetIsa,
+) -> FuncRef {
+    find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
+}
+
+/// Get a function reference for the probestack function in `func`.
+///
+/// If there is an existing reference, use it, otherwise make a new one.
+pub fn get_probestack_funcref(
+    func: &mut Function,
+    reg_type: Type,
+    arg_reg: RegUnit,
+    isa: &TargetIsa,
+) -> FuncRef {
+    find_funcref(LibCall::Probestack, func).unwrap_or_else(|| {
+        make_funcref_for_probestack(func, reg_type, arg_reg, isa)
+    })
+}
+
+/// Get the existing function reference for `libcall` in `func` if it exists.
+fn find_funcref(libcall: LibCall, func: &Function) -> Option<FuncRef> {
+    // We're assuming that all libcall function decls are at the end.
+    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
+    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
+        match func_data.name {
+            ExternalName::LibCall(lc) => {
+                if lc == libcall {
+                    return Some(fref);
+                }
+            }
+            _ => break,
+        }
+    }
+    None
+}
+
+/// Create a funcref for `LibCall::Probestack`.
+fn make_funcref_for_probestack(
+    func: &mut Function,
+    reg_type: Type,
+    arg_reg: RegUnit,
+    isa: &TargetIsa,
+) -> FuncRef {
+    let mut sig = Signature::new(CallConv::Probestack);
+    let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg);
+    sig.params.push(rax);
+    if !isa.flags().probestack_func_adjusts_sp() {
+        sig.returns.push(rax);
+    }
+    make_funcref(LibCall::Probestack, func, sig, isa)
+}
+
+/// Create a funcref for `libcall` with a signature matching `inst`.
+fn make_funcref_for_inst(
+    libcall: LibCall,
+    func: &mut Function,
+    inst: Inst,
+    isa: &TargetIsa,
+) -> FuncRef {
+    // Start with a fast calling convention. We'll give the ISA a chance to change it.
+    let mut sig = Signature::new(isa.flags().call_conv());
+    for &v in func.dfg.inst_args(inst) {
+        sig.params.push(AbiParam::new(func.dfg.value_type(v)));
+    }
+    for &v in func.dfg.inst_results(inst) {
+        sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
+    }
+
+    make_funcref(libcall, func, sig, isa)
+}
+
+/// Create a funcref for `libcall`.
+fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef {
+    let sigref = func.import_signature(sig);
+
+    func.import_function(ExtFuncData {
+        name: ExternalName::LibCall(libcall),
+        signature: sigref,
+        colocated: isa.flags().colocated_libcalls(),
+    })
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/lib/codegen/src/ir/mod.rs
+++ b/lib/codegen/src/ir/mod.rs
@@ -33,7 +33,7 @@ pub use ir::heap::{HeapBase, HeapData, HeapStyle};
 pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
 pub use ir::jumptable::JumpTableData;
 pub use ir::layout::Layout;
-pub use ir::libcall::LibCall;
+pub use ir::libcall::{LibCall, get_libcall_funcref, get_probestack_funcref};
 pub use ir::memflags::MemFlags;
 pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
 pub use ir::sourceloc::SourceLoc;
--- a/lib/codegen/src/isa/x86/abi.rs
+++ b/lib/codegen/src/isa/x86/abi.rs
@@ -6,7 +6,8 @@ use cursor::{Cursor, CursorPosition, EncCursor};
 use ir;
 use ir::immediates::Imm64;
 use ir::stackslot::{StackOffset, StackSize};
-use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc};
+use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, ValueLoc,
+         get_probestack_funcref};
 use isa::{RegClass, RegUnit, TargetIsa};
 use regalloc::RegisterSet;
 use result;
@@ -216,10 +217,16 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::Ct
        }
        CallConv::Fastcall => unimplemented!("Windows calling conventions"),
        CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
+        CallConv::Probestack => unimplemented!("probestack calling convention"),
    }
 }

 pub fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
+    debug_assert!(
+        !isa.flags().probestack_enabled(),
+        "baldrdash does not expect cretonne to emit stack probes"
+    );
+
    // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
    let stack_align = 16;
    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
@@ -239,7 +246,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    // newer versions use a 16-byte aligned stack pointer.
    let stack_align = 16;
    let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
-    let csr_type = if isa.flags().is_64bit() {
+    let reg_type = if isa.flags().is_64bit() {
        ir::types::I64
    } else {
        ir::types::I32
@@ -266,7 +273,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r

    // Add CSRs to function signature
    let fp_arg = ir::AbiParam::special_reg(
-        csr_type,
+        reg_type,
        ir::ArgumentPurpose::FramePointer,
        RU::rbp as RegUnit,
    );
@@ -274,7 +281,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    func.signature.returns.push(fp_arg);

    for csr in csrs.iter(GPR) {
-        let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
+        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
        func.signature.params.push(csr_arg);
        func.signature.returns.push(csr_arg);
    }
@@ -282,11 +289,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
    // Set up the cursor and insert the prologue
    let entry_ebb = func.layout.entry_block().expect("missing entry block");
    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
-    insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
+    insert_system_v_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);

    // Reset the cursor and insert the epilogue
    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
+    insert_system_v_epilogues(&mut pos, local_stack_size, reg_type, &csrs);

    Ok(())
 }
@@ -295,12 +302,13 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
 fn insert_system_v_prologue(
    pos: &mut EncCursor,
    stack_size: i64,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
+    isa: &TargetIsa,
 ) {
    // Append param to entry EBB
    let ebb = pos.current_ebb().expect("missing ebb under cursor");
-    let fp = pos.func.dfg.append_ebb_param(ebb, csr_type);
+    let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);

    pos.ins().x86_push(fp);
@@ -311,7 +319,7 @@ fn insert_system_v_prologue(

    for reg in csrs.iter(GPR) {
        // Append param to entry EBB
-        let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
+        let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);

        // Assign it a location
        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
@@ -320,8 +328,48 @@ fn insert_system_v_prologue(
        pos.ins().x86_push(csr_arg);
    }

+    // Allocate stack frame storage.
    if stack_size > 0 {
-        pos.ins().adjust_sp_imm(Imm64::new(-stack_size));
+        if isa.flags().probestack_enabled() &&
+            stack_size > (1 << isa.flags().probestack_size_log2())
+        {
+            // Emit a stack probe.
+            let rax = RU::rax as RegUnit;
+            let rax_val = ir::ValueLoc::Reg(rax);
+
+            // The probestack function expects its input in %rax.
+            let arg = pos.ins().iconst(reg_type, stack_size);
+            pos.func.locations[arg] = rax_val;
+
+            // Call the probestack function.
+            let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
+
+            // Make the call.
+            let call = if !isa.flags().is_pic() && isa.flags().is_64bit() &&
+                !pos.func.dfg.ext_funcs[callee].colocated
+            {
+                // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
+                // Use r11 as it may be clobbered under all supported calling conventions.
+                let r11 = RU::r11 as RegUnit;
+                let sig = pos.func.dfg.ext_funcs[callee].signature;
+                let addr = pos.ins().func_addr(reg_type, callee);
+                pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
+                pos.ins().call_indirect(sig, addr, &[arg])
+            } else {
+                // Otherwise just do a normal call.
+                pos.ins().call(callee, &[arg])
+            };
+
+            // If the probestack function doesn't adjust sp, do it ourselves.
+            if !isa.flags().probestack_func_adjusts_sp() {
+                let result = pos.func.dfg.inst_results(call)[0];
+                pos.func.locations[result] = rax_val;
+                pos.ins().adjust_sp_down(result);
+            }
+        } else {
+            // Simply decrement the stack pointer.
+            pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
+        }
    }
 }

@@ -329,14 +377,14 @@ fn insert_system_v_prologue(
 fn insert_system_v_epilogues(
    pos: &mut EncCursor,
    stack_size: i64,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
 ) {
    while let Some(ebb) = pos.next_ebb() {
        pos.goto_last_inst(ebb);
        if let Some(inst) = pos.current_inst() {
            if pos.func.dfg[inst].opcode().is_return() {
-                insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
+                insert_system_v_epilogue(inst, stack_size, pos, reg_type, csrs);
            }
        }
    }
@@ -347,23 +395,23 @@ fn insert_system_v_epilogue(
    inst: ir::Inst,
    stack_size: i64,
    pos: &mut EncCursor,
-    csr_type: ir::types::Type,
+    reg_type: ir::types::Type,
    csrs: &RegisterSet,
 ) {
    if stack_size > 0 {
-        pos.ins().adjust_sp_imm(Imm64::new(stack_size));
+        pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
    }

    // Pop all the callee-saved registers, stepping backward each time to
    // preserve the correct order.
-    let fp_ret = pos.ins().x86_pop(csr_type);
+    let fp_ret = pos.ins().x86_pop(reg_type);
    pos.prev_inst();

    pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
    pos.func.dfg.append_inst_arg(inst, fp_ret);

    for reg in csrs.iter(GPR) {
-        let csr_ret = pos.ins().x86_pop(csr_type);
+        let csr_ret = pos.ins().x86_pop(reg_type);
        pos.prev_inst();

        pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
--- a/lib/codegen/src/legalizer/libcall.rs
+++ b/lib/codegen/src/legalizer/libcall.rs
@@ -1,7 +1,7 @@
 //! Expanding instructions as runtime library calls.

 use ir;
-use ir::InstBuilder;
+use ir::{InstBuilder, get_libcall_funcref};
 use std::vec::Vec;
 use isa::TargetIsa;

@@ -14,58 +14,14 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIs
            None => return false,
        };

-    let funcref =
-        find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func, isa));
-
    // Now we convert `inst` to a call. First save the arguments.
    let mut args = Vec::new();
    args.extend_from_slice(func.dfg.inst_args(inst));
    // The replace builder will preserve the instruction result values.
+    let funcref = get_libcall_funcref(libcall, func, inst, isa);
    func.dfg.replace(inst).call(funcref, &args);

    // TODO: ask the ISA to legalize the signature.

    true
 }
-
-/// Get the existing function reference for `libcall` in `func` if it exists.
-fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
-    // We're assuming that all libcall function decls are at the end.
-    // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
-    for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
-        match func_data.name {
-            ir::ExternalName::LibCall(lc) => {
-                if lc == libcall {
-                    return Some(fref);
-                }
-            }
-            _ => break,
-        }
-    }
-    None
-}
-
-/// Create a funcref for `libcall` with a signature matching `inst`.
-fn make_funcref(
-    libcall: ir::LibCall,
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    isa: &TargetIsa,
-) -> ir::FuncRef {
-    // Start with a fast calling convention. We'll give the ISA a chance to change it.
-    let mut sig = ir::Signature::new(isa.flags().call_conv());
-    for &v in func.dfg.inst_args(inst) {
-        sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
-    }
-    for &v in func.dfg.inst_results(inst) {
-        sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v)));
-    }
-    let sigref = func.import_signature(sig);
-
-    // TODO: Can libcalls be colocated in some circumstances?
-    func.import_function(ir::ExtFuncData {
-        name: ir::ExternalName::LibCall(libcall),
-        signature: sigref,
-        colocated: false,
-    })
-}
--- a/lib/codegen/src/settings.rs
+++ b/lib/codegen/src/settings.rs
@@ -363,6 +363,7 @@ mod tests {
             is_64bit = false\n\
             call_conv = \"fast\"\n\
             is_pic = false\n\
+             colocated_libcalls = false\n\
             return_at_end = false\n\
             avoid_div_traps = false\n\
             is_compressed = false\n\
@@ -370,7 +371,10 @@ mod tests {
             enable_simd = true\n\
             enable_atomics = true\n\
             baldrdash_prologue_words = 0\n\
-             allones_funcaddrs = false\n"
+             allones_funcaddrs = false\n\
+             probestack_enabled = true\n\
+             probestack_func_adjusts_sp = false\n\
+             probestack_size_log2 = 12\n"
        );
        assert_eq!(f.opt_level(), super::OptLevel::Default);
        assert_eq!(f.enable_simd(), true);