Merge pull request #2486 from cfallin/fix-probestack
Two Lucet-related fixes to stack overflow handling.
This commit is contained in:
@@ -12,7 +12,7 @@ use crate::{CodegenError, CodegenResult};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
|
||||
// these ABIs are very similar.
|
||||
@@ -508,6 +508,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
|
||||
// TODO: implement if we ever require stack probes on an AArch64 host
|
||||
// (unlikely unless Lucet is ported)
|
||||
smallvec![]
|
||||
}
|
||||
|
||||
// Returns stack bytes used as well as instructions. Does not adjust
|
||||
// nominal SP offset; abi_impl generic code will do that.
|
||||
fn gen_clobber_save(
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
/// Support for the ARM ABI from the callee side (within a function body).
|
||||
pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
|
||||
@@ -305,6 +305,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
|
||||
ret
|
||||
}
|
||||
|
||||
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
|
||||
// TODO: implement if we ever require stack probes on ARM32 (unlikely
|
||||
// unless Lucet is ported)
|
||||
smallvec![]
|
||||
}
|
||||
|
||||
/// Returns stack bytes used as well as instructions. Does not adjust
|
||||
/// nominal SP offset; caller will do that.
|
||||
fn gen_clobber_save(
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Implementation of the standard x64 ABI.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{self, types, MemFlags, TrapCode, Type};
|
||||
use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
|
||||
use crate::isa;
|
||||
use crate::isa::{x64::inst::*, CallConv};
|
||||
use crate::machinst::abi_impl::*;
|
||||
@@ -389,6 +389,22 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_probestack(frame_size: u32) -> SmallVec<[Self::I; 2]> {
|
||||
let mut insts = SmallVec::new();
|
||||
insts.push(Inst::imm(
|
||||
OperandSize::Size32,
|
||||
frame_size as u64,
|
||||
Writable::from_reg(regs::rax()),
|
||||
));
|
||||
insts.push(Inst::CallKnown {
|
||||
dest: ExternalName::LibCall(LibCall::Probestack),
|
||||
uses: vec![regs::rax()],
|
||||
defs: vec![],
|
||||
opcode: Opcode::Call,
|
||||
});
|
||||
insts
|
||||
}
|
||||
|
||||
fn gen_clobber_save(
|
||||
call_conv: isa::CallConv,
|
||||
_: &settings::Flags,
|
||||
|
||||
@@ -182,6 +182,7 @@ impl LegacyPrefixes {
|
||||
fn emit_std_enc_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
mut num_opcodes: usize,
|
||||
@@ -194,7 +195,8 @@ fn emit_std_enc_mem(
|
||||
// expression. But `enc_g` can be derived from a register of any class.
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if srcloc != SourceLoc::default() && mem_e.can_trap() {
|
||||
let can_trap = mem_e.can_trap();
|
||||
if srcloc != SourceLoc::default() && can_trap {
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
@@ -202,6 +204,12 @@ fn emit_std_enc_mem(
|
||||
|
||||
match mem_e {
|
||||
Amode::ImmReg { simm32, base, .. } => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
// First, the REX byte.
|
||||
let enc_e = int_reg_enc(*base);
|
||||
rex.emit_two_op(sink, enc_g, enc_e);
|
||||
@@ -262,6 +270,12 @@ fn emit_std_enc_mem(
|
||||
shift,
|
||||
..
|
||||
} => {
|
||||
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
|
||||
// first touch of a new stack page.
|
||||
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
|
||||
sink.add_trap(srcloc, TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
let enc_base = int_reg_enc(*reg_base);
|
||||
let enc_index = int_reg_enc(*reg_index);
|
||||
|
||||
@@ -350,6 +364,7 @@ fn emit_std_enc_enc(
|
||||
fn emit_std_reg_mem(
|
||||
sink: &mut MachBuffer<Inst>,
|
||||
state: &EmitState,
|
||||
info: &EmitInfo,
|
||||
prefixes: LegacyPrefixes,
|
||||
opcodes: u32,
|
||||
num_opcodes: usize,
|
||||
@@ -361,6 +376,7 @@ fn emit_std_reg_mem(
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefixes,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
@@ -538,6 +554,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x0FAF,
|
||||
2,
|
||||
@@ -597,6 +614,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
opcode_m,
|
||||
1,
|
||||
@@ -654,6 +672,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
@@ -717,7 +736,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
RegMem::Mem { addr: src } => {
|
||||
let amode = src.finalize(state, sink);
|
||||
emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags);
|
||||
emit_std_enc_mem(
|
||||
sink, state, info, prefix, opcode, 1, subopcode, &amode, rex_flags,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -738,7 +759,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
RegMem::Mem { addr: src } => {
|
||||
let amode = src.finalize(state, sink);
|
||||
emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags);
|
||||
emit_std_enc_mem(
|
||||
sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -987,6 +1010,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
@@ -1004,6 +1028,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x8B,
|
||||
1,
|
||||
@@ -1019,6 +1044,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x8D,
|
||||
1,
|
||||
@@ -1081,6 +1107,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
opcodes,
|
||||
num_opcodes,
|
||||
@@ -1108,7 +1135,17 @@ pub(crate) fn emit(
|
||||
};
|
||||
|
||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||
emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x88,
|
||||
1,
|
||||
*src,
|
||||
dst,
|
||||
rex,
|
||||
)
|
||||
}
|
||||
|
||||
2 => {
|
||||
@@ -1116,6 +1153,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::_66,
|
||||
0x89,
|
||||
1,
|
||||
@@ -1130,6 +1168,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x89,
|
||||
1,
|
||||
@@ -1144,6 +1183,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0x89,
|
||||
1,
|
||||
@@ -1253,6 +1293,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode_bytes,
|
||||
2,
|
||||
@@ -1311,7 +1352,7 @@ pub(crate) fn emit(
|
||||
let addr = &addr.finalize(state, sink);
|
||||
// Whereas here we revert to the "normal" G-E ordering.
|
||||
let opcode = if *size == 1 { 0x3A } else { 0x3B };
|
||||
emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex);
|
||||
emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex);
|
||||
}
|
||||
|
||||
RegMemImm::Imm { simm32 } => {
|
||||
@@ -1372,6 +1413,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
2,
|
||||
@@ -1408,6 +1450,10 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
Inst::Push64 { src } => {
|
||||
if info.flags().enable_probestack() {
|
||||
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
|
||||
}
|
||||
|
||||
match src {
|
||||
RegMemImm::Reg { reg } => {
|
||||
let enc_reg = int_reg_enc(*reg);
|
||||
@@ -1423,6 +1469,7 @@ pub(crate) fn emit(
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
@@ -1454,6 +1501,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
Inst::CallKnown { dest, opcode, .. } => {
|
||||
if info.flags().enable_probestack() {
|
||||
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
|
||||
}
|
||||
if let Some(s) = state.take_stack_map() {
|
||||
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
|
||||
}
|
||||
@@ -1469,6 +1519,9 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
Inst::CallUnknown { dest, opcode, .. } => {
|
||||
if info.flags().enable_probestack() {
|
||||
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
|
||||
}
|
||||
let start_offset = sink.cur_offset();
|
||||
match dest {
|
||||
RegMem::Reg { reg } => {
|
||||
@@ -1489,6 +1542,7 @@ pub(crate) fn emit(
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
@@ -1587,6 +1641,7 @@ pub(crate) fn emit(
|
||||
emit_std_enc_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
LegacyPrefixes::None,
|
||||
0xFF,
|
||||
1,
|
||||
@@ -1733,6 +1788,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
@@ -1863,6 +1919,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
length,
|
||||
@@ -2012,7 +2069,17 @@ pub(crate) fn emit(
|
||||
!regs_swapped,
|
||||
"No existing way to encode a mem argument in the ModRM r/m field."
|
||||
);
|
||||
emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex);
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
len,
|
||||
dst.to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
}
|
||||
sink.put1(*imm);
|
||||
@@ -2045,6 +2112,7 @@ pub(crate) fn emit(
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
2,
|
||||
@@ -2109,7 +2177,17 @@ pub(crate) fn emit(
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
emit_std_reg_mem(
|
||||
sink,
|
||||
state,
|
||||
info,
|
||||
prefix,
|
||||
opcode,
|
||||
2,
|
||||
reg_g.to_reg(),
|
||||
addr,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2129,7 +2207,7 @@ pub(crate) fn emit(
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex);
|
||||
emit_std_reg_mem(sink, state, info, prefix, opcode, len, *dst, addr, rex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2643,7 +2721,7 @@ pub(crate) fn emit(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let amode = dst.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex);
|
||||
emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
|
||||
}
|
||||
|
||||
Inst::AtomicRmwSeq { ty, op } => {
|
||||
|
||||
@@ -4334,8 +4334,8 @@ impl LowerBackend for X64Backend {
|
||||
let ty = ctx.input_ty(ifcmp_sp, 0);
|
||||
ctx.emit(Inst::cmp_rmi_r(
|
||||
ty.bytes() as u8,
|
||||
RegMemImm::reg(operand),
|
||||
regs::rsp(),
|
||||
RegMemImm::reg(regs::rsp()),
|
||||
operand,
|
||||
));
|
||||
let cond_code = ctx.data(branches[0]).cond_code().unwrap();
|
||||
let cc = CC::from_intcc(cond_code);
|
||||
|
||||
@@ -314,6 +314,9 @@ pub trait ABIMachineSpec {
|
||||
/// Generate the usual frame-restore sequence for this architecture.
|
||||
fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>;
|
||||
|
||||
/// Generate a probestack call.
|
||||
fn gen_probestack(_frame_size: u32) -> SmallVec<[Self::I; 2]>;
|
||||
|
||||
/// Generate a clobber-save sequence. This takes the list of *all* registers
|
||||
/// written/modified by the function body. The implementation here is
|
||||
/// responsible for determining which of these are callee-saved according to
|
||||
@@ -481,6 +484,9 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
|
||||
/// manually register-allocated and carefully only use caller-saved
|
||||
/// registers and keep nothing live after this sequence of instructions.
|
||||
stack_limit: Option<(Reg, Vec<M::I>)>,
|
||||
/// Are we to invoke the probestack function in the prologue? If so,
|
||||
/// what is the minimum size at which we must invoke it?
|
||||
probestack_min_frame: Option<u32>,
|
||||
|
||||
_mach: PhantomData<M>,
|
||||
}
|
||||
@@ -536,6 +542,18 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
.map(|reg| (reg, Vec::new()))
|
||||
.or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv)));
|
||||
|
||||
// Determine whether a probestack call is required for large enough
|
||||
// frames (and the minimum frame size if so).
|
||||
let probestack_min_frame = if flags.enable_probestack() {
|
||||
assert!(
|
||||
!flags.probestack_func_adjusts_sp(),
|
||||
"SP-adjusting probestack not supported in new backends"
|
||||
);
|
||||
Some(1 << flags.probestack_size_log2())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
sig,
|
||||
stackslots,
|
||||
@@ -550,6 +568,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
flags,
|
||||
is_leaf: f.is_leaf(),
|
||||
stack_limit,
|
||||
probestack_min_frame,
|
||||
_mach: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -978,6 +997,11 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
insts.extend_from_slice(stack_limit_load);
|
||||
self.insert_stack_check(*reg, total_stacksize, &mut insts);
|
||||
}
|
||||
if let Some(min_frame) = &self.probestack_min_frame {
|
||||
if total_stacksize >= *min_frame {
|
||||
insts.extend(M::gen_probestack(total_stacksize));
|
||||
}
|
||||
}
|
||||
}
|
||||
if total_stacksize > 0 {
|
||||
self.fixed_frame_storage_size += total_stacksize;
|
||||
|
||||
17
cranelift/filetests/filetests/isa/x64/probestack.clif
Normal file
17
cranelift/filetests/filetests/isa/x64/probestack.clif
Normal file
@@ -0,0 +1,17 @@
|
||||
test compile
|
||||
set enable_probestack=true
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
function %f1() -> i64 {
|
||||
ss0 = explicit_slot 100000
|
||||
|
||||
block0:
|
||||
v1 = stack_addr.i64 ss0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: pushq %rbp
|
||||
; nextln: movq %rsp, %rbp
|
||||
; nextln: movl $$100000, %eax
|
||||
; nextln: call LibCall(Probestack)
|
||||
Reference in New Issue
Block a user