Merge pull request #2486 from cfallin/fix-probestack

Two Lucet-related fixes to stack overflow handling.
This commit is contained in:
Chris Fallin
2020-12-07 16:47:37 -08:00
committed by GitHub
8 changed files with 169 additions and 19 deletions

View File

@@ -12,7 +12,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
// these ABIs are very similar.
@@ -508,6 +508,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on an AArch64 host
// (unlikely unless Lucet is ported)
smallvec![]
}
// Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(

View File

@@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
/// Support for the ARM ABI from the callee side (within a function body).
pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
@@ -305,6 +305,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
ret
}
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on ARM32 (unlikely
// unless Lucet is ported)
smallvec![]
}
/// Returns stack bytes used as well as instructions. Does not adjust
/// nominal SP offset; caller will do that.
fn gen_clobber_save(

View File

@@ -1,7 +1,7 @@
//! Implementation of the standard x64 ABI.
use crate::ir::types::*;
use crate::ir::{self, types, MemFlags, TrapCode, Type};
use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
use crate::isa;
use crate::isa::{x64::inst::*, CallConv};
use crate::machinst::abi_impl::*;
@@ -389,6 +389,22 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}
fn gen_probestack(frame_size: u32) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
insts.push(Inst::imm(
OperandSize::Size32,
frame_size as u64,
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
dest: ExternalName::LibCall(LibCall::Probestack),
uses: vec![regs::rax()],
defs: vec![],
opcode: Opcode::Call,
});
insts
}
fn gen_clobber_save(
call_conv: isa::CallConv,
_: &settings::Flags,

View File

@@ -182,6 +182,7 @@ impl LegacyPrefixes {
fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
@@ -194,7 +195,8 @@ fn emit_std_enc_mem(
// expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem_e.can_trap() {
let can_trap = mem_e.can_trap();
if srcloc != SourceLoc::default() && can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
@@ -202,6 +204,12 @@ fn emit_std_enc_mem(
match mem_e {
Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte.
let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e);
@@ -262,6 +270,12 @@ fn emit_std_enc_mem(
shift,
..
} => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index);
@@ -350,6 +364,7 @@ fn emit_std_enc_enc(
fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
@@ -361,6 +376,7 @@ fn emit_std_reg_mem(
emit_std_enc_mem(
sink,
state,
info,
prefixes,
opcodes,
num_opcodes,
@@ -538,6 +554,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x0FAF,
2,
@@ -597,6 +614,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcode_m,
1,
@@ -654,6 +672,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
num_opcodes,
@@ -717,7 +736,9 @@ pub(crate) fn emit(
}
RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags);
emit_std_enc_mem(
sink, state, info, prefix, opcode, 1, subopcode, &amode, rex_flags,
);
}
}
}
@@ -738,7 +759,9 @@ pub(crate) fn emit(
}
RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags);
emit_std_enc_mem(
sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags,
);
}
}
}
@@ -987,6 +1010,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcodes,
num_opcodes,
@@ -1004,6 +1028,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x8B,
1,
@@ -1019,6 +1044,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x8D,
1,
@@ -1081,6 +1107,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcodes,
num_opcodes,
@@ -1108,7 +1135,17 @@ pub(crate) fn emit(
};
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x88,
1,
*src,
dst,
rex,
)
}
2 => {
@@ -1116,6 +1153,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::_66,
0x89,
1,
@@ -1130,6 +1168,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x89,
1,
@@ -1144,6 +1183,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x89,
1,
@@ -1253,6 +1293,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode_bytes,
2,
@@ -1311,7 +1352,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state, sink);
// Whereas here we revert to the "normal" G-E ordering.
let opcode = if *size == 1 { 0x3A } else { 0x3B };
emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex);
emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex);
}
RegMemImm::Imm { simm32 } => {
@@ -1372,6 +1413,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
@@ -1408,6 +1450,10 @@ pub(crate) fn emit(
}
Inst::Push64 { src } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
match src {
RegMemImm::Reg { reg } => {
let enc_reg = int_reg_enc(*reg);
@@ -1423,6 +1469,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1454,6 +1501,9 @@ pub(crate) fn emit(
}
Inst::CallKnown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
}
@@ -1469,6 +1519,9 @@ pub(crate) fn emit(
}
Inst::CallUnknown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
let start_offset = sink.cur_offset();
match dest {
RegMem::Reg { reg } => {
@@ -1489,6 +1542,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1587,6 +1641,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1733,6 +1788,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
num_opcodes,
@@ -1863,6 +1919,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
length,
@@ -2012,7 +2069,17 @@ pub(crate) fn emit(
!regs_swapped,
"No existing way to encode a mem argument in the ModRM r/m field."
);
emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex);
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
len,
dst.to_reg(),
addr,
rex,
);
}
}
sink.put1(*imm);
@@ -2045,6 +2112,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
@@ -2109,7 +2177,17 @@ pub(crate) fn emit(
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
reg_g.to_reg(),
addr,
rex,
);
}
}
}
@@ -2129,7 +2207,7 @@ pub(crate) fn emit(
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex);
emit_std_reg_mem(sink, state, info, prefix, opcode, len, *dst, addr, rex);
}
}
}
@@ -2643,7 +2721,7 @@ pub(crate) fn emit(
_ => unreachable!(),
};
let amode = dst.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex);
emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
}
Inst::AtomicRmwSeq { ty, op } => {

View File

@@ -4334,8 +4334,8 @@ impl LowerBackend for X64Backend {
let ty = ctx.input_ty(ifcmp_sp, 0);
ctx.emit(Inst::cmp_rmi_r(
ty.bytes() as u8,
RegMemImm::reg(operand),
regs::rsp(),
RegMemImm::reg(regs::rsp()),
operand,
));
let cond_code = ctx.data(branches[0]).cond_code().unwrap();
let cc = CC::from_intcc(cond_code);

View File

@@ -314,6 +314,9 @@ pub trait ABIMachineSpec {
/// Generate the usual frame-restore sequence for this architecture.
fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>;
/// Generate a probestack call.
fn gen_probestack(_frame_size: u32) -> SmallVec<[Self::I; 2]>;
/// Generate a clobber-save sequence. This takes the list of *all* registers
/// written/modified by the function body. The implementation here is
/// responsible for determining which of these are callee-saved according to
@@ -481,6 +484,9 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// manually register-allocated and carefully only use caller-saved
/// registers and keep nothing live after this sequence of instructions.
stack_limit: Option<(Reg, Vec<M::I>)>,
/// Are we to invoke the probestack function in the prologue? If so,
/// what is the minimum size at which we must invoke it?
probestack_min_frame: Option<u32>,
_mach: PhantomData<M>,
}
@@ -536,6 +542,18 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
.map(|reg| (reg, Vec::new()))
.or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv)));
// Determine whether a probestack call is required for large enough
// frames (and the minimum frame size if so).
let probestack_min_frame = if flags.enable_probestack() {
assert!(
!flags.probestack_func_adjusts_sp(),
"SP-adjusting probestack not supported in new backends"
);
Some(1 << flags.probestack_size_log2())
} else {
None
};
Ok(Self {
sig,
stackslots,
@@ -550,6 +568,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
flags,
is_leaf: f.is_leaf(),
stack_limit,
probestack_min_frame,
_mach: PhantomData,
})
}
@@ -978,6 +997,11 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
insts.extend_from_slice(stack_limit_load);
self.insert_stack_check(*reg, total_stacksize, &mut insts);
}
if let Some(min_frame) = &self.probestack_min_frame {
if total_stacksize >= *min_frame {
insts.extend(M::gen_probestack(total_stacksize));
}
}
}
if total_stacksize > 0 {
self.fixed_frame_storage_size += total_stacksize;

View File

@@ -0,0 +1,17 @@
test compile
set enable_probestack=true
target x86_64
feature "experimental_x64"
function %f1() -> i64 {
ss0 = explicit_slot 100000
block0:
v1 = stack_addr.i64 ss0
return v1
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movl $$100000, %eax
; nextln: call LibCall(Probestack)