Two Lucet-related fixes to stack overflow handling.

Lucet uses stack probes rather than explicit stack limit checks as
Wasmtime does. In bytecodealliance/lucet#616, I have discovered that I
previously was not running some Lucet runtime tests with the new
backend, so was missing some test failures due to missing pieces in the
new backend.

This PR adds (i) calls to probestack, when enabled, in the prologue of
every function with a stack frame larger than one page (configurable via
flags); and (ii) trap metadata for every instruction on x86-64 that can
access the stack, hence be the first point at which a stack overflow is
detected when the stack pointer is decremented.
This commit is contained in:
Chris Fallin
2020-12-07 15:09:47 -08:00
parent efe7f37542
commit 3a01d14712
8 changed files with 169 additions and 19 deletions

View File

@@ -60,7 +60,10 @@ jobs:
# nightly-only feature right now.
- uses: ./.github/actions/install-rust
with:
toolchain: nightly
# TODO (rust-lang/rust#79661): We are seeing an internal compiler error when
# building with the latest (2020-12-06) nightly; pin on a slightly older
# version for now.
toolchain: nightly-2020-11-29
- run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta
- run: cargo doc --package cranelift-codegen-meta --document-private-items
- uses: actions/upload-artifact@v1
@@ -145,7 +148,7 @@ jobs:
# flags to rustc.
- uses: ./.github/actions/install-rust
with:
toolchain: nightly
toolchain: nightly-2020-11-29
- run: cargo install cargo-fuzz --vers "^0.8"
- run: cargo fetch
working-directory: ./fuzz
@@ -202,7 +205,7 @@ jobs:
rust: beta
- build: nightly
os: ubuntu-latest
rust: nightly
rust: nightly-2020-11-29
- build: macos
os: macos-latest
rust: stable
@@ -292,7 +295,7 @@ jobs:
submodules: true
- uses: ./.github/actions/install-rust
with:
toolchain: nightly
toolchain: nightly-2020-11-29
- uses: ./.github/actions/define-llvm-env
# Install wasm32 targets in order to build various tests throughout the
@@ -303,7 +306,7 @@ jobs:
# Run the x64 CI script.
- run: ./ci/run-experimental-x64-ci.sh
env:
CARGO_VERSION: "+nightly"
CARGO_VERSION: "+nightly-2020-11-29"
RUST_BACKTRACE: 1
# Build and test the wasi-nn module.

View File

@@ -12,7 +12,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
// these ABIs are very similar.
@@ -508,6 +508,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on an AArch64 host
// (unlikely unless Lucet is ported)
smallvec![]
}
// Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save(

View File

@@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
/// Support for the ARM ABI from the callee side (within a function body).
pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
@@ -305,6 +305,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
ret
}
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on ARM32 (unlikely
// unless Lucet is ported)
smallvec![]
}
/// Returns stack bytes used as well as instructions. Does not adjust
/// nominal SP offset; caller will do that.
fn gen_clobber_save(

View File

@@ -1,7 +1,7 @@
//! Implementation of the standard x64 ABI.
use crate::ir::types::*;
use crate::ir::{self, types, MemFlags, TrapCode, Type};
use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
use crate::isa;
use crate::isa::{x64::inst::*, CallConv};
use crate::machinst::abi_impl::*;
@@ -389,6 +389,22 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}
fn gen_probestack(frame_size: u32) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
insts.push(Inst::imm(
OperandSize::Size32,
frame_size as u64,
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
dest: ExternalName::LibCall(LibCall::Probestack),
uses: vec![regs::rax()],
defs: vec![],
opcode: Opcode::Call,
});
insts
}
fn gen_clobber_save(
call_conv: isa::CallConv,
_: &settings::Flags,

View File

@@ -182,6 +182,7 @@ impl LegacyPrefixes {
fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
@@ -194,7 +195,8 @@ fn emit_std_enc_mem(
// expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem_e.can_trap() {
let can_trap = mem_e.can_trap();
if srcloc != SourceLoc::default() && can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
@@ -202,6 +204,12 @@ fn emit_std_enc_mem(
match mem_e {
Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte.
let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e);
@@ -262,6 +270,12 @@ fn emit_std_enc_mem(
shift,
..
} => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index);
@@ -350,6 +364,7 @@ fn emit_std_enc_enc(
fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
@@ -361,6 +376,7 @@ fn emit_std_reg_mem(
emit_std_enc_mem(
sink,
state,
info,
prefixes,
opcodes,
num_opcodes,
@@ -538,6 +554,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x0FAF,
2,
@@ -597,6 +614,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcode_m,
1,
@@ -654,6 +672,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
num_opcodes,
@@ -717,7 +736,9 @@ pub(crate) fn emit(
}
RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags);
emit_std_enc_mem(
sink, state, info, prefix, opcode, 1, subopcode, &amode, rex_flags,
);
}
}
}
@@ -738,7 +759,9 @@ pub(crate) fn emit(
}
RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags);
emit_std_enc_mem(
sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags,
);
}
}
}
@@ -987,6 +1010,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcodes,
num_opcodes,
@@ -1004,6 +1028,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x8B,
1,
@@ -1019,6 +1044,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x8D,
1,
@@ -1081,6 +1107,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
opcodes,
num_opcodes,
@@ -1108,7 +1135,17 @@ pub(crate) fn emit(
};
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x88,
1,
*src,
dst,
rex,
)
}
2 => {
@@ -1116,6 +1153,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::_66,
0x89,
1,
@@ -1130,6 +1168,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x89,
1,
@@ -1144,6 +1183,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x89,
1,
@@ -1253,6 +1293,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode_bytes,
2,
@@ -1311,7 +1352,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state, sink);
// Whereas here we revert to the "normal" G-E ordering.
let opcode = if *size == 1 { 0x3A } else { 0x3B };
emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex);
emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex);
}
RegMemImm::Imm { simm32 } => {
@@ -1372,6 +1413,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
@@ -1408,6 +1450,10 @@ pub(crate) fn emit(
}
Inst::Push64 { src } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
match src {
RegMemImm::Reg { reg } => {
let enc_reg = int_reg_enc(*reg);
@@ -1423,6 +1469,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1454,6 +1501,9 @@ pub(crate) fn emit(
}
Inst::CallKnown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
}
@@ -1469,6 +1519,9 @@ pub(crate) fn emit(
}
Inst::CallUnknown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
let start_offset = sink.cur_offset();
match dest {
RegMem::Reg { reg } => {
@@ -1489,6 +1542,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1587,6 +1641,7 @@ pub(crate) fn emit(
emit_std_enc_mem(
sink,
state,
info,
LegacyPrefixes::None,
0xFF,
1,
@@ -1733,6 +1788,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
num_opcodes,
@@ -1848,6 +1904,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
length,
@@ -1994,7 +2051,17 @@ pub(crate) fn emit(
!regs_swapped,
"No existing way to encode a mem argument in the ModRM r/m field."
);
emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex);
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
len,
dst.to_reg(),
addr,
rex,
);
}
}
sink.put1(*imm);
@@ -2027,6 +2094,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
@@ -2091,7 +2159,17 @@ pub(crate) fn emit(
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
reg_g.to_reg(),
addr,
rex,
);
}
}
}
@@ -2111,7 +2189,7 @@ pub(crate) fn emit(
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex);
emit_std_reg_mem(sink, state, info, prefix, opcode, len, *dst, addr, rex);
}
}
}
@@ -2625,7 +2703,7 @@ pub(crate) fn emit(
_ => unreachable!(),
};
let amode = dst.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex);
emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
}
Inst::AtomicRmwSeq { ty, op } => {

View File

@@ -4079,8 +4079,8 @@ impl LowerBackend for X64Backend {
let ty = ctx.input_ty(ifcmp_sp, 0);
ctx.emit(Inst::cmp_rmi_r(
ty.bytes() as u8,
RegMemImm::reg(operand),
regs::rsp(),
RegMemImm::reg(regs::rsp()),
operand,
));
let cond_code = ctx.data(branches[0]).cond_code().unwrap();
let cc = CC::from_intcc(cond_code);

View File

@@ -314,6 +314,9 @@ pub trait ABIMachineSpec {
/// Generate the usual frame-restore sequence for this architecture.
fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>;
/// Generate a probestack call.
fn gen_probestack(_frame_size: u32) -> SmallVec<[Self::I; 2]>;
/// Generate a clobber-save sequence. This takes the list of *all* registers
/// written/modified by the function body. The implementation here is
/// responsible for determining which of these are callee-saved according to
@@ -481,6 +484,9 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// manually register-allocated and carefully only use caller-saved
/// registers and keep nothing live after this sequence of instructions.
stack_limit: Option<(Reg, Vec<M::I>)>,
/// Are we to invoke the probestack function in the prologue? If so,
/// what is the minimum size at which we must invoke it?
probestack_min_frame: Option<u32>,
_mach: PhantomData<M>,
}
@@ -536,6 +542,18 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
.map(|reg| (reg, Vec::new()))
.or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv)));
// Determine whether a probestack call is required for large enough
// frames (and the minimum frame size if so).
let probestack_min_frame = if flags.enable_probestack() {
assert!(
!flags.probestack_func_adjusts_sp(),
"SP-adjusting probestack not supported in new backends"
);
Some(1 << flags.probestack_size_log2())
} else {
None
};
Ok(Self {
sig,
stackslots,
@@ -550,6 +568,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
flags,
is_leaf: f.is_leaf(),
stack_limit,
probestack_min_frame,
_mach: PhantomData,
})
}
@@ -978,6 +997,11 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
insts.extend_from_slice(stack_limit_load);
self.insert_stack_check(*reg, total_stacksize, &mut insts);
}
if let Some(min_frame) = &self.probestack_min_frame {
if total_stacksize >= *min_frame {
insts.extend(M::gen_probestack(total_stacksize));
}
}
}
if total_stacksize > 0 {
self.fixed_frame_storage_size += total_stacksize;

View File

@@ -0,0 +1,17 @@
test compile
set enable_probestack=true
target x86_64
feature "experimental_x64"
function %f1() -> i64 {
ss0 = explicit_slot 100000
block0:
v1 = stack_addr.i64 ss0
return v1
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movl $$100000, %eax
; nextln: call LibCall(Probestack)