Merge pull request #2486 from cfallin/fix-probestack

Two Lucet-related fixes to stack overflow handling.
This commit is contained in:
Chris Fallin
2020-12-07 16:47:37 -08:00
committed by GitHub
8 changed files with 169 additions and 19 deletions

View File

@@ -60,7 +60,10 @@ jobs:
# nightly-only feature right now. # nightly-only feature right now.
- uses: ./.github/actions/install-rust - uses: ./.github/actions/install-rust
with: with:
toolchain: nightly # TODO (rust-lang/rust#79661): We are seeing an internal compiler error when
# building with the latest (2020-12-06) nightly; pin on a slightly older
# version for now.
toolchain: nightly-2020-11-29
- run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta - run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta
- run: cargo doc --package cranelift-codegen-meta --document-private-items - run: cargo doc --package cranelift-codegen-meta --document-private-items
- uses: actions/upload-artifact@v1 - uses: actions/upload-artifact@v1
@@ -145,7 +148,7 @@ jobs:
# flags to rustc. # flags to rustc.
- uses: ./.github/actions/install-rust - uses: ./.github/actions/install-rust
with: with:
toolchain: nightly toolchain: nightly-2020-11-29
- run: cargo install cargo-fuzz --vers "^0.8" - run: cargo install cargo-fuzz --vers "^0.8"
- run: cargo fetch - run: cargo fetch
working-directory: ./fuzz working-directory: ./fuzz
@@ -202,7 +205,7 @@ jobs:
rust: beta rust: beta
- build: nightly - build: nightly
os: ubuntu-latest os: ubuntu-latest
rust: nightly rust: nightly-2020-11-29
- build: macos - build: macos
os: macos-latest os: macos-latest
rust: stable rust: stable
@@ -292,7 +295,7 @@ jobs:
submodules: true submodules: true
- uses: ./.github/actions/install-rust - uses: ./.github/actions/install-rust
with: with:
toolchain: nightly toolchain: nightly-2020-11-29
- uses: ./.github/actions/define-llvm-env - uses: ./.github/actions/define-llvm-env
# Install wasm32 targets in order to build various tests throughout the # Install wasm32 targets in order to build various tests throughout the
@@ -303,7 +306,7 @@ jobs:
# Run the x64 CI script. # Run the x64 CI script.
- run: ./ci/run-experimental-x64-ci.sh - run: ./ci/run-experimental-x64-ci.sh
env: env:
CARGO_VERSION: "+nightly" CARGO_VERSION: "+nightly-2020-11-29"
RUST_BACKTRACE: 1 RUST_BACKTRACE: 1
# Build and test the wasi-nn module. # Build and test the wasi-nn module.

View File

@@ -12,7 +12,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box; use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec; use smallvec::{smallvec, SmallVec};
// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
// these ABIs are very similar. // these ABIs are very similar.
@@ -508,6 +508,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts insts
} }
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on an AArch64 host
// (unlikely unless Lucet is ported)
smallvec![]
}
// Returns stack bytes used as well as instructions. Does not adjust // Returns stack bytes used as well as instructions. Does not adjust
// nominal SP offset; abi_impl generic code will do that. // nominal SP offset; abi_impl generic code will do that.
fn gen_clobber_save( fn gen_clobber_save(

View File

@@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box; use alloc::boxed::Box;
use alloc::vec::Vec; use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, Writable}; use regalloc::{RealReg, Reg, RegClass, Set, Writable};
use smallvec::SmallVec; use smallvec::{smallvec, SmallVec};
/// Support for the ARM ABI from the callee side (within a function body). /// Support for the ARM ABI from the callee side (within a function body).
pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>; pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
@@ -305,6 +305,12 @@ impl ABIMachineSpec for Arm32MachineDeps {
ret ret
} }
fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> {
// TODO: implement if we ever require stack probes on ARM32 (unlikely
// unless Lucet is ported)
smallvec![]
}
/// Returns stack bytes used as well as instructions. Does not adjust /// Returns stack bytes used as well as instructions. Does not adjust
/// nominal SP offset; caller will do that. /// nominal SP offset; caller will do that.
fn gen_clobber_save( fn gen_clobber_save(

View File

@@ -1,7 +1,7 @@
//! Implementation of the standard x64 ABI. //! Implementation of the standard x64 ABI.
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::{self, types, MemFlags, TrapCode, Type}; use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type};
use crate::isa; use crate::isa;
use crate::isa::{x64::inst::*, CallConv}; use crate::isa::{x64::inst::*, CallConv};
use crate::machinst::abi_impl::*; use crate::machinst::abi_impl::*;
@@ -389,6 +389,22 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts insts
} }
fn gen_probestack(frame_size: u32) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
insts.push(Inst::imm(
OperandSize::Size32,
frame_size as u64,
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
dest: ExternalName::LibCall(LibCall::Probestack),
uses: vec![regs::rax()],
defs: vec![],
opcode: Opcode::Call,
});
insts
}
fn gen_clobber_save( fn gen_clobber_save(
call_conv: isa::CallConv, call_conv: isa::CallConv,
_: &settings::Flags, _: &settings::Flags,

View File

@@ -182,6 +182,7 @@ impl LegacyPrefixes {
fn emit_std_enc_mem( fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>, sink: &mut MachBuffer<Inst>,
state: &EmitState, state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes, prefixes: LegacyPrefixes,
opcodes: u32, opcodes: u32,
mut num_opcodes: usize, mut num_opcodes: usize,
@@ -194,7 +195,8 @@ fn emit_std_enc_mem(
// expression. But `enc_g` can be derived from a register of any class. // expression. But `enc_g` can be derived from a register of any class.
let srcloc = state.cur_srcloc(); let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem_e.can_trap() { let can_trap = mem_e.can_trap();
if srcloc != SourceLoc::default() && can_trap {
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
} }
@@ -202,6 +204,12 @@ fn emit_std_enc_mem(
match mem_e { match mem_e {
Amode::ImmReg { simm32, base, .. } => { Amode::ImmReg { simm32, base, .. } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
// First, the REX byte. // First, the REX byte.
let enc_e = int_reg_enc(*base); let enc_e = int_reg_enc(*base);
rex.emit_two_op(sink, enc_g, enc_e); rex.emit_two_op(sink, enc_g, enc_e);
@@ -262,6 +270,12 @@ fn emit_std_enc_mem(
shift, shift,
.. ..
} => { } => {
// If this is an access based off of RSP, it may trap with a stack overflow if it's the
// first touch of a new stack page.
if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() {
sink.add_trap(srcloc, TrapCode::StackOverflow);
}
let enc_base = int_reg_enc(*reg_base); let enc_base = int_reg_enc(*reg_base);
let enc_index = int_reg_enc(*reg_index); let enc_index = int_reg_enc(*reg_index);
@@ -350,6 +364,7 @@ fn emit_std_enc_enc(
fn emit_std_reg_mem( fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>, sink: &mut MachBuffer<Inst>,
state: &EmitState, state: &EmitState,
info: &EmitInfo,
prefixes: LegacyPrefixes, prefixes: LegacyPrefixes,
opcodes: u32, opcodes: u32,
num_opcodes: usize, num_opcodes: usize,
@@ -361,6 +376,7 @@ fn emit_std_reg_mem(
emit_std_enc_mem( emit_std_enc_mem(
sink, sink,
state, state,
info,
prefixes, prefixes,
opcodes, opcodes,
num_opcodes, num_opcodes,
@@ -538,6 +554,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0x0FAF, 0x0FAF,
2, 2,
@@ -597,6 +614,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
opcode_m, opcode_m,
1, 1,
@@ -654,6 +672,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode, opcode,
num_opcodes, num_opcodes,
@@ -717,7 +736,9 @@ pub(crate) fn emit(
} }
RegMem::Mem { addr: src } => { RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink); let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags); emit_std_enc_mem(
sink, state, info, prefix, opcode, 1, subopcode, &amode, rex_flags,
);
} }
} }
} }
@@ -738,7 +759,9 @@ pub(crate) fn emit(
} }
RegMem::Mem { addr: src } => { RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink); let amode = src.finalize(state, sink);
emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags); emit_std_enc_mem(
sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags,
);
} }
} }
} }
@@ -987,6 +1010,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
opcodes, opcodes,
num_opcodes, num_opcodes,
@@ -1004,6 +1028,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0x8B, 0x8B,
1, 1,
@@ -1019,6 +1044,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0x8D, 0x8D,
1, 1,
@@ -1081,6 +1107,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
opcodes, opcodes,
num_opcodes, num_opcodes,
@@ -1108,7 +1135,17 @@ pub(crate) fn emit(
}; };
// MOV r8, r/m8 is (REX.W==0) 88 /r // MOV r8, r/m8 is (REX.W==0) 88 /r
emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex) emit_std_reg_mem(
sink,
state,
info,
LegacyPrefixes::None,
0x88,
1,
*src,
dst,
rex,
)
} }
2 => { 2 => {
@@ -1116,6 +1153,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::_66, LegacyPrefixes::_66,
0x89, 0x89,
1, 1,
@@ -1130,6 +1168,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0x89, 0x89,
1, 1,
@@ -1144,6 +1183,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0x89, 0x89,
1, 1,
@@ -1253,6 +1293,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode_bytes, opcode_bytes,
2, 2,
@@ -1311,7 +1352,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state, sink); let addr = &addr.finalize(state, sink);
// Whereas here we revert to the "normal" G-E ordering. // Whereas here we revert to the "normal" G-E ordering.
let opcode = if *size == 1 { 0x3A } else { 0x3B }; let opcode = if *size == 1 { 0x3A } else { 0x3B };
emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex); emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex);
} }
RegMemImm::Imm { simm32 } => { RegMemImm::Imm { simm32 } => {
@@ -1372,6 +1413,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode, opcode,
2, 2,
@@ -1408,6 +1450,10 @@ pub(crate) fn emit(
} }
Inst::Push64 { src } => { Inst::Push64 { src } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
match src { match src {
RegMemImm::Reg { reg } => { RegMemImm::Reg { reg } => {
let enc_reg = int_reg_enc(*reg); let enc_reg = int_reg_enc(*reg);
@@ -1423,6 +1469,7 @@ pub(crate) fn emit(
emit_std_enc_mem( emit_std_enc_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0xFF, 0xFF,
1, 1,
@@ -1454,6 +1501,9 @@ pub(crate) fn emit(
} }
Inst::CallKnown { dest, opcode, .. } => { Inst::CallKnown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
if let Some(s) = state.take_stack_map() { if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
} }
@@ -1469,6 +1519,9 @@ pub(crate) fn emit(
} }
Inst::CallUnknown { dest, opcode, .. } => { Inst::CallUnknown { dest, opcode, .. } => {
if info.flags().enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);
}
let start_offset = sink.cur_offset(); let start_offset = sink.cur_offset();
match dest { match dest {
RegMem::Reg { reg } => { RegMem::Reg { reg } => {
@@ -1489,6 +1542,7 @@ pub(crate) fn emit(
emit_std_enc_mem( emit_std_enc_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0xFF, 0xFF,
1, 1,
@@ -1587,6 +1641,7 @@ pub(crate) fn emit(
emit_std_enc_mem( emit_std_enc_mem(
sink, sink,
state, state,
info,
LegacyPrefixes::None, LegacyPrefixes::None,
0xFF, 0xFF,
1, 1,
@@ -1733,6 +1788,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode, opcode,
num_opcodes, num_opcodes,
@@ -1863,6 +1919,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode, opcode,
length, length,
@@ -2012,7 +2069,17 @@ pub(crate) fn emit(
!regs_swapped, !regs_swapped,
"No existing way to encode a mem argument in the ModRM r/m field." "No existing way to encode a mem argument in the ModRM r/m field."
); );
emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex); emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
len,
dst.to_reg(),
addr,
rex,
);
} }
} }
sink.put1(*imm); sink.put1(*imm);
@@ -2045,6 +2112,7 @@ pub(crate) fn emit(
emit_std_reg_mem( emit_std_reg_mem(
sink, sink,
state, state,
info,
prefix, prefix,
opcode, opcode,
2, 2,
@@ -2109,7 +2177,17 @@ pub(crate) fn emit(
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink); let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex); emit_std_reg_mem(
sink,
state,
info,
prefix,
opcode,
2,
reg_g.to_reg(),
addr,
rex,
);
} }
} }
} }
@@ -2129,7 +2207,7 @@ pub(crate) fn emit(
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink); let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex); emit_std_reg_mem(sink, state, info, prefix, opcode, len, *dst, addr, rex);
} }
} }
} }
@@ -2643,7 +2721,7 @@ pub(crate) fn emit(
_ => unreachable!(), _ => unreachable!(),
}; };
let amode = dst.finalize(state, sink); let amode = dst.finalize(state, sink);
emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex); emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex);
} }
Inst::AtomicRmwSeq { ty, op } => { Inst::AtomicRmwSeq { ty, op } => {

View File

@@ -4334,8 +4334,8 @@ impl LowerBackend for X64Backend {
let ty = ctx.input_ty(ifcmp_sp, 0); let ty = ctx.input_ty(ifcmp_sp, 0);
ctx.emit(Inst::cmp_rmi_r( ctx.emit(Inst::cmp_rmi_r(
ty.bytes() as u8, ty.bytes() as u8,
RegMemImm::reg(operand), RegMemImm::reg(regs::rsp()),
regs::rsp(), operand,
)); ));
let cond_code = ctx.data(branches[0]).cond_code().unwrap(); let cond_code = ctx.data(branches[0]).cond_code().unwrap();
let cc = CC::from_intcc(cond_code); let cc = CC::from_intcc(cond_code);

View File

@@ -314,6 +314,9 @@ pub trait ABIMachineSpec {
/// Generate the usual frame-restore sequence for this architecture. /// Generate the usual frame-restore sequence for this architecture.
fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>; fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>;
/// Generate a probestack call.
fn gen_probestack(_frame_size: u32) -> SmallVec<[Self::I; 2]>;
/// Generate a clobber-save sequence. This takes the list of *all* registers /// Generate a clobber-save sequence. This takes the list of *all* registers
/// written/modified by the function body. The implementation here is /// written/modified by the function body. The implementation here is
/// responsible for determining which of these are callee-saved according to /// responsible for determining which of these are callee-saved according to
@@ -481,6 +484,9 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// manually register-allocated and carefully only use caller-saved /// manually register-allocated and carefully only use caller-saved
/// registers and keep nothing live after this sequence of instructions. /// registers and keep nothing live after this sequence of instructions.
stack_limit: Option<(Reg, Vec<M::I>)>, stack_limit: Option<(Reg, Vec<M::I>)>,
/// Are we to invoke the probestack function in the prologue? If so,
/// what is the minimum size at which we must invoke it?
probestack_min_frame: Option<u32>,
_mach: PhantomData<M>, _mach: PhantomData<M>,
} }
@@ -536,6 +542,18 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
.map(|reg| (reg, Vec::new())) .map(|reg| (reg, Vec::new()))
.or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv))); .or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv)));
// Determine whether a probestack call is required for large enough
// frames (and the minimum frame size if so).
let probestack_min_frame = if flags.enable_probestack() {
assert!(
!flags.probestack_func_adjusts_sp(),
"SP-adjusting probestack not supported in new backends"
);
Some(1 << flags.probestack_size_log2())
} else {
None
};
Ok(Self { Ok(Self {
sig, sig,
stackslots, stackslots,
@@ -550,6 +568,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
flags, flags,
is_leaf: f.is_leaf(), is_leaf: f.is_leaf(),
stack_limit, stack_limit,
probestack_min_frame,
_mach: PhantomData, _mach: PhantomData,
}) })
} }
@@ -978,6 +997,11 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
insts.extend_from_slice(stack_limit_load); insts.extend_from_slice(stack_limit_load);
self.insert_stack_check(*reg, total_stacksize, &mut insts); self.insert_stack_check(*reg, total_stacksize, &mut insts);
} }
if let Some(min_frame) = &self.probestack_min_frame {
if total_stacksize >= *min_frame {
insts.extend(M::gen_probestack(total_stacksize));
}
}
} }
if total_stacksize > 0 { if total_stacksize > 0 {
self.fixed_frame_storage_size += total_stacksize; self.fixed_frame_storage_size += total_stacksize;

View File

@@ -0,0 +1,17 @@
test compile
set enable_probestack=true
target x86_64
feature "experimental_x64"
function %f1() -> i64 {
ss0 = explicit_slot 100000
block0:
v1 = stack_addr.i64 ss0
return v1
}
; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: movl $$100000, %eax
; nextln: call LibCall(Probestack)