diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3bd17dbdca..5e80781daf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -60,7 +60,10 @@ jobs: # nightly-only feature right now. - uses: ./.github/actions/install-rust with: - toolchain: nightly + # TODO (rust-lang/rust#79661): We are seeing an internal compiler error when + # building with the latest (2020-12-06) nightly; pin on a slightly older + # version for now. + toolchain: nightly-2020-11-29 - run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta - run: cargo doc --package cranelift-codegen-meta --document-private-items - uses: actions/upload-artifact@v1 @@ -145,7 +148,7 @@ jobs: # flags to rustc. - uses: ./.github/actions/install-rust with: - toolchain: nightly + toolchain: nightly-2020-11-29 - run: cargo install cargo-fuzz --vers "^0.8" - run: cargo fetch working-directory: ./fuzz @@ -202,7 +205,7 @@ jobs: rust: beta - build: nightly os: ubuntu-latest - rust: nightly + rust: nightly-2020-11-29 - build: macos os: macos-latest rust: stable @@ -292,7 +295,7 @@ jobs: submodules: true - uses: ./.github/actions/install-rust with: - toolchain: nightly + toolchain: nightly-2020-11-29 - uses: ./.github/actions/define-llvm-env # Install wasm32 targets in order to build various tests throughout the @@ -303,7 +306,7 @@ jobs: # Run the x64 CI script. - run: ./ci/run-experimental-x64-ci.sh env: - CARGO_VERSION: "+nightly" + CARGO_VERSION: "+nightly-2020-11-29" RUST_BACKTRACE: 1 # Build and test the wasi-nn module. diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 8496e51711..69335c10cd 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -12,7 +12,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use regalloc::{RealReg, Reg, RegClass, Set, Writable}; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because // these ABIs are very similar. @@ -508,6 +508,12 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } + fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> { + // TODO: implement if we ever require stack probes on an AArch64 host + // (unlikely unless Lucet is ported) + smallvec![] + } + // Returns stack bytes used as well as instructions. Does not adjust // nominal SP offset; abi_impl generic code will do that. fn gen_clobber_save( diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index 4b69e1cf43..f09dd7dced 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -10,7 +10,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use regalloc::{RealReg, Reg, RegClass, Set, Writable}; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; /// Support for the ARM ABI from the callee side (within a function body). pub(crate) type Arm32ABICallee = ABICalleeImpl; @@ -305,6 +305,12 @@ impl ABIMachineSpec for Arm32MachineDeps { ret } + fn gen_probestack(_: u32) -> SmallVec<[Self::I; 2]> { + // TODO: implement if we ever require stack probes on ARM32 (unlikely + // unless Lucet is ported) + smallvec![] + } + /// Returns stack bytes used as well as instructions. Does not adjust /// nominal SP offset; caller will do that. fn gen_clobber_save( diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4f84d75c1d..dc9592908b 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,7 +1,7 @@ //! Implementation of the standard x64 ABI. use crate::ir::types::*; -use crate::ir::{self, types, MemFlags, TrapCode, Type}; +use crate::ir::{self, types, ExternalName, LibCall, MemFlags, Opcode, TrapCode, Type}; use crate::isa; use crate::isa::{x64::inst::*, CallConv}; use crate::machinst::abi_impl::*; @@ -389,6 +389,22 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } + fn gen_probestack(frame_size: u32) -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + insts.push(Inst::imm( + OperandSize::Size32, + frame_size as u64, + Writable::from_reg(regs::rax()), + )); + insts.push(Inst::CallKnown { + dest: ExternalName::LibCall(LibCall::Probestack), + uses: vec![regs::rax()], + defs: vec![], + opcode: Opcode::Call, + }); + insts + } + fn gen_clobber_save( call_conv: isa::CallConv, _: &settings::Flags, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c0d94d2ab6..401b8aad08 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -182,6 +182,7 @@ impl LegacyPrefixes { fn emit_std_enc_mem( sink: &mut MachBuffer, state: &EmitState, + info: &EmitInfo, prefixes: LegacyPrefixes, opcodes: u32, mut num_opcodes: usize, @@ -194,7 +195,8 @@ fn emit_std_enc_mem( // expression. But `enc_g` can be derived from a register of any class. let srcloc = state.cur_srcloc(); - if srcloc != SourceLoc::default() && mem_e.can_trap() { + let can_trap = mem_e.can_trap(); + if srcloc != SourceLoc::default() && can_trap { sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } @@ -202,6 +204,12 @@ fn emit_std_enc_mem( match mem_e { Amode::ImmReg { simm32, base, .. } => { + // If this is an access based off of RSP, it may trap with a stack overflow if it's the + // first touch of a new stack page. + if *base == regs::rsp() && !can_trap && info.flags().enable_probestack() { + sink.add_trap(srcloc, TrapCode::StackOverflow); + } + // First, the REX byte. let enc_e = int_reg_enc(*base); rex.emit_two_op(sink, enc_g, enc_e); @@ -262,6 +270,12 @@ fn emit_std_enc_mem( shift, .. } => { + // If this is an access based off of RSP, it may trap with a stack overflow if it's the + // first touch of a new stack page. + if *reg_base == regs::rsp() && !can_trap && info.flags().enable_probestack() { + sink.add_trap(srcloc, TrapCode::StackOverflow); + } + let enc_base = int_reg_enc(*reg_base); let enc_index = int_reg_enc(*reg_index); @@ -350,6 +364,7 @@ fn emit_std_enc_enc( fn emit_std_reg_mem( sink: &mut MachBuffer, state: &EmitState, + info: &EmitInfo, prefixes: LegacyPrefixes, opcodes: u32, num_opcodes: usize, @@ -361,6 +376,7 @@ fn emit_std_reg_mem( emit_std_enc_mem( sink, state, + info, prefixes, opcodes, num_opcodes, @@ -538,6 +554,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, 0x0FAF, 2, @@ -597,6 +614,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, opcode_m, 1, @@ -654,6 +672,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode, num_opcodes, @@ -717,7 +736,9 @@ pub(crate) fn emit( } RegMem::Mem { addr: src } => { let amode = src.finalize(state, sink); - emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags); + emit_std_enc_mem( + sink, state, info, prefix, opcode, 1, subopcode, &amode, rex_flags, + ); } } } @@ -738,7 +759,9 @@ pub(crate) fn emit( } RegMem::Mem { addr: src } => { let amode = src.finalize(state, sink); - emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags); + emit_std_enc_mem( + sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags, + ); } } } @@ -987,6 +1010,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, opcodes, num_opcodes, @@ -1004,6 +1028,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, 0x8B, 1, @@ -1019,6 +1044,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, 0x8D, 1, @@ -1081,6 +1107,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, opcodes, num_opcodes, @@ -1108,7 +1135,17 @@ pub(crate) fn emit( }; // MOV r8, r/m8 is (REX.W==0) 88 /r - emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex) + emit_std_reg_mem( + sink, + state, + info, + LegacyPrefixes::None, + 0x88, + 1, + *src, + dst, + rex, + ) } 2 => { @@ -1116,6 +1153,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::_66, 0x89, 1, @@ -1130,6 +1168,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, 0x89, 1, @@ -1144,6 +1183,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, LegacyPrefixes::None, 0x89, 1, @@ -1253,6 +1293,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode_bytes, 2, @@ -1311,7 +1352,7 @@ pub(crate) fn emit( let addr = &addr.finalize(state, sink); // Whereas here we revert to the "normal" G-E ordering. let opcode = if *size == 1 { 0x3A } else { 0x3B }; - emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex); + emit_std_reg_mem(sink, state, info, prefix, opcode, 1, *reg_g, addr, rex); } RegMemImm::Imm { simm32 } => { @@ -1372,6 +1413,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode, 2, @@ -1408,6 +1450,10 @@ pub(crate) fn emit( } Inst::Push64 { src } => { + if info.flags().enable_probestack() { + sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); + } + match src { RegMemImm::Reg { reg } => { let enc_reg = int_reg_enc(*reg); @@ -1423,6 +1469,7 @@ pub(crate) fn emit( emit_std_enc_mem( sink, state, + info, LegacyPrefixes::None, 0xFF, 1, @@ -1454,6 +1501,9 @@ pub(crate) fn emit( } Inst::CallKnown { dest, opcode, .. } => { + if info.flags().enable_probestack() { + sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); + } if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); } @@ -1469,6 +1519,9 @@ pub(crate) fn emit( } Inst::CallUnknown { dest, opcode, .. } => { + if info.flags().enable_probestack() { + sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); + } let start_offset = sink.cur_offset(); match dest { RegMem::Reg { reg } => { @@ -1489,6 +1542,7 @@ pub(crate) fn emit( emit_std_enc_mem( sink, state, + info, LegacyPrefixes::None, 0xFF, 1, @@ -1587,6 +1641,7 @@ pub(crate) fn emit( emit_std_enc_mem( sink, state, + info, LegacyPrefixes::None, 0xFF, 1, @@ -1733,6 +1788,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode, num_opcodes, @@ -1863,6 +1919,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode, length, @@ -2012,7 +2069,17 @@ pub(crate) fn emit( !regs_swapped, "No existing way to encode a mem argument in the ModRM r/m field." ); - emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex); + emit_std_reg_mem( + sink, + state, + info, + prefix, + opcode, + len, + dst.to_reg(), + addr, + rex, + ); } } sink.put1(*imm); @@ -2045,6 +2112,7 @@ pub(crate) fn emit( emit_std_reg_mem( sink, state, + info, prefix, opcode, 2, @@ -2109,7 +2177,17 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); - emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex); + emit_std_reg_mem( + sink, + state, + info, + prefix, + opcode, + 2, + reg_g.to_reg(), + addr, + rex, + ); } } } @@ -2129,7 +2207,7 @@ pub(crate) fn emit( } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); - emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex); + emit_std_reg_mem(sink, state, info, prefix, opcode, len, *dst, addr, rex); } } } @@ -2643,7 +2721,7 @@ pub(crate) fn emit( _ => unreachable!(), }; let amode = dst.finalize(state, sink); - emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex); + emit_std_reg_mem(sink, state, info, prefix, opcodes, 2, *src, &amode, rex); } Inst::AtomicRmwSeq { ty, op } => { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 4b09681548..16e7490a09 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4334,8 +4334,8 @@ impl LowerBackend for X64Backend { let ty = ctx.input_ty(ifcmp_sp, 0); ctx.emit(Inst::cmp_rmi_r( ty.bytes() as u8, - RegMemImm::reg(operand), - regs::rsp(), + RegMemImm::reg(regs::rsp()), + operand, )); let cond_code = ctx.data(branches[0]).cond_code().unwrap(); let cc = CC::from_intcc(cond_code); diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index b9774faba9..4b21c2d946 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -314,6 +314,9 @@ pub trait ABIMachineSpec { /// Generate the usual frame-restore sequence for this architecture. fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>; + /// Generate a probestack call. + fn gen_probestack(_frame_size: u32) -> SmallVec<[Self::I; 2]>; + /// Generate a clobber-save sequence. This takes the list of *all* registers /// written/modified by the function body. The implementation here is /// responsible for determining which of these are callee-saved according to @@ -481,6 +484,9 @@ pub struct ABICalleeImpl { /// manually register-allocated and carefully only use caller-saved /// registers and keep nothing live after this sequence of instructions. stack_limit: Option<(Reg, Vec)>, + /// Are we to invoke the probestack function in the prologue? If so, + /// what is the minimum size at which we must invoke it? + probestack_min_frame: Option, _mach: PhantomData, } @@ -536,6 +542,18 @@ impl ABICalleeImpl { .map(|reg| (reg, Vec::new())) .or_else(|| f.stack_limit.map(|gv| gen_stack_limit::(f, &sig, gv))); + // Determine whether a probestack call is required for large enough + // frames (and the minimum frame size if so). + let probestack_min_frame = if flags.enable_probestack() { + assert!( + !flags.probestack_func_adjusts_sp(), + "SP-adjusting probestack not supported in new backends" + ); + Some(1 << flags.probestack_size_log2()) + } else { + None + }; + Ok(Self { sig, stackslots, @@ -550,6 +568,7 @@ impl ABICalleeImpl { flags, is_leaf: f.is_leaf(), stack_limit, + probestack_min_frame, _mach: PhantomData, }) } @@ -978,6 +997,11 @@ impl ABICallee for ABICalleeImpl { insts.extend_from_slice(stack_limit_load); self.insert_stack_check(*reg, total_stacksize, &mut insts); } + if let Some(min_frame) = &self.probestack_min_frame { + if total_stacksize >= *min_frame { + insts.extend(M::gen_probestack(total_stacksize)); + } + } } if total_stacksize > 0 { self.fixed_frame_storage_size += total_stacksize; diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif new file mode 100644 index 0000000000..135587d355 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -0,0 +1,17 @@ +test compile +set enable_probestack=true +target x86_64 +feature "experimental_x64" + +function %f1() -> i64 { +ss0 = explicit_slot 100000 + +block0: + v1 = stack_addr.i64 ss0 + return v1 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movl $$100000, %eax +; nextln: call LibCall(Probestack)