cranelift: Add inline stack probing for x64 (#4747)
* cranelift: Add inline stack probe for x64 * cranelift: Cleanups comments Thanks @jameysharp!
This commit is contained in:
@@ -284,6 +284,18 @@ pub(crate) fn define() -> SettingGroup {
|
|||||||
12,
|
12,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
settings.add_enum(
|
||||||
|
"probestack_strategy",
|
||||||
|
"Controls what kinds of stack probes are emitted.",
|
||||||
|
r#"
|
||||||
|
Supported strategies:
|
||||||
|
|
||||||
|
- `outline`: Always emits stack probes as calls to a probe stack function.
|
||||||
|
- `inline`: Always emits inline stack probes.
|
||||||
|
"#,
|
||||||
|
vec!["outline", "inline"],
|
||||||
|
);
|
||||||
|
|
||||||
// Jump table options.
|
// Jump table options.
|
||||||
|
|
||||||
settings.add_bool(
|
settings.add_bool(
|
||||||
|
|||||||
@@ -610,6 +610,10 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
smallvec![]
|
smallvec![]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn gen_inline_probestack(_frame_size: u32, _guard_size: u32) -> SmallInstVec<Self::I> {
|
||||||
|
unimplemented!("Inline stack probing is unimplemented on AArch64");
|
||||||
|
}
|
||||||
|
|
||||||
// Returns stack bytes used as well as instructions. Does not adjust
|
// Returns stack bytes used as well as instructions. Does not adjust
|
||||||
// nominal SP offset; abi generic code will do that.
|
// nominal SP offset; abi generic code will do that.
|
||||||
fn gen_clobber_save(
|
fn gen_clobber_save(
|
||||||
|
|||||||
@@ -574,6 +574,10 @@ impl ABIMachineSpec for S390xMachineDeps {
|
|||||||
smallvec![]
|
smallvec![]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn gen_inline_probestack(_frame_size: u32, _guard_size: u32) -> SmallInstVec<Self::I> {
|
||||||
|
unimplemented!("Inline stack probing is unimplemented on S390x");
|
||||||
|
}
|
||||||
|
|
||||||
// Returns stack bytes used as well as instructions. Does not adjust
|
// Returns stack bytes used as well as instructions. Does not adjust
|
||||||
// nominal SP offset; abi generic code will do that.
|
// nominal SP offset; abi generic code will do that.
|
||||||
fn gen_clobber_save(
|
fn gen_clobber_save(
|
||||||
|
|||||||
@@ -29,6 +29,42 @@ pub(crate) type X64Caller = Caller<X64ABIMachineSpec>;
|
|||||||
/// Implementation of ABI primitives for x64.
|
/// Implementation of ABI primitives for x64.
|
||||||
pub struct X64ABIMachineSpec;
|
pub struct X64ABIMachineSpec;
|
||||||
|
|
||||||
|
impl X64ABIMachineSpec {
|
||||||
|
fn gen_probestack_unroll(guard_size: u32, probe_count: u32) -> SmallInstVec<Inst> {
|
||||||
|
let mut insts = SmallVec::with_capacity(probe_count as usize);
|
||||||
|
for i in 0..probe_count {
|
||||||
|
let offset = (guard_size * (i + 1)) as i64;
|
||||||
|
|
||||||
|
// TODO: It would be nice if we could store the imm 0, but we don't have insts for those
|
||||||
|
// so store the stack pointer. Any register will do, since the stack is undefined at this point
|
||||||
|
insts.push(Self::gen_store_stack(
|
||||||
|
StackAMode::SPOffset(-offset, I8),
|
||||||
|
regs::rsp(),
|
||||||
|
I32,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
insts
|
||||||
|
}
|
||||||
|
fn gen_probestack_loop(frame_size: u32, guard_size: u32) -> SmallInstVec<Inst> {
|
||||||
|
// We have to use a caller saved register since clobbering only happens
|
||||||
|
// after stack probing.
|
||||||
|
//
|
||||||
|
// R11 is caller saved on both Fastcall and SystemV, and not used for argument
|
||||||
|
// passing, so it's pretty much free. It is also not used by the stacklimit mechanism.
|
||||||
|
let tmp = regs::r11();
|
||||||
|
debug_assert!({
|
||||||
|
let real_reg = tmp.to_real_reg().unwrap();
|
||||||
|
!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
|
||||||
|
});
|
||||||
|
|
||||||
|
smallvec![Inst::StackProbeLoop {
|
||||||
|
tmp: Writable::from_reg(tmp),
|
||||||
|
frame_size,
|
||||||
|
guard_size,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl IsaFlags for x64_settings::Flags {}
|
impl IsaFlags for x64_settings::Flags {}
|
||||||
|
|
||||||
impl ABIMachineSpec for X64ABIMachineSpec {
|
impl ABIMachineSpec for X64ABIMachineSpec {
|
||||||
@@ -398,6 +434,23 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
insts
|
insts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn gen_inline_probestack(frame_size: u32, guard_size: u32) -> SmallInstVec<Self::I> {
|
||||||
|
// Unroll at most n consecutive probes, before falling back to using a loop
|
||||||
|
//
|
||||||
|
// This was number was picked because the loop version is 38 bytes long. We can fit
|
||||||
|
// 5 inline probes in that space, so unroll if its beneficial in terms of code size.
|
||||||
|
const PROBE_MAX_UNROLL: u32 = 5;
|
||||||
|
|
||||||
|
// Number of probes that we need to perform
|
||||||
|
let probe_count = align_to(frame_size, guard_size) / guard_size;
|
||||||
|
|
||||||
|
if probe_count <= PROBE_MAX_UNROLL {
|
||||||
|
Self::gen_probestack_unroll(guard_size, probe_count)
|
||||||
|
} else {
|
||||||
|
Self::gen_probestack_loop(frame_size, guard_size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn gen_clobber_save(
|
fn gen_clobber_save(
|
||||||
_call_conv: isa::CallConv,
|
_call_conv: isa::CallConv,
|
||||||
setup_frame: bool,
|
setup_frame: bool,
|
||||||
|
|||||||
@@ -177,6 +177,11 @@
|
|||||||
;; popq reg
|
;; popq reg
|
||||||
(Pop64 (dst WritableGpr))
|
(Pop64 (dst WritableGpr))
|
||||||
|
|
||||||
|
;; Emits a inline stack probe loop.
|
||||||
|
(StackProbeLoop (tmp WritableReg)
|
||||||
|
(frame_size u32)
|
||||||
|
(guard_size u32))
|
||||||
|
|
||||||
;; =========================================
|
;; =========================================
|
||||||
;; Floating-point operations.
|
;; Floating-point operations.
|
||||||
|
|
||||||
|
|||||||
@@ -1234,6 +1234,109 @@ pub(crate) fn emit(
|
|||||||
sink.put1(0x58 + (enc_dst & 7));
|
sink.put1(0x58 + (enc_dst & 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::StackProbeLoop {
|
||||||
|
tmp,
|
||||||
|
frame_size,
|
||||||
|
guard_size,
|
||||||
|
} => {
|
||||||
|
assert!(info.flags.enable_probestack());
|
||||||
|
assert!(guard_size.is_power_of_two());
|
||||||
|
|
||||||
|
let tmp = allocs.next_writable(*tmp);
|
||||||
|
|
||||||
|
// Number of probes that we need to perform
|
||||||
|
let probe_count = align_to(*frame_size, *guard_size) / guard_size;
|
||||||
|
|
||||||
|
// The inline stack probe loop has 3 phases:
|
||||||
|
//
|
||||||
|
// We generate the "guard area" register which is essentially the frame_size aligned to
|
||||||
|
// guard_size. We copy the stack pointer and subtract the guard area from it. This
|
||||||
|
// gets us a register that we can use to compare when looping.
|
||||||
|
//
|
||||||
|
// After that we emit the loop. Essentially we just adjust the stack pointer one guard_size'd
|
||||||
|
// distance at a time and then touch the stack by writing anything to it. We use the previously
|
||||||
|
// created "guard area" register to know when to stop looping.
|
||||||
|
//
|
||||||
|
// When we have touched all the pages that we need, we have to restore the stack pointer
|
||||||
|
// to where it was before.
|
||||||
|
//
|
||||||
|
// Generate the following code:
|
||||||
|
// mov tmp_reg, rsp
|
||||||
|
// sub tmp_reg, guard_size * probe_count
|
||||||
|
// .loop_start:
|
||||||
|
// sub rsp, guard_size
|
||||||
|
// mov [rsp], rsp
|
||||||
|
// cmp rsp, tmp_reg
|
||||||
|
// jne .loop_start
|
||||||
|
// add rsp, guard_size * probe_count
|
||||||
|
|
||||||
|
// Create the guard bound register
|
||||||
|
// mov tmp_reg, rsp
|
||||||
|
let inst = Inst::gen_move(tmp, regs::rsp(), types::I64);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
// sub tmp_reg, GUARD_SIZE * probe_count
|
||||||
|
let inst = Inst::alu_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
AluRmiROpcode::Sub,
|
||||||
|
RegMemImm::imm(guard_size * probe_count),
|
||||||
|
tmp,
|
||||||
|
);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
// Emit the main loop!
|
||||||
|
let loop_start = sink.get_label();
|
||||||
|
sink.bind_label(loop_start);
|
||||||
|
|
||||||
|
// sub rsp, GUARD_SIZE
|
||||||
|
let inst = Inst::alu_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
AluRmiROpcode::Sub,
|
||||||
|
RegMemImm::imm(*guard_size),
|
||||||
|
Writable::from_reg(regs::rsp()),
|
||||||
|
);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
// TODO: `mov [rsp], 0` would be better, but we don't have that instruction
|
||||||
|
// Probe the stack! We don't use Inst::gen_store_stack here because we need a predictable
|
||||||
|
// instruction size.
|
||||||
|
// mov [rsp], rsp
|
||||||
|
let inst = Inst::mov_r_m(
|
||||||
|
OperandSize::Size32, // Use Size32 since it saves us one byte
|
||||||
|
regs::rsp(),
|
||||||
|
SyntheticAmode::Real(Amode::imm_reg(0, regs::rsp())),
|
||||||
|
);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
// Compare and jump if we are not done yet
|
||||||
|
// cmp rsp, tmp_reg
|
||||||
|
let inst = Inst::cmp_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
RegMemImm::reg(regs::rsp()),
|
||||||
|
tmp.to_reg(),
|
||||||
|
);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
// jne .loop_start
|
||||||
|
// TODO: Encoding the JmpIf as a short jump saves us 4 bytes here.
|
||||||
|
one_way_jmp(sink, CC::NZ, loop_start);
|
||||||
|
|
||||||
|
// The regular prologue code is going to emit a `sub` after this, so we need to
|
||||||
|
// reset the stack pointer
|
||||||
|
//
|
||||||
|
// TODO: It would be better if we could avoid the `add` + `sub` that is generated here
|
||||||
|
// and in the stack adj portion of the prologue
|
||||||
|
//
|
||||||
|
// add rsp, GUARD_SIZE * probe_count
|
||||||
|
let inst = Inst::alu_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
AluRmiROpcode::Add,
|
||||||
|
RegMemImm::imm(guard_size * probe_count),
|
||||||
|
Writable::from_reg(regs::rsp()),
|
||||||
|
);
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::CallKnown {
|
Inst::CallKnown {
|
||||||
dest,
|
dest,
|
||||||
info: call_info,
|
info: call_info,
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ impl Inst {
|
|||||||
| Inst::Nop { .. }
|
| Inst::Nop { .. }
|
||||||
| Inst::Pop64 { .. }
|
| Inst::Pop64 { .. }
|
||||||
| Inst::Push64 { .. }
|
| Inst::Push64 { .. }
|
||||||
|
| Inst::StackProbeLoop { .. }
|
||||||
| Inst::Ret { .. }
|
| Inst::Ret { .. }
|
||||||
| Inst::Setcc { .. }
|
| Inst::Setcc { .. }
|
||||||
| Inst::ShiftR { .. }
|
| Inst::ShiftR { .. }
|
||||||
@@ -1427,6 +1428,21 @@ impl PrettyPrint for Inst {
|
|||||||
format!("{} {}", ljustify("pushq".to_string()), src)
|
format!("{} {}", ljustify("pushq".to_string()), src)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::StackProbeLoop {
|
||||||
|
tmp,
|
||||||
|
frame_size,
|
||||||
|
guard_size,
|
||||||
|
} => {
|
||||||
|
let tmp = pretty_print_reg(tmp.to_reg(), 8, allocs);
|
||||||
|
format!(
|
||||||
|
"{} {}, frame_size={}, guard_size={}",
|
||||||
|
ljustify("stack_probe_loop".to_string()),
|
||||||
|
tmp,
|
||||||
|
frame_size,
|
||||||
|
guard_size
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Pop64 { dst } => {
|
Inst::Pop64 { dst } => {
|
||||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
format!("{} {}", ljustify("popq".to_string()), dst)
|
format!("{} {}", ljustify("popq".to_string()), dst)
|
||||||
@@ -1946,6 +1962,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
Inst::Pop64 { dst } => {
|
Inst::Pop64 { dst } => {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
}
|
}
|
||||||
|
Inst::StackProbeLoop { tmp, .. } => {
|
||||||
|
collector.reg_early_def(*tmp);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::CallKnown { ref info, .. } => {
|
Inst::CallKnown { ref info, .. } => {
|
||||||
for &u in &info.uses {
|
for &u in &info.uses {
|
||||||
|
|||||||
@@ -108,6 +108,7 @@ use crate::ir::types::*;
|
|||||||
use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot};
|
use crate::ir::{ArgumentExtension, ArgumentPurpose, DynamicStackSlot, Signature, StackSlot};
|
||||||
use crate::isa::TargetIsa;
|
use crate::isa::TargetIsa;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
|
use crate::settings::ProbestackStrategy;
|
||||||
use crate::CodegenResult;
|
use crate::CodegenResult;
|
||||||
use crate::{ir, isa};
|
use crate::{ir, isa};
|
||||||
use crate::{machinst::*, trace};
|
use crate::{machinst::*, trace};
|
||||||
@@ -430,6 +431,9 @@ pub trait ABIMachineSpec {
|
|||||||
/// Generate a probestack call.
|
/// Generate a probestack call.
|
||||||
fn gen_probestack(_frame_size: u32) -> SmallInstVec<Self::I>;
|
fn gen_probestack(_frame_size: u32) -> SmallInstVec<Self::I>;
|
||||||
|
|
||||||
|
/// Generate a inline stack probe.
|
||||||
|
fn gen_inline_probestack(_frame_size: u32, _guard_size: u32) -> SmallInstVec<Self::I>;
|
||||||
|
|
||||||
/// Get all clobbered registers that are callee-saved according to the ABI; the result
|
/// Get all clobbered registers that are callee-saved according to the ABI; the result
|
||||||
/// contains the registers in a sorted order.
|
/// contains the registers in a sorted order.
|
||||||
fn get_clobbered_callee_saves(
|
fn get_clobbered_callee_saves(
|
||||||
@@ -1660,10 +1664,20 @@ impl<M: ABIMachineSpec> Callee<M> {
|
|||||||
insts.extend(stack_limit_load.clone());
|
insts.extend(stack_limit_load.clone());
|
||||||
self.insert_stack_check(*reg, total_stacksize, &mut insts);
|
self.insert_stack_check(*reg, total_stacksize, &mut insts);
|
||||||
}
|
}
|
||||||
if let Some(min_frame) = &self.probestack_min_frame {
|
|
||||||
if total_stacksize >= *min_frame {
|
let needs_probestack = self
|
||||||
insts.extend(M::gen_probestack(total_stacksize));
|
.probestack_min_frame
|
||||||
}
|
.map_or(false, |min_frame| total_stacksize >= min_frame);
|
||||||
|
|
||||||
|
if needs_probestack {
|
||||||
|
insts.extend(
|
||||||
|
if self.flags.probestack_strategy() == ProbestackStrategy::Inline {
|
||||||
|
let guard_size = 1 << self.flags.probestack_size_log2();
|
||||||
|
M::gen_inline_probestack(total_stacksize, guard_size)
|
||||||
|
} else {
|
||||||
|
M::gen_probestack(total_stacksize)
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -525,6 +525,7 @@ opt_level = "none"
|
|||||||
tls_model = "none"
|
tls_model = "none"
|
||||||
libcall_call_conv = "isa_default"
|
libcall_call_conv = "isa_default"
|
||||||
probestack_size_log2 = 12
|
probestack_size_log2 = 12
|
||||||
|
probestack_strategy = "outline"
|
||||||
regalloc_checker = false
|
regalloc_checker = false
|
||||||
regalloc_verbose_logs = false
|
regalloc_verbose_logs = false
|
||||||
enable_alias_analysis = true
|
enable_alias_analysis = true
|
||||||
|
|||||||
@@ -0,0 +1,67 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_probestack=true
|
||||||
|
; Test with the larger size of 64k
|
||||||
|
set probestack_size_log2=16
|
||||||
|
set probestack_strategy=inline
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; If the stack size is just one page, we can avoid the stack probe entirely
|
||||||
|
function %single_page() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 8192
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; subq %rsp, $8192, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $8192, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %unrolled() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 196608
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; movl %esp, -65536(%rsp)
|
||||||
|
; movl %esp, -131072(%rsp)
|
||||||
|
; movl %esp, -196608(%rsp)
|
||||||
|
; subq %rsp, $196608, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $196608, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %large() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 2097152
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; stack_probe_loop %r11, frame_size=2097152, guard_size=65536
|
||||||
|
; subq %rsp, $2097152, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $2097152, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
66
cranelift/filetests/filetests/isa/x64/inline-probestack.clif
Normal file
66
cranelift/filetests/filetests/isa/x64/inline-probestack.clif
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set enable_probestack=true
|
||||||
|
set probestack_strategy=inline
|
||||||
|
; This is the default and is equivalent to a page size of 4096
|
||||||
|
set probestack_size_log2=12
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
|
||||||
|
; If the stack size is just one page, we can avoid the stack probe entirely
|
||||||
|
function %single_page() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 2048
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; subq %rsp, $2048, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $2048, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %unrolled() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 12288
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; movl %esp, -4096(%rsp)
|
||||||
|
; movl %esp, -8192(%rsp)
|
||||||
|
; movl %esp, -12288(%rsp)
|
||||||
|
; subq %rsp, $12288, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $12288, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %large() -> i64 system_v {
|
||||||
|
ss0 = explicit_slot 100000
|
||||||
|
|
||||||
|
block0:
|
||||||
|
v1 = stack_addr.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; stack_probe_loop %r11, frame_size=100000, guard_size=4096
|
||||||
|
; subq %rsp, $100000, %rsp
|
||||||
|
; block0:
|
||||||
|
; lea rsp(0 + virtual offset), %rax
|
||||||
|
; addq %rsp, $100000, %rsp
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
test run
|
test run
|
||||||
set enable_llvm_abi_extensions=true
|
set enable_llvm_abi_extensions=true
|
||||||
|
; Disable stack probes since these tests don't require them
|
||||||
|
set enable_probestack=false
|
||||||
target x86_64
|
target x86_64
|
||||||
target aarch64
|
target aarch64
|
||||||
target s390x
|
target s390x
|
||||||
|
|||||||
@@ -0,0 +1,37 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
set enable_probestack=true
|
||||||
|
set probestack_strategy=inline
|
||||||
|
|
||||||
|
; This is the default and is equivalent to a page size of 4096
|
||||||
|
set probestack_size_log2=12
|
||||||
|
target x86_64
|
||||||
|
; Test also with 64k pages
|
||||||
|
set probestack_size_log2=16
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
; Create a huge stack slot (1MB), way larger than PAGE_SIZE and touch the end of it.
|
||||||
|
; This guarantees that we bypass the guard page, cause a page fault the OS isn't expecting
|
||||||
|
; which turns into a segfault if we haven't correctly implemented stack probing.
|
||||||
|
|
||||||
|
function %probe_loop(i64) -> i64 {
|
||||||
|
ss0 = explicit_slot 1048576
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
stack_store.i64 v0, ss0
|
||||||
|
v1 = stack_load.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %probe_loop(1) == 1
|
||||||
|
|
||||||
|
|
||||||
|
; Tests the unrolled version of the stackprobe
|
||||||
|
function %probe_unroll(i64) -> i64 {
|
||||||
|
ss0 = explicit_slot 9000
|
||||||
|
|
||||||
|
block0(v0: i64):
|
||||||
|
stack_store.i64 v0, ss0
|
||||||
|
v1 = stack_load.i64 ss0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %probe_unroll(1) == 1
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
test interpret
|
test interpret
|
||||||
test run
|
test run
|
||||||
|
; Disable stack probes since these tests don't require them
|
||||||
|
set enable_probestack=false
|
||||||
target x86_64
|
target x86_64
|
||||||
target s390x
|
target s390x
|
||||||
target aarch64
|
target aarch64
|
||||||
|
|||||||
@@ -43,19 +43,6 @@ fn build_host_isa(
|
|||||||
builder.set(value.name, &value.value_string()).unwrap();
|
builder.set(value.name, &value.value_string()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to force disable stack probing, since we don't support it yet.
|
|
||||||
let flags = {
|
|
||||||
let mut flags_builder = settings::builder();
|
|
||||||
|
|
||||||
// Copy all flags
|
|
||||||
for flag in flags.iter() {
|
|
||||||
flags_builder.set(flag.name, &flag.value_string()).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
flags_builder.set("enable_probestack", "false").unwrap();
|
|
||||||
settings::Flags::new(flags_builder)
|
|
||||||
};
|
|
||||||
|
|
||||||
builder.finish(flags).unwrap()
|
builder.finish(flags).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -354,6 +354,7 @@ impl Engine {
|
|||||||
"enable_llvm_abi_extensions" => *value == FlagValue::Bool(false),
|
"enable_llvm_abi_extensions" => *value == FlagValue::Bool(false),
|
||||||
"enable_pinned_reg" => *value == FlagValue::Bool(false),
|
"enable_pinned_reg" => *value == FlagValue::Bool(false),
|
||||||
"enable_probestack" => *value == FlagValue::Bool(false),
|
"enable_probestack" => *value == FlagValue::Bool(false),
|
||||||
|
"probestack_strategy" => *value == FlagValue::Enum("outline".into()),
|
||||||
"use_colocated_libcalls" => *value == FlagValue::Bool(false),
|
"use_colocated_libcalls" => *value == FlagValue::Bool(false),
|
||||||
"use_pinned_reg_as_heap_base" => *value == FlagValue::Bool(false),
|
"use_pinned_reg_as_heap_base" => *value == FlagValue::Bool(false),
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user