Fix locals not being restored properly (which may cause us to read garbage values from the stack)
This commit is contained in:
180
src/backend.rs
180
src/backend.rs
@@ -1,5 +1,9 @@
|
||||
#![allow(dead_code)] // for now
|
||||
|
||||
// Since we want this to be linear-time, we never want to iterate over a `Vec`. `ArrayVec`s have a hard,
|
||||
// small maximum size and so we can consider iterating over them to be essentially constant-time.
|
||||
use arrayvec::ArrayVec;
|
||||
|
||||
use dynasmrt::x64::Assembler;
|
||||
use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
|
||||
use error::Error;
|
||||
@@ -166,7 +170,7 @@ impl CodeGenSession {
|
||||
asm: &mut self.assembler,
|
||||
func_starts: &self.func_starts,
|
||||
block_state: Default::default(),
|
||||
locals: Default::default(),
|
||||
original_locals: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,7 +226,7 @@ impl Value {
|
||||
|
||||
fn location(&self, locals: &Locals) -> ValueLocation {
|
||||
match *self {
|
||||
Value::Local(loc) => local_location(locals, loc),
|
||||
Value::Local(loc) => locals.get(loc),
|
||||
Value::Temp(reg) => ValueLocation::Reg(reg),
|
||||
Value::Immediate(reg) => ValueLocation::Immediate(reg),
|
||||
}
|
||||
@@ -240,7 +244,7 @@ enum StackValue {
|
||||
impl StackValue {
|
||||
fn location(&self, locals: &Locals) -> Option<ValueLocation> {
|
||||
match *self {
|
||||
StackValue::Local(loc) => Some(local_location(locals, loc)),
|
||||
StackValue::Local(loc) => Some(locals.get(loc)),
|
||||
StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)),
|
||||
StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)),
|
||||
StackValue::Pop => None,
|
||||
@@ -248,10 +252,30 @@ impl StackValue {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Default, Clone)]
|
||||
struct Locals {
|
||||
// TODO: Use `ArrayVec` since we have a hard maximum (the number of registers)
|
||||
locs: Vec<ValueLocation>,
|
||||
register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>,
|
||||
num_stack_args: u32,
|
||||
num_local_stack_slots: u32,
|
||||
}
|
||||
|
||||
impl Locals {
|
||||
fn get(&self, index: u32) -> ValueLocation {
|
||||
self.register_arguments
|
||||
.get(index as usize)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| {
|
||||
let stack_index = index - self.register_arguments.len() as u32;
|
||||
if stack_index < self.num_stack_args {
|
||||
ValueLocation::Stack(
|
||||
((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _,
|
||||
)
|
||||
} else {
|
||||
let stack_index = stack_index - self.num_stack_args;
|
||||
ValueLocation::Stack((stack_index * WORD_SIZE) as _)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
@@ -259,22 +283,16 @@ pub struct BlockState {
|
||||
stack: Stack,
|
||||
pub depth: StackDepth,
|
||||
regs: Registers,
|
||||
/// This is the _current_ locals, since we can shuffle them about during function calls.
|
||||
/// We will restore this to be the same state as the `Locals` in `Context` at the end
|
||||
/// of a block.
|
||||
locals: Locals,
|
||||
}
|
||||
|
||||
fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 {
|
||||
(ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset
|
||||
}
|
||||
|
||||
fn local_location(locals: &Locals, index: u32) -> ValueLocation {
|
||||
locals
|
||||
.locs
|
||||
.get(index as usize)
|
||||
.cloned()
|
||||
.unwrap_or(ValueLocation::Stack(
|
||||
(index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _,
|
||||
))
|
||||
}
|
||||
|
||||
type Stack = Vec<StackValue>;
|
||||
|
||||
pub struct Context<'a> {
|
||||
@@ -282,7 +300,7 @@ pub struct Context<'a> {
|
||||
func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>,
|
||||
/// Each push and pop on the value stack increments or decrements this value by 1 respectively.
|
||||
block_state: BlockState,
|
||||
locals: Locals,
|
||||
original_locals: Locals,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {}
|
||||
@@ -323,42 +341,36 @@ pub fn current_block_state(ctx: &Context) -> BlockState {
|
||||
}
|
||||
|
||||
pub fn return_from_block(ctx: &mut Context) {
|
||||
if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) {
|
||||
match loc {
|
||||
ValueLocation::Reg(r) => {
|
||||
dynasm!(ctx.asm
|
||||
; push Rq(r)
|
||||
);
|
||||
}
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
; push QWORD [rsp + offset]
|
||||
);
|
||||
}
|
||||
ValueLocation::Immediate(imm) => {
|
||||
dynasm!(ctx.asm
|
||||
; push imm
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
// If `location` is `None` then we don't need to do anything.
|
||||
free_return_register(ctx, 1);
|
||||
pop_i32_into(ctx, ValueLocation::Reg(RAX))
|
||||
}
|
||||
|
||||
pub fn push_block_return_value(ctx: &mut Context) {
|
||||
ctx.block_state.depth.reserve(1);
|
||||
ctx.block_state.stack.push(StackValue::Pop);
|
||||
ctx.block_state.stack.push(StackValue::Temp(RAX));
|
||||
}
|
||||
|
||||
pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) {
|
||||
ctx.block_state = block_state;
|
||||
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
|
||||
restore_locals(ctx);
|
||||
ctx.block_state = parent_block_state;
|
||||
}
|
||||
|
||||
pub fn push_return_value(ctx: &mut Context) {
|
||||
ctx.block_state.stack.push(StackValue::Temp(RAX));
|
||||
}
|
||||
|
||||
fn restore_locals(ctx: &mut Context) {
|
||||
for (src, dst) in ctx
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.clone()
|
||||
.iter()
|
||||
.zip(&ctx.original_locals.register_arguments.clone())
|
||||
{
|
||||
copy_value(ctx, *src, *dst);
|
||||
}
|
||||
}
|
||||
|
||||
fn push_i32(ctx: &mut Context, value: Value) {
|
||||
let stack_loc = match value {
|
||||
Value::Local(loc) => StackValue::Local(loc),
|
||||
@@ -421,7 +433,8 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) {
|
||||
}
|
||||
};
|
||||
|
||||
let src = to_move.location(&ctx.locals);
|
||||
let src = to_move.location(&ctx.block_state.locals);
|
||||
println!("{:?}, {:?}", src, dst);
|
||||
copy_value(ctx, src, dst);
|
||||
free_val(ctx, to_move);
|
||||
}
|
||||
@@ -435,7 +448,7 @@ fn free_val(ctx: &mut Context, val: Value) {
|
||||
|
||||
/// Puts this value into a register so that it can be efficiently read
|
||||
fn into_reg(ctx: &mut Context, val: Value) -> GPR {
|
||||
match val.location(&ctx.locals) {
|
||||
match val.location(&ctx.block_state.locals) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
let scratch = ctx.block_state.regs.take_scratch_gpr();
|
||||
@@ -462,7 +475,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
|
||||
Value::Local(loc) => {
|
||||
let scratch = ctx.block_state.regs.take_scratch_gpr();
|
||||
|
||||
match local_location(&ctx.locals, loc) {
|
||||
match ctx.block_state.locals.get(loc) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
@@ -512,7 +525,7 @@ macro_rules! commutative_binop {
|
||||
_ => (into_temp_reg(ctx, op0), op1),
|
||||
};
|
||||
|
||||
match op0.location(&ctx.locals) {
|
||||
match op0.location(&ctx.block_state.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; $instr Rd(op1), Rd(reg)
|
||||
@@ -538,12 +551,14 @@ macro_rules! commutative_binop {
|
||||
}
|
||||
}
|
||||
|
||||
commutative_binop!(i32_add, add, |a, b| a + b);
|
||||
commutative_binop!(i32_add, add, i32::wrapping_add);
|
||||
commutative_binop!(i32_and, and, |a, b| a & b);
|
||||
commutative_binop!(i32_or, or, |a, b| a | b);
|
||||
commutative_binop!(i32_xor, xor, |a, b| a ^ b);
|
||||
commutative_binop!(i32_mul, imul, |a, b| a * b);
|
||||
commutative_binop!(i32_mul, imul, i32::wrapping_mul);
|
||||
|
||||
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
|
||||
// temp register as the output)
|
||||
pub fn i32_sub(ctx: &mut Context) {
|
||||
let op0 = pop_i32(ctx);
|
||||
let op1 = pop_i32(ctx);
|
||||
@@ -556,7 +571,7 @@ pub fn i32_sub(ctx: &mut Context) {
|
||||
}
|
||||
|
||||
let op1 = into_temp_reg(ctx, op1);
|
||||
match op0.location(&ctx.locals) {
|
||||
match op0.location(&ctx.block_state.locals) {
|
||||
ValueLocation::Reg(reg) => {
|
||||
dynasm!(ctx.asm
|
||||
; sub Rd(op1), Rd(reg)
|
||||
@@ -588,8 +603,18 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
|
||||
// back into registers here.
|
||||
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
|
||||
let val = pop_i32(ctx);
|
||||
let val_loc = val.location(&ctx.locals);
|
||||
let dst_loc = local_location(&ctx.locals, local_idx);
|
||||
let val_loc = val.location(&ctx.block_state.locals);
|
||||
let dst_loc = ctx.original_locals.get(local_idx);
|
||||
|
||||
if let Some(cur) = ctx
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.get_mut(local_idx as usize)
|
||||
{
|
||||
*cur = dst_loc;
|
||||
}
|
||||
|
||||
copy_value(ctx, val_loc, dst_loc);
|
||||
free_val(ctx, val);
|
||||
}
|
||||
@@ -604,7 +629,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
|
||||
let result = ctx.block_state.regs.take_scratch_gpr();
|
||||
|
||||
if let Some(i) = left.immediate() {
|
||||
match right.location(&ctx.locals) {
|
||||
match right.location(&ctx.block_state.locals) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
@@ -629,7 +654,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
|
||||
}
|
||||
} else {
|
||||
let lreg = into_reg(ctx, left);
|
||||
match right.location(&ctx.locals) {
|
||||
match right.location(&ctx.block_state.locals) {
|
||||
ValueLocation::Stack(offset) => {
|
||||
let offset = adjusted_offset(ctx, offset);
|
||||
dynasm!(ctx.asm
|
||||
@@ -733,7 +758,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
|
||||
|
||||
#[must_use]
|
||||
pub struct CallCleanup {
|
||||
restore_registers: Vec<GPR>,
|
||||
restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>,
|
||||
stack_depth: i32,
|
||||
}
|
||||
|
||||
@@ -748,15 +773,16 @@ fn free_arg_registers(ctx: &mut Context, count: u32) {
|
||||
return;
|
||||
}
|
||||
|
||||
for i in 0..ctx.locals.locs.len() {
|
||||
match ctx.locals.locs[i] {
|
||||
// This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime
|
||||
for i in 0..ctx.block_state.locals.register_arguments.len() {
|
||||
match ctx.block_state.locals.register_arguments[i] {
|
||||
ValueLocation::Reg(reg) => {
|
||||
if ARGS_IN_GPRS.contains(®) {
|
||||
let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _);
|
||||
dynasm!(ctx.asm
|
||||
; mov [rsp + offset], Rq(reg)
|
||||
);
|
||||
ctx.locals.locs[i] = ValueLocation::Stack(offset);
|
||||
ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
@@ -770,7 +796,7 @@ fn free_return_register(ctx: &mut Context, count: u32) {
|
||||
}
|
||||
|
||||
for stack_val in &mut ctx.block_state.stack {
|
||||
match stack_val.location(&ctx.locals) {
|
||||
match stack_val.location(&ctx.block_state.locals) {
|
||||
// For now it's impossible for a local to be in RAX but that might be
|
||||
// possible in the future, so we check both cases.
|
||||
Some(ValueLocation::Reg(RAX)) => {
|
||||
@@ -787,8 +813,8 @@ fn free_return_register(ctx: &mut Context, count: u32) {
|
||||
|
||||
// TODO: Use `ArrayVec`?
|
||||
/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
|
||||
fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
|
||||
let mut out = vec![];
|
||||
fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> {
|
||||
let mut out = ArrayVec::new();
|
||||
|
||||
// TODO: If there are no `StackValue::Pop`s that need to be popped
|
||||
// before we reach our `Temp` value, we can set the `StackValue`
|
||||
@@ -811,11 +837,6 @@ fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
|
||||
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
|
||||
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
|
||||
|
||||
let out = CallCleanup {
|
||||
stack_depth: num_stack_args,
|
||||
restore_registers: save_volatile(ctx),
|
||||
};
|
||||
|
||||
// We pop stack arguments first - arguments are RTL
|
||||
if num_stack_args > 0 {
|
||||
let size = num_stack_args * WORD_SIZE as i32;
|
||||
@@ -847,7 +868,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
|
||||
pop_i32_into(ctx, ValueLocation::Reg(*reg));
|
||||
}
|
||||
|
||||
out
|
||||
CallCleanup {
|
||||
stack_depth: num_stack_args,
|
||||
restore_registers: save_volatile(ctx),
|
||||
}
|
||||
}
|
||||
|
||||
/// Frees up the stack space used for stack-passed arguments and restores the value
|
||||
@@ -901,29 +925,23 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) {
|
||||
// Align stack slots to the nearest even number. This is required
|
||||
// by x86-64 ABI.
|
||||
let aligned_stack_slots = (locals + 1) & !1;
|
||||
let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
||||
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
||||
|
||||
ctx.locals.locs = reg_args
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(ValueLocation::Reg)
|
||||
.chain(
|
||||
(0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _))
|
||||
// We add 2 here because 1 stack slot is used for the stack pointer and another is
|
||||
// used for the return address. It's a magic number but there's not really a way
|
||||
// around this.
|
||||
.map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)),
|
||||
)
|
||||
.collect();
|
||||
ctx.original_locals.register_arguments =
|
||||
reg_args.iter().cloned().map(ValueLocation::Reg).collect();
|
||||
ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
|
||||
ctx.original_locals.num_local_stack_slots = locals;
|
||||
ctx.block_state.locals = ctx.original_locals.clone();
|
||||
|
||||
dynasm!(ctx.asm
|
||||
; push rbp
|
||||
; mov rbp, rsp
|
||||
);
|
||||
|
||||
if framesize > 0 {
|
||||
// ctx.block_state.depth.reserve(aligned_stack_slots - locals);
|
||||
if frame_size > 0 {
|
||||
dynasm!(ctx.asm
|
||||
; sub rsp, framesize
|
||||
; sub rsp, frame_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user