Fix locals not being restored properly (which may cause us to read garbage values from the stack)

This commit is contained in:
Jef
2018-12-17 12:16:40 +01:00
parent bd2ee53c89
commit 23b5a56a7d
5 changed files with 106 additions and 84 deletions

View File

@@ -9,6 +9,7 @@ keywords = ["webassembly", "wasm", "compile", "compiler", "jit"]
publish = false
[dependencies]
arrayvec = "0.4"
dynasm = "0.2.3"
dynasmrt = "0.2.3"
wasmparser = "0.21.6"

View File

@@ -1,5 +1,9 @@
#![allow(dead_code)] // for now
// Since we want this to be linear-time, we never want to iterate over a `Vec`. `ArrayVec`s have a hard,
// small maximum size and so we can consider iterating over them to be essentially constant-time.
use arrayvec::ArrayVec;
use dynasmrt::x64::Assembler;
use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
use error::Error;
@@ -166,7 +170,7 @@ impl CodeGenSession {
asm: &mut self.assembler,
func_starts: &self.func_starts,
block_state: Default::default(),
locals: Default::default(),
original_locals: Default::default(),
}
}
@@ -222,7 +226,7 @@ impl Value {
fn location(&self, locals: &Locals) -> ValueLocation {
match *self {
Value::Local(loc) => local_location(locals, loc),
Value::Local(loc) => locals.get(loc),
Value::Temp(reg) => ValueLocation::Reg(reg),
Value::Immediate(reg) => ValueLocation::Immediate(reg),
}
@@ -240,7 +244,7 @@ enum StackValue {
impl StackValue {
fn location(&self, locals: &Locals) -> Option<ValueLocation> {
match *self {
StackValue::Local(loc) => Some(local_location(locals, loc)),
StackValue::Local(loc) => Some(locals.get(loc)),
StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)),
StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)),
StackValue::Pop => None,
@@ -248,10 +252,30 @@ impl StackValue {
}
}
#[derive(Default)]
#[derive(Default, Clone)]
struct Locals {
// TODO: Use `ArrayVec` since we have a hard maximum (the number of registers)
locs: Vec<ValueLocation>,
register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>,
num_stack_args: u32,
num_local_stack_slots: u32,
}
impl Locals {
fn get(&self, index: u32) -> ValueLocation {
self.register_arguments
.get(index as usize)
.cloned()
.unwrap_or_else(|| {
let stack_index = index - self.register_arguments.len() as u32;
if stack_index < self.num_stack_args {
ValueLocation::Stack(
((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _,
)
} else {
let stack_index = stack_index - self.num_stack_args;
ValueLocation::Stack((stack_index * WORD_SIZE) as _)
}
})
}
}
#[derive(Default, Clone)]
@@ -259,22 +283,16 @@ pub struct BlockState {
stack: Stack,
pub depth: StackDepth,
regs: Registers,
/// This is the _current_ locals, since we can shuffle them about during function calls.
/// We will restore this to be the same state as the `Locals` in `Context` at the end
/// of a block.
locals: Locals,
}
fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 {
(ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset
}
fn local_location(locals: &Locals, index: u32) -> ValueLocation {
locals
.locs
.get(index as usize)
.cloned()
.unwrap_or(ValueLocation::Stack(
(index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _,
))
}
type Stack = Vec<StackValue>;
pub struct Context<'a> {
@@ -282,7 +300,7 @@ pub struct Context<'a> {
func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>,
/// Each push and pop on the value stack increments or decrements this value by 1 respectively.
block_state: BlockState,
locals: Locals,
original_locals: Locals,
}
impl<'a> Context<'a> {}
@@ -323,42 +341,36 @@ pub fn current_block_state(ctx: &Context) -> BlockState {
}
pub fn return_from_block(ctx: &mut Context) {
if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) {
match loc {
ValueLocation::Reg(r) => {
dynasm!(ctx.asm
; push Rq(r)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; push QWORD [rsp + offset]
);
}
ValueLocation::Immediate(imm) => {
dynasm!(ctx.asm
; push imm
);
}
}
}
// If `location` is `None` then we don't need to do anything.
free_return_register(ctx, 1);
pop_i32_into(ctx, ValueLocation::Reg(RAX))
}
pub fn push_block_return_value(ctx: &mut Context) {
ctx.block_state.depth.reserve(1);
ctx.block_state.stack.push(StackValue::Pop);
ctx.block_state.stack.push(StackValue::Temp(RAX));
}
pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) {
ctx.block_state = block_state;
pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
restore_locals(ctx);
ctx.block_state = parent_block_state;
}
pub fn push_return_value(ctx: &mut Context) {
ctx.block_state.stack.push(StackValue::Temp(RAX));
}
fn restore_locals(ctx: &mut Context) {
for (src, dst) in ctx
.block_state
.locals
.register_arguments
.clone()
.iter()
.zip(&ctx.original_locals.register_arguments.clone())
{
copy_value(ctx, *src, *dst);
}
}
fn push_i32(ctx: &mut Context, value: Value) {
let stack_loc = match value {
Value::Local(loc) => StackValue::Local(loc),
@@ -421,7 +433,8 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) {
}
};
let src = to_move.location(&ctx.locals);
let src = to_move.location(&ctx.block_state.locals);
println!("{:?}, {:?}", src, dst);
copy_value(ctx, src, dst);
free_val(ctx, to_move);
}
@@ -435,7 +448,7 @@ fn free_val(ctx: &mut Context, val: Value) {
/// Puts this value into a register so that it can be efficiently read
fn into_reg(ctx: &mut Context, val: Value) -> GPR {
match val.location(&ctx.locals) {
match val.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
let scratch = ctx.block_state.regs.take_scratch_gpr();
@@ -462,7 +475,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
Value::Local(loc) => {
let scratch = ctx.block_state.regs.take_scratch_gpr();
match local_location(&ctx.locals, loc) {
match ctx.block_state.locals.get(loc) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
@@ -512,7 +525,7 @@ macro_rules! commutative_binop {
_ => (into_temp_reg(ctx, op0), op1),
};
match op0.location(&ctx.locals) {
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; $instr Rd(op1), Rd(reg)
@@ -538,12 +551,14 @@ macro_rules! commutative_binop {
}
}
commutative_binop!(i32_add, add, |a, b| a + b);
commutative_binop!(i32_add, add, i32::wrapping_add);
commutative_binop!(i32_and, and, |a, b| a & b);
commutative_binop!(i32_or, or, |a, b| a | b);
commutative_binop!(i32_xor, xor, |a, b| a ^ b);
commutative_binop!(i32_mul, imul, |a, b| a * b);
commutative_binop!(i32_mul, imul, i32::wrapping_mul);
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output)
pub fn i32_sub(ctx: &mut Context) {
let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx);
@@ -556,7 +571,7 @@ pub fn i32_sub(ctx: &mut Context) {
}
let op1 = into_temp_reg(ctx, op1);
match op0.location(&ctx.locals) {
match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => {
dynasm!(ctx.asm
; sub Rd(op1), Rd(reg)
@@ -588,8 +603,18 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
// back into registers here.
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
let val = pop_i32(ctx);
let val_loc = val.location(&ctx.locals);
let dst_loc = local_location(&ctx.locals, local_idx);
let val_loc = val.location(&ctx.block_state.locals);
let dst_loc = ctx.original_locals.get(local_idx);
if let Some(cur) = ctx
.block_state
.locals
.register_arguments
.get_mut(local_idx as usize)
{
*cur = dst_loc;
}
copy_value(ctx, val_loc, dst_loc);
free_val(ctx, val);
}
@@ -604,7 +629,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
let result = ctx.block_state.regs.take_scratch_gpr();
if let Some(i) = left.immediate() {
match right.location(&ctx.locals) {
match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
@@ -629,7 +654,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
}
} else {
let lreg = into_reg(ctx, left);
match right.location(&ctx.locals) {
match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
@@ -733,7 +758,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
#[must_use]
pub struct CallCleanup {
restore_registers: Vec<GPR>,
restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>,
stack_depth: i32,
}
@@ -748,15 +773,16 @@ fn free_arg_registers(ctx: &mut Context, count: u32) {
return;
}
for i in 0..ctx.locals.locs.len() {
match ctx.locals.locs[i] {
// This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime
for i in 0..ctx.block_state.locals.register_arguments.len() {
match ctx.block_state.locals.register_arguments[i] {
ValueLocation::Reg(reg) => {
if ARGS_IN_GPRS.contains(&reg) {
let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _);
dynasm!(ctx.asm
; mov [rsp + offset], Rq(reg)
);
ctx.locals.locs[i] = ValueLocation::Stack(offset);
ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset);
}
}
_ => {}
@@ -770,7 +796,7 @@ fn free_return_register(ctx: &mut Context, count: u32) {
}
for stack_val in &mut ctx.block_state.stack {
match stack_val.location(&ctx.locals) {
match stack_val.location(&ctx.block_state.locals) {
// For now it's impossible for a local to be in RAX but that might be
// possible in the future, so we check both cases.
Some(ValueLocation::Reg(RAX)) => {
@@ -787,8 +813,8 @@ fn free_return_register(ctx: &mut Context, count: u32) {
// TODO: Use `ArrayVec`?
/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
let mut out = vec![];
fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> {
let mut out = ArrayVec::new();
// TODO: If there are no `StackValue::Pop`s that need to be popped
// before we reach our `Temp` value, we can set the `StackValue`
@@ -811,11 +837,6 @@ fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
let out = CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
};
// We pop stack arguments first - arguments are RTL
if num_stack_args > 0 {
let size = num_stack_args * WORD_SIZE as i32;
@@ -847,7 +868,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
pop_i32_into(ctx, ValueLocation::Reg(*reg));
}
out
CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
}
}
/// Frees up the stack space used for stack-passed arguments and restores the value
@@ -901,29 +925,23 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) {
// Align stack slots to the nearest even number. This is required
// by x86-64 ABI.
let aligned_stack_slots = (locals + 1) & !1;
let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
ctx.locals.locs = reg_args
.iter()
.cloned()
.map(ValueLocation::Reg)
.chain(
(0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _))
// We add 2 here because 1 stack slot is used for the stack pointer and another is
// used for the return address. It's a magic number but there's not really a way
// around this.
.map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)),
)
.collect();
ctx.original_locals.register_arguments =
reg_args.iter().cloned().map(ValueLocation::Reg).collect();
ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
ctx.original_locals.num_local_stack_slots = locals;
ctx.block_state.locals = ctx.original_locals.clone();
dynasm!(ctx.asm
; push rbp
; mov rbp, rsp
);
if framesize > 0 {
// ctx.block_state.depth.reserve(aligned_stack_slots - locals);
if frame_size > 0 {
dynasm!(ctx.asm
; sub rsp, framesize
; sub rsp, frame_size
);
}
}

View File

@@ -158,7 +158,7 @@ pub fn translate(
// 0 it will branch here.
// After that reset stack depth to the value before entering `if` block.
define_label(&mut ctx, if_not);
restore_block_state(&mut ctx, block_state.clone());
end_block(&mut ctx, block_state.clone());
// Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered.
@@ -199,7 +199,7 @@ pub fn translate(
prepare_return_value(&mut ctx);
}
restore_block_state(&mut ctx, control_frame.block_state);
end_block(&mut ctx, control_frame.block_state);
push_block_return_value(&mut ctx);
}
Operator::I32Eq => relop_eq_i32(&mut ctx),

View File

@@ -1,8 +1,9 @@
#![feature(plugin, test)]
#![feature(plugin, test, const_slice_len)]
#![plugin(dynasm)]
extern crate test;
extern crate arrayvec;
extern crate capstone;
extern crate failure;
extern crate wasmparser;

View File

@@ -250,6 +250,7 @@ fn function_write_args_spill_to_stack() {
assert_eq!(
{
let translated = translate_wat(code);
translated.disassemble();
let out: u32 =
unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) };
out
@@ -330,6 +331,7 @@ fn fib() {
}
let translated = translate_wat(FIBONACCI);
translated.disassemble();
for x in 0..30 {
unsafe {