Fix locals not being restored properly (which may cause us to read garbage values from the stack)

This commit is contained in:
Jef
2018-12-17 12:16:40 +01:00
parent bd2ee53c89
commit 23b5a56a7d
5 changed files with 106 additions and 84 deletions

View File

@@ -9,6 +9,7 @@ keywords = ["webassembly", "wasm", "compile", "compiler", "jit"]
publish = false publish = false
[dependencies] [dependencies]
arrayvec = "0.4"
dynasm = "0.2.3" dynasm = "0.2.3"
dynasmrt = "0.2.3" dynasmrt = "0.2.3"
wasmparser = "0.21.6" wasmparser = "0.21.6"

View File

@@ -1,5 +1,9 @@
#![allow(dead_code)] // for now #![allow(dead_code)] // for now
// Since we want this to be linear-time, we never want to iterate over a `Vec`. `ArrayVec`s have a hard,
// small maximum size and so we can consider iterating over them to be essentially constant-time.
use arrayvec::ArrayVec;
use dynasmrt::x64::Assembler; use dynasmrt::x64::Assembler;
use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
use error::Error; use error::Error;
@@ -166,7 +170,7 @@ impl CodeGenSession {
asm: &mut self.assembler, asm: &mut self.assembler,
func_starts: &self.func_starts, func_starts: &self.func_starts,
block_state: Default::default(), block_state: Default::default(),
locals: Default::default(), original_locals: Default::default(),
} }
} }
@@ -222,7 +226,7 @@ impl Value {
fn location(&self, locals: &Locals) -> ValueLocation { fn location(&self, locals: &Locals) -> ValueLocation {
match *self { match *self {
Value::Local(loc) => local_location(locals, loc), Value::Local(loc) => locals.get(loc),
Value::Temp(reg) => ValueLocation::Reg(reg), Value::Temp(reg) => ValueLocation::Reg(reg),
Value::Immediate(reg) => ValueLocation::Immediate(reg), Value::Immediate(reg) => ValueLocation::Immediate(reg),
} }
@@ -240,7 +244,7 @@ enum StackValue {
impl StackValue { impl StackValue {
fn location(&self, locals: &Locals) -> Option<ValueLocation> { fn location(&self, locals: &Locals) -> Option<ValueLocation> {
match *self { match *self {
StackValue::Local(loc) => Some(local_location(locals, loc)), StackValue::Local(loc) => Some(locals.get(loc)),
StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)), StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)),
StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)), StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)),
StackValue::Pop => None, StackValue::Pop => None,
@@ -248,10 +252,30 @@ impl StackValue {
} }
} }
#[derive(Default)] #[derive(Default, Clone)]
struct Locals { struct Locals {
// TODO: Use `ArrayVec` since we have a hard maximum (the number of registers) register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>,
locs: Vec<ValueLocation>, num_stack_args: u32,
num_local_stack_slots: u32,
}
impl Locals {
fn get(&self, index: u32) -> ValueLocation {
self.register_arguments
.get(index as usize)
.cloned()
.unwrap_or_else(|| {
let stack_index = index - self.register_arguments.len() as u32;
if stack_index < self.num_stack_args {
ValueLocation::Stack(
((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _,
)
} else {
let stack_index = stack_index - self.num_stack_args;
ValueLocation::Stack((stack_index * WORD_SIZE) as _)
}
})
}
} }
#[derive(Default, Clone)] #[derive(Default, Clone)]
@@ -259,22 +283,16 @@ pub struct BlockState {
stack: Stack, stack: Stack,
pub depth: StackDepth, pub depth: StackDepth,
regs: Registers, regs: Registers,
/// This is the _current_ locals, since we can shuffle them about during function calls.
/// We will restore this to be the same state as the `Locals` in `Context` at the end
/// of a block.
locals: Locals,
} }
fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 {
(ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset (ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset
} }
fn local_location(locals: &Locals, index: u32) -> ValueLocation {
locals
.locs
.get(index as usize)
.cloned()
.unwrap_or(ValueLocation::Stack(
(index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _,
))
}
type Stack = Vec<StackValue>; type Stack = Vec<StackValue>;
pub struct Context<'a> { pub struct Context<'a> {
@@ -282,7 +300,7 @@ pub struct Context<'a> {
func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>, func_starts: &'a Vec<(Option<AssemblyOffset>, DynamicLabel)>,
/// Each push and pop on the value stack increments or decrements this value by 1 respectively. /// Each push and pop on the value stack increments or decrements this value by 1 respectively.
block_state: BlockState, block_state: BlockState,
locals: Locals, original_locals: Locals,
} }
impl<'a> Context<'a> {} impl<'a> Context<'a> {}
@@ -323,42 +341,36 @@ pub fn current_block_state(ctx: &Context) -> BlockState {
} }
pub fn return_from_block(ctx: &mut Context) { pub fn return_from_block(ctx: &mut Context) {
if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) { free_return_register(ctx, 1);
match loc { pop_i32_into(ctx, ValueLocation::Reg(RAX))
ValueLocation::Reg(r) => {
dynasm!(ctx.asm
; push Rq(r)
);
}
ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm
; push QWORD [rsp + offset]
);
}
ValueLocation::Immediate(imm) => {
dynasm!(ctx.asm
; push imm
);
}
}
}
// If `location` is `None` then we don't need to do anything.
} }
pub fn push_block_return_value(ctx: &mut Context) { pub fn push_block_return_value(ctx: &mut Context) {
ctx.block_state.depth.reserve(1); ctx.block_state.stack.push(StackValue::Temp(RAX));
ctx.block_state.stack.push(StackValue::Pop);
} }
pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) { pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) {
ctx.block_state = block_state; restore_locals(ctx);
ctx.block_state = parent_block_state;
} }
pub fn push_return_value(ctx: &mut Context) { pub fn push_return_value(ctx: &mut Context) {
ctx.block_state.stack.push(StackValue::Temp(RAX)); ctx.block_state.stack.push(StackValue::Temp(RAX));
} }
fn restore_locals(ctx: &mut Context) {
for (src, dst) in ctx
.block_state
.locals
.register_arguments
.clone()
.iter()
.zip(&ctx.original_locals.register_arguments.clone())
{
copy_value(ctx, *src, *dst);
}
}
fn push_i32(ctx: &mut Context, value: Value) { fn push_i32(ctx: &mut Context, value: Value) {
let stack_loc = match value { let stack_loc = match value {
Value::Local(loc) => StackValue::Local(loc), Value::Local(loc) => StackValue::Local(loc),
@@ -421,7 +433,8 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) {
} }
}; };
let src = to_move.location(&ctx.locals); let src = to_move.location(&ctx.block_state.locals);
println!("{:?}, {:?}", src, dst);
copy_value(ctx, src, dst); copy_value(ctx, src, dst);
free_val(ctx, to_move); free_val(ctx, to_move);
} }
@@ -435,7 +448,7 @@ fn free_val(ctx: &mut Context, val: Value) {
/// Puts this value into a register so that it can be efficiently read /// Puts this value into a register so that it can be efficiently read
fn into_reg(ctx: &mut Context, val: Value) -> GPR { fn into_reg(ctx: &mut Context, val: Value) -> GPR {
match val.location(&ctx.locals) { match val.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => { ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
let scratch = ctx.block_state.regs.take_scratch_gpr(); let scratch = ctx.block_state.regs.take_scratch_gpr();
@@ -462,7 +475,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR {
Value::Local(loc) => { Value::Local(loc) => {
let scratch = ctx.block_state.regs.take_scratch_gpr(); let scratch = ctx.block_state.regs.take_scratch_gpr();
match local_location(&ctx.locals, loc) { match ctx.block_state.locals.get(loc) {
ValueLocation::Stack(offset) => { ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
@@ -512,7 +525,7 @@ macro_rules! commutative_binop {
_ => (into_temp_reg(ctx, op0), op1), _ => (into_temp_reg(ctx, op0), op1),
}; };
match op0.location(&ctx.locals) { match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => { ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; $instr Rd(op1), Rd(reg) ; $instr Rd(op1), Rd(reg)
@@ -538,12 +551,14 @@ macro_rules! commutative_binop {
} }
} }
commutative_binop!(i32_add, add, |a, b| a + b); commutative_binop!(i32_add, add, i32::wrapping_add);
commutative_binop!(i32_and, and, |a, b| a & b); commutative_binop!(i32_and, and, |a, b| a & b);
commutative_binop!(i32_or, or, |a, b| a | b); commutative_binop!(i32_or, or, |a, b| a | b);
commutative_binop!(i32_xor, xor, |a, b| a ^ b); commutative_binop!(i32_xor, xor, |a, b| a ^ b);
commutative_binop!(i32_mul, imul, |a, b| a * b); commutative_binop!(i32_mul, imul, i32::wrapping_mul);
// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1`
// temp register as the output)
pub fn i32_sub(ctx: &mut Context) { pub fn i32_sub(ctx: &mut Context) {
let op0 = pop_i32(ctx); let op0 = pop_i32(ctx);
let op1 = pop_i32(ctx); let op1 = pop_i32(ctx);
@@ -556,7 +571,7 @@ pub fn i32_sub(ctx: &mut Context) {
} }
let op1 = into_temp_reg(ctx, op1); let op1 = into_temp_reg(ctx, op1);
match op0.location(&ctx.locals) { match op0.location(&ctx.block_state.locals) {
ValueLocation::Reg(reg) => { ValueLocation::Reg(reg) => {
dynasm!(ctx.asm dynasm!(ctx.asm
; sub Rd(op1), Rd(reg) ; sub Rd(op1), Rd(reg)
@@ -588,8 +603,18 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) {
// back into registers here. // back into registers here.
pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { pub fn set_local_i32(ctx: &mut Context, local_idx: u32) {
let val = pop_i32(ctx); let val = pop_i32(ctx);
let val_loc = val.location(&ctx.locals); let val_loc = val.location(&ctx.block_state.locals);
let dst_loc = local_location(&ctx.locals, local_idx); let dst_loc = ctx.original_locals.get(local_idx);
if let Some(cur) = ctx
.block_state
.locals
.register_arguments
.get_mut(local_idx as usize)
{
*cur = dst_loc;
}
copy_value(ctx, val_loc, dst_loc); copy_value(ctx, val_loc, dst_loc);
free_val(ctx, val); free_val(ctx, val);
} }
@@ -604,7 +629,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
let result = ctx.block_state.regs.take_scratch_gpr(); let result = ctx.block_state.regs.take_scratch_gpr();
if let Some(i) = left.immediate() { if let Some(i) = left.immediate() {
match right.location(&ctx.locals) { match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => { ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
@@ -629,7 +654,7 @@ pub fn relop_eq_i32(ctx: &mut Context) {
} }
} else { } else {
let lreg = into_reg(ctx, left); let lreg = into_reg(ctx, left);
match right.location(&ctx.locals) { match right.location(&ctx.block_state.locals) {
ValueLocation::Stack(offset) => { ValueLocation::Stack(offset) => {
let offset = adjusted_offset(ctx, offset); let offset = adjusted_offset(ctx, offset);
dynasm!(ctx.asm dynasm!(ctx.asm
@@ -733,7 +758,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) {
#[must_use] #[must_use]
pub struct CallCleanup { pub struct CallCleanup {
restore_registers: Vec<GPR>, restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>,
stack_depth: i32, stack_depth: i32,
} }
@@ -748,15 +773,16 @@ fn free_arg_registers(ctx: &mut Context, count: u32) {
return; return;
} }
for i in 0..ctx.locals.locs.len() { // This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime
match ctx.locals.locs[i] { for i in 0..ctx.block_state.locals.register_arguments.len() {
match ctx.block_state.locals.register_arguments[i] {
ValueLocation::Reg(reg) => { ValueLocation::Reg(reg) => {
if ARGS_IN_GPRS.contains(&reg) { if ARGS_IN_GPRS.contains(&reg) {
let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _); let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _);
dynasm!(ctx.asm dynasm!(ctx.asm
; mov [rsp + offset], Rq(reg) ; mov [rsp + offset], Rq(reg)
); );
ctx.locals.locs[i] = ValueLocation::Stack(offset); ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset);
} }
} }
_ => {} _ => {}
@@ -770,7 +796,7 @@ fn free_return_register(ctx: &mut Context, count: u32) {
} }
for stack_val in &mut ctx.block_state.stack { for stack_val in &mut ctx.block_state.stack {
match stack_val.location(&ctx.locals) { match stack_val.location(&ctx.block_state.locals) {
// For now it's impossible for a local to be in RAX but that might be // For now it's impossible for a local to be in RAX but that might be
// possible in the future, so we check both cases. // possible in the future, so we check both cases.
Some(ValueLocation::Reg(RAX)) => { Some(ValueLocation::Reg(RAX)) => {
@@ -787,8 +813,8 @@ fn free_return_register(ctx: &mut Context, count: u32) {
// TODO: Use `ArrayVec`? // TODO: Use `ArrayVec`?
/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
fn save_volatile(ctx: &mut Context) -> Vec<GPR> { fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> {
let mut out = vec![]; let mut out = ArrayVec::new();
// TODO: If there are no `StackValue::Pop`s that need to be popped // TODO: If there are no `StackValue::Pop`s that need to be popped
// before we reach our `Temp` value, we can set the `StackValue` // before we reach our `Temp` value, we can set the `StackValue`
@@ -811,11 +837,6 @@ fn save_volatile(ctx: &mut Context) -> Vec<GPR> {
fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
let out = CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
};
// We pop stack arguments first - arguments are RTL // We pop stack arguments first - arguments are RTL
if num_stack_args > 0 { if num_stack_args > 0 {
let size = num_stack_args * WORD_SIZE as i32; let size = num_stack_args * WORD_SIZE as i32;
@@ -847,7 +868,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup {
pop_i32_into(ctx, ValueLocation::Reg(*reg)); pop_i32_into(ctx, ValueLocation::Reg(*reg));
} }
out CallCleanup {
stack_depth: num_stack_args,
restore_registers: save_volatile(ctx),
}
} }
/// Frees up the stack space used for stack-passed arguments and restores the value /// Frees up the stack space used for stack-passed arguments and restores the value
@@ -901,29 +925,23 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) {
// Align stack slots to the nearest even number. This is required // Align stack slots to the nearest even number. This is required
// by x86-64 ABI. // by x86-64 ABI.
let aligned_stack_slots = (locals + 1) & !1; let aligned_stack_slots = (locals + 1) & !1;
let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
ctx.locals.locs = reg_args ctx.original_locals.register_arguments =
.iter() reg_args.iter().cloned().map(ValueLocation::Reg).collect();
.cloned() ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
.map(ValueLocation::Reg) ctx.original_locals.num_local_stack_slots = locals;
.chain( ctx.block_state.locals = ctx.original_locals.clone();
(0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _))
// We add 2 here because 1 stack slot is used for the stack pointer and another is
// used for the return address. It's a magic number but there's not really a way
// around this.
.map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)),
)
.collect();
dynasm!(ctx.asm dynasm!(ctx.asm
; push rbp ; push rbp
; mov rbp, rsp ; mov rbp, rsp
); );
if framesize > 0 { // ctx.block_state.depth.reserve(aligned_stack_slots - locals);
if frame_size > 0 {
dynasm!(ctx.asm dynasm!(ctx.asm
; sub rsp, framesize ; sub rsp, frame_size
); );
} }
} }

View File

@@ -158,7 +158,7 @@ pub fn translate(
// 0 it will branch here. // 0 it will branch here.
// After that reset stack depth to the value before entering `if` block. // After that reset stack depth to the value before entering `if` block.
define_label(&mut ctx, if_not); define_label(&mut ctx, if_not);
restore_block_state(&mut ctx, block_state.clone()); end_block(&mut ctx, block_state.clone());
// Carry over the `end_label`, so it will be resolved when the corresponding `end` // Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered. // is encountered.
@@ -199,7 +199,7 @@ pub fn translate(
prepare_return_value(&mut ctx); prepare_return_value(&mut ctx);
} }
restore_block_state(&mut ctx, control_frame.block_state); end_block(&mut ctx, control_frame.block_state);
push_block_return_value(&mut ctx); push_block_return_value(&mut ctx);
} }
Operator::I32Eq => relop_eq_i32(&mut ctx), Operator::I32Eq => relop_eq_i32(&mut ctx),

View File

@@ -1,8 +1,9 @@
#![feature(plugin, test)] #![feature(plugin, test, const_slice_len)]
#![plugin(dynasm)] #![plugin(dynasm)]
extern crate test; extern crate test;
extern crate arrayvec;
extern crate capstone; extern crate capstone;
extern crate failure; extern crate failure;
extern crate wasmparser; extern crate wasmparser;

View File

@@ -250,6 +250,7 @@ fn function_write_args_spill_to_stack() {
assert_eq!( assert_eq!(
{ {
let translated = translate_wat(code); let translated = translate_wat(code);
translated.disassemble();
let out: u32 = let out: u32 =
unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) };
out out
@@ -330,6 +331,7 @@ fn fib() {
} }
let translated = translate_wat(FIBONACCI); let translated = translate_wat(FIBONACCI);
translated.disassemble();
for x in 0..30 { for x in 0..30 {
unsafe { unsafe {