First baby steps to supporting memory accesses

Currently we don't actually sandbox the memory at all, so you can do evil things
like read and write the host's memory. We also don't support growing memory or
cranelift-compatible ABI that passes the memory offset as an argument.

We also always immediately allocate the buffer when encountering a memory section,
there is preliminary support for translating a buffer which can then have the real
offset replaced using relocations (and returning a different type when doing so)
but I haven't written the code that actually does relocation so it doesn't work yet.
This commit is contained in:
Jef
2019-01-11 15:20:32 +01:00
parent ddb4c0fd19
commit 1eebc65c9e
7 changed files with 1741 additions and 1237 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,12 @@ impl From<BinaryReaderError> for Error {
} }
} }
impl From<!> for Error {
fn from(other: !) -> Self {
other
}
}
impl From<capstone::Error> for Error { impl From<capstone::Error> for Error {
fn from(e: capstone::Error) -> Self { fn from(e: capstone::Error) -> Self {
Error::Disassembler(e.to_string()) Error::Disassembler(e.to_string())

View File

@@ -90,12 +90,31 @@ impl ControlFrame {
} }
} }
pub fn translate( pub fn translate<T: Memory>(
session: &mut CodeGenSession, session: &mut CodeGenSession<T>,
translation_ctx: &TranslationContext, translation_ctx: &TranslationContext,
func_idx: u32, func_idx: u32,
body: &FunctionBody, body: &FunctionBody,
) -> Result<(), Error> { ) -> Result<(), Error>
where
Error: From<T::Error>,
{
fn break_from_control_frame_with_id<T0: Memory>(
ctx: &mut Context<T0>,
control_frames: &mut Vec<ControlFrame>,
idx: usize,
) {
control_frames
.last_mut()
.expect("Control stack is empty!")
.mark_unreachable();
let control_frame = control_frames.get(idx).expect("wrong depth");
ctx.return_from_block(control_frame.arity(), idx == 0);
ctx.br(control_frame.kind.branch_target());
}
let locals = body.get_locals_reader()?; let locals = body.get_locals_reader()?;
let func_type = translation_ctx.func_type(func_idx); let func_type = translation_ctx.func_type(func_idx);
@@ -117,14 +136,14 @@ pub fn translate(
let ctx = &mut session.new_context(func_idx); let ctx = &mut session.new_context(func_idx);
let operators = body.get_operators_reader()?; let operators = body.get_operators_reader()?;
let func = start_function(ctx, arg_count, num_locals); let func = ctx.start_function(arg_count, num_locals);
let mut control_frames = Vec::new(); let mut control_frames = Vec::new();
// Upon entering the function implicit frame for function body is pushed. It has the same // Upon entering the function implicit frame for function body is pushed. It has the same
// result type as the function itself. Branching to it is equivalent to returning from the function. // result type as the function itself. Branching to it is equivalent to returning from the function.
let epilogue_label = create_label(ctx); let epilogue_label = ctx.create_label();
let function_block_state = start_block(ctx); let function_block_state = ctx.start_block();
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::Block { ControlFrameKind::Block {
end_label: epilogue_label, end_label: epilogue_label,
@@ -135,6 +154,8 @@ pub fn translate(
// TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we // TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we
// can coelesce multiple `end`s and optimise break-at-end-of-block into noop. // can coelesce multiple `end`s and optimise break-at-end-of-block into noop.
// TODO: Does coelescing multiple `end`s matter since at worst this really only elides a single move at
// the end of a function, and this is probably a no-op anyway due to register renaming.
for op in operators { for op in operators {
let op = op?; let op = op?;
@@ -157,11 +178,11 @@ pub fn translate(
.last_mut() .last_mut()
.expect("control stack is never empty") .expect("control stack is never empty")
.mark_unreachable(); .mark_unreachable();
trap(ctx); ctx.trap();
} }
Operator::Block { ty } => { Operator::Block { ty } => {
let label = create_label(ctx); let label = ctx.create_label();
let state = start_block(ctx); let state = ctx.start_block();
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::Block { end_label: label }, ControlFrameKind::Block { end_label: label },
state, state,
@@ -169,49 +190,32 @@ pub fn translate(
)); ));
} }
Operator::Return => { Operator::Return => {
control_frames break_from_control_frame_with_id(ctx, &mut control_frames, 0);
.last_mut()
.expect("control stack is never empty")
.mark_unreachable();
let control_frame = control_frames.get(0).expect("control stack is never empty");
return_from_block(ctx, control_frame.arity(), true);
br(ctx, control_frame.kind.branch_target());
} }
Operator::Br { relative_depth } => { Operator::Br { relative_depth } => {
control_frames
.last_mut()
.expect("control stack is never empty")
.mark_unreachable();
let idx = control_frames.len() - 1 - relative_depth as usize; let idx = control_frames.len() - 1 - relative_depth as usize;
let control_frame = control_frames.get(idx).expect("wrong depth");
return_from_block(ctx, control_frame.arity(), idx == 0); break_from_control_frame_with_id(ctx, &mut control_frames, idx);
br(ctx, control_frame.kind.branch_target());
} }
Operator::BrIf { relative_depth } => { Operator::BrIf { relative_depth } => {
let idx = control_frames.len() - 1 - relative_depth as usize; let idx = control_frames.len() - 1 - relative_depth as usize;
let control_frame = control_frames.get(idx).expect("wrong depth"); let control_frame = control_frames.get(idx).expect("wrong depth");
let if_not = create_label(ctx); let if_not = ctx.create_label();
jump_if_false(ctx, if_not); ctx.jump_if_false(if_not);
return_from_block(ctx, control_frame.arity(), idx == 0); ctx.return_from_block(control_frame.arity(), idx == 0);
br(ctx, control_frame.kind.branch_target()); ctx.br(control_frame.kind.branch_target());
define_label(ctx, if_not); ctx.define_label(if_not);
} }
Operator::If { ty } => { Operator::If { ty } => {
let end_label = create_label(ctx); let end_label = ctx.create_label();
let if_not = create_label(ctx); let if_not = ctx.create_label();
jump_if_false(ctx, if_not); ctx.jump_if_false(if_not);
let state = start_block(ctx); let state = ctx.start_block();
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not }, ControlFrameKind::IfTrue { end_label, if_not },
@@ -220,10 +224,10 @@ pub fn translate(
)); ));
} }
Operator::Loop { ty } => { Operator::Loop { ty } => {
let header = create_label(ctx); let header = ctx.create_label();
define_label(ctx, header); ctx.define_label(header);
let state = start_block(ctx); let state = ctx.start_block();
control_frames.push(ControlFrame::new( control_frames.push(ControlFrame::new(
ControlFrameKind::Loop { header }, ControlFrameKind::Loop { header },
@@ -239,16 +243,16 @@ pub fn translate(
block_state, block_state,
.. ..
}) => { }) => {
return_from_block(ctx, arity(ty), false); ctx.return_from_block(arity(ty), false);
reset_block(ctx, block_state.clone()); ctx.reset_block(block_state.clone());
// Finalize `then` block by jumping to the `end_label`. // Finalize `then` block by jumping to the `end_label`.
br(ctx, end_label); ctx.br(end_label);
// Define `if_not` label here, so if the corresponding `if` block receives // Define `if_not` label here, so if the corresponding `if` block receives
// 0 it will branch here. // 0 it will branch here.
// After that reset stack depth to the value before entering `if` block. // After that reset stack depth to the value before entering `if` block.
define_label(ctx, if_not); ctx.define_label(if_not);
// Carry over the `end_label`, so it will be resolved when the corresponding `end` // Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered. // is encountered.
@@ -278,66 +282,71 @@ pub fn translate(
// Don't bother generating this code if we're in unreachable code // Don't bother generating this code if we're in unreachable code
if !control_frame.unreachable { if !control_frame.unreachable {
return_from_block(ctx, arity, control_frames.is_empty()); ctx.return_from_block(arity, control_frames.is_empty());
} }
let block_end = control_frame.kind.block_end();
// TODO: What is the correct order of this and the `define_label`? It's clear for `block`s // TODO: What is the correct order of this and the `define_label`? It's clear for `block`s
// but I'm not certain for `if..then..else..end`. // but I'm not certain for `if..then..else..end`.
end_block(ctx, control_frame.block_state); ctx.end_block(control_frame.block_state, |ctx| {
if let Some(block_end) = block_end {
if let Some(block_end) = control_frame.kind.block_end() { ctx.define_label(block_end);
define_label(ctx, block_end);
} }
});
if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind { if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind {
// this is `if .. end` construction. Define the `if_not` label here. // this is `if .. end` construction. Define the `if_not` label here.
define_label(ctx, if_not); ctx.define_label(if_not);
} }
} }
Operator::I32Eq => i32_eq(ctx), Operator::I32Eq => ctx.i32_eq(),
Operator::I32Ne => i32_neq(ctx), Operator::I32Ne => ctx.i32_neq(),
Operator::I32LtS => i32_lt_s(ctx), Operator::I32LtS => ctx.i32_lt_s(),
Operator::I32LeS => i32_le_s(ctx), Operator::I32LeS => ctx.i32_le_s(),
Operator::I32GtS => i32_gt_s(ctx), Operator::I32GtS => ctx.i32_gt_s(),
Operator::I32GeS => i32_ge_s(ctx), Operator::I32GeS => ctx.i32_ge_s(),
Operator::I32LtU => i32_lt_u(ctx), Operator::I32LtU => ctx.i32_lt_u(),
Operator::I32LeU => i32_le_u(ctx), Operator::I32LeU => ctx.i32_le_u(),
Operator::I32GtU => i32_gt_u(ctx), Operator::I32GtU => ctx.i32_gt_u(),
Operator::I32GeU => i32_ge_u(ctx), Operator::I32GeU => ctx.i32_ge_u(),
Operator::I32Add => i32_add(ctx), Operator::I32Add => ctx.i32_add(),
Operator::I32Sub => i32_sub(ctx), Operator::I32Sub => ctx.i32_sub(),
Operator::I32And => i32_and(ctx), Operator::I32And => ctx.i32_and(),
Operator::I32Or => i32_or(ctx), Operator::I32Or => ctx.i32_or(),
Operator::I32Xor => i32_xor(ctx), Operator::I32Xor => ctx.i32_xor(),
Operator::I32Mul => i32_mul(ctx), Operator::I32Mul => ctx.i32_mul(),
Operator::I64Eq => i64_eq(ctx), Operator::I64Eq => ctx.i64_eq(),
Operator::I64Ne => i64_neq(ctx), Operator::I64Ne => ctx.i64_neq(),
Operator::I64LtS => i64_lt_s(ctx), Operator::I64LtS => ctx.i64_lt_s(),
Operator::I64LeS => i64_le_s(ctx), Operator::I64LeS => ctx.i64_le_s(),
Operator::I64GtS => i64_gt_s(ctx), Operator::I64GtS => ctx.i64_gt_s(),
Operator::I64GeS => i64_ge_s(ctx), Operator::I64GeS => ctx.i64_ge_s(),
Operator::I64LtU => i64_lt_u(ctx), Operator::I64LtU => ctx.i64_lt_u(),
Operator::I64LeU => i64_le_u(ctx), Operator::I64LeU => ctx.i64_le_u(),
Operator::I64GtU => i64_gt_u(ctx), Operator::I64GtU => ctx.i64_gt_u(),
Operator::I64GeU => i64_ge_u(ctx), Operator::I64GeU => ctx.i64_ge_u(),
Operator::I64Add => i64_add(ctx), Operator::I64Add => ctx.i64_add(),
Operator::I64Sub => i64_sub(ctx), Operator::I64Sub => ctx.i64_sub(),
Operator::I64And => i64_and(ctx), Operator::I64And => ctx.i64_and(),
Operator::I64Or => i64_or(ctx), Operator::I64Or => ctx.i64_or(),
Operator::I64Xor => i64_xor(ctx), Operator::I64Xor => ctx.i64_xor(),
Operator::I64Mul => i64_mul(ctx), Operator::I64Mul => ctx.i64_mul(),
Operator::Drop => drop(ctx), Operator::Drop => ctx.drop(),
Operator::SetLocal { local_index } => set_local_i32(ctx, local_index), Operator::SetLocal { local_index } => ctx.set_local(local_index),
Operator::GetLocal { local_index } => get_local_i32(ctx, local_index), Operator::GetLocal { local_index } => ctx.get_local(local_index),
Operator::I32Const { value } => literal_i32(ctx, value), Operator::TeeLocal { local_index } => ctx.tee_local(local_index),
Operator::I64Const { value } => literal_i64(ctx, value), Operator::I32Const { value } => ctx.i32_literal(value),
Operator::I64Const { value } => ctx.i64_literal(value),
Operator::I32Load { memarg } => ctx.i32_load(memarg.offset)?,
Operator::I64Load { memarg } => ctx.i64_load(memarg.offset)?,
Operator::I32Store { memarg } => ctx.i32_store(memarg.offset)?,
Operator::I64Store { memarg } => ctx.i64_store(memarg.offset)?,
Operator::Call { function_index } => { Operator::Call { function_index } => {
let callee_ty = translation_ctx.func_type(function_index); let callee_ty = translation_ctx.func_type(function_index);
// TODO: this implementation assumes that this function is locally defined. // TODO: this implementation assumes that this function is locally defined.
call_direct( ctx.call_direct(
ctx,
function_index, function_index,
callee_ty.params.len() as u32, callee_ty.params.len() as u32,
callee_ty.returns.len() as u32, callee_ty.returns.len() as u32,
@@ -349,7 +358,7 @@ pub fn translate(
} }
} }
} }
epilogue(ctx, func); ctx.epilogue(func);
Ok(()) Ok(())
} }

View File

@@ -1,4 +1,4 @@
#![feature(plugin, test, const_slice_len)] #![feature(plugin, test, const_slice_len, never_type)]
#![plugin(dynasm)] #![plugin(dynasm)]
extern crate test; extern crate test;

View File

@@ -38,6 +38,7 @@ impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S);
#[derive(Default)] #[derive(Default)]
pub struct TranslatedModule { pub struct TranslatedModule {
translated_code_section: Option<TranslatedCodeSection>, translated_code_section: Option<TranslatedCodeSection>,
memory: Option<Vec<u8>>,
} }
impl TranslatedModule { impl TranslatedModule {
@@ -136,7 +137,18 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Memory = section.code { if let SectionCode::Memory = section.code {
let memories = section.get_memory_section_reader()?; let memories = section.get_memory_section_reader()?;
translate_sections::memory(memories)?; let mem = translate_sections::memory(memories)?;
assert!(
mem.len() <= 1,
"Multiple memory sections not yet unimplemented"
);
if !mem.is_empty() {
let mem = mem[0];
assert_eq!(Some(mem.limits.initial), mem.limits.maximum);
output.memory = Some(vec![0; mem.limits.initial as usize * 65_536]);
}
reader.skip_custom_sections()?; reader.skip_custom_sections()?;
if reader.eof() { if reader.eof() {
@@ -191,7 +203,11 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Code = section.code { if let SectionCode::Code = section.code {
let code = section.get_code_section_reader()?; let code = section.get_code_section_reader()?;
output.translated_code_section = Some(translate_sections::code(code, &ctx)?); output.translated_code_section = Some(translate_sections::code(
code,
&ctx,
output.memory.as_mut().map(|m| &mut m[..]),
)?);
reader.skip_custom_sections()?; reader.skip_custom_sections()?;
if reader.eof() { if reader.eof() {

View File

@@ -25,6 +25,7 @@ mod op32 {
($op:ident, $func:expr) => { ($op:ident, $func:expr) => {
mod $op { mod $op {
use super::{translate_wat, TranslatedModule}; use super::{translate_wat, TranslatedModule};
use std::sync::Once;
const OP: &str = stringify!($op); const OP: &str = stringify!($op);
@@ -41,17 +42,18 @@ mod op32 {
} }
fn lit_lit(a: i32, b: i32) -> bool { fn lit_lit(a: i32, b: i32) -> bool {
unsafe { let translated = translate_wat(&format!("
translate_wat(&format!("
(module (func (result i32) (module (func (result i32)
(i32.{op} (i32.const {left}) (i32.const {right})))) (i32.{op} (i32.const {left}) (i32.const {right}))))
", op = OP, left = a, right = b)).execute_func::<(), i32>(0, ()) == $func(a, b) ", op = OP, left = a, right = b));
static ONCE: Once = Once::new();
ONCE.call_once(|| translated.disassemble());
unsafe {
translated.execute_func::<(), i32>(0, ()) == $func(a, b)
} }
} }
fn lit_reg(a: i32, b: i32) -> bool { fn lit_reg(a: i32, b: i32) -> bool {
use std::sync::Once;
let translated = translate_wat(&format!(" let translated = translate_wat(&format!("
(module (func (param i32) (result i32) (module (func (param i32) (result i32)
(i32.{op} (i32.const {left}) (get_local 0)))) (i32.{op} (i32.const {left}) (get_local 0))))
@@ -64,11 +66,14 @@ mod op32 {
} }
fn reg_lit(a: i32, b: i32) -> bool { fn reg_lit(a: i32, b: i32) -> bool {
unsafe { let translated = translate_wat(&format!("
translate_wat(&format!("
(module (func (param i32) (result i32) (module (func (param i32) (result i32)
(i32.{op} (get_local 0) (i32.const {right})))) (i32.{op} (get_local 0) (i32.const {right}))))
", op = OP, right = b)).execute_func::<(i32,), i32>(0, (a,)) == $func(a, b) ", op = OP, right = b));
static ONCE: Once = Once::new();
ONCE.call_once(|| translated.disassemble());
unsafe {
translated.execute_func::<(i32,), i32>(0, (a,)) == $func(a, b)
} }
} }
} }
@@ -452,7 +457,11 @@ fn br_block() {
) )
) )
"#; "#;
assert_eq!(execute_wat(code, 5, 7), 12);
let translated = translate_wat(code);
translated.disassemble();
assert_eq!(unsafe { translated.execute_func::<(i32, i32), i32>(0, (5, 7)) }, 12);
} }
// Tests discarding values on the value stack, while // Tests discarding values on the value stack, while
@@ -724,6 +733,42 @@ fn fib() {
} }
} }
#[test]
fn storage() {
const CODE: &str = r#"
(module
(memory 1 1)
(func (result i32)
(local i32 i32 i32)
(set_local 0 (i32.const 10))
(block
(loop
(if
(i32.eq (get_local 0) (i32.const 0))
(then (br 2))
)
(set_local 2 (i32.mul (get_local 0) (i32.const 4)))
(i32.store (get_local 2) (get_local 0))
(set_local 1 (i32.load (get_local 2)))
(if
(i32.ne (get_local 0) (get_local 1))
(then (return (i32.const 0)))
)
(set_local 0 (i32.sub (get_local 0) (i32.const 1)))
(br 0)
)
)
(i32.const 1)
)
)"#;
let translated = translate_wat(CODE);
translated.disassemble();
assert_eq!(unsafe { translated.execute_func::<(), i32>(0, ()) }, 1);
}
#[bench] #[bench]
fn bench_fibonacci_compile(b: &mut test::Bencher) { fn bench_fibonacci_compile(b: &mut test::Bencher) {
let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); let wasm = wabt::wat2wasm(FIBONACCI).unwrap();

View File

@@ -45,11 +45,11 @@ pub fn table(tables: TableSectionReader) -> Result<(), Error> {
} }
/// Parses the Memory section of the wasm module. /// Parses the Memory section of the wasm module.
pub fn memory(memories: MemorySectionReader) -> Result<(), Error> { pub fn memory(memories: MemorySectionReader) -> Result<Vec<MemoryType>, Error> {
for entry in memories { memories
entry?; // TODO .into_iter()
} .map(|r| r.map_err(Into::into))
Ok(()) .collect()
} }
/// Parses the Global section of the wasm module. /// Parses the Global section of the wasm module.
@@ -86,13 +86,22 @@ pub fn element(elements: ElementSectionReader) -> Result<(), Error> {
pub fn code( pub fn code(
code: CodeSectionReader, code: CodeSectionReader,
translation_ctx: &TranslationContext, translation_ctx: &TranslationContext,
memory: Option<&mut [u8]>,
) -> Result<TranslatedCodeSection, Error> { ) -> Result<TranslatedCodeSection, Error> {
let func_count = code.get_count(); let func_count = code.get_count();
let mut session = CodeGenSession::new(func_count); if let Some(memory) = memory {
let mut session = CodeGenSession::<::backend::HasMemory>::with_memory(func_count, memory.as_mut_ptr());
for (idx, body) in code.into_iter().enumerate() { for (idx, body) in code.into_iter().enumerate() {
function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?; function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?;
} }
Ok(session.into_translated_code_section()?) Ok(session.into_translated_code_section()?)
} else {
let mut session = CodeGenSession::<::backend::NoMemory>::new(func_count);
for (idx, body) in code.into_iter().enumerate() {
function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?;
}
Ok(session.into_translated_code_section()?)
}
} }
/// Parses the Data section of the wasm module. /// Parses the Data section of the wasm module.