Merge pull request #18 from Vurich/microwasm

Microwasm
This commit is contained in:
Jef
2019-03-01 08:23:21 +00:00
committed by GitHub
13 changed files with 7269 additions and 613 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "wasmparser.rs"]
path = wasmparser.rs
url = git@github.com:Vurich/wasmparser.rs

View File

@@ -9,13 +9,21 @@ keywords = ["webassembly", "wasm", "compile", "compiler", "jit"]
publish = false
[dependencies]
smallvec = "0.6"
dynasm = "0.2.3"
dynasmrt = "0.2.3"
wasmparser = "0.21.6"
wasmparser = { path = "./wasmparser.rs" }
memoffset = "0.2"
itertools = "0.8"
capstone = "0.5.0"
failure = "0.1.3"
failure_derive = "0.1.3"
cranelift-codegen = "0.28"
multi_mut = "0.1"
either = "1.5"
wabt = "0.7"
lazy_static = "1.2"
quickcheck = "0.7"
[badges]
maintenance = { status = "experimental" }

View File

@@ -22,7 +22,7 @@ fn read_to_end<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
fn maybe_main() -> Result<(), String> {
let data = read_to_end("test.wasm").map_err(|e| e.to_string())?;
let translated = translate(&data).map_err(|e| e.to_string())?;
let result: u32 = unsafe { translated.execute_func(0, (5u32, 3u32)) };
let result: u32 = translated.execute_func(0, (5u32, 3u32)).unwrap();
println!("f(5, 3) = {}", result);
Ok(())

File diff suppressed because it is too large Load Diff

View File

@@ -20,7 +20,7 @@ pub fn disassemble(mem: &[u8]) -> Result<(), Error> {
for b in i.bytes() {
write!(&mut bytes_str, "{:02x} ", b).unwrap();
}
write!(&mut line, "{:21}\t", bytes_str).unwrap();
write!(&mut line, "{:24}\t", bytes_str).unwrap();
if let Some(s) = i.mnemonic() {
write!(&mut line, "{}\t", s).unwrap();

View File

@@ -20,6 +20,12 @@ impl From<BinaryReaderError> for Error {
}
}
impl From<!> for Error {
fn from(other: !) -> Self {
other
}
}
impl From<capstone::Error> for Error {
fn from(e: capstone::Error) -> Self {
Error::Disassembler(e.to_string())

View File

@@ -1,240 +1,487 @@
use backend::*;
use error::Error;
use module::TranslationContext;
use wasmparser::{FunctionBody, Operator, Type};
use crate::backend::*;
use crate::error::Error;
use crate::microwasm::*;
use crate::module::{quickhash, ModuleContext, SigType, Signature};
use either::{Either, Left, Right};
use multi_mut::HashMapMultiMut;
use std::{collections::HashMap, convert::TryInto, hash::Hash};
// TODO: Use own declared `Type` enum.
/// Type of a control frame.
#[derive(Debug, Copy, Clone, PartialEq)]
enum ControlFrameKind {
/// A regular block frame.
///
/// Can be used for an implicit function block.
Block { end_label: Label },
/// Loop frame (branching to the beginning of block).
#[allow(unused)]
Loop { header: Label },
/// True-subblock of if expression.
IfTrue {
/// If jump happens inside the if-true block then control will
/// land on this label.
end_label: Label,
/// If the condition of the `if` statement is unsatisfied, control
/// will land on this label. This label might point to `else` block if it
/// exists. Otherwise it equal to `end_label`.
if_not: Label,
},
/// False-subblock of if expression.
IfFalse { end_label: Label },
#[derive(Debug)]
struct Block {
label: BrTarget<Label>,
calling_convention: Option<Either<CallingConvention, VirtualCallingConvention>>,
params: u32,
// TODO: Is there a cleaner way to do this? `has_backwards_callers` should always be set if `is_next`
// is false, so we should probably use an `enum` here.
is_next: bool,
num_callers: Option<u32>,
actual_num_callers: u32,
has_backwards_callers: bool,
}
impl ControlFrameKind {
/// Returns a label which should be used as a branch destination.
fn br_destination(&self) -> Label {
match *self {
ControlFrameKind::Block { end_label } => end_label,
ControlFrameKind::Loop { header } => header,
ControlFrameKind::IfTrue { end_label, .. } => end_label,
ControlFrameKind::IfFalse { end_label } => end_label,
}
}
/// Returns `true` if this block of a loop kind.
fn is_loop(&self) -> bool {
match *self {
ControlFrameKind::Loop { .. } => true,
_ => false,
}
impl Block {
fn should_serialize_args(&self) -> bool {
self.calling_convention.is_none()
&& (self.num_callers != Some(1) || self.has_backwards_callers)
}
}
struct ControlFrame {
kind: ControlFrameKind,
/// Boolean which signals whether value stack became polymorphic. Value stack starts in non-polymorphic state and
/// becomes polymorphic only after an instruction that never passes control further is executed,
/// i.e. `unreachable`, `br` (but not `br_if`!), etc.
stack_polymorphic: bool,
/// Relative stack depth at the beginning of the frame.
stack_depth: StackDepth,
ty: Type,
}
const DISASSEMBLE: bool = false;
impl ControlFrame {
pub fn new(kind: ControlFrameKind, stack_depth: StackDepth, ty: Type) -> ControlFrame {
ControlFrame {
kind,
stack_depth,
ty,
stack_polymorphic: false,
}
}
pub fn outgoing_stack_depth(&self) -> StackDepth {
let mut outgoing_stack_depth = self.stack_depth;
if self.ty != Type::EmptyBlockType {
// If there a return value then reserve expected outgoing stack depth value
// to account for the result value.
outgoing_stack_depth.reserve(1);
}
outgoing_stack_depth
}
/// Marks this control frame as reached stack-polymorphic state.
pub fn mark_stack_polymorphic(&mut self) {
self.stack_polymorphic = true;
}
}
pub fn translate(
session: &mut CodeGenSession,
translation_ctx: &TranslationContext,
pub fn translate_wasm<M: ModuleContext>(
session: &mut CodeGenSession<M>,
func_idx: u32,
body: &FunctionBody,
) -> Result<(), Error> {
let locals = body.get_locals_reader()?;
body: &wasmparser::FunctionBody,
) -> Result<(), Error>
where
for<'any> &'any M::Signature: Into<OpSig>,
{
let ty = session.module_context.func_type(func_idx);
let func_type = translation_ctx.func_type(func_idx);
let arg_count = func_type.params.len() as u32;
let return_ty = if func_type.returns.len() > 0 {
func_type.returns[0]
} else {
Type::EmptyBlockType
};
if DISASSEMBLE {
let mut microwasm = vec![];
let mut framesize = arg_count;
for local in locals {
let (count, _ty) = local?;
framesize += count;
let microwasm_conv = MicrowasmConv::new(
session.module_context,
ty.params().iter().map(SigType::to_microwasm_type),
ty.returns().iter().map(SigType::to_microwasm_type),
body,
);
for ops in microwasm_conv {
microwasm.extend(ops?);
}
println!("{}", crate::microwasm::dis(func_idx, &microwasm));
}
let mut ctx = session.new_context(func_idx);
let operators = body.get_operators_reader()?;
let microwasm_conv = MicrowasmConv::new(
session.module_context,
ty.params().iter().map(SigType::to_microwasm_type),
ty.returns().iter().map(SigType::to_microwasm_type),
body,
);
prologue(&mut ctx, framesize);
translate(
session,
func_idx,
microwasm_conv.flat_map(|i| i.expect("TODO: Make this not panic")),
)
}
for arg_pos in 0..arg_count {
copy_incoming_arg(&mut ctx, arg_pos);
}
pub fn translate<M: ModuleContext, I, L>(
session: &mut CodeGenSession<M>,
func_idx: u32,
body: I,
) -> Result<(), Error>
where
I: IntoIterator<Item = Operator<L>>,
L: Hash + Clone + Eq,
Operator<L>: std::fmt::Display,
{
let func_type = session.module_context.defined_func_type(func_idx);
let mut body = body.into_iter().peekable();
let mut control_frames = Vec::new();
let ctx = &mut session.new_context(func_idx);
// Upon entering the function implicit frame for function body is pushed. It has the same
// result type as the function itself. Branching to it is equivalent to returning from the function.
let epilogue_label = create_label(&mut ctx);
control_frames.push(ControlFrame::new(
ControlFrameKind::Block {
end_label: epilogue_label,
let params = func_type
.params()
.iter()
.map(|t| t.to_microwasm_type())
.collect::<Vec<_>>();
ctx.start_function(params.iter().cloned());
let mut blocks = HashMap::<BrTarget<L>, Block>::new();
let num_returns = func_type.returns().len();
blocks.insert(
BrTarget::Return,
Block {
label: BrTarget::Return,
params: num_returns as u32,
// TODO: This only works for integers
//
calling_convention: Some(Left(CallingConvention::function_start(ret_locs(
func_type.returns().iter().map(|t| t.to_microwasm_type()),
)))),
is_next: false,
has_backwards_callers: false,
actual_num_callers: 0,
num_callers: None,
},
current_stack_depth(&ctx),
return_ty,
));
);
for op in operators {
match op? {
loop {
let op = if let Some(op) = body.next() {
op
} else {
break;
};
if let Some(Operator::Label(label)) = body.peek() {
let block = blocks
.get_mut(&BrTarget::Label(label.clone()))
.expect("Block definition should be before label definition");
block.is_next = true;
}
match op {
Operator::Unreachable => {
control_frames
.last_mut()
.expect("control stack is never empty")
.mark_stack_polymorphic();
trap(&mut ctx);
ctx.trap();
}
Operator::If { ty } => {
let end_label = create_label(&mut ctx);
let if_not = create_label(&mut ctx);
Operator::Label(label) => {
use std::collections::hash_map::Entry;
pop_and_breq(&mut ctx, if_not);
if let Entry::Occupied(mut entry) = blocks.entry(BrTarget::Label(label)) {
let has_backwards_callers = {
let block = entry.get_mut();
control_frames.push(ControlFrame::new(
ControlFrameKind::IfTrue { end_label, if_not },
current_stack_depth(&ctx),
ty,
));
// TODO: Is it possible with arbitrary CFGs that a block will have _only_ backwards callers?
// Certainly for Microwasm generated from Wasm that is currently impossible.
if block.actual_num_callers == 0 {
loop {
let done = match body.peek() {
Some(Operator::Label(_)) | None => true,
Some(_) => false,
};
if done {
break;
}
body.next();
}
continue;
}
block.is_next = false;
// TODO: We can `take` this if it's a `Right`
match block.calling_convention.as_ref() {
Some(Left(cc)) => {
ctx.apply_cc(cc);
}
Some(Right(virt)) => {
ctx.set_state(virt.clone());
}
_ => {}
}
ctx.define_label(block.label.label().unwrap().clone());
block.has_backwards_callers
};
// To reduce memory overhead
if !has_backwards_callers {
entry.remove_entry();
}
} else {
panic!("Label defined before being declared");
}
}
Operator::Else => {
match control_frames.pop() {
Some(ControlFrame {
kind: ControlFrameKind::IfTrue { if_not, end_label },
ty,
stack_depth,
Operator::Block {
label,
has_backwards_callers,
params,
num_callers,
} => {
let asm_label = ctx.create_label();
blocks.insert(
BrTarget::Label(label),
Block {
label: BrTarget::Label(asm_label),
params: params.len() as _,
calling_convention: None,
is_next: false,
has_backwards_callers,
actual_num_callers: 0,
num_callers,
},
);
}
Operator::Br { target } => {
// TODO: We should add the block to the hashmap if we don't have it already
let block = blocks.get_mut(&target).unwrap();
block.actual_num_callers += 1;
let should_serialize_args = block.should_serialize_args();
match block {
Block {
is_next,
label: BrTarget::Label(l),
calling_convention,
..
}) => {
// Finalize if..else block by jumping to the `end_label`.
br(&mut ctx, end_label);
} => {
let cc = if should_serialize_args {
*calling_convention = Some(Left(ctx.serialize_args(block.params)));
None
} else {
calling_convention
.as_ref()
.map(Either::as_ref)
.and_then(Either::left)
};
// Define `if_not` label here, so if the corresponding `if` block receives
// 0 it will branch here.
// After that reset stack depth to the value before entering `if` block.
define_label(&mut ctx, if_not);
restore_stack_depth(&mut ctx, stack_depth);
if let Some(cc) = cc {
ctx.pass_block_args(cc);
}
// Carry over the `end_label`, so it will be resolved when the corresponding `end`
// is encountered.
//
// Also note that we reset `stack_depth` to the value before entering `if` block.
let mut frame = ControlFrame::new(
ControlFrameKind::IfFalse { end_label },
stack_depth,
ty,
);
control_frames.push(frame);
if !*is_next {
ctx.br(*l);
}
}
Block {
label: BrTarget::Return,
calling_convention: Some(Left(cc)),
..
} => {
ctx.pass_block_args(cc);
ctx.ret();
}
_ => unimplemented!(),
}
}
Operator::BrIf { then, else_ } => {
let (then_block, else_block) = blocks.pair_mut(&then, &else_);
// TODO: If actual_num_callers == num_callers then we can remove this block from the hashmap.
// This frees memory and acts as a kind of verification that `num_callers` is set
// correctly. It doesn't help for loops and block ends generated from Wasm.
then_block.actual_num_callers += 1;
else_block.actual_num_callers += 1;
let then_block_parts = (then_block.is_next, then_block.label);
let else_block_parts = (else_block.is_next, else_block.label);
// TODO: Use "compatible" cc
assert_eq!(then_block.params, else_block.params);
// TODO: The blocks should have compatible (one must be subset of other?) calling
// conventions or else at least one must have no calling convention. This
// should always be true for converting from WebAssembly AIUI.
let f = |ctx: &mut Context<_>| {
let then_block_should_serialize_args = then_block.should_serialize_args();
let else_block_should_serialize_args = else_block.should_serialize_args();
match (
&mut then_block.calling_convention,
&mut else_block.calling_convention,
) {
(Some(Left(ref cc)), ref mut other @ None)
| (ref mut other @ None, Some(Left(ref cc))) => {
**other = Some(Left(cc.clone()));
ctx.pass_block_args(cc);
}
(ref mut then_cc @ None, ref mut else_cc @ None) => {
let cc = if then_block_should_serialize_args {
Some(Left(ctx.serialize_args(then_block.params)))
} else if else_block_should_serialize_args {
Some(Left(ctx.serialize_args(else_block.params)))
} else {
Some(Right(ctx.virtual_calling_convention()))
};
**then_cc = cc.clone();
**else_cc = cc;
}
_ => unimplemented!(
"Can't pass different params to different sides of `br_if` yet"
),
}
Some(_) => panic!("else expects if block"),
None => panic!("control stack is never empty"),
};
}
Operator::End => {
let control_frame = control_frames.pop().expect("control stack is never empty");
if !control_frame.kind.is_loop() {
// Branches to a control frame with block type directs control flow to the header of the loop
// and we don't need to resolve it here. Branching to other control frames always lead
// control flow to the corresponding `end`.
define_label(&mut ctx, control_frame.kind.br_destination());
}
if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind {
// this is `if .. end` construction. Define the `if_not` label here.
define_label(&mut ctx, if_not);
}
restore_stack_depth(&mut ctx, control_frame.outgoing_stack_depth());
if control_frames.len() == 0 {
// This is the last control frame. Perform the implicit return here.
if return_ty != Type::EmptyBlockType {
prepare_return_value(&mut ctx);
match (then_block_parts, else_block_parts) {
((true, _), (false, BrTarget::Label(else_))) => {
ctx.br_if_false(else_, f);
}
((false, BrTarget::Label(then)), (true, _)) => {
ctx.br_if_true(then, f);
}
((false, BrTarget::Label(then)), (false, BrTarget::Label(else_))) => {
ctx.br_if_true(then, f);
ctx.br(else_);
}
other => unimplemented!("{:#?}", other),
}
}
Operator::I32Eq => {
relop_eq_i32(&mut ctx);
Operator::BrTable(BrTable { targets, default }) => {
use itertools::Itertools;
let (def, params) = {
let def = blocks.get(&default).unwrap();
(
if def.is_next {
None
} else {
Some(def.label)
},
def.params.clone()
)
};
let target_labels = targets.iter()
.map(|target| blocks.get(target).unwrap().label)
.collect::<Vec<_>>();
ctx.br_table(target_labels, def, |ctx| {
let mut cc = None;
let mut max_num_callers = Some(0);
for target in targets.iter().chain(std::iter::once(&default)).unique() {
let block = blocks.get_mut(target).unwrap();
block.actual_num_callers += 1;
if block.calling_convention.is_some() {
assert!(cc.is_none(), "Can't pass different params to different elements of `br_table` yet");
cc = block.calling_convention.clone();
}
if let Some(max) = max_num_callers {
max_num_callers = block.num_callers.map(|n| max.max(n));
}
}
if let Some(Left(cc)) = &cc {
ctx.pass_block_args(cc);
}
let cc = cc.unwrap_or_else(||
if max_num_callers == Some(1) {
Right(ctx.virtual_calling_convention())
} else {
Left(ctx.serialize_args(params))
}
);
for target in targets.iter().chain(std::iter::once(&default)).unique() {
let block = blocks.get_mut(target).unwrap();
block.calling_convention = Some(cc.clone());
}
});
}
Operator::I32Add => {
add_i32(&mut ctx);
}
Operator::GetLocal { local_index } => {
get_local_i32(&mut ctx, local_index);
}
Operator::I32Const { value } => {
literal_i32(&mut ctx, value);
Operator::Swap { depth } => ctx.swap(depth),
Operator::Pick { depth } => ctx.pick(depth),
Operator::Eq(I32) => ctx.i32_eq(),
Operator::Eqz(Size::_32) => ctx.i32_eqz(),
Operator::Ne(I32) => ctx.i32_neq(),
Operator::Lt(SI32) => ctx.i32_lt_s(),
Operator::Le(SI32) => ctx.i32_le_s(),
Operator::Gt(SI32) => ctx.i32_gt_s(),
Operator::Ge(SI32) => ctx.i32_ge_s(),
Operator::Lt(SU32) => ctx.i32_lt_u(),
Operator::Le(SU32) => ctx.i32_le_u(),
Operator::Gt(SU32) => ctx.i32_gt_u(),
Operator::Ge(SU32) => ctx.i32_ge_u(),
Operator::Add(I32) => ctx.i32_add(),
Operator::Sub(I32) => ctx.i32_sub(),
Operator::And(Size::_32) => ctx.i32_and(),
Operator::Or(Size::_32) => ctx.i32_or(),
Operator::Xor(Size::_32) => ctx.i32_xor(),
Operator::Mul(I32) => ctx.i32_mul(),
Operator::Div(SU32) => ctx.i32_div_u(),
Operator::Div(SI32) => ctx.i32_div_s(),
Operator::Rem(sint::I32) => ctx.i32_rem_u(),
Operator::Rem(sint::U32) => ctx.i32_rem_s(),
Operator::Shl(Size::_32) => ctx.i32_shl(),
Operator::Shr(sint::I32) => ctx.i32_shr_s(),
Operator::Shr(sint::U32) => ctx.i32_shr_u(),
Operator::Rotl(Size::_32) => ctx.i32_rotl(),
Operator::Rotr(Size::_32) => ctx.i32_rotr(),
Operator::Clz(Size::_32) => ctx.i32_clz(),
Operator::Ctz(Size::_32) => ctx.i32_ctz(),
Operator::Popcnt(Size::_32) => ctx.i32_popcnt(),
Operator::Eq(I64) => ctx.i64_eq(),
Operator::Eqz(Size::_64) => ctx.i64_eqz(),
Operator::Ne(I64) => ctx.i64_neq(),
Operator::Lt(SI64) => ctx.i64_lt_s(),
Operator::Le(SI64) => ctx.i64_le_s(),
Operator::Gt(SI64) => ctx.i64_gt_s(),
Operator::Ge(SI64) => ctx.i64_ge_s(),
Operator::Lt(SU64) => ctx.i64_lt_u(),
Operator::Le(SU64) => ctx.i64_le_u(),
Operator::Gt(SU64) => ctx.i64_gt_u(),
Operator::Ge(SU64) => ctx.i64_ge_u(),
Operator::Add(I64) => ctx.i64_add(),
Operator::Sub(I64) => ctx.i64_sub(),
Operator::And(Size::_64) => ctx.i64_and(),
Operator::Or(Size::_64) => ctx.i64_or(),
Operator::Xor(Size::_64) => ctx.i64_xor(),
Operator::Mul(I64) => ctx.i64_mul(),
Operator::Shl(Size::_64) => ctx.i64_shl(),
Operator::Shr(sint::I64) => ctx.i64_shr_s(),
Operator::Shr(sint::U64) => ctx.i64_shr_u(),
Operator::Rotl(Size::_64) => ctx.i64_rotl(),
Operator::Rotr(Size::_64) => ctx.i64_rotr(),
Operator::Clz(Size::_64) => ctx.i64_clz(),
Operator::Ctz(Size::_64) => ctx.i64_ctz(),
Operator::Popcnt(Size::_64) => ctx.i64_popcnt(),
Operator::Add(F32) => ctx.f32_add(),
Operator::Mul(F32) => ctx.f32_mul(),
Operator::Sub(F32) => ctx.f32_sub(),
Operator::Neg(Size::_32) => ctx.f32_neg(),
Operator::Gt(SF32) => ctx.f32_gt(),
Operator::Ge(SF32) => ctx.f32_ge(),
Operator::Lt(SF32) => ctx.f32_lt(),
Operator::Le(SF32) => ctx.f32_le(),
Operator::Add(F64) => ctx.f64_add(),
Operator::Mul(F64) => ctx.f64_mul(),
Operator::Sub(F64) => ctx.f64_sub(),
Operator::Neg(Size::_64) => ctx.f64_neg(),
Operator::Gt(SF64) => ctx.f64_gt(),
Operator::Ge(SF64) => ctx.f64_ge(),
Operator::Lt(SF64) => ctx.f64_lt(),
Operator::Le(SF64) => ctx.f64_le(),
Operator::Drop(range) => ctx.drop(range),
Operator::Const(val) => ctx.const_(val),
Operator::Load { ty: I32, memarg } => ctx.i32_load(memarg.offset)?,
Operator::Load { ty: I64, memarg } => ctx.i64_load(memarg.offset)?,
Operator::Store { ty: I32, memarg } => ctx.i32_store(memarg.offset)?,
Operator::Store { ty: I64, memarg } => ctx.i64_store(memarg.offset)?,
Operator::Select => {
ctx.select();
}
Operator::Call { function_index } => {
let callee_ty = translation_ctx.func_type(function_index);
let function_index = session
.module_context
.defined_func_index(function_index)
.expect("We don't support host calls yet");
let callee_ty = session.module_context.func_type(function_index);
// TODO: this implementation assumes that this function is locally defined.
// TODO: guarantee 16-byte alignment for calls as required by x86-64 ABI
pass_outgoing_args(&mut ctx, callee_ty.params.len() as u32);
call_direct(&mut ctx, function_index, callee_ty.returns.len() as u32);
ctx.call_direct(
function_index,
callee_ty.params().iter().map(|t| t.to_microwasm_type()),
callee_ty.returns().len() as u32,
);
}
_ => {
trap(&mut ctx);
Operator::CallIndirect {
type_index,
table_index,
} => {
assert_eq!(table_index, 0);
let callee_ty = session.module_context.signature(type_index);
// TODO: this implementation assumes that this function is locally defined.
ctx.call_indirect(
quickhash(callee_ty) as u32,
callee_ty.params().iter().map(|t| t.to_microwasm_type()),
callee_ty.returns().len() as u32,
);
}
op => {
unimplemented!("{}", op);
}
}
}
epilogue(&mut ctx);
ctx.epilogue();
Ok(())
}

View File

@@ -1,24 +1,49 @@
#![feature(plugin)]
#![feature(
plugin,
test,
const_slice_len,
never_type,
alloc_layout_extra,
try_from,
try_trait,
)]
#![plugin(dynasm)]
extern crate test;
#[macro_use]
extern crate smallvec;
extern crate capstone;
extern crate either;
extern crate failure;
extern crate wasmparser;
pub extern crate wasmparser;
#[macro_use]
extern crate failure_derive;
#[macro_use]
extern crate memoffset;
extern crate dynasmrt;
extern crate itertools;
#[cfg(test)]
#[macro_use]
extern crate lazy_static;
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
extern crate wabt;
// Just so we can implement `Signature` for `cranelift_codegen::ir::Signature`
extern crate cranelift_codegen;
extern crate multi_mut;
mod backend;
mod disassemble;
mod error;
mod function_body;
mod microwasm;
mod module;
mod translate_sections;
#[cfg(test)]
mod tests;
pub use module::translate;
pub use module::TranslatedModule;
pub use backend::CodeGenSession;
pub use function_body::translate_wasm as translate_function;
pub use module::{translate, ExecutableModule, ModuleContext, Signature, TranslatedModule};

1893
src/microwasm.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,34 +1,98 @@
use crate::microwasm;
use backend::TranslatedCodeSection;
use cranelift_codegen::{
ir::{self, AbiParam, Signature as CraneliftSignature},
isa,
};
use error::Error;
use std::mem;
use std::{
convert::TryInto,
hash::{Hash, Hasher},
mem,
};
use translate_sections;
use wasmparser::{FuncType, ModuleReader, SectionCode};
use wasmparser::{FuncType, MemoryType, ModuleReader, SectionCode, TableType, Type};
pub trait FunctionArgs {
unsafe fn call<T>(self, start: *const u8) -> T;
pub trait AsValueType {
const TYPE: Type;
}
pub trait TypeList {
const TYPE_LIST: &'static [Type];
}
impl<T> TypeList for T
where
T: AsValueType,
{
const TYPE_LIST: &'static [Type] = &[T::TYPE];
}
impl AsValueType for i32 {
const TYPE: Type = Type::I32;
}
impl AsValueType for i64 {
const TYPE: Type = Type::I64;
}
impl AsValueType for u32 {
const TYPE: Type = Type::I32;
}
impl AsValueType for u64 {
const TYPE: Type = Type::I64;
}
impl AsValueType for f32 {
const TYPE: Type = Type::F32;
}
impl AsValueType for f64 {
const TYPE: Type = Type::F64;
}
pub trait FunctionArgs<O> {
type FuncType;
unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> O;
fn into_func(start: *const u8) -> Self::FuncType;
}
type VmCtxPtr = u64;
macro_rules! impl_function_args {
($first:ident $(, $rest:ident)*) => {
impl<$first, $($rest),*> FunctionArgs for ($first, $($rest),*) {
impl<Output, $first, $($rest),*> FunctionArgs<Output> for ($first, $($rest),*) {
type FuncType = unsafe extern "sysv64" fn(VmCtxPtr, $first $(, $rest)*) -> Output;
#[allow(non_snake_case)]
unsafe fn call<T>(self, start: *const u8) -> T {
let func = mem::transmute::<_, extern "sysv64" fn($first, $($rest),*) -> T>(start);
{
let ($first, $($rest),*) = self;
func($first, $($rest),*)
}
unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output {
let ($first, $($rest),*) = self;
func(vm_ctx as VmCtxPtr, $first $(, $rest)*)
}
fn into_func(start: *const u8) -> Self::FuncType {
unsafe { mem::transmute(start) }
}
}
impl<$first: AsValueType, $($rest: AsValueType),*> TypeList for ($first, $($rest),*) {
const TYPE_LIST: &'static [Type] = &[$first::TYPE, $($rest::TYPE),*];
}
impl_function_args!($($rest),*);
};
() => {
impl FunctionArgs for () {
unsafe fn call<T>(self, start: *const u8) -> T {
let func = mem::transmute::<_, extern "sysv64" fn() -> T>(start);
func()
impl<Output> FunctionArgs<Output> for () {
type FuncType = unsafe extern "sysv64" fn(VmCtxPtr) -> Output;
unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output {
func(vm_ctx as VmCtxPtr)
}
fn into_func(start: *const u8) -> Self::FuncType {
unsafe { mem::transmute(start) }
}
}
impl TypeList for () {
const TYPE_LIST: &'static [Type] = &[];
}
};
}
@@ -38,42 +102,374 @@ impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S);
#[derive(Default)]
pub struct TranslatedModule {
translated_code_section: Option<TranslatedCodeSection>,
types: SimpleContext,
// TODO: Should we wrap this in a `Mutex` so that calling functions from multiple
// threads doesn't cause data races?
table: Option<(TableType, Vec<u32>)>,
memory: Option<MemoryType>,
}
pub fn quickhash<H: Hash>(h: H) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
h.hash(&mut hasher);
hasher.finish()
}
impl TranslatedModule {
// For testing only.
// TODO: Handle generic signatures.
pub unsafe fn execute_func<Args: FunctionArgs, T>(&self, func_idx: u32, args: Args) -> T {
pub fn instantiate(mut self) -> ExecutableModule {
let table = {
let code_section = self
.translated_code_section
.as_ref()
.expect("We don't currently support a table section without a code section");
let types = &self.types;
self.table
.as_mut()
.map(|&mut (_, ref mut idxs)| {
let initial = idxs
.iter()
.map(|i| {
let start = code_section.func_start(*i as _);
let ty = types.func_type(*i);
RuntimeFunc {
func_start: start,
sig_hash: quickhash(ty) as u32,
}
})
.collect::<Vec<_>>();
let out = BoxSlice::from(initial.into_boxed_slice());
out
})
.unwrap_or(BoxSlice {
ptr: std::ptr::NonNull::dangling().as_ptr(),
len: 0,
})
};
let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize;
let mem: BoxSlice<_> = vec![0u8; mem_size * WASM_PAGE_SIZE]
.into_boxed_slice()
.into();
let ctx = if mem.len > 0 || table.len > 0 {
Some(Box::new(VmCtx { table, mem }))
} else {
None
};
ExecutableModule {
module: self,
context: ctx,
}
}
pub fn disassemble(&self) {
self.translated_code_section
.as_ref()
.expect("no code section")
.disassemble();
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ExecutionError {
FuncIndexOutOfBounds,
TypeMismatch,
}
pub struct ExecutableModule {
module: TranslatedModule,
context: Option<Box<VmCtx>>,
}
impl ExecutableModule {
/// Executes the function _without checking types_. This can cause undefined
/// memory to be accessed.
pub unsafe fn execute_func_unchecked<Args: FunctionArgs<T>, T>(
&self,
func_idx: u32,
args: Args,
) -> T {
let code_section = self
.module
.translated_code_section
.as_ref()
.expect("no code section");
let start_buf = code_section.func_start(func_idx as usize);
args.call(start_buf)
args.call(
Args::into_func(start_buf),
self.context
.as_ref()
.map(|ctx| (&**ctx) as *const VmCtx as *const u8)
.unwrap_or(std::ptr::null()),
)
}
pub fn execute_func<Args: FunctionArgs<T> + TypeList, T: TypeList>(
&self,
func_idx: u32,
args: Args,
) -> Result<T, ExecutionError> {
let module = &self.module;
if func_idx as usize >= module.types.func_ty_indicies.len() {
return Err(ExecutionError::FuncIndexOutOfBounds);
}
let type_ = module.types.func_type(func_idx);
// TODO: Handle "compatible" types (i.e. f32 and i32)
if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) {
return Err(ExecutionError::TypeMismatch);
}
Ok(unsafe { self.execute_func_unchecked(func_idx, args) })
}
pub fn disassemble(&self) {
self.module.disassemble();
}
}
#[derive(Default)]
pub struct TranslationContext {
type FuncRef = *const u8;
pub struct RuntimeFunc {
sig_hash: u32,
func_start: FuncRef,
}
unsafe impl Send for RuntimeFunc {}
unsafe impl Sync for RuntimeFunc {}
impl RuntimeFunc {
pub fn offset_of_sig_hash() -> usize {
offset_of!(Self, sig_hash)
}
pub fn offset_of_func_start() -> usize {
offset_of!(Self, func_start)
}
}
struct BoxSlice<T> {
len: usize,
ptr: *mut T,
}
impl<T> From<Box<[T]>> for BoxSlice<T> {
fn from(mut other: Box<[T]>) -> Self {
let out = BoxSlice {
len: other.len(),
ptr: other.as_mut_ptr(),
};
mem::forget(other);
out
}
}
unsafe impl<T: Send> Send for BoxSlice<T> {}
unsafe impl<T: Sync> Sync for BoxSlice<T> {}
impl<T> Drop for BoxSlice<T> {
fn drop(&mut self) {
unsafe { Vec::from_raw_parts(self.ptr, self.len, self.len) };
}
}
pub struct VmCtx {
table: BoxSlice<RuntimeFunc>,
mem: BoxSlice<u8>,
}
impl VmCtx {
pub fn offset_of_memory_ptr() -> u8 {
offset_of!(Self, mem.ptr)
.try_into()
.expect("Offset exceeded size of u8")
}
pub fn offset_of_memory_len() -> u8 {
offset_of!(Self, mem.len)
.try_into()
.expect("Offset exceeded size of u8")
}
pub fn offset_of_funcs_ptr() -> u8 {
offset_of!(Self, table.ptr)
.try_into()
.expect("Offset exceeded size of u8")
}
pub fn offset_of_funcs_len() -> u8 {
offset_of!(Self, table.len)
.try_into()
.expect("Offset exceeded size of u8")
}
}
#[derive(Default, Debug)]
pub struct SimpleContext {
types: Vec<FuncType>,
func_ty_indicies: Vec<u32>,
}
impl TranslationContext {
pub fn func_type(&self, func_idx: u32) -> &FuncType {
// TODO: This assumes that there is no imported functions.
let func_ty_idx = self.func_ty_indicies[func_idx as usize];
&self.types[func_ty_idx as usize]
const WASM_PAGE_SIZE: usize = 65_536;
pub trait Signature {
type Type: SigType;
fn params(&self) -> &[Self::Type];
fn returns(&self) -> &[Self::Type];
}
pub trait SigType {
fn to_microwasm_type(&self) -> microwasm::SignlessType;
fn is_float(&self) -> bool;
}
impl SigType for AbiParam {
fn to_microwasm_type(&self) -> microwasm::SignlessType {
use microwasm::{Size::*, Type::*};
if self.value_type.is_int() {
match self.value_type.bits() {
32 => Int(_32),
64 => Int(_64),
_ => unimplemented!(),
}
} else if self.value_type.is_float() {
match self.value_type.bits() {
32 => Float(_32),
64 => Float(_64),
_ => unimplemented!(),
}
} else {
unimplemented!()
}
}
fn is_float(&self) -> bool {
self.value_type.is_float()
}
}
impl Signature for CraneliftSignature {
type Type = AbiParam;
fn params(&self) -> &[Self::Type] {
// TODO: We want to instead add the `VMContext` to the signature used by
// cranelift, removing the special-casing from the internals.
assert_eq!(self.params[0].purpose, ir::ArgumentPurpose::VMContext);
assert_eq!(self.call_conv, isa::CallConv::SystemV);
&self.params[1..]
}
fn returns(&self) -> &[Self::Type] {
assert_eq!(self.call_conv, isa::CallConv::SystemV);
&self.returns
}
}
impl SigType for wasmparser::Type {
fn to_microwasm_type(&self) -> microwasm::SignlessType {
microwasm::Type::from_wasm(*self).unwrap()
}
fn is_float(&self) -> bool {
match self {
wasmparser::Type::F32 | wasmparser::Type::F64 => true,
_ => false,
}
}
}
impl Signature for FuncType {
type Type = wasmparser::Type;
fn params(&self) -> &[Self::Type] {
&*self.params
}
fn returns(&self) -> &[Self::Type] {
&*self.returns
}
}
pub trait ModuleContext {
type Signature: Signature + Hash;
fn func_type_index(&self, func_idx: u32) -> u32;
fn signature(&self, index: u32) -> &Self::Signature;
fn offset_of_memory_ptr(&self) -> u8;
fn offset_of_memory_len(&self) -> u8;
fn offset_of_funcs_ptr(&self) -> u8;
fn offset_of_funcs_len(&self) -> u8;
fn func_index(&self, defined_func_index: u32) -> u32;
fn defined_func_index(&self, func_index: u32) -> Option<u32>;
fn defined_func_type(&self, func_idx: u32) -> &Self::Signature {
// TODO: This assumes that there are no imported functions.
self.func_type(self.func_index(func_idx))
}
fn func_type(&self, func_idx: u32) -> &Self::Signature {
// TODO: This assumes that there are no imported functions.
self.signature(self.func_type_index(func_idx))
}
}
impl ModuleContext for SimpleContext {
type Signature = FuncType;
// TODO: We don't support external functions yet
fn func_index(&self, func_idx: u32) -> u32 {
func_idx
}
fn defined_func_index(&self, func_idx: u32) -> Option<u32> {
Some(func_idx)
}
fn func_type_index(&self, func_idx: u32) -> u32 {
self.func_ty_indicies[func_idx as usize]
}
fn signature(&self, index: u32) -> &Self::Signature {
&self.types[index as usize]
}
fn offset_of_memory_ptr(&self) -> u8 {
VmCtx::offset_of_memory_ptr()
}
fn offset_of_memory_len(&self) -> u8 {
VmCtx::offset_of_memory_len()
}
fn offset_of_funcs_ptr(&self) -> u8 {
VmCtx::offset_of_funcs_ptr()
}
fn offset_of_funcs_len(&self) -> u8 {
VmCtx::offset_of_funcs_len()
}
// TODO: type of a global
}
pub fn translate(data: &[u8]) -> Result<ExecutableModule, Error> {
translate_only(data).map(|m| m.instantiate())
}
/// Translate from a slice of bytes holding a wasm module.
pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
let mut reader = ModuleReader::new(data)?;
let mut output = TranslatedModule::default();
let mut table = None;
reader.skip_custom_sections()?;
if reader.eof() {
@@ -81,11 +477,9 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
}
let mut section = reader.read()?;
let mut ctx = TranslationContext::default();
if let SectionCode::Type = section.code {
let types_reader = section.get_type_section_reader()?;
ctx.types = translate_sections::type_(types_reader)?;
output.types.types = translate_sections::type_(types_reader)?;
reader.skip_custom_sections()?;
if reader.eof() {
@@ -107,7 +501,7 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Function = section.code {
let functions = section.get_function_section_reader()?;
ctx.func_ty_indicies = translate_sections::function(functions)?;
output.types.func_ty_indicies = translate_sections::function(functions)?;
reader.skip_custom_sections()?;
if reader.eof() {
@@ -118,7 +512,11 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Table = section.code {
let tables = section.get_table_section_reader()?;
translate_sections::table(tables)?;
let mut tables = translate_sections::table(tables)?;
assert!(tables.len() <= 1);
table = tables.drain(..).next();
reader.skip_custom_sections()?;
if reader.eof() {
@@ -129,7 +527,18 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Memory = section.code {
let memories = section.get_memory_section_reader()?;
translate_sections::memory(memories)?;
let mem = translate_sections::memory(memories)?;
assert!(
mem.len() <= 1,
"Multiple memory sections not yet unimplemented"
);
if !mem.is_empty() {
let mem = mem[0];
assert_eq!(Some(mem.limits.initial), mem.limits.maximum);
output.memory = Some(mem);
}
reader.skip_custom_sections()?;
if reader.eof() {
@@ -173,7 +582,12 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Element = section.code {
let elements = section.get_element_section_reader()?;
translate_sections::element(elements)?;
let elements = translate_sections::element(elements)?;
output.table = Some((
table.expect("Element section with no table section"),
elements,
));
reader.skip_custom_sections()?;
if reader.eof() {
@@ -184,7 +598,7 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
if let SectionCode::Code = section.code {
let code = section.get_code_section_reader()?;
output.translated_code_section = Some(translate_sections::code(code, &ctx)?);
output.translated_code_section = Some(translate_sections::code(code, &output.types)?);
reader.skip_custom_sections()?;
if reader.eof() {
@@ -198,5 +612,7 @@ pub fn translate(data: &[u8]) -> Result<TranslatedModule, Error> {
translate_sections::data(data)?;
}
assert!(reader.eof());
Ok(output)
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,22 +1,23 @@
use backend::{CodeGenSession, TranslatedCodeSection};
use error::Error;
use function_body;
use module::TranslationContext;
use microwasm::{MicrowasmConv, Type as MWType};
use module::{ModuleContext, SimpleContext};
#[allow(unused_imports)] // for now
use wasmparser::{
CodeSectionReader, Data, DataSectionReader, Element, ElementSectionReader, Export,
ExportSectionReader, ExternalKind, FuncType, FunctionSectionReader, Global,
GlobalSectionReader, GlobalType, Import, ImportSectionEntryType, ImportSectionReader,
MemorySectionReader, MemoryType, Operator, TableSectionReader, Type, TypeSectionReader,
MemorySectionReader, MemoryType, Operator, TableSectionReader, TableType, Type,
TypeSectionReader,
};
/// Parses the Type section of the wasm module.
pub fn type_(types_reader: TypeSectionReader) -> Result<Vec<FuncType>, Error> {
let mut types = vec![];
for entry in types_reader {
types.push(entry?);
}
Ok(types)
types_reader
.into_iter()
.map(|r| r.map_err(Into::into))
.collect()
}
/// Parses the Import section of the wasm module.
@@ -29,27 +30,23 @@ pub fn import(imports: ImportSectionReader) -> Result<(), Error> {
/// Parses the Function section of the wasm module.
pub fn function(functions: FunctionSectionReader) -> Result<Vec<u32>, Error> {
let mut func_ty_indicies = vec![];
for entry in functions {
func_ty_indicies.push(entry?);
}
Ok(func_ty_indicies)
functions
.into_iter()
.map(|r| r.map_err(Into::into))
.collect()
}
/// Parses the Table section of the wasm module.
pub fn table(tables: TableSectionReader) -> Result<(), Error> {
for entry in tables {
entry?; // TODO
}
Ok(())
pub fn table(tables: TableSectionReader) -> Result<Vec<TableType>, Error> {
tables.into_iter().map(|r| r.map_err(Into::into)).collect()
}
/// Parses the Memory section of the wasm module.
pub fn memory(memories: MemorySectionReader) -> Result<(), Error> {
for entry in memories {
entry?; // TODO
}
Ok(())
pub fn memory(memories: MemorySectionReader) -> Result<Vec<MemoryType>, Error> {
memories
.into_iter()
.map(|r| r.map_err(Into::into))
.collect()
}
/// Parses the Global section of the wasm module.
@@ -75,23 +72,57 @@ pub fn start(_index: u32) -> Result<(), Error> {
}
/// Parses the Element section of the wasm module.
pub fn element(elements: ElementSectionReader) -> Result<(), Error> {
pub fn element(elements: ElementSectionReader) -> Result<Vec<u32>, Error> {
let mut out = Vec::new();
for entry in elements {
entry?; // TODO
let entry = entry?;
assert_eq!(entry.table_index, 0);
let offset = {
let mut reader = entry.init_expr.get_operators_reader();
let out = match reader.read() {
Ok(Operator::I32Const { value }) => value,
_ => panic!("We only support i32.const table init expressions right now"),
};
//reader.ensure_end()?;
out
};
assert_eq!(offset, out.len() as i32);
let elements = entry
.items
.get_items_reader()?
.into_iter()
.collect::<Result<Vec<_>, _>>()?;
out.extend(elements);
}
Ok(())
Ok(out)
}
/// Parses the Code section of the wasm module.
pub fn code(
code: CodeSectionReader,
translation_ctx: &TranslationContext,
translation_ctx: &SimpleContext,
) -> Result<TranslatedCodeSection, Error> {
let func_count = code.get_count();
let mut session = CodeGenSession::new(func_count);
let mut session = CodeGenSession::new(func_count, translation_ctx);
for (idx, body) in code.into_iter().enumerate() {
function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?;
let body = body?;
function_body::translate_wasm(
&mut session,
idx as u32,
&body,
)?;
}
Ok(session.into_translated_code_section()?)
}

1
wasmparser.rs Submodule

Submodule wasmparser.rs added at e8bc42b377