Merge ends, store locals in registers where possible
This commit is contained in:
@@ -13,6 +13,8 @@ arrayvec = "0.4"
|
||||
dynasm = "0.2.3"
|
||||
dynasmrt = "0.2.3"
|
||||
wasmparser = { path = "./wasmparser.rs" }
|
||||
memoffset = "0.2"
|
||||
itertools = "0.8"
|
||||
capstone = "0.5.0"
|
||||
failure = "0.1.3"
|
||||
failure_derive = "0.1.3"
|
||||
|
||||
@@ -384,7 +384,7 @@ pub struct Locals {
|
||||
/// registers that this can contain. If we need to move the argument
|
||||
/// out of a register (for example, because we're calling a function)
|
||||
/// we note that down here, so we don't have to move it back afterwards.
|
||||
register_arguments: ArrayVec<[ArgLoc; ARGS_IN_GPRS.len()]>,
|
||||
register_locals: ArrayVec<[ArgLoc; ARGS_IN_GPRS.len()]>,
|
||||
/// The number of arguments stored on the stack.
|
||||
num_stack_args: u32,
|
||||
/// The number of local stack slots, i.e. the amount of stack space reserved for locals.
|
||||
@@ -393,7 +393,7 @@ pub struct Locals {
|
||||
|
||||
impl Locals {
|
||||
fn register(&self, index: u32) -> Option<GPR> {
|
||||
if index < self.register_arguments.len() as u32 {
|
||||
if index < self.register_locals.len() as u32 {
|
||||
Some(ARGS_IN_GPRS[index as usize])
|
||||
} else {
|
||||
None
|
||||
@@ -401,19 +401,19 @@ impl Locals {
|
||||
}
|
||||
|
||||
fn add_pos(&mut self, index: u32, loc: ValueLocation) {
|
||||
self.register_arguments[index as usize].add_loc(loc);
|
||||
self.register_locals[index as usize].add_loc(loc);
|
||||
}
|
||||
|
||||
fn set_pos(&mut self, index: u32, loc: ValueLocation) {
|
||||
self.register_arguments[index as usize] = ArgLoc::from_loc(loc);
|
||||
self.register_locals[index as usize] = ArgLoc::from_loc(loc);
|
||||
}
|
||||
|
||||
fn get(&self, index: u32) -> ValueLocation {
|
||||
self.register_arguments
|
||||
self.register_locals
|
||||
.get(index as usize)
|
||||
.map(ArgLoc::best_loc)
|
||||
.unwrap_or_else(|| {
|
||||
let stack_index = index - self.register_arguments.len() as u32;
|
||||
let stack_index = index - self.register_locals.len() as u32;
|
||||
if stack_index < self.num_stack_args {
|
||||
ValueLocation::Stack(
|
||||
((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _,
|
||||
@@ -426,7 +426,7 @@ impl Locals {
|
||||
}
|
||||
|
||||
fn num_args(&self) -> u32 {
|
||||
self.register_arguments.len() as u32 + self.num_stack_args
|
||||
self.register_locals.len() as u32 + self.num_stack_args
|
||||
}
|
||||
|
||||
fn vmctx_index(&self) -> u32 {
|
||||
@@ -1245,20 +1245,20 @@ impl Context<'_> {
|
||||
parent_block_state: BlockState,
|
||||
before_push_return: impl FnOnce(&mut Self),
|
||||
) {
|
||||
// TODO: This should currently never be called, but is important for if we want to
|
||||
// have a more complex stack spilling scheme.
|
||||
debug_assert_eq!(
|
||||
self.block_state.depth, parent_block_state.depth,
|
||||
"Imbalanced pushes and pops"
|
||||
);
|
||||
// TODO: This should currently never be called, but is important for if we want to
|
||||
// have a more complex stack spilling scheme.
|
||||
// TODO: This should use an `end_locals`-style system where we only do this when
|
||||
// control flow splits.
|
||||
if self.block_state.depth != parent_block_state.depth {
|
||||
dynasm!(self.asm
|
||||
; add rsp, ((self.block_state.depth.0 - parent_block_state.depth.0) * WORD_SIZE) as i32
|
||||
);
|
||||
}
|
||||
|
||||
self.restore_locals();
|
||||
|
||||
let return_reg = self.block_state.return_register;
|
||||
let locals = mem::replace(&mut self.block_state.locals, Default::default());
|
||||
self.block_state = parent_block_state;
|
||||
@@ -1282,10 +1282,10 @@ impl Context<'_> {
|
||||
for (src, dst) in self
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.register_locals
|
||||
.clone()
|
||||
.iter()
|
||||
.zip(&locals.register_arguments)
|
||||
.zip(&locals.register_locals)
|
||||
{
|
||||
self.copy_value(src.best_loc(), dst.best_loc());
|
||||
}
|
||||
@@ -1293,9 +1293,9 @@ impl Context<'_> {
|
||||
for (src, dst) in self
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.register_locals
|
||||
.iter_mut()
|
||||
.zip(&locals.register_arguments)
|
||||
.zip(&locals.register_locals)
|
||||
{
|
||||
src.union(*dst);
|
||||
}
|
||||
@@ -1715,7 +1715,7 @@ impl Context<'_> {
|
||||
if let Some(cur) = self
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.register_locals
|
||||
.get_mut(local_idx as usize)
|
||||
{
|
||||
*cur = ArgLoc::from_loc(dst_loc);
|
||||
@@ -1736,7 +1736,7 @@ impl Context<'_> {
|
||||
if let Some(cur) = self
|
||||
.block_state
|
||||
.locals
|
||||
.register_arguments
|
||||
.register_locals
|
||||
.get_mut(local_idx as usize)
|
||||
{
|
||||
*cur = ArgLoc::from_loc(dst_loc);
|
||||
@@ -1821,10 +1821,10 @@ impl Context<'_> {
|
||||
fn free_arg_registers(&mut self, exclude: Option<u32>) {
|
||||
// This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant
|
||||
// runtime
|
||||
for i in (0..self.block_state.locals.register_arguments.len())
|
||||
for i in (0..self.block_state.locals.register_locals.len())
|
||||
.filter(|i| exclude != Some(*i as u32))
|
||||
{
|
||||
match self.block_state.locals.register_arguments[i] {
|
||||
match self.block_state.locals.register_locals[i] {
|
||||
ArgLoc::Register(reg) => {
|
||||
if ARGS_IN_GPRS.contains(®) {
|
||||
let offset =
|
||||
@@ -1832,7 +1832,7 @@ impl Context<'_> {
|
||||
* WORD_SIZE) as _;
|
||||
let dst = ValueLocation::Stack(offset);
|
||||
self.copy_value(ValueLocation::Reg(reg), dst);
|
||||
self.block_state.locals.register_arguments[i].add_stack(offset);
|
||||
self.block_state.locals.register_locals[i].add_stack(offset);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
@@ -1989,7 +1989,7 @@ impl Context<'_> {
|
||||
|
||||
// If these values were in register they've now been invalidated, since
|
||||
// the callee can use them as scratch.
|
||||
for loc in self.block_state.locals.register_arguments.iter_mut() {
|
||||
for loc in self.block_state.locals.register_locals.iter_mut() {
|
||||
if let Some(offset) = loc.stack() {
|
||||
*loc = ArgLoc::Stack(offset);
|
||||
}
|
||||
@@ -2097,7 +2097,8 @@ impl Context<'_> {
|
||||
pub fn start_function(&mut self, arguments: u32, locals: u32) -> FunctionEnd {
|
||||
// To support `vmctx`
|
||||
let arguments = arguments + 1;
|
||||
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
|
||||
let (reg_args, locals_in_gprs) = ARGS_IN_GPRS.split_at((arguments as usize).min(ARGS_IN_GPRS.len()));
|
||||
let reg_locals = &locals_in_gprs[..(locals as usize).min(locals_in_gprs.len())];
|
||||
|
||||
// We need space to store the register arguments if we need to call a function
|
||||
// and overwrite these registers so we add `reg_args.len()`
|
||||
@@ -2107,8 +2108,8 @@ impl Context<'_> {
|
||||
let aligned_stack_slots = (stack_slots + 1) & !1;
|
||||
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
|
||||
|
||||
self.block_state.locals.register_arguments =
|
||||
reg_args.iter().cloned().map(ArgLoc::Register).collect();
|
||||
self.block_state.locals.register_locals =
|
||||
reg_args.iter().chain(reg_locals).cloned().map(ArgLoc::Register).collect();
|
||||
self.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
|
||||
self.block_state.locals.num_local_stack_slots = stack_slots;
|
||||
self.block_state.return_register = Some(RAX);
|
||||
|
||||
@@ -40,6 +40,17 @@ impl ControlFrameKind {
|
||||
}
|
||||
}
|
||||
|
||||
fn end_labels(&self) -> impl Iterator<Item = Label> {
|
||||
self.block_end()
|
||||
.into_iter()
|
||||
.chain(if let ControlFrameKind::IfTrue { if_not, .. } = self {
|
||||
// this is `if .. end` construction. Define the `if_not` label.
|
||||
Some(*if_not)
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
fn is_loop(&self) -> bool {
|
||||
match *self {
|
||||
ControlFrameKind::Loop { .. } => true,
|
||||
@@ -174,12 +185,18 @@ pub fn translate(
|
||||
return_ty,
|
||||
));
|
||||
|
||||
let mut operators = itertools::put_back(operators.into_iter());
|
||||
|
||||
// TODO: We want to make this a state machine (maybe requires 1-element lookahead? Not sure) so that we
|
||||
// can coelesce multiple `end`s and optimise break-at-end-of-block into noop.
|
||||
// TODO: Does coelescing multiple `end`s matter since at worst this really only elides a single move at
|
||||
// the end of a function, and this is probably a no-op anyway due to register renaming.
|
||||
for op in operators {
|
||||
let op = op?;
|
||||
loop {
|
||||
let op = if let Some(op) = operators.next() {
|
||||
op?
|
||||
} else {
|
||||
break;
|
||||
};
|
||||
|
||||
match op {
|
||||
Operator::End | Operator::Else => {}
|
||||
@@ -306,28 +323,60 @@ pub fn translate(
|
||||
//
|
||||
// This doesn't require lookahead but it does require turning this loop into
|
||||
// a kind of state machine.
|
||||
let control_frame = control_frames.pop().expect("control stack is never empty");
|
||||
let mut control_frame = control_frames.pop().expect("control stack is never empty");
|
||||
let mut labels = control_frame
|
||||
.kind
|
||||
.end_labels()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut end = control_frame.block_state.end_locals.take();
|
||||
|
||||
// Fold `End`s together to prevent unnecessary shuffling of locals
|
||||
loop {
|
||||
let op = if let Some(op) = operators.next() {
|
||||
op?
|
||||
} else {
|
||||
break;
|
||||
};
|
||||
|
||||
match op {
|
||||
Operator::End => {
|
||||
control_frame =
|
||||
control_frames.pop().expect("control stack is never empty");
|
||||
|
||||
labels.extend(control_frame.kind.end_labels());
|
||||
|
||||
end = control_frame.block_state.end_locals.take().or(end);
|
||||
}
|
||||
other => {
|
||||
operators.put_back(Ok(other));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let arity = control_frame.arity();
|
||||
|
||||
// Don't bother generating this code if we're in unreachable code
|
||||
if !control_frame.unreachable {
|
||||
ctx.return_from_block(arity);
|
||||
|
||||
// If there are no remaining frames we've hit the end of the function - we don't need to
|
||||
// restore locals since no execution will happen after this point.
|
||||
if !control_frames.is_empty() {
|
||||
if let Some(end) = end {
|
||||
ctx.restore_locals_to(&end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let block_end = control_frame.kind.block_end();
|
||||
// TODO: What is the correct order of this and the `define_label`? It's clear for `block`s
|
||||
// but I'm not certain for `if..then..else..end`.
|
||||
ctx.end_block(control_frame.block_state, |ctx| {
|
||||
if let Some(block_end) = block_end {
|
||||
ctx.define_label(block_end);
|
||||
for label in labels {
|
||||
ctx.define_label(label);
|
||||
}
|
||||
});
|
||||
|
||||
if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind {
|
||||
// this is `if .. end` construction. Define the `if_not` label here.
|
||||
ctx.define_label(if_not);
|
||||
}
|
||||
}
|
||||
Operator::I32Eq => ctx.i32_eq(),
|
||||
Operator::I32Eqz => ctx.i32_eqz(),
|
||||
|
||||
@@ -9,7 +9,10 @@ extern crate failure;
|
||||
extern crate wasmparser;
|
||||
#[macro_use]
|
||||
extern crate failure_derive;
|
||||
#[macro_use]
|
||||
extern crate memoffset;
|
||||
extern crate dynasmrt;
|
||||
extern crate itertools;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
@@ -28,4 +31,4 @@ mod translate_sections;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use module::{translate, TranslatedModule, ExecutableModule};
|
||||
pub use module::{translate, ExecutableModule, TranslatedModule};
|
||||
|
||||
@@ -254,6 +254,10 @@ impl VmCtx {
|
||||
pub fn offset_of_memory() -> usize {
|
||||
mem::size_of::<Self>()
|
||||
}
|
||||
|
||||
pub fn offset_of_funcs_ptr() -> usize {
|
||||
offset_of!(Self, table.ptr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for BoxSlice<T> {
|
||||
|
||||
74
src/tests.rs
74
src/tests.rs
@@ -698,6 +698,18 @@ fn wrong_index() {
|
||||
);
|
||||
}
|
||||
|
||||
fn iterative_fib_baseline(n: u32) -> u32 {
|
||||
let (mut a, mut b) = (1, 1);
|
||||
|
||||
for _ in 0..n {
|
||||
let old_a = a;
|
||||
a = b;
|
||||
b += old_a;
|
||||
}
|
||||
|
||||
a
|
||||
}
|
||||
|
||||
const FIBONACCI: &str = r#"
|
||||
(module
|
||||
(func $fib (param $n i32) (result i32)
|
||||
@@ -745,25 +757,59 @@ const FIBONACCI: &str = r#"
|
||||
|
||||
#[test]
|
||||
fn fib() {
|
||||
fn fib(n: u32) -> u32 {
|
||||
let (mut a, mut b) = (1, 1);
|
||||
|
||||
for _ in 0..n {
|
||||
let old_a = a;
|
||||
a = b;
|
||||
b += old_a;
|
||||
}
|
||||
|
||||
a
|
||||
}
|
||||
|
||||
let translated = translate_wat(FIBONACCI);
|
||||
translated.disassemble();
|
||||
|
||||
for x in 0..30 {
|
||||
assert_eq!(
|
||||
translated.execute_func::<_, u32>(0, (x,)),
|
||||
Ok(fib(x)),
|
||||
Ok(iterative_fib_baseline(x)),
|
||||
"Failed for x={}",
|
||||
x
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Generated by Rust for the `fib` function in `bench_fibonacci_baseline`
|
||||
const FIBONACCI_OPT: &str = r"
|
||||
(module
|
||||
(func $fib (param $p0 i32) (result i32)
|
||||
(local $l1 i32)
|
||||
(set_local $l1
|
||||
(i32.const 1))
|
||||
(block $B0
|
||||
(br_if $B0
|
||||
(i32.lt_u
|
||||
(get_local $p0)
|
||||
(i32.const 2)))
|
||||
(set_local $l1
|
||||
(i32.const 1))
|
||||
(loop $L1
|
||||
(set_local $l1
|
||||
(i32.add
|
||||
(call $fib
|
||||
(i32.add
|
||||
(get_local $p0)
|
||||
(i32.const -1)))
|
||||
(get_local $l1)))
|
||||
(br_if $L1
|
||||
(i32.gt_u
|
||||
(tee_local $p0
|
||||
(i32.add
|
||||
(get_local $p0)
|
||||
(i32.const -2)))
|
||||
(i32.const 1)))))
|
||||
(get_local $l1)))";
|
||||
|
||||
#[test]
|
||||
fn fib_opt() {
|
||||
let translated = translate_wat(FIBONACCI_OPT);
|
||||
translated.disassemble();
|
||||
|
||||
for x in 0..30 {
|
||||
assert_eq!(
|
||||
translated.execute_func::<_, u32>(0, (x,)),
|
||||
Ok(iterative_fib_baseline(x)),
|
||||
"Failed for x={}",
|
||||
x
|
||||
);
|
||||
@@ -940,7 +986,7 @@ fn bench_fibonacci_compile(b: &mut test::Bencher) {
|
||||
|
||||
#[bench]
|
||||
fn bench_fibonacci_run(b: &mut test::Bencher) {
|
||||
let wasm = wabt::wat2wasm(FIBONACCI).unwrap();
|
||||
let wasm = wabt::wat2wasm(FIBONACCI_OPT).unwrap();
|
||||
let module = translate(&wasm).unwrap();
|
||||
|
||||
b.iter(|| module.execute_func::<_, u32>(0, (20,)));
|
||||
|
||||
Reference in New Issue
Block a user