From 52c04433684305227445c91b1b4c1e0bbdaf9581 Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Thu, 29 Nov 2018 18:55:49 +0100 Subject: [PATCH 01/61] Pass function sig in function_body::translate --- src/function_body.rs | 45 +++++++++++++++++++++++++++++++-------- src/module.rs | 22 ++++++++++++------- src/tests.rs | 5 +++++ src/translate_sections.rs | 31 +++++++++++++++++---------- 4 files changed, 75 insertions(+), 28 deletions(-) diff --git a/src/function_body.rs b/src/function_body.rs index c49699f1b4..213da6ee59 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -1,6 +1,6 @@ use backend::*; use error::Error; -use wasmparser::{FunctionBody, Operator, Type}; +use wasmparser::{FuncType, FunctionBody, Operator, Type}; // TODO: Use own declared `Type` enum. @@ -86,15 +86,21 @@ impl ControlFrame { } } -pub fn translate(session: &mut CodeGenSession, body: &FunctionBody) -> Result<(), Error> { +pub fn translate( + session: &mut CodeGenSession, + func_type: &FuncType, + body: &FunctionBody, +) -> Result<(), Error> { let locals = body.get_locals_reader()?; - // Assume signature is (i32, i32) -> i32 for now. - // TODO: Use a real signature - const ARG_COUNT: u32 = 2; - let return_ty = Type::I32; + let arg_count = func_type.params.len() as u32; + let return_ty = if func_type.returns.len() > 0 { + func_type.returns[0] + } else { + Type::EmptyBlockType + }; - let mut framesize = ARG_COUNT; + let mut framesize = arg_count; for local in locals { let (count, _ty) = local?; framesize += count; @@ -105,7 +111,7 @@ pub fn translate(session: &mut CodeGenSession, body: &FunctionBody) -> Result<() prologue(&mut ctx, framesize); - for arg_pos in 0..ARG_COUNT { + for arg_pos in 0..arg_count { copy_incoming_arg(&mut ctx, arg_pos); } @@ -191,6 +197,13 @@ pub fn translate(session: &mut CodeGenSession, body: &FunctionBody) -> Result<() } restore_stack_depth(&mut ctx, control_frame.outgoing_stack_depth()); + + if control_frames.len() == 0 { + // This is the last control frame. Perform the implicit return here. + if return_ty != Type::EmptyBlockType { + prepare_return_value(&mut ctx); + } + } } Operator::I32Eq => { relop_eq_i32(&mut ctx); @@ -201,12 +214,26 @@ pub fn translate(session: &mut CodeGenSession, body: &FunctionBody) -> Result<() Operator::GetLocal { local_index } => { get_local_i32(&mut ctx, local_index); } + Operator::Call { function_index } => { + // TODO: find out the signature of this function + // this requires to generalize the function types infrasturcture + + // TODO: ensure that this function is locally defined + // We would like to support imported functions at some point + + // TODO: pop arguments and move them in appropriate positions. + // only 6 for now. + + // TODO: jump to the specified position + // this requires us saving function start locations in codegensession. + + panic!() + } _ => { trap(&mut ctx); } } } - prepare_return_value(&mut ctx); epilogue(&mut ctx); Ok(()) diff --git a/src/module.rs b/src/module.rs index 06a6f9bbb5..6a900a2695 100644 --- a/src/module.rs +++ b/src/module.rs @@ -1,7 +1,7 @@ -use std::mem; -use error::Error; -use translate_sections; use backend::TranslatedCodeSection; +use error::Error; +use std::mem; +use translate_sections; use wasmparser::{ModuleReader, SectionCode}; #[derive(Default)] @@ -14,7 +14,10 @@ impl TranslatedModule { // Assume signature is (i32, i32) -> i32 for now. // TODO: Handle generic signatures. pub fn execute_func(&self, func_idx: u32, a: usize, b: usize) -> usize { - let code_section = self.translated_code_section.as_ref().expect("no code section"); + let code_section = self + .translated_code_section + .as_ref() + .expect("no code section"); let start_buf = code_section.func_start(func_idx as usize); unsafe { @@ -34,10 +37,12 @@ pub fn translate(data: &[u8]) -> Result { return Ok(output); } let mut section = reader.read()?; + let mut types = vec![]; + let mut func_ty_indicies = vec![]; if let SectionCode::Type = section.code { - let types = section.get_type_section_reader()?; - translate_sections::type_(types)?; + let types_reader = section.get_type_section_reader()?; + types = translate_sections::type_(types_reader)?; reader.skip_custom_sections()?; if reader.eof() { @@ -59,7 +64,7 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Function = section.code { let functions = section.get_function_section_reader()?; - translate_sections::function(functions)?; + func_ty_indicies = translate_sections::function(functions)?; reader.skip_custom_sections()?; if reader.eof() { @@ -136,7 +141,8 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Code = section.code { let code = section.get_code_section_reader()?; - output.translated_code_section = Some(translate_sections::code(code)?); + output.translated_code_section = + Some(translate_sections::code(code, &types, &func_ty_indicies)?); reader.skip_custom_sections()?; if reader.eof() { diff --git a/src/tests.rs b/src/tests.rs index b6cc91faae..4a505d6983 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -13,6 +13,11 @@ fn execute_wat(wat: &str, a: usize, b: usize) -> usize { translated.execute_func(0, a, b) } +#[test] +fn empty() { + let _ = translate_wat("(module (func))"); +} + #[test] fn adds() { const CASES: &[(usize, usize, usize)] = &[ diff --git a/src/translate_sections.rs b/src/translate_sections.rs index df2936fdbe..db97713a7a 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -1,3 +1,4 @@ +use backend::{CodeGenSession, TranslatedCodeSection}; use error::Error; use function_body; #[allow(unused_imports)] // for now @@ -7,14 +8,14 @@ use wasmparser::{ GlobalSectionReader, GlobalType, Import, ImportSectionEntryType, ImportSectionReader, MemorySectionReader, MemoryType, Operator, TableSectionReader, Type, TypeSectionReader, }; -use backend::{CodeGenSession, TranslatedCodeSection}; /// Parses the Type section of the wasm module. -pub fn type_(types: TypeSectionReader) -> Result<(), Error> { - for entry in types { - entry?; // TODO +pub fn type_(types_reader: TypeSectionReader) -> Result, Error> { + let mut types = vec![]; + for entry in types_reader { + types.push(entry?); } - Ok(()) + Ok(types) } /// Parses the Import section of the wasm module. @@ -26,11 +27,12 @@ pub fn import(imports: ImportSectionReader) -> Result<(), Error> { } /// Parses the Function section of the wasm module. -pub fn function(functions: FunctionSectionReader) -> Result<(), Error> { +pub fn function(functions: FunctionSectionReader) -> Result, Error> { + let mut func_ty_indicies = vec![]; for entry in functions { - entry?; // TODO + func_ty_indicies.push(entry?); } - Ok(()) + Ok(func_ty_indicies) } /// Parses the Table section of the wasm module. @@ -80,10 +82,17 @@ pub fn element(elements: ElementSectionReader) -> Result<(), Error> { } /// Parses the Code section of the wasm module. -pub fn code(code: CodeSectionReader) -> Result { +pub fn code( + code: CodeSectionReader, + types: &[FuncType], + func_ty_indicies: &[u32], +) -> Result { let mut session = CodeGenSession::new(); - for body in code { - function_body::translate(&mut session, &body?)?; + for (idx, body) in code.into_iter().enumerate() { + let func_ty_idx = func_ty_indicies[idx]; + let func_ty = &types[func_ty_idx as usize]; + + function_body::translate(&mut session, &func_ty, &body?)?; } Ok(session.into_translated_code_section()?) } From 5eb43f027a3fd6822199146453dea76235a31f49 Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Thu, 29 Nov 2018 22:28:10 +0100 Subject: [PATCH 02/61] Introduce TranslationContext --- src/function_body.rs | 2 ++ src/module.rs | 26 ++++++++++++++++++++------ src/translate_sections.rs | 10 ++++------ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/function_body.rs b/src/function_body.rs index 213da6ee59..d0f83fab4b 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -1,4 +1,5 @@ use backend::*; +use module::TranslationContext; use error::Error; use wasmparser::{FuncType, FunctionBody, Operator, Type}; @@ -88,6 +89,7 @@ impl ControlFrame { pub fn translate( session: &mut CodeGenSession, + translation_ctx: &TranslationContext, func_type: &FuncType, body: &FunctionBody, ) -> Result<(), Error> { diff --git a/src/module.rs b/src/module.rs index 6a900a2695..e2c71ba989 100644 --- a/src/module.rs +++ b/src/module.rs @@ -2,7 +2,7 @@ use backend::TranslatedCodeSection; use error::Error; use std::mem; use translate_sections; -use wasmparser::{ModuleReader, SectionCode}; +use wasmparser::{FuncType, ModuleReader, SectionCode}; #[derive(Default)] pub struct TranslatedModule { @@ -27,6 +27,20 @@ impl TranslatedModule { } } +#[derive(Default)] +pub struct TranslationContext { + types: Vec, + func_ty_indicies: Vec, +} + +impl TranslationContext { + pub fn func_type(&self, func_idx: u32) -> &FuncType { + // TODO: This assumes that there is no imported functions. + let func_ty_idx = self.func_ty_indicies[func_idx as usize]; + &self.types[func_ty_idx as usize] + } +} + /// Translate from a slice of bytes holding a wasm module. pub fn translate(data: &[u8]) -> Result { let mut reader = ModuleReader::new(data)?; @@ -37,12 +51,12 @@ pub fn translate(data: &[u8]) -> Result { return Ok(output); } let mut section = reader.read()?; - let mut types = vec![]; - let mut func_ty_indicies = vec![]; + + let mut ctx = TranslationContext::default(); if let SectionCode::Type = section.code { let types_reader = section.get_type_section_reader()?; - types = translate_sections::type_(types_reader)?; + ctx.types = translate_sections::type_(types_reader)?; reader.skip_custom_sections()?; if reader.eof() { @@ -64,7 +78,7 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Function = section.code { let functions = section.get_function_section_reader()?; - func_ty_indicies = translate_sections::function(functions)?; + ctx.func_ty_indicies = translate_sections::function(functions)?; reader.skip_custom_sections()?; if reader.eof() { @@ -142,7 +156,7 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Code = section.code { let code = section.get_code_section_reader()?; output.translated_code_section = - Some(translate_sections::code(code, &types, &func_ty_indicies)?); + Some(translate_sections::code(code, &ctx)?); reader.skip_custom_sections()?; if reader.eof() { diff --git a/src/translate_sections.rs b/src/translate_sections.rs index db97713a7a..431285b42f 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -1,6 +1,7 @@ use backend::{CodeGenSession, TranslatedCodeSection}; use error::Error; use function_body; +use module::TranslationContext; #[allow(unused_imports)] // for now use wasmparser::{ CodeSectionReader, Data, DataSectionReader, Element, ElementSectionReader, Export, @@ -84,15 +85,12 @@ pub fn element(elements: ElementSectionReader) -> Result<(), Error> { /// Parses the Code section of the wasm module. pub fn code( code: CodeSectionReader, - types: &[FuncType], - func_ty_indicies: &[u32], + translation_ctx: &TranslationContext ) -> Result { let mut session = CodeGenSession::new(); for (idx, body) in code.into_iter().enumerate() { - let func_ty_idx = func_ty_indicies[idx]; - let func_ty = &types[func_ty_idx as usize]; - - function_body::translate(&mut session, &func_ty, &body?)?; + let func_ty = translation_ctx.func_type(idx as u32); + function_body::translate(&mut session, translation_ctx, &func_ty, &body?)?; } Ok(session.into_translated_code_section()?) } From 078486e0805e6354174a12abd5632edacd1f95b1 Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Fri, 7 Dec 2018 19:16:32 +0100 Subject: [PATCH 03/61] Basic form of a function call --- src/backend.rs | 94 +++++++++++++++++++++++---------------- src/function_body.rs | 19 ++++---- src/module.rs | 5 ++- src/tests.rs | 23 +++++++--- src/translate_sections.rs | 8 ++-- 5 files changed, 89 insertions(+), 60 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index e504df7d1e..b3cf12a91b 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,8 +1,9 @@ #![allow(dead_code)] // for now -use error::Error; use dynasmrt::x64::Assembler; -use dynasmrt::{DynasmApi, DynasmLabelApi, AssemblyOffset, ExecutableBuffer, DynamicLabel}; +use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; +use error::Error; +use std::iter; /// Size of a pointer on the target in bytes. const WORD_SIZE: u32 = 8; @@ -45,10 +46,7 @@ impl GPRs { } fn release(&mut self, gpr: GPR) { - assert!( - !self.is_free(gpr), - "released register was already free", - ); + assert!(!self.is_free(gpr), "released register was already free",); self.bits |= 1 << gpr; } @@ -93,22 +91,15 @@ enum ArgLocation { /// Get a location for an argument at the given position. fn abi_loc_for_arg(pos: u32) -> ArgLocation { // TODO: This assumes only system-v calling convention. - // In system-v calling convention the first 6 arguments are passed via registers. + // In system-v calling convention the first 6 arguments are passed via registers. // All rest arguments are passed on the stack. - const ARGS_IN_GPRS: &'static [GPR] = &[ - RDI, - RSI, - RDX, - RCX, - R8, - R9, - ]; + const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; if let Some(®) = ARGS_IN_GPRS.get(pos as usize) { ArgLocation::Reg(reg) } else { let stack_pos = pos - ARGS_IN_GPRS.len() as u32; - // +2 is because the first argument is located right after the saved frame pointer slot + // +2 is because the first argument is located right after the saved frame pointer slot // and the incoming return address. let stack_offset = ((stack_pos + 2) * WORD_SIZE) as i32; ArgLocation::Stack(stack_offset) @@ -117,33 +108,54 @@ fn abi_loc_for_arg(pos: u32) -> ArgLocation { pub struct CodeGenSession { assembler: Assembler, - func_starts: Vec, + func_starts: Vec<(Option, DynamicLabel)>, } impl CodeGenSession { - pub fn new() -> Self { + pub fn new(func_count: u32) -> Self { + let mut assembler = Assembler::new().unwrap(); + let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) + .take(func_count as usize) + .collect::>(); + CodeGenSession { - assembler: Assembler::new().unwrap(), - func_starts: Vec::new(), + assembler, + func_starts, } } - pub fn new_context(&mut self) -> Context { - let start_offset = self.assembler.offset(); - self.func_starts.push(start_offset); + pub fn new_context(&mut self, func_idx: u32) -> Context { + { + let func_start = &mut self.func_starts[func_idx as usize]; + + // At this point we now the exact start address of this function. Save it + // and define dynamic label at this location. + func_start.0 = Some(self.assembler.offset()); + self.assembler.dynamic_label(func_start.1); + } + Context { asm: &mut self.assembler, - start: start_offset, + func_starts: &self.func_starts, regs: Registers::new(), sp_depth: StackDepth(0), } } - pub fn into_translated_code_section(self) -> Result { - let exec_buf = self.assembler + pub fn into_translated_code_section(self) -> Result { + let exec_buf = self + .assembler .finalize() .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?; - Ok(TranslatedCodeSection { exec_buf, func_starts: self.func_starts }) + let func_starts = self + .func_starts + .iter() + .map(|(offset, _)| offset.unwrap()) + .collect::>(); + Ok(TranslatedCodeSection { + exec_buf, + func_starts, + }) } } @@ -161,19 +173,12 @@ impl TranslatedCodeSection { pub struct Context<'a> { asm: &'a mut Assembler, - start: AssemblyOffset, + func_starts: &'a Vec<(Option, DynamicLabel)>, regs: Registers, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. sp_depth: StackDepth, } -impl<'a> Context<'a> { - /// Returns the offset of the first instruction. - fn start(&self) -> AssemblyOffset { - self.start - } -} - /// Label in code. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Label(DynamicLabel); @@ -184,8 +189,8 @@ pub fn create_label(ctx: &mut Context) -> Label { } /// Define the given label at the current position. -/// -/// Multiple labels can be defined at the same position. However, a label +/// +/// Multiple labels can be defined at the same position. However, a label /// can be defined only once. pub fn define_label(ctx: &mut Context, label: Label) { ctx.asm.dynamic_label(label.0); @@ -327,7 +332,14 @@ pub fn copy_incoming_arg(ctx: &mut Context, arg_pos: u32) { // And then move a value from a register into local variable area on the stack. let offset = sp_relative_offset(ctx, arg_pos); dynasm!(ctx.asm - ; mov [rsp + offset], Rq(reg) + ; mov [rsp + offset], Rq(reg) + ); +} + +pub fn call_direct(ctx: &mut Context, index: u32) { + let label = &ctx.func_starts[index as usize].1; + dynasm!(ctx.asm + ; call =>*label ); } @@ -346,7 +358,11 @@ pub fn prologue(ctx: &mut Context, stack_slots: u32) { } pub fn epilogue(ctx: &mut Context) { - assert_eq!(ctx.sp_depth, StackDepth(0), "imbalanced pushes and pops detected"); + assert_eq!( + ctx.sp_depth, + StackDepth(0), + "imbalanced pushes and pops detected" + ); dynasm!(ctx.asm ; mov rsp, rbp ; pop rbp diff --git a/src/function_body.rs b/src/function_body.rs index d0f83fab4b..18733c644a 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -1,7 +1,7 @@ use backend::*; -use module::TranslationContext; use error::Error; -use wasmparser::{FuncType, FunctionBody, Operator, Type}; +use module::TranslationContext; +use wasmparser::{FunctionBody, Operator, Type}; // TODO: Use own declared `Type` enum. @@ -90,11 +90,12 @@ impl ControlFrame { pub fn translate( session: &mut CodeGenSession, translation_ctx: &TranslationContext, - func_type: &FuncType, + func_idx: u32, body: &FunctionBody, ) -> Result<(), Error> { let locals = body.get_locals_reader()?; + let func_type = translation_ctx.func_type(func_idx); let arg_count = func_type.params.len() as u32; let return_ty = if func_type.returns.len() > 0 { func_type.returns[0] @@ -108,7 +109,7 @@ pub fn translate( framesize += count; } - let mut ctx = session.new_context(); + let mut ctx = session.new_context(func_idx); let operators = body.get_operators_reader()?; prologue(&mut ctx, framesize); @@ -217,8 +218,9 @@ pub fn translate( get_local_i32(&mut ctx, local_index); } Operator::Call { function_index } => { - // TODO: find out the signature of this function - // this requires to generalize the function types infrasturcture + let callee_ty = translation_ctx.func_type(function_index); + assert!(callee_ty.params.len() == 0, "is not supported"); + assert!(callee_ty.returns.len() == 0, "is not supported"); // TODO: ensure that this function is locally defined // We would like to support imported functions at some point @@ -226,10 +228,7 @@ pub fn translate( // TODO: pop arguments and move them in appropriate positions. // only 6 for now. - // TODO: jump to the specified position - // this requires us saving function start locations in codegensession. - - panic!() + call_direct(&mut ctx, function_index); } _ => { trap(&mut ctx); diff --git a/src/module.rs b/src/module.rs index e2c71ba989..52f0d61009 100644 --- a/src/module.rs +++ b/src/module.rs @@ -39,6 +39,8 @@ impl TranslationContext { let func_ty_idx = self.func_ty_indicies[func_idx as usize]; &self.types[func_ty_idx as usize] } + + // TODO: type of a global } /// Translate from a slice of bytes holding a wasm module. @@ -155,8 +157,7 @@ pub fn translate(data: &[u8]) -> Result { if let SectionCode::Code = section.code { let code = section.get_code_section_reader()?; - output.translated_code_section = - Some(translate_sections::code(code, &ctx)?); + output.translated_code_section = Some(translate_sections::code(code, &ctx)?); reader.skip_custom_sections()?; if reader.eof() { diff --git a/src/tests.rs b/src/tests.rs index 4a505d6983..980d100bc4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -20,11 +20,7 @@ fn empty() { #[test] fn adds() { - const CASES: &[(usize, usize, usize)] = &[ - (5, 3, 8), - (0, 228, 228), - (usize::max_value(), 1, 0), - ]; + const CASES: &[(usize, usize, usize)] = &[(5, 3, 8), (0, 228, 228), (usize::max_value(), 1, 0)]; let code = r#" (module @@ -110,4 +106,21 @@ fn if_without_result() { assert_eq!(execute_wat(code, 2, 3), 2); } +#[test] +fn function_call() { + let code = r#" +(module + (func (param i32) (param i32) (result i32) + (call 1) + (get_local 0) + ) + + (func + ) +) + "#; + + assert_eq!(execute_wat(code, 2, 3), 2); +} + // TODO: Add a test that checks argument passing via the stack. diff --git a/src/translate_sections.rs b/src/translate_sections.rs index 431285b42f..94d248d517 100644 --- a/src/translate_sections.rs +++ b/src/translate_sections.rs @@ -85,12 +85,12 @@ pub fn element(elements: ElementSectionReader) -> Result<(), Error> { /// Parses the Code section of the wasm module. pub fn code( code: CodeSectionReader, - translation_ctx: &TranslationContext + translation_ctx: &TranslationContext, ) -> Result { - let mut session = CodeGenSession::new(); + let func_count = code.get_count(); + let mut session = CodeGenSession::new(func_count); for (idx, body) in code.into_iter().enumerate() { - let func_ty = translation_ctx.func_type(idx as u32); - function_body::translate(&mut session, translation_ctx, &func_ty, &body?)?; + function_body::translate(&mut session, translation_ctx, idx as u32, &body?)?; } Ok(session.into_translated_code_section()?) } From 0e9ba8332f59148cfb1cf8dfdd3815ad1bbdfb47 Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Tue, 11 Dec 2018 19:45:33 +0100 Subject: [PATCH 04/61] Pass arguments. --- src/backend.rs | 28 +++++++++++++++++++++++----- src/function_body.rs | 5 +---- src/tests.rs | 12 +++++++++--- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index b3cf12a91b..dfcd301c32 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -81,6 +81,7 @@ impl Registers { } /// Describes location of a argument. +#[derive(Debug)] enum ArgLocation { /// Argument is passed via some register. Reg(GPR), @@ -336,6 +337,22 @@ pub fn copy_incoming_arg(ctx: &mut Context, arg_pos: u32) { ); } +pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { + for arg_pos in (0..arity).rev() { + ctx.sp_depth.free(1); + + let loc = abi_loc_for_arg(arg_pos); + match loc { + ArgLocation::Reg(gpr) => { + dynasm!(ctx.asm + ; pop Rq(gpr) + ); + } + _ => unimplemented!("don't know how to pass argument {} via {:?}", arg_pos, loc), + } + } +} + pub fn call_direct(ctx: &mut Context, index: u32) { let label = &ctx.func_starts[index as usize].1; dynasm!(ctx.asm @@ -358,11 +375,12 @@ pub fn prologue(ctx: &mut Context, stack_slots: u32) { } pub fn epilogue(ctx: &mut Context) { - assert_eq!( - ctx.sp_depth, - StackDepth(0), - "imbalanced pushes and pops detected" - ); + // TODO: This doesn't work with stack alignment. + // assert_eq!( + // ctx.sp_depth, + // StackDepth(0), + // "imbalanced pushes and pops detected" + // ); dynasm!(ctx.asm ; mov rsp, rbp ; pop rbp diff --git a/src/function_body.rs b/src/function_body.rs index 18733c644a..bffca70b98 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -219,15 +219,12 @@ pub fn translate( } Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); - assert!(callee_ty.params.len() == 0, "is not supported"); assert!(callee_ty.returns.len() == 0, "is not supported"); // TODO: ensure that this function is locally defined // We would like to support imported functions at some point - // TODO: pop arguments and move them in appropriate positions. - // only 6 for now. - + pass_outgoing_args(&mut ctx, callee_ty.params.len() as u32); call_direct(&mut ctx, function_index); } _ => { diff --git a/src/tests.rs b/src/tests.rs index 980d100bc4..d2e2effb78 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -111,16 +111,22 @@ fn function_call() { let code = r#" (module (func (param i32) (param i32) (result i32) - (call 1) + (call $assert_zero + (get_local 1) + ) (get_local 0) ) - (func + (func $assert_zero (param $v i32) + (local i32) + (if (get_local $v) + (unreachable) + ) ) ) "#; - assert_eq!(execute_wat(code, 2, 3), 2); + assert_eq!(execute_wat(code, 2, 0), 2); } // TODO: Add a test that checks argument passing via the stack. From e02dbf1bc238e6401c8f827a4d44d3896662b548 Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Tue, 11 Dec 2018 20:12:55 +0100 Subject: [PATCH 05/61] Add i32 literals support. --- src/backend.rs | 10 +++++++++- src/function_body.rs | 3 +++ src/tests.rs | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/backend.rs b/src/backend.rs index dfcd301c32..3e90722c00 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -261,7 +261,7 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { push_i32(ctx, gpr); } -pub fn store_i32(ctx: &mut Context, local_idx: u32) { +pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { let gpr = pop_i32(ctx); let offset = sp_relative_offset(ctx, local_idx); dynasm!(ctx.asm @@ -270,6 +270,14 @@ pub fn store_i32(ctx: &mut Context, local_idx: u32) { ctx.regs.release_scratch_gpr(gpr); } +pub fn literal_i32(ctx: &mut Context, imm: i32) { + let gpr = ctx.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; mov Rd(gpr), imm + ); + push_i32(ctx, gpr); +} + pub fn relop_eq_i32(ctx: &mut Context) { let right = pop_i32(ctx); let left = pop_i32(ctx); diff --git a/src/function_body.rs b/src/function_body.rs index bffca70b98..fca2a7e666 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -217,6 +217,9 @@ pub fn translate( Operator::GetLocal { local_index } => { get_local_i32(&mut ctx, local_index); } + Operator::I32Const { value } => { + literal_i32(&mut ctx, value); + } Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); assert!(callee_ty.returns.len() == 0, "is not supported"); diff --git a/src/tests.rs b/src/tests.rs index d2e2effb78..348c0e772f 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -129,4 +129,18 @@ fn function_call() { assert_eq!(execute_wat(code, 2, 0), 2); } +#[test] +fn literals() { + let code = r#" +(module + (func (param i32) (param i32) (result i32) + (i32.const 228) + ) +) + "#; + + assert_eq!(execute_wat(code, 0, 0), 228); +} + + // TODO: Add a test that checks argument passing via the stack. From 0cd70c649a631219422fbd097649820a69e6a86e Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Tue, 11 Dec 2018 20:13:20 +0100 Subject: [PATCH 06/61] Implement returns. --- src/backend.rs | 11 ++++++++++- src/function_body.rs | 6 ++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 3e90722c00..7829b7bf1f 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -361,11 +361,20 @@ pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { } } -pub fn call_direct(ctx: &mut Context, index: u32) { +pub fn call_direct(ctx: &mut Context, index: u32, return_arity: u32) { + assert!(return_arity == 0 || return_arity == 1); + let label = &ctx.func_starts[index as usize].1; dynasm!(ctx.asm ; call =>*label ); + + if return_arity == 1 { + dynasm!(ctx.asm + ; push rax + ); + ctx.sp_depth.reserve(1); + } } pub fn prologue(ctx: &mut Context, stack_slots: u32) { diff --git a/src/function_body.rs b/src/function_body.rs index fca2a7e666..0983973f9f 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -222,13 +222,11 @@ pub fn translate( } Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); - assert!(callee_ty.returns.len() == 0, "is not supported"); - // TODO: ensure that this function is locally defined - // We would like to support imported functions at some point + // TODO: this implementation assumes that this function is locally defined. pass_outgoing_args(&mut ctx, callee_ty.params.len() as u32); - call_direct(&mut ctx, function_index); + call_direct(&mut ctx, function_index, callee_ty.returns.len() as u32); } _ => { trap(&mut ctx); From b32f77ea0237ce6fe102f499a6275fafbbb9b40d Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Tue, 11 Dec 2018 20:13:27 +0100 Subject: [PATCH 07/61] Add the fib test. --- src/tests.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/tests.rs b/src/tests.rs index 348c0e772f..add6ee96b7 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -142,5 +142,61 @@ fn literals() { assert_eq!(execute_wat(code, 0, 0), 228); } +#[test] +fn fib() { + let code = r#" +(module + (func $fib (param $n i32) (param $_unused i32) (result i32) + (if (result i32) + (i32.eq + (i32.const 0) + (get_local $n) + ) + (then + (i32.const 1) + ) + (else + (if (result i32) + (i32.eq + (i32.const 1) + (get_local $n) + ) + (then + (i32.const 1) + ) + (else + (i32.add + ;; fib(n - 1) + (call $fib + (i32.add + (get_local $n) + (i32.const -1) + ) + (i32.const 0) + ) + ;; fib(n - 2) + (call $fib + (i32.add + (get_local $n) + (i32.const -2) + ) + (i32.const 0) + ) + ) + ) + ) + ) + ) + ) +) + "#; + + // fac(x) = y <=> (x, y) + const FIB_SEQ: &[usize] = &[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]; + + for x in 0..10 { + assert_eq!(execute_wat(code, x, 0), FIB_SEQ[x]); + } +} // TODO: Add a test that checks argument passing via the stack. From 86353cba5ed6c59725392097a59b2ab073ffe130 Mon Sep 17 00:00:00 2001 From: Jef Date: Wed, 12 Dec 2018 11:52:48 +0100 Subject: [PATCH 08/61] Allow calling functions with any signature --- examples/test.rs | 2 +- src/module.rs | 39 +++++++++++++++++++++++++++++++++------ src/tests.rs | 47 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 74 insertions(+), 14 deletions(-) diff --git a/examples/test.rs b/examples/test.rs index 3295df8ae6..163dda0142 100644 --- a/examples/test.rs +++ b/examples/test.rs @@ -22,7 +22,7 @@ fn read_to_end>(path: P) -> io::Result> { fn maybe_main() -> Result<(), String> { let data = read_to_end("test.wasm").map_err(|e| e.to_string())?; let translated = translate(&data).map_err(|e| e.to_string())?; - let result = translated.execute_func(0, 5, 3); + let result: u32 = unsafe { translated.execute_func(0, (5u32, 3u32)) }; println!("f(5, 3) = {}", result); Ok(()) diff --git a/src/module.rs b/src/module.rs index 52f0d61009..a38aac0940 100644 --- a/src/module.rs +++ b/src/module.rs @@ -4,6 +4,37 @@ use std::mem; use translate_sections; use wasmparser::{FuncType, ModuleReader, SectionCode}; +pub trait FunctionArgs { + unsafe fn call(self, start: *const u8) -> T; +} + +macro_rules! impl_function_args { + ($first:ident $(, $rest:ident)*) => { + impl<$first, $($rest),*> FunctionArgs for ($first, $($rest),*) { + #[allow(non_snake_case)] + unsafe fn call(self, start: *const u8) -> T { + let func = mem::transmute::<_, extern "sysv64" fn($first, $($rest),*) -> T>(start); + { + let ($first, $($rest),*) = self; + func($first, $($rest),*) + } + } + } + + impl_function_args!($($rest),*); + }; + () => { + impl FunctionArgs for () { + unsafe fn call(self, start: *const u8) -> T { + let func = mem::transmute::<_, extern "sysv64" fn() -> T>(start); + func() + } + } + }; +} + +impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S); + #[derive(Default)] pub struct TranslatedModule { translated_code_section: Option, @@ -11,19 +42,15 @@ pub struct TranslatedModule { impl TranslatedModule { // For testing only. - // Assume signature is (i32, i32) -> i32 for now. // TODO: Handle generic signatures. - pub fn execute_func(&self, func_idx: u32, a: usize, b: usize) -> usize { + pub unsafe fn execute_func(&self, func_idx: u32, args: Args) -> T { let code_section = self .translated_code_section .as_ref() .expect("no code section"); let start_buf = code_section.func_start(func_idx as usize); - unsafe { - let func = mem::transmute::<_, extern "sysv64" fn(usize, usize) -> usize>(start_buf); - func(a, b) - } + args.call(start_buf) } } diff --git a/src/tests.rs b/src/tests.rs index add6ee96b7..0e434c7233 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -8,9 +8,9 @@ fn translate_wat(wat: &str) -> TranslatedModule { } /// Execute the first function in the module. -fn execute_wat(wat: &str, a: usize, b: usize) -> usize { +fn execute_wat(wat: &str, a: u32, b: u32) -> u32 { let translated = translate_wat(wat); - translated.execute_func(0, a, b) + unsafe { translated.execute_func(0, (a, b)) } } #[test] @@ -20,7 +20,7 @@ fn empty() { #[test] fn adds() { - const CASES: &[(usize, usize, usize)] = &[(5, 3, 8), (0, 228, 228), (usize::max_value(), 1, 0)]; + const CASES: &[(u32, u32, u32)] = &[(5, 3, 8), (0, 228, 228), (u32::max_value(), 1, 0)]; let code = r#" (module @@ -34,7 +34,7 @@ fn adds() { #[test] fn relop_eq() { - const CASES: &[(usize, usize, usize)] = &[ + const CASES: &[(u32, u32, u32)] = &[ (0, 0, 1), (0, 1, 0), (1, 0, 0), @@ -56,7 +56,7 @@ fn relop_eq() { #[test] fn if_then_else() { - const CASES: &[(usize, usize, usize)] = &[ + const CASES: &[(u32, u32, u32)] = &[ (0, 1, 1), (0, 0, 0), (1, 0, 0), @@ -129,6 +129,39 @@ fn function_call() { assert_eq!(execute_wat(code, 2, 0), 2); } +#[test] +fn large_function_call() { + let code = r#" +(module + (func (param i32) (param i32) (param i32) (param i32) + (param i32) (param i32) + (result i32) + + (call $assert_zero + (get_local 5) + ) + (get_local 0) + ) + + (func $assert_zero (param $v i32) + (local i32) + (if (get_local $v) + (unreachable) + ) + ) +) + "#; + + assert_eq!( + { + let translated = translate_wat(code); + let out: u32 = unsafe { translated.execute_func(0, (5, 4, 3, 2, 1, 0)) }; + out + }, + 5 + ); +} + #[test] fn literals() { let code = r#" @@ -192,10 +225,10 @@ fn fib() { "#; // fac(x) = y <=> (x, y) - const FIB_SEQ: &[usize] = &[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]; + const FIB_SEQ: &[u32] = &[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]; for x in 0..10 { - assert_eq!(execute_wat(code, x, 0), FIB_SEQ[x]); + assert_eq!(execute_wat(code, x, 0), FIB_SEQ[x as usize]); } } From 189996accd3b83e42bb12df408c687e86df50a3b Mon Sep 17 00:00:00 2001 From: Jef Date: Wed, 12 Dec 2018 13:23:43 +0100 Subject: [PATCH 09/61] Fix receiving more than 6 arguments, allow calling functions with more than 6 arguments --- src/backend.rs | 29 +++++++++++---- src/disassemble.rs | 2 +- src/function_body.rs | 2 +- src/tests.rs | 84 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 107 insertions(+), 10 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 7829b7bf1f..994d0c5e96 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -89,13 +89,13 @@ enum ArgLocation { Stack(i32), } +// TODO: This assumes only system-v calling convention. +// In system-v calling convention the first 6 arguments are passed via registers. +// All rest arguments are passed on the stack. +const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; + /// Get a location for an argument at the given position. fn abi_loc_for_arg(pos: u32) -> ArgLocation { - // TODO: This assumes only system-v calling convention. - // In system-v calling convention the first 6 arguments are passed via registers. - // All rest arguments are passed on the stack. - const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; - if let Some(®) = ARGS_IN_GPRS.get(pos as usize) { ArgLocation::Reg(reg) } else { @@ -320,7 +320,7 @@ pub fn prepare_return_value(ctx: &mut Context) { } } -pub fn copy_incoming_arg(ctx: &mut Context, arg_pos: u32) { +pub fn copy_incoming_arg(ctx: &mut Context, frame_size: u32, arg_pos: u32) { let loc = abi_loc_for_arg(arg_pos); // First, ensure the argument is in a register. @@ -331,6 +331,7 @@ pub fn copy_incoming_arg(ctx: &mut Context, arg_pos: u32) { ctx.regs.scratch_gprs.is_free(RAX), "we assume that RAX can be used as a scratch register for now", ); + let offset = offset + (frame_size * WORD_SIZE) as i32; dynasm!(ctx.asm ; mov Rq(RAX), [rsp + offset] ); @@ -346,6 +347,7 @@ pub fn copy_incoming_arg(ctx: &mut Context, arg_pos: u32) { } pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { + let mut stack_args = vec![]; for arg_pos in (0..arity).rev() { ctx.sp_depth.free(1); @@ -356,9 +358,22 @@ pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { ; pop Rq(gpr) ); } - _ => unimplemented!("don't know how to pass argument {} via {:?}", arg_pos, loc), + ArgLocation::Stack(_) => { + let gpr = ctx.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; pop Rq(gpr) + ); + stack_args.push(gpr); + } } } + + for gpr in stack_args { + dynasm!(ctx.asm + ; push Rq(gpr) + ); + ctx.regs.release_scratch_gpr(gpr); + } } pub fn call_direct(ctx: &mut Context, index: u32, return_arity: u32) { diff --git a/src/disassemble.rs b/src/disassemble.rs index d3bb9cafe5..e846d12341 100644 --- a/src/disassemble.rs +++ b/src/disassemble.rs @@ -20,7 +20,7 @@ pub fn disassemble(mem: &[u8]) -> Result<(), Error> { for b in i.bytes() { write!(&mut bytes_str, "{:02x} ", b).unwrap(); } - write!(&mut line, "{:21}\t", bytes_str).unwrap(); + write!(&mut line, "{:24}\t", bytes_str).unwrap(); if let Some(s) = i.mnemonic() { write!(&mut line, "{}\t", s).unwrap(); diff --git a/src/function_body.rs b/src/function_body.rs index 0983973f9f..00e07025c6 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -115,7 +115,7 @@ pub fn translate( prologue(&mut ctx, framesize); for arg_pos in 0..arg_count { - copy_incoming_arg(&mut ctx, arg_pos); + copy_incoming_arg(&mut ctx, framesize, arg_pos); } let mut control_frames = Vec::new(); diff --git a/src/tests.rs b/src/tests.rs index 0e434c7233..14b48c15b9 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -130,7 +130,7 @@ fn function_call() { } #[test] -fn large_function_call() { +fn large_function() { let code = r#" (module (func (param i32) (param i32) (param i32) (param i32) @@ -162,6 +162,88 @@ fn large_function_call() { ); } +#[test] +fn function_read_args_spill_to_stack() { + let code = r#" +(module + (func (param i32) (param i32) (param i32) (param i32) + (param i32) (param i32) (param i32) (param i32) + (result i32) + + (call $assert_zero + (get_local 7) + ) + (get_local 0) + ) + + (func $assert_zero (param $v i32) + (local i32) + (if (get_local $v) + (unreachable) + ) + ) +) + "#; + + assert_eq!( + { + let translated = translate_wat(code); + let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; + out + }, + 7 + ); +} + +#[test] +fn function_write_args_spill_to_stack() { + let code = r#" +(module + (func (param i32) (param i32) (param i32) (param i32) + (param i32) (param i32) (param i32) (param i32) + (result i32) + + (call $called + (get_local 0) + (get_local 1) + (get_local 2) + (get_local 3) + (get_local 4) + (get_local 5) + (get_local 6) + (get_local 7) + ) + ) + + (func $called + (param i32) (param i32) (param i32) (param i32) + (param i32) (param i32) (param i32) (param i32) + (result i32) + + (call $assert_zero + (get_local 7) + ) + (get_local 0) + ) + + (func $assert_zero (param $v i32) + (local i32) + (if (get_local $v) + (unreachable) + ) + ) +) + "#; + + assert_eq!( + { + let translated = translate_wat(code); + let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; + out + }, + 7 + ); +} #[test] fn literals() { let code = r#" From 5b448ce3c780c934fe211bf07e8e89a77e25ccdf Mon Sep 17 00:00:00 2001 From: Jef Date: Thu, 13 Dec 2018 10:25:17 +0100 Subject: [PATCH 10/61] Stop leaking stack space on function call --- src/backend.rs | 28 +++++++++++++++++++++++----- src/function_body.rs | 8 ++++++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 994d0c5e96..f9bc16125c 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -346,8 +346,9 @@ pub fn copy_incoming_arg(ctx: &mut Context, frame_size: u32, arg_pos: u32) { ); } -pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { - let mut stack_args = vec![]; +#[must_use] +fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> i32 { + let mut stack_args = Vec::with_capacity((arity as usize).saturating_sub(ARGS_IN_GPRS.len())); for arg_pos in (0..arity).rev() { ctx.sp_depth.free(1); @@ -368,22 +369,39 @@ pub fn pass_outgoing_args(ctx: &mut Context, arity: u32) { } } - for gpr in stack_args { + let num_stack_args = stack_args.len() as i32; + dynasm!(ctx.asm + ; sub rsp, num_stack_args + ); + for (stack_slot, gpr) in stack_args.into_iter().rev().enumerate() { + let offset = (stack_slot * WORD_SIZE as usize) as i32; dynasm!(ctx.asm - ; push Rq(gpr) + ; mov [rsp + offset], Rq(gpr) ); ctx.regs.release_scratch_gpr(gpr); } + + num_stack_args } -pub fn call_direct(ctx: &mut Context, index: u32, return_arity: u32) { +fn post_call_cleanup(ctx: &mut Context, num_stack_args: i32) { + dynasm!(ctx.asm + ; add rsp, num_stack_args + ); +} + +pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) { assert!(return_arity == 0 || return_arity == 1); + let num_stack_args = pass_outgoing_args(ctx, arg_arity); + let label = &ctx.func_starts[index as usize].1; dynasm!(ctx.asm ; call =>*label ); + post_call_cleanup(ctx, num_stack_args); + if return_arity == 1 { dynasm!(ctx.asm ; push rax diff --git a/src/function_body.rs b/src/function_body.rs index 00e07025c6..925b32e502 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -225,8 +225,12 @@ pub fn translate( // TODO: this implementation assumes that this function is locally defined. - pass_outgoing_args(&mut ctx, callee_ty.params.len() as u32); - call_direct(&mut ctx, function_index, callee_ty.returns.len() as u32); + call_direct( + &mut ctx, + function_index, + callee_ty.params.len() as u32, + callee_ty.returns.len() as u32, + ); } _ => { trap(&mut ctx); From 5bb7430976f044c4f333e032308a9afae70a645b Mon Sep 17 00:00:00 2001 From: Jef Date: Wed, 12 Dec 2018 14:02:11 +0100 Subject: [PATCH 11/61] Add quickcheck, implement simple binary operations --- Cargo.toml | 2 ++ src/backend.rs | 64 +++++++++++++++++++++++++++++++++++++++++--- src/function_body.rs | 21 +++++++-------- src/lib.rs | 5 +++- src/module.rs | 7 +++++ src/tests.rs | 35 ++++++++++++++++-------- 6 files changed, 106 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b7801bd8d3..a6e3028e0a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ capstone = "0.5.0" failure = "0.1.3" failure_derive = "0.1.3" wabt = "0.7" +lazy_static = "1.2" +quickcheck = "0.7" [badges] maintenance = { status = "experimental" } diff --git a/src/backend.rs b/src/backend.rs index f9bc16125c..9759a6b49f 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -160,6 +160,7 @@ impl CodeGenSession { } } +#[derive(Debug)] pub struct TranslatedCodeSection { exec_buf: ExecutableBuffer, func_starts: Vec, @@ -170,6 +171,10 @@ impl TranslatedCodeSection { let offset = self.func_starts[idx]; self.exec_buf.ptr(offset) } + + pub fn disassemble(&self) { + ::disassemble::disassemble(&*self.exec_buf).unwrap(); + } } pub struct Context<'a> { @@ -238,14 +243,64 @@ fn pop_i32(ctx: &mut Context) -> GPR { gpr } -pub fn add_i32(ctx: &mut Context) { +pub fn i32_add(ctx: &mut Context) { let op0 = pop_i32(ctx); let op1 = pop_i32(ctx); dynasm!(ctx.asm - ; add Rd(op0), Rd(op1) + ; add Rd(op1), Rd(op0) ); - push_i32(ctx, op0); - ctx.regs.release_scratch_gpr(op1); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); +} + +pub fn i32_sub(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + dynasm!(ctx.asm + ; sub Rd(op1), Rd(op0) + ); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); +} + +pub fn i32_and(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + dynasm!(ctx.asm + ; and Rd(op1), Rd(op0) + ); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); +} + +pub fn i32_or(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + dynasm!(ctx.asm + ; or Rd(op1), Rd(op0) + ); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); +} + +pub fn i32_xor(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + dynasm!(ctx.asm + ; xor Rd(op1), Rd(op0) + ); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); +} + +pub fn i32_mul(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + dynasm!(ctx.asm + ; imul Rd(op1), Rd(op0) + ); + push_i32(ctx, op1); + ctx.regs.release_scratch_gpr(op0); } fn sp_relative_offset(ctx: &mut Context, slot_idx: u32) -> i32 { @@ -411,6 +466,7 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: } pub fn prologue(ctx: &mut Context, stack_slots: u32) { + let stack_slots = stack_slots; // Align stack slots to the nearest even number. This is required // by x86-64 ABI. let aligned_stack_slots = (stack_slots + 1) & !1; diff --git a/src/function_body.rs b/src/function_body.rs index 925b32e502..e27c8a3bfc 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -208,18 +208,15 @@ pub fn translate( } } } - Operator::I32Eq => { - relop_eq_i32(&mut ctx); - } - Operator::I32Add => { - add_i32(&mut ctx); - } - Operator::GetLocal { local_index } => { - get_local_i32(&mut ctx, local_index); - } - Operator::I32Const { value } => { - literal_i32(&mut ctx, value); - } + Operator::I32Eq => relop_eq_i32(&mut ctx), + Operator::I32Add => i32_add(&mut ctx), + Operator::I32Sub => i32_sub(&mut ctx), + Operator::I32And => i32_and(&mut ctx), + Operator::I32Or => i32_or(&mut ctx), + Operator::I32Xor => i32_xor(&mut ctx), + Operator::I32Mul => i32_mul(&mut ctx), + Operator::GetLocal { local_index } => get_local_i32(&mut ctx, local_index), + Operator::I32Const { value } => literal_i32(&mut ctx, value), Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); diff --git a/src/lib.rs b/src/lib.rs index 5b63cfb9cc..0d5f130639 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,10 @@ extern crate wasmparser; #[macro_use] extern crate failure_derive; extern crate dynasmrt; - +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate quickcheck; extern crate wabt; mod backend; diff --git a/src/module.rs b/src/module.rs index a38aac0940..c3b7f4ddaf 100644 --- a/src/module.rs +++ b/src/module.rs @@ -52,6 +52,13 @@ impl TranslatedModule { args.call(start_buf) } + + pub fn disassemble(&self) { + self.translated_code_section + .as_ref() + .expect("no code section") + .disassemble(); + } } #[derive(Default)] diff --git a/src/tests.rs b/src/tests.rs index 14b48c15b9..5a1d49e085 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -18,20 +18,33 @@ fn empty() { let _ = translate_wat("(module (func))"); } -#[test] -fn adds() { - const CASES: &[(u32, u32, u32)] = &[(5, 3, 8), (0, 228, 228), (u32::max_value(), 1, 0)]; +macro_rules! binop_test { + ($op:ident, $func:path) => { + quickcheck! { + fn $op(a: u32, b: u32) -> bool { + static CODE: &str = concat!( + "(module (func (param i32) (param i32) (result i32) (i32.", + stringify!($op), + " (get_local 0) (get_local 1))))" + ); - let code = r#" -(module - (func (param i32) (param i32) (result i32) (i32.add (get_local 0) (get_local 1))) -) - "#; - for (a, b, expected) in CASES { - assert_eq!(execute_wat(code, *a, *b), *expected); - } + lazy_static! { + static ref TRANSLATED: TranslatedModule = translate_wat(CODE); + } + + unsafe { TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)) == $func(a, b) } + } + } + }; } +binop_test!(add, u32::wrapping_add); +binop_test!(sub, u32::wrapping_sub); +binop_test!(and, std::ops::BitAnd::bitand); +binop_test!(or, std::ops::BitOr::bitor); +binop_test!(xor, std::ops::BitXor::bitxor); +binop_test!(mul, u32::wrapping_mul); + #[test] fn relop_eq() { const CASES: &[(u32, u32, u32)] = &[ From 912fa83fff1b04962fa26cf81ce0075ac56d4a5a Mon Sep 17 00:00:00 2001 From: Jef Date: Wed, 12 Dec 2018 16:47:06 +0100 Subject: [PATCH 12/61] Add benchmarks --- src/lib.rs | 4 +++- src/tests.rs | 30 +++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0d5f130639..71fecec427 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ -#![feature(plugin)] +#![feature(plugin, test)] #![plugin(dynasm)] +extern crate test; + extern crate capstone; extern crate failure; extern crate wasmparser; diff --git a/src/tests.rs b/src/tests.rs index 5a1d49e085..fda8c6d64d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -270,9 +270,7 @@ fn literals() { assert_eq!(execute_wat(code, 0, 0), 228); } -#[test] -fn fib() { - let code = r#" +const FIBONACCI: &str = r#" (module (func $fib (param $n i32) (param $_unused i32) (result i32) (if (result i32) @@ -319,12 +317,34 @@ fn fib() { ) "#; +#[test] +fn fib() { // fac(x) = y <=> (x, y) const FIB_SEQ: &[u32] = &[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]; + let translated = translate_wat(FIBONACCI); + for x in 0..10 { - assert_eq!(execute_wat(code, x, 0), FIB_SEQ[x as usize]); + unsafe { + assert_eq!( + translated.execute_func::<_, u32>(0, (x, 0u32)), + FIB_SEQ[x as usize] + ); + } } } -// TODO: Add a test that checks argument passing via the stack. +#[bench] +fn bench_compile(b: &mut test::Bencher) { + let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); + + b.iter(|| test::black_box(translate(&wasm).unwrap())); +} + +#[bench] +fn bench_run(b: &mut test::Bencher) { + let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); + let module = translate(&wasm).unwrap(); + + b.iter(|| unsafe { module.execute_func::<_, u32>(0, (20, 0u32)) }); +} From 4994e3671c6c7e86d5b69cf6b2deafa2a3f877f7 Mon Sep 17 00:00:00 2001 From: Jef Date: Thu, 13 Dec 2018 11:08:34 +0100 Subject: [PATCH 13/61] Remove unused argument from fibonacci example --- src/backend.rs | 9 +++------ src/tests.rs | 8 +++----- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 9759a6b49f..e74c85af9b 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -481,12 +481,9 @@ pub fn prologue(ctx: &mut Context, stack_slots: u32) { } pub fn epilogue(ctx: &mut Context) { - // TODO: This doesn't work with stack alignment. - // assert_eq!( - // ctx.sp_depth, - // StackDepth(0), - // "imbalanced pushes and pops detected" - // ); + // We don't need to clean up the stack - `rsp` is restored and + // the calling function has its own register stack and will + // stomp on the registers from our stack if necessary. dynasm!(ctx.asm ; mov rsp, rbp ; pop rbp diff --git a/src/tests.rs b/src/tests.rs index fda8c6d64d..7cf63eee11 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -272,7 +272,7 @@ fn literals() { const FIBONACCI: &str = r#" (module - (func $fib (param $n i32) (param $_unused i32) (result i32) + (func $fib (param $n i32) (result i32) (if (result i32) (i32.eq (i32.const 0) @@ -298,7 +298,6 @@ const FIBONACCI: &str = r#" (get_local $n) (i32.const -1) ) - (i32.const 0) ) ;; fib(n - 2) (call $fib @@ -306,7 +305,6 @@ const FIBONACCI: &str = r#" (get_local $n) (i32.const -2) ) - (i32.const 0) ) ) ) @@ -327,7 +325,7 @@ fn fib() { for x in 0..10 { unsafe { assert_eq!( - translated.execute_func::<_, u32>(0, (x, 0u32)), + translated.execute_func::<_, u32>(0, (x,)), FIB_SEQ[x as usize] ); } @@ -346,5 +344,5 @@ fn bench_run(b: &mut test::Bencher) { let wasm = wabt::wat2wasm(FIBONACCI).unwrap(); let module = translate(&wasm).unwrap(); - b.iter(|| unsafe { module.execute_func::<_, u32>(0, (20, 0u32)) }); + b.iter(|| unsafe { module.execute_func::<_, u32>(0, (20,)) }); } From 17ecd049a1804dde8832a5cc8bee7e077123ea38 Mon Sep 17 00:00:00 2001 From: Jef Date: Thu, 13 Dec 2018 16:05:24 +0100 Subject: [PATCH 14/61] Register allocation V2 This lays the groundwork for other on-the-fly optimisations, like passing literals through in order to do const folding in linear time, while compiling. --- src/backend.rs | 749 ++++++++++++++++++++++++++++++++----------- src/function_body.rs | 52 ++- src/lib.rs | 2 + src/tests.rs | 16 +- 4 files changed, 590 insertions(+), 229 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index e74c85af9b..bbf1abe663 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -10,6 +10,7 @@ const WORD_SIZE: u32 = 8; type GPR = u8; +#[derive(Copy, Clone)] struct GPRs { bits: u16, } @@ -36,13 +37,19 @@ const R12: u8 = 12; const R13: u8 = 13; const R14: u8 = 14; const R15: u8 = 15; +const NUM_GPRS: u8 = 16; impl GPRs { fn take(&mut self) -> GPR { let lz = self.bits.trailing_zeros(); - assert!(lz < 32, "ran out of free GPRs"); - self.bits &= !(1 << lz); - lz as GPR + assert!(lz < 16, "ran out of free GPRs"); + let gpr = lz as GPR; + self.mark_used(gpr); + gpr + } + + fn mark_used(&mut self, gpr: GPR) { + self.bits &= !(1 << gpr as u16); } fn release(&mut self, gpr: GPR) { @@ -50,62 +57,80 @@ impl GPRs { self.bits |= 1 << gpr; } + fn free_count(&self) -> u32 { + self.bits.count_ones() + } + fn is_free(&self, gpr: GPR) -> bool { (self.bits & (1 << gpr)) != 0 } } +#[derive(Copy, Clone)] pub struct Registers { - scratch_gprs: GPRs, + scratch: GPRs, +} + +impl Default for Registers { + fn default() -> Self { + Self::new() + } } impl Registers { pub fn new() -> Self { let mut result = Self { - scratch_gprs: GPRs::new(), + scratch: GPRs::new(), }; // Give ourselves a few scratch registers to work with, for now. - result.release_scratch_gpr(RAX); - result.release_scratch_gpr(RCX); - result.release_scratch_gpr(RDX); + for &scratch in SCRATCH_REGS { + result.release_scratch_gpr(scratch); + } + result } + // TODO: Add function that takes a scratch register if possible + // but otherwise gives a fresh stack location. pub fn take_scratch_gpr(&mut self) -> GPR { - self.scratch_gprs.take() + self.scratch.take() } pub fn release_scratch_gpr(&mut self, gpr: GPR) { - self.scratch_gprs.release(gpr); + self.scratch.release(gpr); + } + + pub fn is_free(&self, gpr: GPR) -> bool { + self.scratch.is_free(gpr) + } + + pub fn free_scratch(&self) -> u32 { + self.scratch.free_count() } } -/// Describes location of a argument. -#[derive(Debug)] -enum ArgLocation { - /// Argument is passed via some register. +/// Describes location of a value. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum ValueLocation { + /// Value exists in a register. Reg(GPR), - /// Value is passed thru the stack. + /// Value exists on the stack. This is an offset relative to the + /// first local, and so will have to be adjusted with `adjusted_offset` + /// before reading (as RSP may have been changed by `push`/`pop`). Stack(i32), } // TODO: This assumes only system-v calling convention. // In system-v calling convention the first 6 arguments are passed via registers. // All rest arguments are passed on the stack. -const ARGS_IN_GPRS: &'static [GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; - -/// Get a location for an argument at the given position. -fn abi_loc_for_arg(pos: u32) -> ArgLocation { - if let Some(®) = ARGS_IN_GPRS.get(pos as usize) { - ArgLocation::Reg(reg) - } else { - let stack_pos = pos - ARGS_IN_GPRS.len() as u32; - // +2 is because the first argument is located right after the saved frame pointer slot - // and the incoming return address. - let stack_offset = ((stack_pos + 2) * WORD_SIZE) as i32; - ArgLocation::Stack(stack_offset) - } -} +const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; +// RAX is reserved for return values. In the future we want a system to allow +// use of specific registers by saving/restoring them. This would allow using +// RAX as a scratch register when we're not calling a function, and would also +// allow us to call instructions that require specific registers. +// +// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI +const SCRATCH_REGS: &[GPR] = &[R10, R11]; pub struct CodeGenSession { assembler: Assembler, @@ -138,8 +163,8 @@ impl CodeGenSession { Context { asm: &mut self.assembler, func_starts: &self.func_starts, - regs: Registers::new(), - sp_depth: StackDepth(0), + block_state: Default::default(), + locals: Default::default(), } } @@ -177,14 +202,78 @@ impl TranslatedCodeSection { } } +// TODO: Immediates? We could implement on-the-fly const folding +#[derive(Copy, Clone)] +enum Value { + Local(u32), + Temp(GPR), +} + +impl Value { + fn location(&self, locals: &Locals) -> ValueLocation { + match *self { + Value::Local(loc) => local_location(locals, loc), + Value::Temp(reg) => ValueLocation::Reg(reg), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum StackValue { + Local(u32), + Temp(GPR), + Pop, +} + +impl StackValue { + fn location(&self, locals: &Locals) -> Option { + match *self { + StackValue::Local(loc) => Some(local_location(locals, loc)), + StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)), + StackValue::Pop => None, + } + } +} + +#[derive(Default)] +struct Locals { + // TODO: Use `ArrayVec` since we have a hard maximum (the number of registers) + locs: Vec, +} + +#[derive(Default, Clone)] +pub struct BlockState { + stack: Stack, + depth: StackDepth, + regs: Registers, +} + +fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { + (ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset +} + +fn local_location(locals: &Locals, index: u32) -> ValueLocation { + locals + .locs + .get(index as usize) + .cloned() + .unwrap_or(ValueLocation::Stack( + (index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _, + )) +} + +type Stack = Vec; + pub struct Context<'a> { asm: &'a mut Assembler, func_starts: &'a Vec<(Option, DynamicLabel)>, - regs: Registers, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. - sp_depth: StackDepth, + block_state: BlockState, + locals: Locals, } +impl<'a> Context<'a> {} + /// Label in code. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Label(DynamicLabel); @@ -203,7 +292,7 @@ pub fn define_label(ctx: &mut Context, label: Label) { } /// Offset from starting value of SP counted in words. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] pub struct StackDepth(u32); impl StackDepth { @@ -216,146 +305,298 @@ impl StackDepth { } } -pub fn current_stack_depth(ctx: &Context) -> StackDepth { - ctx.sp_depth +pub fn current_block_state(ctx: &Context) -> BlockState { + ctx.block_state.clone() } -pub fn restore_stack_depth(ctx: &mut Context, stack_depth: StackDepth) { - ctx.sp_depth = stack_depth; +pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) { + ctx.block_state = block_state; } -fn push_i32(ctx: &mut Context, gpr: GPR) { - // For now, do an actual push (and pop below). In the future, we could - // do on-the-fly register allocation here. - ctx.sp_depth.reserve(1); - dynasm!(ctx.asm - ; push Rq(gpr) - ); - ctx.regs.release_scratch_gpr(gpr); +pub fn push_return_value(ctx: &mut Context) { + ctx.block_state.stack.push(StackValue::Temp(RAX)); } -fn pop_i32(ctx: &mut Context) -> GPR { - ctx.sp_depth.free(1); - let gpr = ctx.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; pop Rq(gpr) - ); - gpr +fn push_i32(ctx: &mut Context, value: Value) { + let stack_loc = match value { + Value::Local(loc) => StackValue::Local(loc), + Value::Temp(gpr) => { + if ctx.block_state.regs.free_scratch() >= 1 { + StackValue::Temp(gpr) + } else { + ctx.block_state.depth.reserve(1); + dynasm!(ctx.asm + ; push Rq(gpr) + ); + ctx.block_state.regs.release_scratch_gpr(gpr); + StackValue::Pop + } + } + }; + + ctx.block_state.stack.push(stack_loc); } +fn pop_i32(ctx: &mut Context) -> Value { + match ctx.block_state.stack.pop().expect("Stack is empty") { + StackValue::Local(loc) => Value::Local(loc), + StackValue::Temp(reg) => Value::Temp(reg), + StackValue::Pop => { + ctx.block_state.depth.free(1); + let gpr = ctx.block_state.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; pop Rq(gpr) + ); + Value::Temp(gpr) + } + } +} + +fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { + let val = pop_i32(ctx); + let val_loc = val.location(&ctx.locals); + copy_value(ctx, val_loc, dst); + free_val(ctx, val); +} + +fn free_val(ctx: &mut Context, val: Value) { + match val { + Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg), + Value::Local(_) => {} + } +} + +/// Puts this value into a register so that it can be efficiently read +fn into_reg(ctx: &mut Context, val: Value) -> GPR { + match val.location(&ctx.locals) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + let scratch = ctx.block_state.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; mov Rq(scratch), [rsp + offset] + ); + scratch + } + ValueLocation::Reg(reg) => reg, + } +} + +/// Puts this value into a temporary register so that operations +/// on that register don't write to a local. +fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR { + match val { + Value::Local(loc) => { + let scratch = ctx.block_state.regs.take_scratch_gpr(); + + match local_location(&ctx.locals, loc) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; mov Rq(scratch), [rsp + offset] + ); + } + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; mov Rq(scratch), Rq(reg) + ); + } + } + + scratch + } + Value::Temp(reg) => reg, + } +} + +// TODO: For the commutative instructions we can do operands in either +// order, so we can choose the operand order that creates the +// least unnecessary temps. pub fn i32_add(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; add Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; add Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; add Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn i32_sub(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; sub Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; sub Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; sub Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn i32_and(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; and Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; and Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; and Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn i32_or(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; or Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; or Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; or Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn i32_xor(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; xor Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; xor Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn i32_mul(ctx: &mut Context) { let op0 = pop_i32(ctx); - let op1 = pop_i32(ctx); - dynasm!(ctx.asm - ; imul Rd(op1), Rd(op0) - ); - push_i32(ctx, op1); - ctx.regs.release_scratch_gpr(op0); -} - -fn sp_relative_offset(ctx: &mut Context, slot_idx: u32) -> i32 { - ((ctx.sp_depth.0 as i32) + slot_idx as i32) * WORD_SIZE as i32 + let tmp = pop_i32(ctx); + let op1 = into_temp_reg(ctx, tmp); + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; imul Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; imul Rd(op1), [rsp + offset] + ); + } + } + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { - let gpr = ctx.regs.take_scratch_gpr(); - let offset = sp_relative_offset(ctx, local_idx); - dynasm!(ctx.asm - ; mov Rq(gpr), [rsp + offset] - ); - push_i32(ctx, gpr); + push_i32(ctx, Value::Local(local_idx)); } +// TODO: We can put locals that were spilled to the stack +// back into registers here. pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { - let gpr = pop_i32(ctx); - let offset = sp_relative_offset(ctx, local_idx); - dynasm!(ctx.asm - ; mov [rsp + offset], Rq(gpr) - ); - ctx.regs.release_scratch_gpr(gpr); + let val = pop_i32(ctx); + let val_loc = val.location(&ctx.locals); + let dst_loc = local_location(&ctx.locals, local_idx); + copy_value(ctx, val_loc, dst_loc); + free_val(ctx, val); } +// TODO: Don't store literals at all, roll them into `Value` pub fn literal_i32(ctx: &mut Context, imm: i32) { - let gpr = ctx.regs.take_scratch_gpr(); + let gpr = ctx.block_state.regs.take_scratch_gpr(); dynasm!(ctx.asm ; mov Rd(gpr), imm ); - push_i32(ctx, gpr); + push_i32(ctx, Value::Temp(gpr)); } pub fn relop_eq_i32(ctx: &mut Context) { let right = pop_i32(ctx); let left = pop_i32(ctx); - let result = ctx.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(left), Rd(right) - ; sete Rb(result) - ); - push_i32(ctx, result); - ctx.regs.release_scratch_gpr(left); - ctx.regs.release_scratch_gpr(right); + let result = ctx.block_state.regs.take_scratch_gpr(); + let lreg = into_reg(ctx, left); + match right.location(&ctx.locals) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), [rsp + offset] + ; sete Rb(result) + ); + } + ValueLocation::Reg(rreg) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), Rd(rreg) + ; sete Rb(result) + ); + } + } + push_i32(ctx, Value::Temp(result)); + free_val(ctx, left); + free_val(ctx, right); } /// Pops i32 predicate and branches to the specified label /// if the predicate is equal to zero. pub fn pop_and_breq(ctx: &mut Context, label: Label) { - let predicate = pop_i32(ctx); + let val = pop_i32(ctx); + let predicate = into_temp_reg(ctx, val); dynasm!(ctx.asm ; test Rd(predicate), Rd(predicate) ; je =>label.0 ); - ctx.regs.release_scratch_gpr(predicate); + ctx.block_state.regs.release_scratch_gpr(predicate); } /// Branch unconditionally to the specified label. @@ -366,122 +607,246 @@ pub fn br(ctx: &mut Context, label: Label) { } pub fn prepare_return_value(ctx: &mut Context) { - let ret_gpr = pop_i32(ctx); - if ret_gpr != RAX { - dynasm!(ctx.asm - ; mov Rq(RAX), Rq(ret_gpr) - ); - ctx.regs.release_scratch_gpr(ret_gpr); - } + pop_i32_into(ctx, ValueLocation::Reg(RAX)); } -pub fn copy_incoming_arg(ctx: &mut Context, frame_size: u32, arg_pos: u32) { - let loc = abi_loc_for_arg(arg_pos); - - // First, ensure the argument is in a register. - let reg = match loc { - ArgLocation::Reg(reg) => reg, - ArgLocation::Stack(offset) => { - assert!( - ctx.regs.scratch_gprs.is_free(RAX), - "we assume that RAX can be used as a scratch register for now", - ); - let offset = offset + (frame_size * WORD_SIZE) as i32; - dynasm!(ctx.asm - ; mov Rq(RAX), [rsp + offset] - ); - RAX +fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { + match (src, dst) { + (ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => { + let in_offset = adjusted_offset(ctx, in_offset); + let out_offset = adjusted_offset(ctx, out_offset); + if in_offset != out_offset { + let gpr = ctx.block_state.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; mov Rq(gpr), [rsp + in_offset] + ; mov [rsp + out_offset], Rq(gpr) + ); + ctx.block_state.regs.release_scratch_gpr(gpr); + } } - }; - - // And then move a value from a register into local variable area on the stack. - let offset = sp_relative_offset(ctx, arg_pos); - dynasm!(ctx.asm - ; mov [rsp + offset], Rq(reg) - ); + (ValueLocation::Reg(in_reg), ValueLocation::Stack(out_offset)) => { + let out_offset = adjusted_offset(ctx, out_offset); + dynasm!(ctx.asm + ; mov [rsp + out_offset], Rq(in_reg) + ); + } + (ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => { + let in_offset = adjusted_offset(ctx, in_offset); + dynasm!(ctx.asm + ; mov Rq(out_reg), [rsp + in_offset] + ); + } + (ValueLocation::Reg(in_reg), ValueLocation::Reg(out_reg)) => { + if in_reg != out_reg { + dynasm!(ctx.asm + ; mov Rq(out_reg), Rq(in_reg) + ); + } + } + } } #[must_use] -fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> i32 { - let mut stack_args = Vec::with_capacity((arity as usize).saturating_sub(ARGS_IN_GPRS.len())); - for arg_pos in (0..arity).rev() { - ctx.sp_depth.free(1); +pub struct CallCleanup { + restore_registers: Vec, + stack_depth: i32, +} - let loc = abi_loc_for_arg(arg_pos); - match loc { - ArgLocation::Reg(gpr) => { - dynasm!(ctx.asm - ; pop Rq(gpr) - ); +/// Make sure that any argument registers that will be used by the call are free +/// by storing them to the stack. +/// +/// Unfortunately, we can't elide this store if we're just passing arguments on +/// because these registers are caller-saved and so the callee can use them as +/// scratch space. +fn free_arg_registers(ctx: &mut Context, count: u32) { + if count == 0 { + return; + } + + for i in 0..ctx.locals.locs.len() { + match ctx.locals.locs[i] { + ValueLocation::Reg(reg) => { + if ARGS_IN_GPRS.contains(®) { + let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _); + dynasm!(ctx.asm + ; mov [rsp + offset], Rq(reg) + ); + ctx.locals.locs[i] = ValueLocation::Stack(offset); + } } - ArgLocation::Stack(_) => { - let gpr = ctx.regs.take_scratch_gpr(); + _ => {} + } + } +} + +fn free_return_register(ctx: &mut Context, count: u32) { + if count == 0 { + return; + } + + for stack_val in &mut ctx.block_state.stack { + match stack_val.location(&ctx.locals) { + // For now it's impossible for a local to be in RAX but that might be + // possible in the future, so we check both cases. + Some(ValueLocation::Reg(RAX)) => { + let scratch = ctx.block_state.regs.take_scratch_gpr(); dynasm!(ctx.asm - ; pop Rq(gpr) + ; mov Rq(scratch), rax ); - stack_args.push(gpr); + *stack_val = StackValue::Temp(scratch); } + _ => {} + } + } +} + +// TODO: Use `ArrayVec`? +/// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. +fn save_volatile(ctx: &mut Context) -> Vec { + let mut out = vec![]; + + // TODO: If there are no `StackValue::Pop`s that need to be popped + // before we reach our `Temp` value, we can set the `StackValue` + // for the register to be restored to `StackValue::Pop` (and + // release the register!) instead of restoring it. + for ® in SCRATCH_REGS.iter() { + if !ctx.block_state.regs.is_free(reg) { + dynasm!(ctx.asm + ; push Rq(reg) + ); + out.push(reg); } } - let num_stack_args = stack_args.len() as i32; - dynasm!(ctx.asm - ; sub rsp, num_stack_args - ); - for (stack_slot, gpr) in stack_args.into_iter().rev().enumerate() { - let offset = (stack_slot * WORD_SIZE as usize) as i32; + out +} + +/// Write the arguments to the callee to the registers and the stack using the SystemV +/// calling convention. +fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { + let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; + + let out = CallCleanup { + stack_depth: num_stack_args, + restore_registers: save_volatile(ctx), + }; + + // We pop stack arguments first - arguments are RTL + if num_stack_args > 0 { + let size = num_stack_args * WORD_SIZE as i32; + + // Reserve space for the outgoing stack arguments (so we don't + // stomp on any locals or the value stack). dynasm!(ctx.asm - ; mov [rsp + offset], Rq(gpr) + ; sub rsp, size ); - ctx.regs.release_scratch_gpr(gpr); + ctx.block_state.depth.reserve(num_stack_args as u32); + + for stack_slot in (0..num_stack_args).rev() { + // Since the stack offset is from the bottom of the locals + // and we want to start from the actual RSP (so `offset = 0` + // writes to `[rsp]`), we subtract our current depth. + // + // We might want to do this in the future by having a separate + // `AbsoluteValueLocation` and `RelativeValueLocation`. + let offset = + stack_slot * WORD_SIZE as i32 - ctx.block_state.depth.0 as i32 * WORD_SIZE as i32; + pop_i32_into(ctx, ValueLocation::Stack(offset)); + } } - num_stack_args + for reg in ARGS_IN_GPRS[..(arity as usize).min(ARGS_IN_GPRS.len())] + .iter() + .rev() + { + pop_i32_into(ctx, ValueLocation::Reg(*reg)); + } + + out } -fn post_call_cleanup(ctx: &mut Context, num_stack_args: i32) { - dynasm!(ctx.asm - ; add rsp, num_stack_args - ); +/// Frees up the stack space used for stack-passed arguments and restores the value +/// of volatile (i.e. caller-saved) registers to the state that they were in before +/// the call. +fn post_call_cleanup(ctx: &mut Context, mut cleanup: CallCleanup) { + if cleanup.stack_depth > 0 { + let size = cleanup.stack_depth * WORD_SIZE as i32; + dynasm!(ctx.asm + ; add rsp, size + ); + } + + for reg in cleanup.restore_registers.drain(..).rev() { + dynasm!(ctx.asm + ; pop Rq(reg) + ); + } } +/// Call a function with the given index pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: u32) { - assert!(return_arity == 0 || return_arity == 1); + assert!( + return_arity == 0 || return_arity == 1, + "We don't support multiple return yet" + ); - let num_stack_args = pass_outgoing_args(ctx, arg_arity); + free_arg_registers(ctx, arg_arity); + free_return_register(ctx, return_arity); + + let cleanup = pass_outgoing_args(ctx, arg_arity); let label = &ctx.func_starts[index as usize].1; dynasm!(ctx.asm ; call =>*label ); - post_call_cleanup(ctx, num_stack_args); - - if return_arity == 1 { - dynasm!(ctx.asm - ; push rax - ); - ctx.sp_depth.reserve(1); - } + post_call_cleanup(ctx, cleanup); } -pub fn prologue(ctx: &mut Context, stack_slots: u32) { - let stack_slots = stack_slots; +// TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them +// as scratch registers +// TODO: Allow use of unused argument registers as scratch registers. +/// Writes the function prologue and stores the arguments as locals +pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) { + let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())]; + + // We need space to store the register arguments if we need to call a function + // and overwrite these registers so we add `reg_args.len()` + let locals = locals + reg_args.len() as u32; // Align stack slots to the nearest even number. This is required // by x86-64 ABI. - let aligned_stack_slots = (stack_slots + 1) & !1; - + let aligned_stack_slots = (locals + 1) & !1; let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; + + ctx.locals.locs = reg_args + .iter() + .cloned() + .map(ValueLocation::Reg) + .chain( + (0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _)) + // We add 2 here because 1 stack slot is used for the stack pointer and another is + // used for the return address. It's a magic number but there's not really a way + // around this. + .map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)), + ) + .collect(); + dynasm!(ctx.asm ; push rbp ; mov rbp, rsp - ; sub rsp, framesize ); - ctx.sp_depth.reserve(aligned_stack_slots - stack_slots); + + if framesize > 0 { + dynasm!(ctx.asm + ; sub rsp, framesize + ); + } } +/// Writes the function epilogue, restoring the stack pointer and returning to the +/// caller. pub fn epilogue(ctx: &mut Context) { - // We don't need to clean up the stack - `rsp` is restored and + // We don't need to clean up the stack - RSP is restored and // the calling function has its own register stack and will // stomp on the registers from our stack if necessary. dynasm!(ctx.asm diff --git a/src/function_body.rs b/src/function_body.rs index e27c8a3bfc..1ccce16f9c 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -56,31 +56,22 @@ struct ControlFrame { /// becomes polymorphic only after an instruction that never passes control further is executed, /// i.e. `unreachable`, `br` (but not `br_if`!), etc. stack_polymorphic: bool, - /// Relative stack depth at the beginning of the frame. - stack_depth: StackDepth, + /// State specific to the block (free temp registers, stack etc) which should be replaced + /// at the end of the block + block_state: BlockState, ty: Type, } impl ControlFrame { - pub fn new(kind: ControlFrameKind, stack_depth: StackDepth, ty: Type) -> ControlFrame { + pub fn new(kind: ControlFrameKind, block_state: BlockState, ty: Type) -> ControlFrame { ControlFrame { kind, - stack_depth, + block_state, ty, stack_polymorphic: false, } } - pub fn outgoing_stack_depth(&self) -> StackDepth { - let mut outgoing_stack_depth = self.stack_depth; - if self.ty != Type::EmptyBlockType { - // If there a return value then reserve expected outgoing stack depth value - // to account for the result value. - outgoing_stack_depth.reserve(1); - } - outgoing_stack_depth - } - /// Marks this control frame as reached stack-polymorphic state. pub fn mark_stack_polymorphic(&mut self) { self.stack_polymorphic = true; @@ -103,20 +94,16 @@ pub fn translate( Type::EmptyBlockType }; - let mut framesize = arg_count; + let mut num_locals = 0; for local in locals { let (count, _ty) = local?; - framesize += count; + num_locals += count; } let mut ctx = session.new_context(func_idx); let operators = body.get_operators_reader()?; - prologue(&mut ctx, framesize); - - for arg_pos in 0..arg_count { - copy_incoming_arg(&mut ctx, framesize, arg_pos); - } + start_function(&mut ctx, arg_count, num_locals); let mut control_frames = Vec::new(); @@ -127,7 +114,7 @@ pub fn translate( ControlFrameKind::Block { end_label: epilogue_label, }, - current_stack_depth(&ctx), + current_block_state(&ctx), return_ty, )); @@ -148,7 +135,7 @@ pub fn translate( control_frames.push(ControlFrame::new( ControlFrameKind::IfTrue { end_label, if_not }, - current_stack_depth(&ctx), + current_block_state(&ctx), ty, )); } @@ -157,7 +144,7 @@ pub fn translate( Some(ControlFrame { kind: ControlFrameKind::IfTrue { if_not, end_label }, ty, - stack_depth, + block_state, .. }) => { // Finalize if..else block by jumping to the `end_label`. @@ -167,7 +154,7 @@ pub fn translate( // 0 it will branch here. // After that reset stack depth to the value before entering `if` block. define_label(&mut ctx, if_not); - restore_stack_depth(&mut ctx, stack_depth); + restore_block_state(&mut ctx, block_state.clone()); // Carry over the `end_label`, so it will be resolved when the corresponding `end` // is encountered. @@ -175,7 +162,7 @@ pub fn translate( // Also note that we reset `stack_depth` to the value before entering `if` block. let mut frame = ControlFrame::new( ControlFrameKind::IfFalse { end_label }, - stack_depth, + block_state, ty, ); control_frames.push(frame); @@ -199,14 +186,12 @@ pub fn translate( define_label(&mut ctx, if_not); } - restore_stack_depth(&mut ctx, control_frame.outgoing_stack_depth()); - - if control_frames.len() == 0 { - // This is the last control frame. Perform the implicit return here. - if return_ty != Type::EmptyBlockType { - prepare_return_value(&mut ctx); - } + // This is the last control frame. Perform the implicit return here. + if control_frames.len() == 0 && return_ty != Type::EmptyBlockType { + prepare_return_value(&mut ctx); } + + // restore_block_state(&mut ctx, control_frame.block_state); } Operator::I32Eq => relop_eq_i32(&mut ctx), Operator::I32Add => i32_add(&mut ctx), @@ -228,6 +213,7 @@ pub fn translate( callee_ty.params.len() as u32, callee_ty.returns.len() as u32, ); + push_return_value(&mut ctx); } _ => { trap(&mut ctx); diff --git a/src/lib.rs b/src/lib.rs index 71fecec427..ea1a4697d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,8 +9,10 @@ extern crate wasmparser; #[macro_use] extern crate failure_derive; extern crate dynasmrt; +#[cfg(test)] #[macro_use] extern crate lazy_static; +#[cfg(test)] #[macro_use] extern crate quickcheck; extern crate wabt; diff --git a/src/tests.rs b/src/tests.rs index 7cf63eee11..df593a0911 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -201,7 +201,9 @@ fn function_read_args_spill_to_stack() { assert_eq!( { let translated = translate_wat(code); - let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; + let out: u32 = unsafe { + translated.execute_func(0, (7u32, 6u32, 5u32, 4u32, 3u32, 2u32, 1u32, 0u32)) + }; out }, 7 @@ -213,6 +215,7 @@ fn function_write_args_spill_to_stack() { let code = r#" (module (func (param i32) (param i32) (param i32) (param i32) + (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (result i32) @@ -225,16 +228,21 @@ fn function_write_args_spill_to_stack() { (get_local 5) (get_local 6) (get_local 7) + (get_local 8) + (get_local 9) + (get_local 10) + (get_local 11) ) ) (func $called + (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (param i32) (result i32) (call $assert_zero - (get_local 7) + (get_local 11) ) (get_local 0) ) @@ -251,10 +259,10 @@ fn function_write_args_spill_to_stack() { assert_eq!( { let translated = translate_wat(code); - let out: u32 = unsafe { translated.execute_func(0, (7, 6, 5, 4, 3, 2, 1, 0)) }; + let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; out }, - 7 + 11 ); } #[test] From b832832c76f38cb1aca4ae3a7fe1b36467cbbce1 Mon Sep 17 00:00:00 2001 From: Jef Date: Fri, 14 Dec 2018 16:20:28 +0100 Subject: [PATCH 15/61] Add const folding, fix returning values from blocks --- src/backend.rs | 312 ++++++++++++++++++++++++++----------------- src/function_body.rs | 11 +- src/tests.rs | 3 +- 3 files changed, 200 insertions(+), 126 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index bbf1abe663..e1f4a6ca03 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -118,6 +118,8 @@ enum ValueLocation { /// first local, and so will have to be adjusted with `adjusted_offset` /// before reading (as RSP may have been changed by `push`/`pop`). Stack(i32), + /// Value is a literal (TODO: Support more than just `i32`) + Immediate(i32), } // TODO: This assumes only system-v calling convention. @@ -207,13 +209,22 @@ impl TranslatedCodeSection { enum Value { Local(u32), Temp(GPR), + Immediate(i32), } impl Value { + fn immediate(&self) -> Option { + match *self { + Value::Immediate(i) => Some(i), + _ => None, + } + } + fn location(&self, locals: &Locals) -> ValueLocation { match *self { Value::Local(loc) => local_location(locals, loc), Value::Temp(reg) => ValueLocation::Reg(reg), + Value::Immediate(reg) => ValueLocation::Immediate(reg), } } } @@ -222,6 +233,7 @@ impl Value { enum StackValue { Local(u32), Temp(GPR), + Immediate(i32), Pop, } @@ -229,6 +241,7 @@ impl StackValue { fn location(&self, locals: &Locals) -> Option { match *self { StackValue::Local(loc) => Some(local_location(locals, loc)), + StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)), StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)), StackValue::Pop => None, } @@ -244,7 +257,7 @@ struct Locals { #[derive(Default, Clone)] pub struct BlockState { stack: Stack, - depth: StackDepth, + pub depth: StackDepth, regs: Registers, } @@ -309,6 +322,37 @@ pub fn current_block_state(ctx: &Context) -> BlockState { ctx.block_state.clone() } +pub fn return_from_block(ctx: &mut Context, new_depth: StackDepth) { + let diff = ((ctx.block_state.depth.0 - new_depth.0) * WORD_SIZE) as i32; + + if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) { + match loc { + ValueLocation::Reg(r) => { + dynasm!(ctx.asm + ; push Rq(r) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; push QWORD [rsp + offset] + ); + } + ValueLocation::Immediate(imm) => { + dynasm!(ctx.asm + ; push imm + ); + } + } + } + // If `location` is `None` then we don't need to do anything. +} + +pub fn push_block_return_value(ctx: &mut Context) { + ctx.block_state.depth.reserve(1); + ctx.block_state.stack.push(StackValue::Pop); +} + pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) { ctx.block_state = block_state; } @@ -320,6 +364,7 @@ pub fn push_return_value(ctx: &mut Context) { fn push_i32(ctx: &mut Context, value: Value) { let stack_loc = match value { Value::Local(loc) => StackValue::Local(loc), + Value::Immediate(i) => StackValue::Immediate(i), Value::Temp(gpr) => { if ctx.block_state.regs.free_scratch() >= 1 { StackValue::Temp(gpr) @@ -340,6 +385,7 @@ fn push_i32(ctx: &mut Context, value: Value) { fn pop_i32(ctx: &mut Context) -> Value { match ctx.block_state.stack.pop().expect("Stack is empty") { StackValue::Local(loc) => Value::Local(loc), + StackValue::Immediate(i) => Value::Immediate(i), StackValue::Temp(reg) => Value::Temp(reg), StackValue::Pop => { ctx.block_state.depth.free(1); @@ -362,7 +408,7 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { fn free_val(ctx: &mut Context, val: Value) { match val { Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg), - Value::Local(_) => {} + Value::Local(_) | Value::Immediate(_) => {} } } @@ -377,6 +423,13 @@ fn into_reg(ctx: &mut Context, val: Value) -> GPR { ); scratch } + ValueLocation::Immediate(i) => { + let scratch = ctx.block_state.regs.take_scratch_gpr(); + dynasm!(ctx.asm + ; mov Rq(scratch), i + ); + scratch + } ValueLocation::Reg(reg) => reg, } } @@ -400,42 +453,88 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR { ; mov Rq(scratch), Rq(reg) ); } + ValueLocation::Immediate(_) => { + panic!("We shouldn't be storing immediates in locals for now") + } } scratch } + Value::Immediate(i) => { + let scratch = ctx.block_state.regs.take_scratch_gpr(); + + dynasm!(ctx.asm + ; mov Rq(scratch), i + ); + + scratch + } Value::Temp(reg) => reg, } } -// TODO: For the commutative instructions we can do operands in either -// order, so we can choose the operand order that creates the -// least unnecessary temps. -pub fn i32_add(ctx: &mut Context) { - let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); - match op0.location(&ctx.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; add Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; add Rd(op1), [rsp + offset] - ); +macro_rules! commutative_binop { + ($name:ident, $instr:ident, $const_fallback:expr) => { + pub fn $name(ctx: &mut Context) { + let op0 = pop_i32(ctx); + let op1 = pop_i32(ctx); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + ctx.block_state.stack.push(StackValue::Immediate($const_fallback(i1, i0))); + return; + } + } + + let (op1, op0) = match op1 { + Value::Temp(reg) => (reg, op0), + _ => (into_temp_reg(ctx, op0), op1), + }; + + match op0.location(&ctx.locals) { + ValueLocation::Reg(reg) => { + dynasm!(ctx.asm + ; $instr Rd(op1), Rd(reg) + ); + } + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; $instr Rd(op1), [rsp + offset] + ); + } + ValueLocation::Immediate(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; $instr Rd(op1), [rsp + offset] + ); + } + } + + ctx.block_state.stack.push(StackValue::Temp(op1)); + free_val(ctx, op0); } } - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); } +commutative_binop!(i32_add, add, |a, b| a + b); +commutative_binop!(i32_and, and, |a, b| a & b); +commutative_binop!(i32_or, or, |a, b| a | b); +commutative_binop!(i32_xor, xor, |a, b| a ^ b); +commutative_binop!(i32_mul, imul, |a, b| a * b); + pub fn i32_sub(ctx: &mut Context) { let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); + let op1 = pop_i32(ctx); + + if let Some(i1) = op1.immediate() { + if let Some(i0) = op0.immediate() { + ctx.block_state.stack.push(StackValue::Immediate(i1 - i0)); + return; + } + } + + let op1 = into_temp_reg(ctx, op1); match op0.location(&ctx.locals) { ValueLocation::Reg(reg) => { dynasm!(ctx.asm @@ -448,91 +547,14 @@ pub fn i32_sub(ctx: &mut Context) { ; sub Rd(op1), [rsp + offset] ); } - } - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); -} - -pub fn i32_and(ctx: &mut Context) { - let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); - match op0.location(&ctx.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; and Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { + ValueLocation::Immediate(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm - ; and Rd(op1), [rsp + offset] + ; sub Rd(op1), [rsp + offset] ); } } - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); -} -pub fn i32_or(ctx: &mut Context) { - let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); - match op0.location(&ctx.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; or Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; or Rd(op1), [rsp + offset] - ); - } - } - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); -} - -pub fn i32_xor(ctx: &mut Context) { - let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); - match op0.location(&ctx.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; xor Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; xor Rd(op1), [rsp + offset] - ); - } - } - ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); -} - -pub fn i32_mul(ctx: &mut Context) { - let op0 = pop_i32(ctx); - let tmp = pop_i32(ctx); - let op1 = into_temp_reg(ctx, tmp); - match op0.location(&ctx.locals) { - ValueLocation::Reg(reg) => { - dynasm!(ctx.asm - ; imul Rd(op1), Rd(reg) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; imul Rd(op1), [rsp + offset] - ); - } - } ctx.block_state.stack.push(StackValue::Temp(op1)); free_val(ctx, op0); } @@ -551,37 +573,67 @@ pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { free_val(ctx, val); } -// TODO: Don't store literals at all, roll them into `Value` pub fn literal_i32(ctx: &mut Context, imm: i32) { - let gpr = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; mov Rd(gpr), imm - ); - push_i32(ctx, Value::Temp(gpr)); + push_i32(ctx, Value::Immediate(imm)); } pub fn relop_eq_i32(ctx: &mut Context) { let right = pop_i32(ctx); let left = pop_i32(ctx); let result = ctx.block_state.regs.take_scratch_gpr(); - let lreg = into_reg(ctx, left); - match right.location(&ctx.locals) { - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(lreg), [rsp + offset] - ; sete Rb(result) - ); + + if let Some(i) = left.immediate() { + match right.location(&ctx.locals) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp DWORD [rsp + offset], i + ; sete Rb(result) + ); + } + ValueLocation::Reg(rreg) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(rreg), i + ; sete Rb(result) + ); + } + ValueLocation::Immediate(right) => { + let is_equal = if i == right { 1i8 } else { 0 }; + dynasm!(ctx.asm + ; mov Rb(result), is_equal + ); + } } - ValueLocation::Reg(rreg) => { - dynasm!(ctx.asm - ; xor Rq(result), Rq(result) - ; cmp Rd(lreg), Rd(rreg) - ; sete Rb(result) - ); + } else { + let lreg = into_reg(ctx, left); + match right.location(&ctx.locals) { + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), [rsp + offset] + ; sete Rb(result) + ); + } + ValueLocation::Reg(rreg) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), Rd(rreg) + ; sete Rb(result) + ); + } + ValueLocation::Immediate(i) => { + dynasm!(ctx.asm + ; xor Rq(result), Rq(result) + ; cmp Rd(lreg), i + ; sete Rb(result) + ); + } } } + push_i32(ctx, Value::Temp(result)); free_val(ctx, left); free_val(ctx, right); @@ -630,6 +682,12 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { ; mov [rsp + out_offset], Rq(in_reg) ); } + (ValueLocation::Immediate(i), ValueLocation::Stack(out_offset)) => { + let out_offset = adjusted_offset(ctx, out_offset); + dynasm!(ctx.asm + ; mov DWORD [rsp + out_offset], i + ); + } (ValueLocation::Stack(in_offset), ValueLocation::Reg(out_reg)) => { let in_offset = adjusted_offset(ctx, in_offset); dynasm!(ctx.asm @@ -643,6 +701,12 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { ); } } + (ValueLocation::Immediate(i), ValueLocation::Reg(out_reg)) => { + dynasm!(ctx.asm + ; mov Rq(out_reg), i + ); + } + (_, ValueLocation::Immediate(_)) => panic!("Tried to copy to an immediate value!"), } } diff --git a/src/function_body.rs b/src/function_body.rs index 1ccce16f9c..5e46e0dfba 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -147,6 +147,10 @@ pub fn translate( block_state, .. }) => { + if ty != Type::EmptyBlockType { + return_from_block(&mut ctx, block_state.depth); + } + // Finalize if..else block by jumping to the `end_label`. br(&mut ctx, end_label); @@ -174,6 +178,10 @@ pub fn translate( Operator::End => { let control_frame = control_frames.pop().expect("control stack is never empty"); + if control_frame.ty != Type::EmptyBlockType && !control_frames.is_empty() { + return_from_block(&mut ctx, control_frame.block_state.depth); + } + if !control_frame.kind.is_loop() { // Branches to a control frame with block type directs control flow to the header of the loop // and we don't need to resolve it here. Branching to other control frames always lead @@ -191,7 +199,8 @@ pub fn translate( prepare_return_value(&mut ctx); } - // restore_block_state(&mut ctx, control_frame.block_state); + restore_block_state(&mut ctx, control_frame.block_state); + push_block_return_value(&mut ctx); } Operator::I32Eq => relop_eq_i32(&mut ctx), Operator::I32Add => i32_add(&mut ctx), diff --git a/src/tests.rs b/src/tests.rs index df593a0911..09a18c38e9 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -259,7 +259,8 @@ fn function_write_args_spill_to_stack() { assert_eq!( { let translated = translate_wat(code); - let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; + let out: u32 = + unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; out }, 11 From 1e04dc90b661b26528a19884bd27943017e244e3 Mon Sep 17 00:00:00 2001 From: Jef Date: Fri, 14 Dec 2018 16:35:48 +0100 Subject: [PATCH 16/61] Make more tests quickcheck-compatible, remove unused code --- src/backend.rs | 4 +-- src/function_body.rs | 4 +-- src/tests.rs | 59 +++++++++++++++++++------------------------- 3 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index e1f4a6ca03..8c15536ff7 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -322,9 +322,7 @@ pub fn current_block_state(ctx: &Context) -> BlockState { ctx.block_state.clone() } -pub fn return_from_block(ctx: &mut Context, new_depth: StackDepth) { - let diff = ((ctx.block_state.depth.0 - new_depth.0) * WORD_SIZE) as i32; - +pub fn return_from_block(ctx: &mut Context) { if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) { match loc { ValueLocation::Reg(r) => { diff --git a/src/function_body.rs b/src/function_body.rs index 5e46e0dfba..d35bf3d05b 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -148,7 +148,7 @@ pub fn translate( .. }) => { if ty != Type::EmptyBlockType { - return_from_block(&mut ctx, block_state.depth); + return_from_block(&mut ctx); } // Finalize if..else block by jumping to the `end_label`. @@ -179,7 +179,7 @@ pub fn translate( let control_frame = control_frames.pop().expect("control stack is never empty"); if control_frame.ty != Type::EmptyBlockType && !control_frames.is_empty() { - return_from_block(&mut ctx, control_frame.block_state.depth); + return_from_block(&mut ctx); } if !control_frame.kind.is_loop() { diff --git a/src/tests.rs b/src/tests.rs index 09a18c38e9..822ebb507a 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -19,7 +19,7 @@ fn empty() { } macro_rules! binop_test { - ($op:ident, $func:path) => { + ($op:ident, $func:expr) => { quickcheck! { fn $op(a: u32, b: u32) -> bool { static CODE: &str = concat!( @@ -45,40 +45,27 @@ binop_test!(or, std::ops::BitOr::bitor); binop_test!(xor, std::ops::BitXor::bitxor); binop_test!(mul, u32::wrapping_mul); -#[test] -fn relop_eq() { - const CASES: &[(u32, u32, u32)] = &[ - (0, 0, 1), - (0, 1, 0), - (1, 0, 0), - (1, 1, 1), - (1312, 1, 0), - (1312, 1312, 1), - ]; +quickcheck! { + fn relop_eq(a: u32, b: u32) -> bool{ + static CODE: &str = r#" + (module + (func (param i32) (param i32) (result i32) (i32.eq (get_local 0) (get_local 1))) + ) + "#; - let code = r#" -(module - (func (param i32) (param i32) (result i32) (i32.eq (get_local 0) (get_local 1))) -) - "#; + lazy_static! { + static ref TRANSLATED: TranslatedModule = translate_wat(CODE); + } - for (a, b, expected) in CASES { - assert_eq!(execute_wat(code, *a, *b), *expected); + let out = unsafe { TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)) }; + + (a == b) == (out == 1) } } -#[test] -fn if_then_else() { - const CASES: &[(u32, u32, u32)] = &[ - (0, 1, 1), - (0, 0, 0), - (1, 0, 0), - (1, 1, 1), - (1312, 1, 1), - (1312, 1312, 1312), - ]; - - let code = r#" +quickcheck! { + fn if_then_else(a: u32, b: u32) -> bool { + const CODE: &str = r#" (module (func (param i32) (param i32) (result i32) (if (result i32) @@ -91,13 +78,17 @@ fn if_then_else() { ) ) ) - "#; + "#; - for (a, b, expected) in CASES { - assert_eq!(execute_wat(code, *a, *b), *expected, "{}, {}", a, b); + lazy_static! { + static ref TRANSLATED: TranslatedModule = translate_wat(CODE); + } + + let out = unsafe { TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b)) }; + + out == (if a == b { a } else { b }) } } - #[test] fn if_without_result() { let code = r#" From bd2ee53c8983043ac3fb0a3746964ab02706f058 Mon Sep 17 00:00:00 2001 From: Jef Date: Sat, 15 Dec 2018 16:39:38 +0100 Subject: [PATCH 17/61] Optimize `pop_i32_into`, check more fib values --- src/backend.rs | 31 +++++++++++++++++++++++++++---- src/tests.rs | 20 +++++++++++++------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index 8c15536ff7..1c1dc144ec 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -397,10 +397,33 @@ fn pop_i32(ctx: &mut Context) -> Value { } fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { - let val = pop_i32(ctx); - let val_loc = val.location(&ctx.locals); - copy_value(ctx, val_loc, dst); - free_val(ctx, val); + let to_move = match ctx.block_state.stack.pop().expect("Stack is empty") { + StackValue::Local(loc) => Value::Local(loc), + StackValue::Immediate(i) => Value::Immediate(i), + StackValue::Temp(reg) => Value::Temp(reg), + StackValue::Pop => { + ctx.block_state.depth.free(1); + match dst { + ValueLocation::Reg(r) => dynasm!(ctx.asm + ; pop Rq(r) + ), + ValueLocation::Stack(offset) => { + let offset = adjusted_offset(ctx, offset); + dynasm!(ctx.asm + ; pop QWORD [rsp + offset] + ) + } + ValueLocation::Immediate(_) => panic!("Tried to write to literal!"), + } + + // DO NOT DO A `copy_val` + return; + } + }; + + let src = to_move.location(&ctx.locals); + copy_value(ctx, src, dst); + free_val(ctx, to_move); } fn free_val(ctx: &mut Context, val: Value) { diff --git a/src/tests.rs b/src/tests.rs index 822ebb507a..d9368f10b7 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -317,17 +317,23 @@ const FIBONACCI: &str = r#" #[test] fn fib() { - // fac(x) = y <=> (x, y) - const FIB_SEQ: &[u32] = &[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]; + fn fib(n: u32) -> u32 { + let (mut a, mut b) = (1, 1); + + for _ in 0..n { + let old_a = a; + a = b; + b += old_a; + } + + a + } let translated = translate_wat(FIBONACCI); - for x in 0..10 { + for x in 0..30 { unsafe { - assert_eq!( - translated.execute_func::<_, u32>(0, (x,)), - FIB_SEQ[x as usize] - ); + assert_eq!(translated.execute_func::<_, u32>(0, (x,)), fib(x)); } } } From 23b5a56a7d202c4dcb6484cc3688d2da01e22300 Mon Sep 17 00:00:00 2001 From: Jef Date: Mon, 17 Dec 2018 12:16:40 +0100 Subject: [PATCH 18/61] Fix locals not being restored properly (which may cause us to read garbage values from the stack) --- Cargo.toml | 1 + src/backend.rs | 180 ++++++++++++++++++++++++------------------- src/function_body.rs | 4 +- src/lib.rs | 3 +- src/tests.rs | 2 + 5 files changed, 106 insertions(+), 84 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a6e3028e0a..2881b13085 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] publish = false [dependencies] +arrayvec = "0.4" dynasm = "0.2.3" dynasmrt = "0.2.3" wasmparser = "0.21.6" diff --git a/src/backend.rs b/src/backend.rs index 1c1dc144ec..f01eeecdf1 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,5 +1,9 @@ #![allow(dead_code)] // for now +// Since we want this to be linear-time, we never want to iterate over a `Vec`. `ArrayVec`s have a hard, +// small maximum size and so we can consider iterating over them to be essentially constant-time. +use arrayvec::ArrayVec; + use dynasmrt::x64::Assembler; use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; use error::Error; @@ -166,7 +170,7 @@ impl CodeGenSession { asm: &mut self.assembler, func_starts: &self.func_starts, block_state: Default::default(), - locals: Default::default(), + original_locals: Default::default(), } } @@ -222,7 +226,7 @@ impl Value { fn location(&self, locals: &Locals) -> ValueLocation { match *self { - Value::Local(loc) => local_location(locals, loc), + Value::Local(loc) => locals.get(loc), Value::Temp(reg) => ValueLocation::Reg(reg), Value::Immediate(reg) => ValueLocation::Immediate(reg), } @@ -240,7 +244,7 @@ enum StackValue { impl StackValue { fn location(&self, locals: &Locals) -> Option { match *self { - StackValue::Local(loc) => Some(local_location(locals, loc)), + StackValue::Local(loc) => Some(locals.get(loc)), StackValue::Immediate(i) => Some(ValueLocation::Immediate(i)), StackValue::Temp(reg) => Some(ValueLocation::Reg(reg)), StackValue::Pop => None, @@ -248,10 +252,30 @@ impl StackValue { } } -#[derive(Default)] +#[derive(Default, Clone)] struct Locals { - // TODO: Use `ArrayVec` since we have a hard maximum (the number of registers) - locs: Vec, + register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>, + num_stack_args: u32, + num_local_stack_slots: u32, +} + +impl Locals { + fn get(&self, index: u32) -> ValueLocation { + self.register_arguments + .get(index as usize) + .cloned() + .unwrap_or_else(|| { + let stack_index = index - self.register_arguments.len() as u32; + if stack_index < self.num_stack_args { + ValueLocation::Stack( + ((stack_index + self.num_local_stack_slots + 2) * WORD_SIZE) as _, + ) + } else { + let stack_index = stack_index - self.num_stack_args; + ValueLocation::Stack((stack_index * WORD_SIZE) as _) + } + }) + } } #[derive(Default, Clone)] @@ -259,22 +283,16 @@ pub struct BlockState { stack: Stack, pub depth: StackDepth, regs: Registers, + /// This is the _current_ locals, since we can shuffle them about during function calls. + /// We will restore this to be the same state as the `Locals` in `Context` at the end + /// of a block. + locals: Locals, } fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { (ctx.block_state.depth.0 * WORD_SIZE) as i32 + offset } -fn local_location(locals: &Locals, index: u32) -> ValueLocation { - locals - .locs - .get(index as usize) - .cloned() - .unwrap_or(ValueLocation::Stack( - (index.saturating_sub(ARGS_IN_GPRS.len() as u32) * WORD_SIZE) as _, - )) -} - type Stack = Vec; pub struct Context<'a> { @@ -282,7 +300,7 @@ pub struct Context<'a> { func_starts: &'a Vec<(Option, DynamicLabel)>, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. block_state: BlockState, - locals: Locals, + original_locals: Locals, } impl<'a> Context<'a> {} @@ -323,42 +341,36 @@ pub fn current_block_state(ctx: &Context) -> BlockState { } pub fn return_from_block(ctx: &mut Context) { - if let Some(loc) = ctx.block_state.stack.last().unwrap().location(&ctx.locals) { - match loc { - ValueLocation::Reg(r) => { - dynasm!(ctx.asm - ; push Rq(r) - ); - } - ValueLocation::Stack(offset) => { - let offset = adjusted_offset(ctx, offset); - dynasm!(ctx.asm - ; push QWORD [rsp + offset] - ); - } - ValueLocation::Immediate(imm) => { - dynasm!(ctx.asm - ; push imm - ); - } - } - } - // If `location` is `None` then we don't need to do anything. + free_return_register(ctx, 1); + pop_i32_into(ctx, ValueLocation::Reg(RAX)) } pub fn push_block_return_value(ctx: &mut Context) { - ctx.block_state.depth.reserve(1); - ctx.block_state.stack.push(StackValue::Pop); + ctx.block_state.stack.push(StackValue::Temp(RAX)); } -pub fn restore_block_state(ctx: &mut Context, block_state: BlockState) { - ctx.block_state = block_state; +pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) { + restore_locals(ctx); + ctx.block_state = parent_block_state; } pub fn push_return_value(ctx: &mut Context) { ctx.block_state.stack.push(StackValue::Temp(RAX)); } +fn restore_locals(ctx: &mut Context) { + for (src, dst) in ctx + .block_state + .locals + .register_arguments + .clone() + .iter() + .zip(&ctx.original_locals.register_arguments.clone()) + { + copy_value(ctx, *src, *dst); + } +} + fn push_i32(ctx: &mut Context, value: Value) { let stack_loc = match value { Value::Local(loc) => StackValue::Local(loc), @@ -421,7 +433,8 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { } }; - let src = to_move.location(&ctx.locals); + let src = to_move.location(&ctx.block_state.locals); + println!("{:?}, {:?}", src, dst); copy_value(ctx, src, dst); free_val(ctx, to_move); } @@ -435,7 +448,7 @@ fn free_val(ctx: &mut Context, val: Value) { /// Puts this value into a register so that it can be efficiently read fn into_reg(ctx: &mut Context, val: Value) -> GPR { - match val.location(&ctx.locals) { + match val.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); let scratch = ctx.block_state.regs.take_scratch_gpr(); @@ -462,7 +475,7 @@ fn into_temp_reg(ctx: &mut Context, val: Value) -> GPR { Value::Local(loc) => { let scratch = ctx.block_state.regs.take_scratch_gpr(); - match local_location(&ctx.locals, loc) { + match ctx.block_state.locals.get(loc) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -512,7 +525,7 @@ macro_rules! commutative_binop { _ => (into_temp_reg(ctx, op0), op1), }; - match op0.location(&ctx.locals) { + match op0.location(&ctx.block_state.locals) { ValueLocation::Reg(reg) => { dynasm!(ctx.asm ; $instr Rd(op1), Rd(reg) @@ -538,12 +551,14 @@ macro_rules! commutative_binop { } } -commutative_binop!(i32_add, add, |a, b| a + b); +commutative_binop!(i32_add, add, i32::wrapping_add); commutative_binop!(i32_and, and, |a, b| a & b); commutative_binop!(i32_or, or, |a, b| a | b); commutative_binop!(i32_xor, xor, |a, b| a ^ b); -commutative_binop!(i32_mul, imul, |a, b| a * b); +commutative_binop!(i32_mul, imul, i32::wrapping_mul); +// `sub` is not commutative, so we have to handle it differently (we _must_ use the `op1` +// temp register as the output) pub fn i32_sub(ctx: &mut Context) { let op0 = pop_i32(ctx); let op1 = pop_i32(ctx); @@ -556,7 +571,7 @@ pub fn i32_sub(ctx: &mut Context) { } let op1 = into_temp_reg(ctx, op1); - match op0.location(&ctx.locals) { + match op0.location(&ctx.block_state.locals) { ValueLocation::Reg(reg) => { dynasm!(ctx.asm ; sub Rd(op1), Rd(reg) @@ -588,8 +603,18 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { // back into registers here. pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { let val = pop_i32(ctx); - let val_loc = val.location(&ctx.locals); - let dst_loc = local_location(&ctx.locals, local_idx); + let val_loc = val.location(&ctx.block_state.locals); + let dst_loc = ctx.original_locals.get(local_idx); + + if let Some(cur) = ctx + .block_state + .locals + .register_arguments + .get_mut(local_idx as usize) + { + *cur = dst_loc; + } + copy_value(ctx, val_loc, dst_loc); free_val(ctx, val); } @@ -604,7 +629,7 @@ pub fn relop_eq_i32(ctx: &mut Context) { let result = ctx.block_state.regs.take_scratch_gpr(); if let Some(i) = left.immediate() { - match right.location(&ctx.locals) { + match right.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -629,7 +654,7 @@ pub fn relop_eq_i32(ctx: &mut Context) { } } else { let lreg = into_reg(ctx, left); - match right.location(&ctx.locals) { + match right.location(&ctx.block_state.locals) { ValueLocation::Stack(offset) => { let offset = adjusted_offset(ctx, offset); dynasm!(ctx.asm @@ -733,7 +758,7 @@ fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { #[must_use] pub struct CallCleanup { - restore_registers: Vec, + restore_registers: ArrayVec<[GPR; SCRATCH_REGS.len()]>, stack_depth: i32, } @@ -748,15 +773,16 @@ fn free_arg_registers(ctx: &mut Context, count: u32) { return; } - for i in 0..ctx.locals.locs.len() { - match ctx.locals.locs[i] { + // This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime + for i in 0..ctx.block_state.locals.register_arguments.len() { + match ctx.block_state.locals.register_arguments[i] { ValueLocation::Reg(reg) => { if ARGS_IN_GPRS.contains(®) { let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _); dynasm!(ctx.asm ; mov [rsp + offset], Rq(reg) ); - ctx.locals.locs[i] = ValueLocation::Stack(offset); + ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset); } } _ => {} @@ -770,7 +796,7 @@ fn free_return_register(ctx: &mut Context, count: u32) { } for stack_val in &mut ctx.block_state.stack { - match stack_val.location(&ctx.locals) { + match stack_val.location(&ctx.block_state.locals) { // For now it's impossible for a local to be in RAX but that might be // possible in the future, so we check both cases. Some(ValueLocation::Reg(RAX)) => { @@ -787,8 +813,8 @@ fn free_return_register(ctx: &mut Context, count: u32) { // TODO: Use `ArrayVec`? /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. -fn save_volatile(ctx: &mut Context) -> Vec { - let mut out = vec![]; +fn save_volatile(ctx: &mut Context) -> ArrayVec<[GPR; SCRATCH_REGS.len()]> { + let mut out = ArrayVec::new(); // TODO: If there are no `StackValue::Pop`s that need to be popped // before we reach our `Temp` value, we can set the `StackValue` @@ -811,11 +837,6 @@ fn save_volatile(ctx: &mut Context) -> Vec { fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32; - let out = CallCleanup { - stack_depth: num_stack_args, - restore_registers: save_volatile(ctx), - }; - // We pop stack arguments first - arguments are RTL if num_stack_args > 0 { let size = num_stack_args * WORD_SIZE as i32; @@ -847,7 +868,10 @@ fn pass_outgoing_args(ctx: &mut Context, arity: u32) -> CallCleanup { pop_i32_into(ctx, ValueLocation::Reg(*reg)); } - out + CallCleanup { + stack_depth: num_stack_args, + restore_registers: save_volatile(ctx), + } } /// Frees up the stack space used for stack-passed arguments and restores the value @@ -901,29 +925,23 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) { // Align stack slots to the nearest even number. This is required // by x86-64 ABI. let aligned_stack_slots = (locals + 1) & !1; - let framesize: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; + let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; - ctx.locals.locs = reg_args - .iter() - .cloned() - .map(ValueLocation::Reg) - .chain( - (0..arguments.saturating_sub(ARGS_IN_GPRS.len() as _)) - // We add 2 here because 1 stack slot is used for the stack pointer and another is - // used for the return address. It's a magic number but there's not really a way - // around this. - .map(|arg_i| ValueLocation::Stack(((arg_i + 2) * WORD_SIZE) as i32 + framesize)), - ) - .collect(); + ctx.original_locals.register_arguments = + reg_args.iter().cloned().map(ValueLocation::Reg).collect(); + ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); + ctx.original_locals.num_local_stack_slots = locals; + ctx.block_state.locals = ctx.original_locals.clone(); dynasm!(ctx.asm ; push rbp ; mov rbp, rsp ); - if framesize > 0 { + // ctx.block_state.depth.reserve(aligned_stack_slots - locals); + if frame_size > 0 { dynasm!(ctx.asm - ; sub rsp, framesize + ; sub rsp, frame_size ); } } diff --git a/src/function_body.rs b/src/function_body.rs index d35bf3d05b..7a835fe4e2 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -158,7 +158,7 @@ pub fn translate( // 0 it will branch here. // After that reset stack depth to the value before entering `if` block. define_label(&mut ctx, if_not); - restore_block_state(&mut ctx, block_state.clone()); + end_block(&mut ctx, block_state.clone()); // Carry over the `end_label`, so it will be resolved when the corresponding `end` // is encountered. @@ -199,7 +199,7 @@ pub fn translate( prepare_return_value(&mut ctx); } - restore_block_state(&mut ctx, control_frame.block_state); + end_block(&mut ctx, control_frame.block_state); push_block_return_value(&mut ctx); } Operator::I32Eq => relop_eq_i32(&mut ctx), diff --git a/src/lib.rs b/src/lib.rs index ea1a4697d1..b725112611 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,9 @@ -#![feature(plugin, test)] +#![feature(plugin, test, const_slice_len)] #![plugin(dynasm)] extern crate test; +extern crate arrayvec; extern crate capstone; extern crate failure; extern crate wasmparser; diff --git a/src/tests.rs b/src/tests.rs index d9368f10b7..5c17de5008 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -250,6 +250,7 @@ fn function_write_args_spill_to_stack() { assert_eq!( { let translated = translate_wat(code); + translated.disassemble(); let out: u32 = unsafe { translated.execute_func(0, (11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) }; out @@ -330,6 +331,7 @@ fn fib() { } let translated = translate_wat(FIBONACCI); + translated.disassemble(); for x in 0..30 { unsafe { From 53841cdb07994ad07277a2603f4bacd539706c34 Mon Sep 17 00:00:00 2001 From: Jef Date: Mon, 17 Dec 2018 12:50:29 +0100 Subject: [PATCH 19/61] Add loops --- src/function_body.rs | 87 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/src/function_body.rs b/src/function_body.rs index 7a835fe4e2..7d92c95896 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -13,8 +13,7 @@ enum ControlFrameKind { /// Can be used for an implicit function block. Block { end_label: Label }, /// Loop frame (branching to the beginning of block). - #[allow(unused)] - Loop { header: Label }, + Loop { header: Label, break_: Label }, /// True-subblock of if expression. IfTrue { /// If jump happens inside the if-true block then control will @@ -32,10 +31,10 @@ enum ControlFrameKind { impl ControlFrameKind { /// Returns a label which should be used as a branch destination. - fn br_destination(&self) -> Label { + fn block_end(&self) -> Label { match *self { ControlFrameKind::Block { end_label } => end_label, - ControlFrameKind::Loop { header } => header, + ControlFrameKind::Loop { break_, .. } => break_, ControlFrameKind::IfTrue { end_label, .. } => end_label, ControlFrameKind::IfFalse { end_label } => end_label, } @@ -100,16 +99,16 @@ pub fn translate( num_locals += count; } - let mut ctx = session.new_context(func_idx); + let ctx = &mut session.new_context(func_idx); let operators = body.get_operators_reader()?; - start_function(&mut ctx, arg_count, num_locals); + start_function(ctx, arg_count, num_locals); let mut control_frames = Vec::new(); // Upon entering the function implicit frame for function body is pushed. It has the same // result type as the function itself. Branching to it is equivalent to returning from the function. - let epilogue_label = create_label(&mut ctx); + let epilogue_label = create_label(ctx); control_frames.push(ControlFrame::new( ControlFrameKind::Block { end_label: epilogue_label, @@ -125,13 +124,13 @@ pub fn translate( .last_mut() .expect("control stack is never empty") .mark_stack_polymorphic(); - trap(&mut ctx); + trap(ctx); } Operator::If { ty } => { - let end_label = create_label(&mut ctx); - let if_not = create_label(&mut ctx); + let end_label = create_label(ctx); + let if_not = create_label(ctx); - pop_and_breq(&mut ctx, if_not); + pop_and_breq(ctx, if_not); control_frames.push(ControlFrame::new( ControlFrameKind::IfTrue { end_label, if_not }, @@ -139,6 +138,19 @@ pub fn translate( ty, )); } + Operator::Loop { ty } => { + let header = create_label(ctx); + let break_ = create_label(ctx); + + define_label(ctx, header); + pop_and_breq(ctx, break_); + + control_frames.push(ControlFrame::new( + ControlFrameKind::Loop { header, break_ }, + current_block_state(&ctx), + ty, + )); + } Operator::Else => { match control_frames.pop() { Some(ControlFrame { @@ -148,17 +160,17 @@ pub fn translate( .. }) => { if ty != Type::EmptyBlockType { - return_from_block(&mut ctx); + return_from_block(ctx); } // Finalize if..else block by jumping to the `end_label`. - br(&mut ctx, end_label); + br(ctx, end_label); // Define `if_not` label here, so if the corresponding `if` block receives // 0 it will branch here. // After that reset stack depth to the value before entering `if` block. - define_label(&mut ctx, if_not); - end_block(&mut ctx, block_state.clone()); + define_label(ctx, if_not); + end_block(ctx, block_state.clone()); // Carry over the `end_label`, so it will be resolved when the corresponding `end` // is encountered. @@ -179,57 +191,56 @@ pub fn translate( let control_frame = control_frames.pop().expect("control stack is never empty"); if control_frame.ty != Type::EmptyBlockType && !control_frames.is_empty() { - return_from_block(&mut ctx); + return_from_block(ctx); } - if !control_frame.kind.is_loop() { - // Branches to a control frame with block type directs control flow to the header of the loop - // and we don't need to resolve it here. Branching to other control frames always lead - // control flow to the corresponding `end`. - define_label(&mut ctx, control_frame.kind.br_destination()); + if let ControlFrameKind::Loop { header, .. } = control_frame.kind { + br(ctx, header); } + define_label(ctx, control_frame.kind.block_end()); + if let ControlFrameKind::IfTrue { if_not, .. } = control_frame.kind { // this is `if .. end` construction. Define the `if_not` label here. - define_label(&mut ctx, if_not); + define_label(ctx, if_not); } // This is the last control frame. Perform the implicit return here. if control_frames.len() == 0 && return_ty != Type::EmptyBlockType { - prepare_return_value(&mut ctx); + prepare_return_value(ctx); } - end_block(&mut ctx, control_frame.block_state); - push_block_return_value(&mut ctx); + end_block(ctx, control_frame.block_state); + push_block_return_value(ctx); } - Operator::I32Eq => relop_eq_i32(&mut ctx), - Operator::I32Add => i32_add(&mut ctx), - Operator::I32Sub => i32_sub(&mut ctx), - Operator::I32And => i32_and(&mut ctx), - Operator::I32Or => i32_or(&mut ctx), - Operator::I32Xor => i32_xor(&mut ctx), - Operator::I32Mul => i32_mul(&mut ctx), - Operator::GetLocal { local_index } => get_local_i32(&mut ctx, local_index), - Operator::I32Const { value } => literal_i32(&mut ctx, value), + Operator::I32Eq => relop_eq_i32(ctx), + Operator::I32Add => i32_add(ctx), + Operator::I32Sub => i32_sub(ctx), + Operator::I32And => i32_and(ctx), + Operator::I32Or => i32_or(ctx), + Operator::I32Xor => i32_xor(ctx), + Operator::I32Mul => i32_mul(ctx), + Operator::GetLocal { local_index } => get_local_i32(ctx, local_index), + Operator::I32Const { value } => literal_i32(ctx, value), Operator::Call { function_index } => { let callee_ty = translation_ctx.func_type(function_index); // TODO: this implementation assumes that this function is locally defined. call_direct( - &mut ctx, + ctx, function_index, callee_ty.params.len() as u32, callee_ty.returns.len() as u32, ); - push_return_value(&mut ctx); + push_return_value(ctx); } _ => { - trap(&mut ctx); + trap(ctx); } } } - epilogue(&mut ctx); + epilogue(ctx); Ok(()) } From 74ffb8560ca2db6cb66cfb1f120f52e83a2e7741 Mon Sep 17 00:00:00 2001 From: Jef Date: Tue, 18 Dec 2018 12:12:17 +0100 Subject: [PATCH 20/61] Fix use of locals --- src/backend.rs | 310 ++++++++++++++++++++++++++++++++++--------- src/function_body.rs | 157 +++++++++++++++------- src/tests.rs | 136 +++++++++++++++++++ 3 files changed, 490 insertions(+), 113 deletions(-) diff --git a/src/backend.rs b/src/backend.rs index f01eeecdf1..0b25b5eb69 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -14,7 +14,7 @@ const WORD_SIZE: u32 = 8; type GPR = u8; -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone)] struct GPRs { bits: u16, } @@ -70,7 +70,7 @@ impl GPRs { } } -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub struct Registers { scratch: GPRs, } @@ -94,6 +94,10 @@ impl Registers { result } + pub fn mark_used(&mut self, gpr: GPR) { + self.scratch.mark_used(gpr); + } + // TODO: Add function that takes a scratch register if possible // but otherwise gives a fresh stack location. pub fn take_scratch_gpr(&mut self) -> GPR { @@ -136,7 +140,7 @@ const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9]; // allow us to call instructions that require specific registers. // // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI -const SCRATCH_REGS: &[GPR] = &[R10, R11]; +const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11]; pub struct CodeGenSession { assembler: Assembler, @@ -170,7 +174,6 @@ impl CodeGenSession { asm: &mut self.assembler, func_starts: &self.func_starts, block_state: Default::default(), - original_locals: Default::default(), } } @@ -209,7 +212,7 @@ impl TranslatedCodeSection { } // TODO: Immediates? We could implement on-the-fly const folding -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] enum Value { Local(u32), Temp(GPR), @@ -252,7 +255,7 @@ impl StackValue { } } -#[derive(Default, Clone)] +#[derive(Debug, Default, Clone)] struct Locals { register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>, num_stack_args: u32, @@ -278,15 +281,18 @@ impl Locals { } } -#[derive(Default, Clone)] +#[derive(Debug, Default, Clone)] pub struct BlockState { stack: Stack, - pub depth: StackDepth, + // TODO: `BitVec` + stack_map: Vec, + depth: StackDepth, regs: Registers, /// This is the _current_ locals, since we can shuffle them about during function calls. /// We will restore this to be the same state as the `Locals` in `Context` at the end /// of a block. locals: Locals, + parent_locals: Locals, } fn adjusted_offset(ctx: &mut Context, offset: i32) -> i32 { @@ -300,7 +306,6 @@ pub struct Context<'a> { func_starts: &'a Vec<(Option, DynamicLabel)>, /// Each push and pop on the value stack increments or decrements this value by 1 respectively. block_state: BlockState, - original_locals: Locals, } impl<'a> Context<'a> {} @@ -336,25 +341,124 @@ impl StackDepth { } } -pub fn current_block_state(ctx: &Context) -> BlockState { - ctx.block_state.clone() +fn expand_stack(ctx: &mut Context, by: u32) { + use std::iter; + + if by == 0 { + return; + } + + let new_stack_size = (ctx.block_state.stack_map.len() + by as usize).next_power_of_two(); + let additional_elements = new_stack_size - ctx.block_state.stack_map.len(); + ctx.block_state + .stack_map + .extend(iter::repeat(false).take(additional_elements)); + + dynasm!(ctx.asm + ; sub rsp, additional_elements as i32 + ); } -pub fn return_from_block(ctx: &mut Context) { - free_return_register(ctx, 1); - pop_i32_into(ctx, ValueLocation::Reg(RAX)) +// TODO: Make this generic over `Vec` or `ArrayVec`? +fn stack_slots(ctx: &mut Context, count: u32) -> Vec { + let mut out = Vec::with_capacity(count as usize); + + let offset_if_taken = |(i, is_taken): (usize, bool)| { + if !is_taken { + Some(i as i32 * WORD_SIZE as i32) + } else { + None + } + }; + + out.extend( + ctx.block_state + .stack_map + .iter() + .cloned() + .enumerate() + .filter_map(offset_if_taken), + ); + + let remaining = count as usize - out.len(); + + if remaining > 0 { + expand_stack(ctx, remaining as u32); + out.extend( + ctx.block_state + .stack_map + .iter() + .cloned() + .enumerate() + .filter_map(offset_if_taken), + ); + } + + out } -pub fn push_block_return_value(ctx: &mut Context) { - ctx.block_state.stack.push(StackValue::Temp(RAX)); +fn stack_slot(ctx: &mut Context) -> i32 { + if let Some(pos) = ctx + .block_state + .stack_map + .iter() + .position(|is_taken| !is_taken) + { + ctx.block_state.stack_map[pos] = true; + pos as i32 * WORD_SIZE as i32 + } else { + expand_stack(ctx, 1); + stack_slot(ctx) + } } -pub fn end_block(ctx: &mut Context, parent_block_state: BlockState) { - restore_locals(ctx); +// We use `put` instead of `pop` since with `BrIf` it's possible +// that the block will continue after returning. +pub fn return_from_block(ctx: &mut Context, arity: u32, is_function_end: bool) { + // This should just be an optimisation, passing `false` should always result + // in correct code. + if !is_function_end { + restore_locals(ctx); + } + + if arity == 0 { + return; + } + + let stack_top = *ctx.block_state.stack.last().expect("Stack is empty"); + put_stack_val_into(ctx, stack_top, ValueLocation::Reg(RAX)) +} + +pub fn start_block(ctx: &mut Context, arity: u32) -> BlockState { + free_return_register(ctx, arity); + let current_state = ctx.block_state.clone(); + ctx.block_state.parent_locals = ctx.block_state.locals.clone(); + current_state +} + +pub fn end_block(ctx: &mut Context, parent_block_state: BlockState, arity: u32) { + // TODO: This is currently never called, but is important for if we want to + // have a more complex stack spilling scheme. + if ctx.block_state.depth != parent_block_state.depth { + dynasm!(ctx.asm + ; add rsp, (ctx.block_state.depth.0 - parent_block_state.depth.0) as i32 + ); + } + ctx.block_state = parent_block_state; + + if arity > 0 { + push_return_value(ctx); + } } +// TODO: We should be able to have arbitrary return registers. For blocks with multiple +// return points we can just choose the first one that we encounter and then always +// use that one. This will mean that `(block ...)` is no less efficient than `...` +// alone, and you only pay for the shuffling of registers in the case that you use +// `BrIf` or similar. pub fn push_return_value(ctx: &mut Context) { + ctx.block_state.regs.mark_used(RAX); ctx.block_state.stack.push(StackValue::Temp(RAX)); } @@ -365,7 +469,7 @@ fn restore_locals(ctx: &mut Context) { .register_arguments .clone() .iter() - .zip(&ctx.original_locals.register_arguments.clone()) + .zip(&ctx.block_state.parent_locals.register_arguments.clone()) { copy_value(ctx, *src, *dst); } @@ -380,6 +484,7 @@ fn push_i32(ctx: &mut Context, value: Value) { StackValue::Temp(gpr) } else { ctx.block_state.depth.reserve(1); + // TODO: Proper stack allocation scheme dynasm!(ctx.asm ; push Rq(gpr) ); @@ -408,8 +513,10 @@ fn pop_i32(ctx: &mut Context) -> Value { } } -fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { - let to_move = match ctx.block_state.stack.pop().expect("Stack is empty") { +/// Warning: this _will_ pop the runtime stack, but will _not_ pop the compile-time +/// stack. It's specifically for mid-block breaks like `Br` and `BrIf`. +fn put_stack_val_into(ctx: &mut Context, val: StackValue, dst: ValueLocation) { + let to_move = match val { StackValue::Local(loc) => Value::Local(loc), StackValue::Immediate(i) => Value::Immediate(i), StackValue::Temp(reg) => Value::Temp(reg), @@ -434,12 +541,30 @@ fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { }; let src = to_move.location(&ctx.block_state.locals); - println!("{:?}, {:?}", src, dst); copy_value(ctx, src, dst); - free_val(ctx, to_move); + if src != dst { + free_value(ctx, to_move); + } } -fn free_val(ctx: &mut Context, val: Value) { +pub fn drop(ctx: &mut Context) { + match ctx.block_state.stack.pop().expect("Stack is empty") { + StackValue::Pop => { + dynasm!(ctx.asm + ; add rsp, WORD_SIZE as i32 + ); + } + StackValue::Temp(gpr) => free_value(ctx, Value::Temp(gpr)), + _ => {} + } +} + +fn pop_i32_into(ctx: &mut Context, dst: ValueLocation) { + let val = ctx.block_state.stack.pop().expect("Stack is empty"); + put_stack_val_into(ctx, val, dst); +} + +fn free_value(ctx: &mut Context, val: Value) { match val { Value::Temp(reg) => ctx.block_state.regs.release_scratch_gpr(reg), Value::Local(_) | Value::Immediate(_) => {} @@ -546,12 +671,13 @@ macro_rules! commutative_binop { } ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); + free_value(ctx, op0); } } } commutative_binop!(i32_add, add, i32::wrapping_add); + commutative_binop!(i32_and, and, |a, b| a & b); commutative_binop!(i32_or, or, |a, b| a | b); commutative_binop!(i32_xor, xor, |a, b| a ^ b); @@ -592,7 +718,7 @@ pub fn i32_sub(ctx: &mut Context) { } ctx.block_state.stack.push(StackValue::Temp(op1)); - free_val(ctx, op0); + free_value(ctx, op0); } pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { @@ -604,7 +730,7 @@ pub fn get_local_i32(ctx: &mut Context, local_idx: u32) { pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { let val = pop_i32(ctx); let val_loc = val.location(&ctx.block_state.locals); - let dst_loc = ctx.original_locals.get(local_idx); + let dst_loc = ctx.block_state.parent_locals.get(local_idx); if let Some(cur) = ctx .block_state @@ -616,7 +742,7 @@ pub fn set_local_i32(ctx: &mut Context, local_idx: u32) { } copy_value(ctx, val_loc, dst_loc); - free_val(ctx, val); + free_value(ctx, val); } pub fn literal_i32(ctx: &mut Context, imm: i32) { @@ -681,13 +807,13 @@ pub fn relop_eq_i32(ctx: &mut Context) { } push_i32(ctx, Value::Temp(result)); - free_val(ctx, left); - free_val(ctx, right); + free_value(ctx, left); + free_value(ctx, right); } /// Pops i32 predicate and branches to the specified label /// if the predicate is equal to zero. -pub fn pop_and_breq(ctx: &mut Context, label: Label) { +pub fn jump_if_equal_zero(ctx: &mut Context, label: Label) { let val = pop_i32(ctx); let predicate = into_temp_reg(ctx, val); dynasm!(ctx.asm @@ -704,10 +830,6 @@ pub fn br(ctx: &mut Context, label: Label) { ); } -pub fn prepare_return_value(ctx: &mut Context) { - pop_i32_into(ctx, ValueLocation::Reg(RAX)); -} - fn copy_value(ctx: &mut Context, src: ValueLocation, dst: ValueLocation) { match (src, dst) { (ValueLocation::Stack(in_offset), ValueLocation::Stack(out_offset)) => { @@ -773,16 +895,15 @@ fn free_arg_registers(ctx: &mut Context, count: u32) { return; } - // This is bound to the maximum size of the `ArrayVec` amd so preserves linear runtime + // This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant + // runtime for i in 0..ctx.block_state.locals.register_arguments.len() { match ctx.block_state.locals.register_arguments[i] { ValueLocation::Reg(reg) => { if ARGS_IN_GPRS.contains(®) { - let offset = adjusted_offset(ctx, (i as u32 * WORD_SIZE) as _); - dynasm!(ctx.asm - ; mov [rsp + offset], Rq(reg) - ); - ctx.block_state.locals.register_arguments[i] = ValueLocation::Stack(offset); + let dst = ValueLocation::Stack((i as u32 * WORD_SIZE) as _); + copy_value(ctx, ValueLocation::Reg(reg), dst); + ctx.block_state.locals.register_arguments[i] = dst; } } _ => {} @@ -795,20 +916,63 @@ fn free_return_register(ctx: &mut Context, count: u32) { return; } - for stack_val in &mut ctx.block_state.stack { + free_register(ctx, RAX); +} + +fn free_register(ctx: &mut Context, reg: GPR) { + let mut to_repush = 0; + let mut out = None; + + if ctx.block_state.regs.is_free(reg) { + return; + } + + for stack_val in ctx.block_state.stack.iter_mut().rev() { match stack_val.location(&ctx.block_state.locals) { // For now it's impossible for a local to be in RAX but that might be // possible in the future, so we check both cases. - Some(ValueLocation::Reg(RAX)) => { - let scratch = ctx.block_state.regs.take_scratch_gpr(); - dynasm!(ctx.asm - ; mov Rq(scratch), rax - ); - *stack_val = StackValue::Temp(scratch); + Some(ValueLocation::Reg(r)) if r == reg => { + *stack_val = if ctx.block_state.regs.free_scratch() > 1 { + let gpr = ctx.block_state.regs.take_scratch_gpr(); + assert!(gpr != RAX, "RAX in stack but marked as free"); + StackValue::Temp(gpr) + } else { + ctx.block_state.depth.reserve(1); + StackValue::Pop + }; + + out = Some(*stack_val); + + break; + } + Some(_) => {} + None => { + to_repush += 1; } - _ => {} } } + + if let Some(out) = out { + match out { + StackValue::Temp(gpr) => { + dynasm!(ctx.asm + ; mov Rq(gpr), rax + ); + } + StackValue::Pop => { + // TODO: Ideally we should do proper stack allocation so we + // don't have to check this at all (i.e. order on the + // physical stack and order on the logical stack should + // be independent). + assert_eq!(to_repush, 0); + dynasm!(ctx.asm + ; push rax + ); + } + _ => unreachable!(), + } + ctx.block_state.regs.release_scratch_gpr(RAX); + } } // TODO: Use `ArrayVec`? @@ -900,7 +1064,9 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: ); free_arg_registers(ctx, arg_arity); - free_return_register(ctx, return_arity); + if return_arity > 0 { + free_return_register(ctx, return_arity); + } let cleanup = pass_outgoing_args(ctx, arg_arity); @@ -910,13 +1076,22 @@ pub fn call_direct(ctx: &mut Context, index: u32, arg_arity: u32, return_arity: ); post_call_cleanup(ctx, cleanup); + + if return_arity > 0 { + push_return_value(ctx); + } +} + +#[must_use] +pub struct Function { + should_generate_epilogue: bool, } // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them // as scratch registers // TODO: Allow use of unused argument registers as scratch registers. /// Writes the function prologue and stores the arguments as locals -pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) { +pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) -> Function { let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())]; // We need space to store the register arguments if we need to call a function @@ -927,34 +1102,41 @@ pub fn start_function(ctx: &mut Context, arguments: u32, locals: u32) { let aligned_stack_slots = (locals + 1) & !1; let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32; - ctx.original_locals.register_arguments = + ctx.block_state.locals.register_arguments = reg_args.iter().cloned().map(ValueLocation::Reg).collect(); - ctx.original_locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); - ctx.original_locals.num_local_stack_slots = locals; - ctx.block_state.locals = ctx.original_locals.clone(); - - dynasm!(ctx.asm - ; push rbp - ; mov rbp, rsp - ); + ctx.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _); + ctx.block_state.locals.num_local_stack_slots = locals; + ctx.block_state.parent_locals = ctx.block_state.locals.clone(); // ctx.block_state.depth.reserve(aligned_stack_slots - locals); - if frame_size > 0 { + let should_generate_epilogue = frame_size > 0; + if should_generate_epilogue { dynasm!(ctx.asm + ; push rbp + ; mov rbp, rsp ; sub rsp, frame_size ); } + + Function { + should_generate_epilogue, + } } /// Writes the function epilogue, restoring the stack pointer and returning to the /// caller. -pub fn epilogue(ctx: &mut Context) { +pub fn epilogue(ctx: &mut Context, func: Function) { // We don't need to clean up the stack - RSP is restored and // the calling function has its own register stack and will // stomp on the registers from our stack if necessary. + if func.should_generate_epilogue { + dynasm!(ctx.asm + ; mov rsp, rbp + ; pop rbp + ); + } + dynasm!(ctx.asm - ; mov rsp, rbp - ; pop rbp ; ret ); } diff --git a/src/function_body.rs b/src/function_body.rs index 7d92c95896..c302e6a86e 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -13,7 +13,7 @@ enum ControlFrameKind { /// Can be used for an implicit function block. Block { end_label: Label }, /// Loop frame (branching to the beginning of block). - Loop { header: Label, break_: Label }, + Loop { header: Label }, /// True-subblock of if expression. IfTrue { /// If jump happens inside the if-true block then control will @@ -31,20 +31,21 @@ enum ControlFrameKind { impl ControlFrameKind { /// Returns a label which should be used as a branch destination. - fn block_end(&self) -> Label { + fn block_end(&self) -> Option