diff --git a/README.md b/README.md index c35fc40e67..24fca0633e 100644 --- a/README.md +++ b/README.md @@ -142,18 +142,16 @@ fib: push rax push rax mov rsi, rcx - call fib - add eax, dword ptr [rsp + 8] - mov rcx, qword ptr [rsp + 0x10] + call 0 + add eax, [rsp + 8] + mov rcx, [rsp + 0x10] add ecx, 0xfffffffe cmp ecx, 1 mov rsi, rcx - pop rcx - pop rcx - pop rcx + lea rsp, [rsp + 0x18] ja .Lloop .Lreturn: - ret + ret ``` Now obviously I'm not advocating for replacing FireFox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs. diff --git a/src/backend.rs b/src/backend.rs index bfa71f33c8..9857ffabc5 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -2262,38 +2262,32 @@ impl<'this, M: ModuleContext> Context<'this, M> { self.free_value(selector); } - fn set_stack_depth_preserve_flags(&mut self, depth: StackDepth) { - if self.block_state.depth.0 < depth.0 { - for _ in 0..depth.0 - self.block_state.depth.0 { - dynasm!(self.asm - ; push rax - ); - } - } else if self.block_state.depth.0 > depth.0 { - let trash = self.take_reg(I64); - for _ in 0..self.block_state.depth.0 - depth.0 { - dynasm!(self.asm - ; pop Rq(trash.rq().unwrap()) - ); - } - self.block_state.regs.release(trash); - } - - self.block_state.depth = depth; - } - fn set_stack_depth(&mut self, depth: StackDepth) { if self.block_state.depth.0 != depth.0 { let diff = depth.0 as i32 - self.block_state.depth.0 as i32; if diff.abs() == 1 { - self.set_stack_depth_preserve_flags(depth); + if self.block_state.depth.0 < depth.0 { + for _ in 0..depth.0 - self.block_state.depth.0 { + dynasm!(self.asm + ; push rax + ); + } + } else if self.block_state.depth.0 > depth.0 { + let trash = self.take_reg(I64); + for _ in 0..self.block_state.depth.0 - depth.0 { + dynasm!(self.asm + ; pop Rq(trash.rq().unwrap()) + ); + } + self.block_state.regs.release(trash); + } } else { dynasm!(self.asm - ; add rsp, (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32 + ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32] ); - - self.block_state.depth = depth; } + + self.block_state.depth = depth; } } @@ -2325,43 +2319,6 @@ impl<'this, M: ModuleContext> Context<'this, M> { self.set_stack_depth(cc.stack_depth); } - pub fn pass_block_args_preserve_flags(&mut self, cc: &BlockCallingConvention) { - self.do_pass_block_args(cc); - self.set_stack_depth_preserve_flags(cc.stack_depth); - } - - pub fn serialize_block_args_preserve_flags( - &mut self, - cc: &BlockCallingConvention, - other_to_drop: Option>, - ) -> BlockCallingConvention { - self.do_pass_block_args(cc); - - let mut out_args = cc.arguments.clone(); - - out_args.reverse(); - - if let Some(to_drop) = other_to_drop { - for _ in to_drop { - let val = self.pop(); - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let loc = CCLoc::Reg(self.into_temp_reg(None, val)); - - out_args.push(loc); - } - } - - out_args.reverse(); - - self.set_stack_depth_preserve_flags(cc.stack_depth); - - BlockCallingConvention { - stack_depth: cc.stack_depth, - arguments: out_args, - } - } - pub fn serialize_block_args( &mut self, cc: &BlockCallingConvention, @@ -5054,3 +5011,4 @@ impl IntoLabel for (LabelValue, LabelValue) { Box::new(const_values(self.0, self.1)) } } + diff --git a/src/function_body.rs b/src/function_body.rs index e2fc3cb4d7..9904c77e3b 100644 --- a/src/function_body.rs +++ b/src/function_body.rs @@ -314,7 +314,7 @@ where ((Some(Left(ref cc)), to_drop), ref mut other @ (None, _)) | (ref mut other @ (None, _), (Some(Left(ref cc)), to_drop)) => { let mut cc = - ctx.serialize_block_args_preserve_flags(cc, to_drop.clone()); + ctx.serialize_block_args(cc, to_drop.clone()); if let Some(to_drop) = other.1 { drop_elements(&mut cc.arguments, to_drop.clone()); }