Improve efficiency of resetting stack pointer
This commit is contained in:
12
README.md
12
README.md
@@ -142,18 +142,16 @@ fib:
|
||||
push rax
|
||||
push rax
|
||||
mov rsi, rcx
|
||||
call fib
|
||||
add eax, dword ptr [rsp + 8]
|
||||
mov rcx, qword ptr [rsp + 0x10]
|
||||
call 0
|
||||
add eax, [rsp + 8]
|
||||
mov rcx, [rsp + 0x10]
|
||||
add ecx, 0xfffffffe
|
||||
cmp ecx, 1
|
||||
mov rsi, rcx
|
||||
pop rcx
|
||||
pop rcx
|
||||
pop rcx
|
||||
lea rsp, [rsp + 0x18]
|
||||
ja .Lloop
|
||||
.Lreturn:
|
||||
ret
|
||||
ret
|
||||
```
|
||||
|
||||
Now obviously I'm not advocating for replacing FireFox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs.
|
||||
|
||||
@@ -2262,38 +2262,32 @@ impl<'this, M: ModuleContext> Context<'this, M> {
|
||||
self.free_value(selector);
|
||||
}
|
||||
|
||||
fn set_stack_depth_preserve_flags(&mut self, depth: StackDepth) {
|
||||
if self.block_state.depth.0 < depth.0 {
|
||||
for _ in 0..depth.0 - self.block_state.depth.0 {
|
||||
dynasm!(self.asm
|
||||
; push rax
|
||||
);
|
||||
}
|
||||
} else if self.block_state.depth.0 > depth.0 {
|
||||
let trash = self.take_reg(I64);
|
||||
for _ in 0..self.block_state.depth.0 - depth.0 {
|
||||
dynasm!(self.asm
|
||||
; pop Rq(trash.rq().unwrap())
|
||||
);
|
||||
}
|
||||
self.block_state.regs.release(trash);
|
||||
}
|
||||
|
||||
self.block_state.depth = depth;
|
||||
}
|
||||
|
||||
fn set_stack_depth(&mut self, depth: StackDepth) {
|
||||
if self.block_state.depth.0 != depth.0 {
|
||||
let diff = depth.0 as i32 - self.block_state.depth.0 as i32;
|
||||
if diff.abs() == 1 {
|
||||
self.set_stack_depth_preserve_flags(depth);
|
||||
if self.block_state.depth.0 < depth.0 {
|
||||
for _ in 0..depth.0 - self.block_state.depth.0 {
|
||||
dynasm!(self.asm
|
||||
; push rax
|
||||
);
|
||||
}
|
||||
} else if self.block_state.depth.0 > depth.0 {
|
||||
let trash = self.take_reg(I64);
|
||||
for _ in 0..self.block_state.depth.0 - depth.0 {
|
||||
dynasm!(self.asm
|
||||
; pop Rq(trash.rq().unwrap())
|
||||
);
|
||||
}
|
||||
self.block_state.regs.release(trash);
|
||||
}
|
||||
} else {
|
||||
dynasm!(self.asm
|
||||
; add rsp, (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32
|
||||
; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32]
|
||||
);
|
||||
|
||||
self.block_state.depth = depth;
|
||||
}
|
||||
|
||||
self.block_state.depth = depth;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2325,43 +2319,6 @@ impl<'this, M: ModuleContext> Context<'this, M> {
|
||||
self.set_stack_depth(cc.stack_depth);
|
||||
}
|
||||
|
||||
pub fn pass_block_args_preserve_flags(&mut self, cc: &BlockCallingConvention) {
|
||||
self.do_pass_block_args(cc);
|
||||
self.set_stack_depth_preserve_flags(cc.stack_depth);
|
||||
}
|
||||
|
||||
pub fn serialize_block_args_preserve_flags(
|
||||
&mut self,
|
||||
cc: &BlockCallingConvention,
|
||||
other_to_drop: Option<RangeInclusive<u32>>,
|
||||
) -> BlockCallingConvention {
|
||||
self.do_pass_block_args(cc);
|
||||
|
||||
let mut out_args = cc.arguments.clone();
|
||||
|
||||
out_args.reverse();
|
||||
|
||||
if let Some(to_drop) = other_to_drop {
|
||||
for _ in to_drop {
|
||||
let val = self.pop();
|
||||
// TODO: We can use stack slots for values already on the stack but we
|
||||
// don't refcount stack slots right now
|
||||
let loc = CCLoc::Reg(self.into_temp_reg(None, val));
|
||||
|
||||
out_args.push(loc);
|
||||
}
|
||||
}
|
||||
|
||||
out_args.reverse();
|
||||
|
||||
self.set_stack_depth_preserve_flags(cc.stack_depth);
|
||||
|
||||
BlockCallingConvention {
|
||||
stack_depth: cc.stack_depth,
|
||||
arguments: out_args,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_block_args(
|
||||
&mut self,
|
||||
cc: &BlockCallingConvention,
|
||||
@@ -5054,3 +5011,4 @@ impl IntoLabel for (LabelValue, LabelValue) {
|
||||
Box::new(const_values(self.0, self.1))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -314,7 +314,7 @@ where
|
||||
((Some(Left(ref cc)), to_drop), ref mut other @ (None, _))
|
||||
| (ref mut other @ (None, _), (Some(Left(ref cc)), to_drop)) => {
|
||||
let mut cc =
|
||||
ctx.serialize_block_args_preserve_flags(cc, to_drop.clone());
|
||||
ctx.serialize_block_args(cc, to_drop.clone());
|
||||
if let Some(to_drop) = other.1 {
|
||||
drop_elements(&mut cc.arguments, to_drop.clone());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user