Update README
This commit is contained in:
159
README.md
159
README.md
@@ -4,6 +4,165 @@ Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended
|
|||||||
|
|
||||||
[wasmtime]: https://github.com/CraneStation/wasmtime
|
[wasmtime]: https://github.com/CraneStation/wasmtime
|
||||||
|
|
||||||
|
## Quality of output
|
||||||
|
|
||||||
|
Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, FireFox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
fn fib(n: i32) -> i32 {
|
||||||
|
if n == 0 || n == 1 {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
fib(n - 1) + fib(n - 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
When compiled with optimisations enabled, rustc will produce the following WebAssembly:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
(module
|
||||||
|
(func $fib (param $p0 i32) (result i32)
|
||||||
|
(local $l1 i32)
|
||||||
|
(set_local $l1
|
||||||
|
(i32.const 1))
|
||||||
|
(block $B0
|
||||||
|
(br_if $B0
|
||||||
|
(i32.lt_u
|
||||||
|
(get_local $p0)
|
||||||
|
(i32.const 2)))
|
||||||
|
(set_local $l1
|
||||||
|
(i32.const 1))
|
||||||
|
(loop $L1
|
||||||
|
(set_local $l1
|
||||||
|
(i32.add
|
||||||
|
(call $fib
|
||||||
|
(i32.add
|
||||||
|
(get_local $p0)
|
||||||
|
(i32.const -1)))
|
||||||
|
(get_local $l1)))
|
||||||
|
(br_if $L1
|
||||||
|
(i32.gt_u
|
||||||
|
(tee_local $p0
|
||||||
|
(i32.add
|
||||||
|
(get_local $p0)
|
||||||
|
(i32.const -2)))
|
||||||
|
(i32.const 1)))))
|
||||||
|
(get_local $l1)))
|
||||||
|
```
|
||||||
|
|
||||||
|
FireFox's optimising compiler produces the following assembly (labels cleaned up somewhat):
|
||||||
|
|
||||||
|
```asm
|
||||||
|
fib:
|
||||||
|
sub rsp, 0x18
|
||||||
|
cmp qword ptr [r14 + 0x28], rsp
|
||||||
|
jae stack_overflow
|
||||||
|
mov dword ptr [rsp + 0xc], edi
|
||||||
|
cmp edi, 2
|
||||||
|
jae .Lelse
|
||||||
|
mov eax, 1
|
||||||
|
mov dword ptr [rsp + 8], eax
|
||||||
|
jmp .Lreturn
|
||||||
|
.Lelse:
|
||||||
|
mov dword ptr [rsp + 0xc], edi
|
||||||
|
mov eax, 1
|
||||||
|
mov dword ptr [rsp + 8], eax
|
||||||
|
.Lloop:
|
||||||
|
mov edi, dword ptr [rsp + 0xc]
|
||||||
|
add edi, -1
|
||||||
|
call 0
|
||||||
|
mov ecx, dword ptr [rsp + 8]
|
||||||
|
add ecx, eax
|
||||||
|
mov dword ptr [rsp + 8], ecx
|
||||||
|
mov ecx, dword ptr [rsp + 0xc]
|
||||||
|
add ecx, -2
|
||||||
|
mov dword ptr [rsp + 0xc], ecx
|
||||||
|
cmp ecx, 1
|
||||||
|
ja .Lloop
|
||||||
|
.Lreturn:
|
||||||
|
mov eax, dword ptr [rsp + 8]
|
||||||
|
nop
|
||||||
|
add rsp, 0x18
|
||||||
|
ret
|
||||||
|
```
|
||||||
|
|
||||||
|
Cranelift with optimisations enabled produces similar:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
fib:
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
sub rsp, 0x20
|
||||||
|
mov qword ptr [rsp + 0x10], rdi
|
||||||
|
mov dword ptr [rsp + 0x1c], esi
|
||||||
|
mov eax, 1
|
||||||
|
mov dword ptr [rsp + 0x18], eax
|
||||||
|
mov eax, dword ptr [rsp + 0x1c]
|
||||||
|
cmp eax, 2
|
||||||
|
jb .Lreturn
|
||||||
|
movabs rax, 0
|
||||||
|
mov qword ptr [rsp + 8], rax
|
||||||
|
.Lloop:
|
||||||
|
mov eax, dword ptr [rsp + 0x1c]
|
||||||
|
add eax, -1
|
||||||
|
mov rcx, qword ptr [rsp + 8]
|
||||||
|
mov rdx, qword ptr [rsp + 0x10]
|
||||||
|
mov rdi, rdx
|
||||||
|
mov esi, eax
|
||||||
|
call rcx
|
||||||
|
mov ecx, dword ptr [rsp + 0x18]
|
||||||
|
add eax, ecx
|
||||||
|
mov dword ptr [rsp + 0x18], eax
|
||||||
|
mov eax, dword ptr [rsp + 0x1c]
|
||||||
|
add eax, -2
|
||||||
|
mov dword ptr [rsp + 0x1c], eax
|
||||||
|
mov eax, dword ptr [rsp + 0x1c]
|
||||||
|
cmp eax, 1
|
||||||
|
ja .Lloop
|
||||||
|
.Lreturn
|
||||||
|
mov eax, dword ptr [rsp + 0x18]
|
||||||
|
add rsp, 0x20
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
```
|
||||||
|
|
||||||
|
Whereas Lightbeam produces code with far fewer memory accesses than both (and fewer blocks than FireFox's output):
|
||||||
|
|
||||||
|
```asm
|
||||||
|
fib:
|
||||||
|
xor eax, eax
|
||||||
|
cmp esi, 2
|
||||||
|
setb al
|
||||||
|
mov ecx, 1
|
||||||
|
test eax, eax
|
||||||
|
jne .Lreturn
|
||||||
|
mov eax, 1
|
||||||
|
.Lloop:
|
||||||
|
mov rcx, rsi
|
||||||
|
add ecx, 0xffffffff
|
||||||
|
push rsi
|
||||||
|
push rax
|
||||||
|
mov rsi, rcx
|
||||||
|
call 0
|
||||||
|
add eax, dword ptr [rsp]
|
||||||
|
mov rcx, qword ptr [rsp + 8]
|
||||||
|
add ecx, 0xfffffffe
|
||||||
|
xor edx, edx
|
||||||
|
cmp ecx, 1
|
||||||
|
seta dl
|
||||||
|
mov rsi, rcx
|
||||||
|
add rsp, 0x10
|
||||||
|
test edx, edx
|
||||||
|
jne .Lloop
|
||||||
|
mov rcx, rax
|
||||||
|
.Lreturn:
|
||||||
|
mov rax, rcx
|
||||||
|
ret
|
||||||
|
```
|
||||||
|
|
||||||
|
Now obviously I'm not advocating for replacing FireFox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs.
|
||||||
|
|
||||||
## Specification compliance
|
## Specification compliance
|
||||||
|
|
||||||
It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 31 out of 77 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output:
|
It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 31 out of 77 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output:
|
||||||
|
|||||||
Reference in New Issue
Block a user