Update README

2019-03-07 13:59:50 +01:00
parent 45ccc591cd
commit 3f3de16bc4
1 changed files with 159 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -4,6 +4,165 @@ Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended

 [wasmtime]: https://github.com/CraneStation/wasmtime

+## Quality of output
+
+Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, FireFox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust:
+
+```rust
+fn fib(n: i32) -> i32 {
+    if n == 0 || n == 1 {
+        1
+    } else {
+        fib(n - 1) + fib(n - 2)
+    }
+}
+```
+
+When compiled with optimisations enabled, rustc will produce the following WebAssembly:
+
+```rust
+(module
+  (func $fib (param $p0 i32) (result i32)
+    (local $l1 i32)
+    (set_local $l1
+      (i32.const 1))
+    (block $B0
+      (br_if $B0
+        (i32.lt_u
+          (get_local $p0)
+          (i32.const 2)))
+      (set_local $l1
+        (i32.const 1))
+      (loop $L1
+        (set_local $l1
+          (i32.add
+            (call $fib
+              (i32.add
+                (get_local $p0)
+                (i32.const -1)))
+            (get_local $l1)))
+        (br_if $L1
+          (i32.gt_u
+            (tee_local $p0
+              (i32.add
+                (get_local $p0)
+                (i32.const -2)))
+            (i32.const 1)))))
+    (get_local $l1)))
+```
+
+FireFox's optimising compiler produces the following assembly (labels cleaned up somewhat):
+
+```asm
+fib:
+  sub rsp, 0x18
+  cmp qword ptr [r14 + 0x28], rsp
+  jae stack_overflow
+  mov dword ptr [rsp + 0xc], edi
+  cmp edi, 2
+  jae .Lelse
+  mov eax, 1
+  mov dword ptr [rsp + 8], eax
+  jmp .Lreturn
+.Lelse:
+  mov dword ptr [rsp + 0xc], edi
+  mov eax, 1
+  mov dword ptr [rsp + 8], eax
+.Lloop:
+  mov edi, dword ptr [rsp + 0xc]
+  add edi, -1
+  call 0
+  mov ecx, dword ptr [rsp + 8]
+  add ecx, eax
+  mov dword ptr [rsp + 8], ecx
+  mov ecx, dword ptr [rsp + 0xc]
+  add ecx, -2
+  mov dword ptr [rsp + 0xc], ecx
+  cmp ecx, 1
+  ja .Lloop
+.Lreturn:
+  mov eax, dword ptr [rsp + 8]
+  nop
+  add rsp, 0x18
+  ret
+```
+
+Cranelift with optimisations enabled produces similar:
+
+```asm
+fib:
+  push   rbp
+  mov    rbp, rsp
+  sub    rsp, 0x20
+  mov    qword ptr [rsp + 0x10], rdi
+  mov    dword ptr [rsp + 0x1c], esi
+  mov    eax, 1
+  mov    dword ptr [rsp + 0x18], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  cmp    eax, 2
+  jb     .Lreturn
+  movabs rax, 0
+  mov    qword ptr [rsp + 8], rax
+.Lloop:
+  mov    eax, dword ptr [rsp + 0x1c]
+  add    eax, -1
+  mov    rcx, qword ptr [rsp + 8]
+  mov    rdx, qword ptr [rsp + 0x10]
+  mov    rdi, rdx
+  mov    esi, eax
+  call   rcx
+  mov    ecx, dword ptr [rsp + 0x18]
+  add    eax, ecx
+  mov    dword ptr [rsp + 0x18], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  add    eax, -2
+  mov    dword ptr [rsp + 0x1c], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  cmp    eax, 1
+  ja     .Lloop
+.Lreturn
+  mov    eax, dword ptr [rsp + 0x18]
+  add    rsp, 0x20
+  pop    rbp
+  ret
+```
+
+Whereas Lightbeam produces code with far fewer memory accesses than both (and fewer blocks than FireFox's output):
+
+```asm
+fib:
+  xor  eax, eax
+  cmp  esi, 2
+  setb al
+  mov  ecx, 1
+  test eax, eax
+  jne  .Lreturn
+  mov  eax, 1
+.Lloop:
+  mov  rcx, rsi
+  add  ecx, 0xffffffff
+  push rsi
+  push rax
+  mov  rsi, rcx
+  call 0
+  add  eax, dword ptr [rsp]
+  mov  rcx, qword ptr [rsp + 8]
+  add  ecx, 0xfffffffe
+  xor  edx, edx
+  cmp  ecx, 1
+  seta dl
+  mov  rsi, rcx
+  add  rsp, 0x10
+  test edx, edx
+  jne  .Lloop
+  mov  rcx, rax
+.Lreturn:
+  mov  rax, rcx
+  ret
+```
+
+Now obviously I'm not advocating for replacing FireFox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs.
+
 ## Specification compliance

 It's hard to judge, since each test in the spec testsuite covers a wide range of features (to check their interactions), but currently 31 out of 77 of the spec suite tests pass when run in Wasmtime with Lightbeam as a backend. Here's the full test output: