Many multi-value returns (#1147)

* Add x86 encodings for `bint` converting to `i8` and `i16` * Introduce tests for many multi-value returns * Support arbitrary numbers of return values This commit implements support for returning an arbitrary number of return values from a function. During legalization we transform multi-value signatures to take a struct return ("sret") return pointer, instead of returning its values in registers. Callers allocate the sret space in their stack frame and pass a pointer to it into the caller, and once the caller returns to them, they load the return values back out of the sret stack slot. The callee's return operations are legalized to store the return values through the given sret pointer. * Keep track of old, pre-legalized signatures When legalizing a call or return for its new legalized signature, we may need to look at the old signature in order to figure out how to legalize the call or return. * Add test for multi-value returns and `call_indirect` * Encode bool -> int x86 instructions in a loop * Rename `Signature::uses_sret` to `Signature::uses_struct_return_param` * Rename `p` to `param` * Add a clarifiying comment in `num_registers_required` * Rename `num_registers_required` to `num_return_registers_required` * Re-add newline * Handle already-assigned parameters in `num_return_registers_required` * Document what some debug assertions are checking for * Make "illegalizing" closure's control flow simpler * Add unit tests and comments for our rounding-up-to-the-next-multiple-of-a-power-of-2 function * Use `append_isnt_arg` instead of doing the same thing manually * Fix grammar in comment * Add `Signature::uses_special_{param,return}` helper functions * Inline the definition of `legalize_type_for_sret_load` for readability * Move sret legalization debug assertions out into their own function * Add `round_up_to_multiple_of_type_align` helper for readability * Add a debug assertion that we aren't removing the wrong return value * Rename `RetPtr` stack slots to `StructReturnSlot` * Make `legalize_type_for_sret_store` more symmetrical to `legalized_type_for_sret` * rustfmt * Remove unnecessary loop labels * Do not pre-assign offsets to struct return stack slots Instead, let the existing frame layout algorithm decide where they should go. * Expand "sret" into explicit "struct return" in doc comment * typo: "than" -> "then" in comment * Fold test's debug message into the assertion itself
2019-11-05 14:36:03 -08:00
parent 45fb377457
commit a49483408c
29 changed files with 3206 additions and 69 deletions
--- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-b1.clif
@@ -0,0 +1,68 @@
+test compile
+target x86_64 haswell
+
+;; `b1` return values need to be legalized into bytes so that they can be stored
+;; in memory.
+
+function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 {
+;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast {
+
+ebb0(v0: b1, v1: b1, v2: b1, v3: b1):
+; check: ebb0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]):
+
+    return v0, v1, v2, v3
+    ; check:  v5 = bint.i8 v0
+    ; nextln: v9 = uextend.i32 v5
+    ; nextln: istore8 notrap aligned v9, v4
+    ; nextln: v6 = bint.i8 v1
+    ; nextln: v10 = uextend.i32 v6
+    ; nextln: istore8 notrap aligned v10, v4+1
+    ; nextln: v7 = bint.i8 v2
+    ; nextln: v11 = uextend.i32 v7
+    ; nextln: istore8 notrap aligned v11, v4+2
+    ; nextln: v8 = bint.i8 v3
+    ; nextln: v12 = uextend.i32 v8
+    ; nextln: istore8 notrap aligned v12, v4+3
+}
+
+function %call_4_b1s() {
+; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast {
+; nextln:    ss0 = sret_slot 4, offset -28
+
+    fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1
+    ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast
+
+ebb0:
+; check: ebb0(v26: i64 [%rbp], v27: i64 [%rbx]):
+
+    v0 = bconst.b1 true
+    v1 = bconst.b1 false
+    v2 = bconst.b1 true
+    v3 = bconst.b1 false
+
+    ; check: v8 = stack_addr.i64 ss0
+    v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
+    ; check:  v9 = call fn0(v0, v1, v2, v3, v8)
+    ; nextln: v22 = uload8.i32 notrap aligned v9
+    ; nextln: v10 = ireduce.i8 v22
+    ; nextln: v11 = raw_bitcast.b8 v10
+    ; nextln: v12 = breduce.b1 v11
+    ; nextln: v4 -> v12
+    ; nextln: v23 = uload8.i32 notrap aligned v9+1
+    ; nextln: v13 = ireduce.i8 v23
+    ; nextln: v14 = raw_bitcast.b8 v13
+    ; nextln: v15 = breduce.b1 v14
+    ; nextln: v5 -> v15
+    ; nextln: v24 = uload8.i32 notrap aligned v9+2
+    ; nextln: v16 = ireduce.i8 v24
+    ; nextln: v17 = raw_bitcast.b8 v16
+    ; nextln: v18 = breduce.b1 v17
+    ; nextln: v6 -> v18
+    ; nextln: v25 = uload8.i32 notrap aligned v9+3
+    ; nextln: v19 = ireduce.i8 v25
+    ; nextln: v20 = raw_bitcast.b8 v19
+    ; nextln: v21 = breduce.b1 v20
+    ; nextln: v7 -> v21
+
+    return
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
@@ -0,0 +1,26 @@
+test legalizer
+target x86_64 haswell
+
+;; Indirect calls with many returns.
+
+function %call_indirect_many_rets(i64) {
+    ; check: ss0 = sret_slot 32
+
+    sig0 = () -> i64, i64, i64, i64
+    ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+
+ebb0(v0: i64):
+    v1, v2, v3, v4 = call_indirect sig0, v0()
+    ; check:  v5 = stack_addr.i64 ss0
+    ; nextln: v6 = call_indirect sig0, v0(v5)
+    ; nextln: v7 = load.i64 notrap aligned v6
+    ; nextln: v1 -> v7
+    ; nextln: v8 = load.i64 notrap aligned v6+8
+    ; nextln: v2 -> v8
+    ; nextln: v9 = load.i64 notrap aligned v6+16
+    ; nextln: v3 -> v9
+    ; nextln: v10 = load.i64 notrap aligned v6+24
+    ; nextln: v4 -> v10
+
+    return
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-f32.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-f32.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many f32s
+
+function %return_2_f32s() -> f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    return v0, v1
+}
+
+function %return_3_f32s() -> f32, f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    v2 = f32const 0x2.0
+    return v0, v1, v2
+}
+
+function %return_4_f32s() -> f32, f32, f32, f32 {
+ebb0:
+    v0 = f32const 0x0.0
+    v1 = f32const 0x1.0
+    v2 = f32const 0x2.0
+    v3 = f32const 0x3.0
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many f32s
+
+function %call() -> f32 {
+    fn0 = %a() -> f32, f32
+    fn1 = %b(f32, f32) -> f32, f32, f32
+    fn2 = %c(f32, f32, f32) -> f32, f32, f32, f32
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = fadd v5, v6
+    v10 = fadd v7, v8
+    v11 = fadd v9, v10
+    return v11
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-f64.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-f64.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many f64s
+
+function %return_2_f64s() -> f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    return v0, v1
+}
+
+function %return_3_f64s() -> f64, f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    v2 = f64const 0x2.0
+    return v0, v1, v2
+}
+
+function %return_4_f64s() -> f64, f64, f64, f64 {
+ebb0:
+    v0 = f64const 0x0.0
+    v1 = f64const 0x1.0
+    v2 = f64const 0x2.0
+    v3 = f64const 0x3.0
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many f64s
+
+function %call() -> f64 {
+    fn0 = %a() -> f64, f64
+    fn1 = %b(f64, f64) -> f64, f64, f64
+    fn2 = %c(f64, f64, f64) -> f64, f64, f64, f64
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = fadd v5, v6
+    v10 = fadd v7, v8
+    v11 = fadd v9, v10
+    return v11
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-i32.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-i32.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many i32s
+
+function %return_2_i32s() -> i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    return v0, v1
+}
+
+function %return_3_i32s() -> i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    return v0, v1, v2
+}
+
+function %return_4_i32s() -> i32, i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many i32s
+
+function %call() -> i32 {
+    fn0 = %a() -> i32, i32
+    fn1 = %b(i32, i32) -> i32, i32, i32
+    fn2 = %c(i32, i32, i32) -> i32, i32, i32, i32
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = iadd v5, v6
+    v10 = iadd v7, v8
+    v11 = iadd v9, v10
+    return v11
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-i64.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-i64.clif
@@ -0,0 +1,44 @@
+test compile
+target x86_64 haswell
+
+;; Returning many i64s
+
+function %return_2_i64s() -> i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    return v0, v1
+}
+
+function %return_3_i64s() -> i64, i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    v2 = iconst.i64 2
+    return v0, v1, v2
+}
+
+function %return_4_i64s() -> i64, i64, i64, i64 {
+ebb0:
+    v0 = iconst.i64 0
+    v1 = iconst.i64 1
+    v2 = iconst.i64 2
+    v3 = iconst.i64 3
+    return v0, v1, v2, v3
+}
+
+;; Calling functions that return many i64s
+
+function %call() -> i64 {
+    fn0 = %a() -> i64, i64
+    fn1 = %b(i64, i64) -> i64, i64, i64
+    fn2 = %c(i64, i64, i64) -> i64, i64, i64, i64
+ebb0:
+    v0, v1 = call fn0()
+    v2, v3, v4 = call fn1(v0, v1)
+    v5, v6, v7, v8 = call fn2(v2, v3, v4)
+    v9 = iadd v5, v6
+    v10 = iadd v7, v8
+    v11 = iadd v9, v10
+    return v11
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
--- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
@@ -0,0 +1,61 @@
+test legalizer
+target x86_64 haswell
+
+;; Test that we don't reuse `sret` stack slots for multiple calls. We could do
+;; this one day, but it would require some care to ensure that we don't have
+;; subsequent calls overwrite the results of previous calls.
+
+function %foo() -> i32, f32 {
+    ; check:  ss0 = sret_slot 20
+    ; nextln: ss1 = sret_slot 20
+
+    fn0 = %f() -> i32, i32, i32, i32, i32
+    fn1 = %g() -> f32, f32, f32, f32, f32
+    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: fn0 = %f sig0
+    ; nextln: fn1 = %g sig1
+
+ebb0:
+    v0, v1, v2, v3, v4 = call fn0()
+    ; check:  v18 = stack_addr.i64 ss0
+    ; nextln: v25 = func_addr.i64 fn0
+    ; nextln: v19 = call_indirect sig0, v25(v18)
+    ; nextln: v20 = load.i32 notrap aligned v19
+    ; nextln: v0 -> v20
+    ; nextln: v21 = load.i32 notrap aligned v19+4
+    ; nextln: v1 -> v21
+    ; nextln: v22 = load.i32 notrap aligned v19+8
+    ; nextln: v2 -> v22
+    ; nextln: v23 = load.i32 notrap aligned v19+12
+    ; nextln: v3 -> v23
+    ; nextln: v24 = load.i32 notrap aligned v19+16
+    ; nextln: v4 -> v24
+
+    v5, v6, v7, v8, v9 = call fn1()
+    ; check:  v26 = stack_addr.i64 ss1
+    ; nextln: v33 = func_addr.i64 fn1
+    ; nextln: v27 = call_indirect sig1, v33(v26)
+    ; nextln: v28 = load.f32 notrap aligned v27
+    ; nextln: v5 -> v28
+    ; nextln: v29 = load.f32 notrap aligned v27+4
+    ; nextln: v6 -> v29
+    ; nextln: v30 = load.f32 notrap aligned v27+8
+    ; nextln: v7 -> v30
+    ; nextln: v31 = load.f32 notrap aligned v27+12
+    ; nextln: v8 -> v31
+    ; nextln: v32 = load.f32 notrap aligned v27+16
+    ; nextln: v9 -> v32
+
+    v10 = iadd v0, v1
+    v11 = iadd v2, v3
+    v12 = iadd v10, v11
+    v13 = iadd v12, v4
+
+    v14 = fadd v5, v6
+    v15 = fadd v7, v8
+    v16 = fadd v14, v15
+    v17 = fadd v16, v9
+
+    return v13, v17
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
@@ -0,0 +1,51 @@
+test legalizer
+target x86_64 haswell
+
+;; Need to insert padding after the `i8`s so that the `i32` and `i64` are
+;; aligned.
+
+function %returner() -> i8, i32, i8, i64 {
+; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast {
+
+ebb0:
+; check: ebb0(v4: i64):
+
+    v0 = iconst.i8 0
+    v1 = iconst.i32 1
+    v2 = iconst.i8 2
+    v3 = iconst.i64 3
+    return v0, v1, v2, v3
+    ; check:  v6 = uextend.i32 v0
+    ; nextln: istore8 notrap aligned v6, v4
+    ; nextln: store notrap aligned v1, v4+4
+    ; nextln: v7 = uextend.i32 v2
+    ; nextln: istore8 notrap aligned v7, v4+8
+    ; nextln: store notrap aligned v3, v4+16
+    ; nextln: return v4
+}
+
+function %caller() {
+    ; check:  ss0 = sret_slot 24
+
+    fn0 = %returner() -> i8, i32, i8, i64
+    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
+    ; nextln: fn0 = %returner sig0
+
+ebb0:
+    v0, v1, v2, v3 = call fn0()
+    ; check:  v4 = stack_addr.i64 ss0
+    ; nextln: v10 = func_addr.i64 fn0
+    ; nextln: v5 = call_indirect sig0, v10(v4)
+    ; nextln: v11 = uload8.i32 notrap aligned v5
+    ; nextln: v6 = ireduce.i8 v11
+    ; nextln: v0 -> v6
+    ; nextln: v7 = load.i32 notrap aligned v5+4
+    ; nextln: v1 -> v7
+    ; nextln: v12 = uload8.i32 notrap aligned v5+8
+    ; nextln: v8 = ireduce.i8 v12
+    ; nextln: v2 -> v8
+    ; nextln: v9 = load.i64 notrap aligned v5+16
+    ; nextln: v3 -> v9
+
+    return
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
@@ -0,0 +1,18 @@
+test compile
+target x86_64 haswell
+
+function %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 {
+ebb0(v0: i32, v1: i64, v2: f32, v3: f64):
+    return v0, v1, v2, v3
+}
+
+function %caller() {
+    fn0 = %returner(i32, i64, f32, f64) -> i32, i64, f32, f64
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i64 1
+    v2 = f32const 0x2.0
+    v3 = f64const 0x3.0
+    v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
+    return
+}
--- a/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif
@@ -0,0 +1,34 @@
+test compile
+target x86_64 haswell
+
+function %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 {
+ebb0:
+    v0 = iconst.i32 0
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    v3 = iconst.i32 3
+    v4 = iconst.i32 4
+    v5 = iconst.i32 5
+    v6 = iconst.i32 6
+    v7 = iconst.i32 7
+    v8 = iconst.i32 8
+    v9 = iconst.i32 9
+    v10 = iconst.i32 10
+    v11 = iconst.i32 11
+    v12 = iconst.i32 12
+    v13 = iconst.i32 13
+    v14 = iconst.i32 14
+    v15 = iconst.i32 15
+    v16 = iconst.i32 16
+    v17 = iconst.i32 17
+    v18 = iconst.i32 18
+    v19 = iconst.i32 19
+    return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19
+}
+
+function %call_20_i32s() {
+    fn0 = %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+ebb0:
+    v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19 = call fn0()
+    return
+}