Many multi-value returns (#1147)

* Add x86 encodings for `bint` converting to `i8` and `i16`

* Introduce tests for many multi-value returns

* Support arbitrary numbers of return values

This commit implements support for returning an arbitrary number of return
values from a function. During legalization we transform multi-value signatures
to take a struct return ("sret") return pointer, instead of returning its values
in registers. Callers allocate the sret space in their stack frame and pass a
pointer to it into the caller, and once the caller returns to them, they load
the return values back out of the sret stack slot. The callee's return
operations are legalized to store the return values through the given sret
pointer.

* Keep track of old, pre-legalized signatures

When legalizing a call or return for its new legalized signature, we may need to
look at the old signature in order to figure out how to legalize the call or
return.

* Add test for multi-value returns and `call_indirect`

* Encode bool -> int x86 instructions in a loop

* Rename `Signature::uses_sret` to `Signature::uses_struct_return_param`

* Rename `p` to `param`

* Add a clarifiying comment in `num_registers_required`

* Rename `num_registers_required` to `num_return_registers_required`

* Re-add newline

* Handle already-assigned parameters in `num_return_registers_required`

* Document what some debug assertions are checking for

* Make "illegalizing" closure's control flow simpler

* Add unit tests and comments for our rounding-up-to-the-next-multiple-of-a-power-of-2 function

* Use `append_isnt_arg` instead of doing the same thing  manually

* Fix grammar in comment

* Add `Signature::uses_special_{param,return}` helper functions

* Inline the definition of `legalize_type_for_sret_load` for readability

* Move sret legalization debug assertions out into their own function

* Add `round_up_to_multiple_of_type_align` helper for readability

* Add a debug assertion that we aren't removing the wrong return value

* Rename `RetPtr` stack slots to `StructReturnSlot`

* Make `legalize_type_for_sret_store` more symmetrical to `legalized_type_for_sret`

* rustfmt

* Remove unnecessary loop labels

* Do not pre-assign offsets to struct return stack slots

Instead, let the existing frame layout algorithm decide where they should go.

* Expand "sret" into explicit "struct return" in doc comment

* typo: "than" -> "then" in comment

* Fold test's debug message into the assertion itself
This commit is contained in:
Nick Fitzgerald
2019-11-05 14:36:03 -08:00
committed by GitHub
parent 45fb377457
commit a49483408c
29 changed files with 3206 additions and 69 deletions

View File

@@ -0,0 +1,68 @@
test compile
target x86_64 haswell
;; `b1` return values need to be legalized into bytes so that they can be stored
;; in memory.
function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 {
;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast {
ebb0(v0: b1, v1: b1, v2: b1, v3: b1):
; check: ebb0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]):
return v0, v1, v2, v3
; check: v5 = bint.i8 v0
; nextln: v9 = uextend.i32 v5
; nextln: istore8 notrap aligned v9, v4
; nextln: v6 = bint.i8 v1
; nextln: v10 = uextend.i32 v6
; nextln: istore8 notrap aligned v10, v4+1
; nextln: v7 = bint.i8 v2
; nextln: v11 = uextend.i32 v7
; nextln: istore8 notrap aligned v11, v4+2
; nextln: v8 = bint.i8 v3
; nextln: v12 = uextend.i32 v8
; nextln: istore8 notrap aligned v12, v4+3
}
function %call_4_b1s() {
; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast {
; nextln: ss0 = sret_slot 4, offset -28
fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1
; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast
ebb0:
; check: ebb0(v26: i64 [%rbp], v27: i64 [%rbx]):
v0 = bconst.b1 true
v1 = bconst.b1 false
v2 = bconst.b1 true
v3 = bconst.b1 false
; check: v8 = stack_addr.i64 ss0
v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
; check: v9 = call fn0(v0, v1, v2, v3, v8)
; nextln: v22 = uload8.i32 notrap aligned v9
; nextln: v10 = ireduce.i8 v22
; nextln: v11 = raw_bitcast.b8 v10
; nextln: v12 = breduce.b1 v11
; nextln: v4 -> v12
; nextln: v23 = uload8.i32 notrap aligned v9+1
; nextln: v13 = ireduce.i8 v23
; nextln: v14 = raw_bitcast.b8 v13
; nextln: v15 = breduce.b1 v14
; nextln: v5 -> v15
; nextln: v24 = uload8.i32 notrap aligned v9+2
; nextln: v16 = ireduce.i8 v24
; nextln: v17 = raw_bitcast.b8 v16
; nextln: v18 = breduce.b1 v17
; nextln: v6 -> v18
; nextln: v25 = uload8.i32 notrap aligned v9+3
; nextln: v19 = ireduce.i8 v25
; nextln: v20 = raw_bitcast.b8 v19
; nextln: v21 = breduce.b1 v20
; nextln: v7 -> v21
return
}

View File

@@ -0,0 +1,26 @@
test legalizer
target x86_64 haswell
;; Indirect calls with many returns.
function %call_indirect_many_rets(i64) {
; check: ss0 = sret_slot 32
sig0 = () -> i64, i64, i64, i64
; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
ebb0(v0: i64):
v1, v2, v3, v4 = call_indirect sig0, v0()
; check: v5 = stack_addr.i64 ss0
; nextln: v6 = call_indirect sig0, v0(v5)
; nextln: v7 = load.i64 notrap aligned v6
; nextln: v1 -> v7
; nextln: v8 = load.i64 notrap aligned v6+8
; nextln: v2 -> v8
; nextln: v9 = load.i64 notrap aligned v6+16
; nextln: v3 -> v9
; nextln: v10 = load.i64 notrap aligned v6+24
; nextln: v4 -> v10
return
}

View File

@@ -0,0 +1,44 @@
test compile
target x86_64 haswell
;; Returning many f32s
function %return_2_f32s() -> f32, f32 {
ebb0:
v0 = f32const 0x0.0
v1 = f32const 0x1.0
return v0, v1
}
function %return_3_f32s() -> f32, f32, f32 {
ebb0:
v0 = f32const 0x0.0
v1 = f32const 0x1.0
v2 = f32const 0x2.0
return v0, v1, v2
}
function %return_4_f32s() -> f32, f32, f32, f32 {
ebb0:
v0 = f32const 0x0.0
v1 = f32const 0x1.0
v2 = f32const 0x2.0
v3 = f32const 0x3.0
return v0, v1, v2, v3
}
;; Calling functions that return many f32s
function %call() -> f32 {
fn0 = %a() -> f32, f32
fn1 = %b(f32, f32) -> f32, f32, f32
fn2 = %c(f32, f32, f32) -> f32, f32, f32, f32
ebb0:
v0, v1 = call fn0()
v2, v3, v4 = call fn1(v0, v1)
v5, v6, v7, v8 = call fn2(v2, v3, v4)
v9 = fadd v5, v6
v10 = fadd v7, v8
v11 = fadd v9, v10
return v11
}

View File

@@ -0,0 +1,44 @@
test compile
target x86_64 haswell
;; Returning many f64s
function %return_2_f64s() -> f64, f64 {
ebb0:
v0 = f64const 0x0.0
v1 = f64const 0x1.0
return v0, v1
}
function %return_3_f64s() -> f64, f64, f64 {
ebb0:
v0 = f64const 0x0.0
v1 = f64const 0x1.0
v2 = f64const 0x2.0
return v0, v1, v2
}
function %return_4_f64s() -> f64, f64, f64, f64 {
ebb0:
v0 = f64const 0x0.0
v1 = f64const 0x1.0
v2 = f64const 0x2.0
v3 = f64const 0x3.0
return v0, v1, v2, v3
}
;; Calling functions that return many f64s
function %call() -> f64 {
fn0 = %a() -> f64, f64
fn1 = %b(f64, f64) -> f64, f64, f64
fn2 = %c(f64, f64, f64) -> f64, f64, f64, f64
ebb0:
v0, v1 = call fn0()
v2, v3, v4 = call fn1(v0, v1)
v5, v6, v7, v8 = call fn2(v2, v3, v4)
v9 = fadd v5, v6
v10 = fadd v7, v8
v11 = fadd v9, v10
return v11
}

View File

@@ -0,0 +1,44 @@
test compile
target x86_64 haswell
;; Returning many i32s
function %return_2_i32s() -> i32, i32 {
ebb0:
v0 = iconst.i32 0
v1 = iconst.i32 1
return v0, v1
}
function %return_3_i32s() -> i32, i32, i32 {
ebb0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
return v0, v1, v2
}
function %return_4_i32s() -> i32, i32, i32, i32 {
ebb0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
return v0, v1, v2, v3
}
;; Calling functions that return many i32s
function %call() -> i32 {
fn0 = %a() -> i32, i32
fn1 = %b(i32, i32) -> i32, i32, i32
fn2 = %c(i32, i32, i32) -> i32, i32, i32, i32
ebb0:
v0, v1 = call fn0()
v2, v3, v4 = call fn1(v0, v1)
v5, v6, v7, v8 = call fn2(v2, v3, v4)
v9 = iadd v5, v6
v10 = iadd v7, v8
v11 = iadd v9, v10
return v11
}

View File

@@ -0,0 +1,44 @@
test compile
target x86_64 haswell
;; Returning many i64s
function %return_2_i64s() -> i64, i64 {
ebb0:
v0 = iconst.i64 0
v1 = iconst.i64 1
return v0, v1
}
function %return_3_i64s() -> i64, i64, i64 {
ebb0:
v0 = iconst.i64 0
v1 = iconst.i64 1
v2 = iconst.i64 2
return v0, v1, v2
}
function %return_4_i64s() -> i64, i64, i64, i64 {
ebb0:
v0 = iconst.i64 0
v1 = iconst.i64 1
v2 = iconst.i64 2
v3 = iconst.i64 3
return v0, v1, v2, v3
}
;; Calling functions that return many i64s
function %call() -> i64 {
fn0 = %a() -> i64, i64
fn1 = %b(i64, i64) -> i64, i64, i64
fn2 = %c(i64, i64, i64) -> i64, i64, i64, i64
ebb0:
v0, v1 = call fn0()
v2, v3, v4 = call fn1(v0, v1)
v5, v6, v7, v8 = call fn2(v2, v3, v4)
v9 = iadd v5, v6
v10 = iadd v7, v8
v11 = iadd v9, v10
return v11
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
test legalizer
target x86_64 haswell
;; Test that we don't reuse `sret` stack slots for multiple calls. We could do
;; this one day, but it would require some care to ensure that we don't have
;; subsequent calls overwrite the results of previous calls.
function %foo() -> i32, f32 {
; check: ss0 = sret_slot 20
; nextln: ss1 = sret_slot 20
fn0 = %f() -> i32, i32, i32, i32, i32
fn1 = %g() -> f32, f32, f32, f32, f32
; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
; nextln: fn0 = %f sig0
; nextln: fn1 = %g sig1
ebb0:
v0, v1, v2, v3, v4 = call fn0()
; check: v18 = stack_addr.i64 ss0
; nextln: v25 = func_addr.i64 fn0
; nextln: v19 = call_indirect sig0, v25(v18)
; nextln: v20 = load.i32 notrap aligned v19
; nextln: v0 -> v20
; nextln: v21 = load.i32 notrap aligned v19+4
; nextln: v1 -> v21
; nextln: v22 = load.i32 notrap aligned v19+8
; nextln: v2 -> v22
; nextln: v23 = load.i32 notrap aligned v19+12
; nextln: v3 -> v23
; nextln: v24 = load.i32 notrap aligned v19+16
; nextln: v4 -> v24
v5, v6, v7, v8, v9 = call fn1()
; check: v26 = stack_addr.i64 ss1
; nextln: v33 = func_addr.i64 fn1
; nextln: v27 = call_indirect sig1, v33(v26)
; nextln: v28 = load.f32 notrap aligned v27
; nextln: v5 -> v28
; nextln: v29 = load.f32 notrap aligned v27+4
; nextln: v6 -> v29
; nextln: v30 = load.f32 notrap aligned v27+8
; nextln: v7 -> v30
; nextln: v31 = load.f32 notrap aligned v27+12
; nextln: v8 -> v31
; nextln: v32 = load.f32 notrap aligned v27+16
; nextln: v9 -> v32
v10 = iadd v0, v1
v11 = iadd v2, v3
v12 = iadd v10, v11
v13 = iadd v12, v4
v14 = fadd v5, v6
v15 = fadd v7, v8
v16 = fadd v14, v15
v17 = fadd v16, v9
return v13, v17
}

View File

@@ -0,0 +1,51 @@
test legalizer
target x86_64 haswell
;; Need to insert padding after the `i8`s so that the `i32` and `i64` are
;; aligned.
function %returner() -> i8, i32, i8, i64 {
; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast {
ebb0:
; check: ebb0(v4: i64):
v0 = iconst.i8 0
v1 = iconst.i32 1
v2 = iconst.i8 2
v3 = iconst.i64 3
return v0, v1, v2, v3
; check: v6 = uextend.i32 v0
; nextln: istore8 notrap aligned v6, v4
; nextln: store notrap aligned v1, v4+4
; nextln: v7 = uextend.i32 v2
; nextln: istore8 notrap aligned v7, v4+8
; nextln: store notrap aligned v3, v4+16
; nextln: return v4
}
function %caller() {
; check: ss0 = sret_slot 24
fn0 = %returner() -> i8, i32, i8, i64
; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
; nextln: fn0 = %returner sig0
ebb0:
v0, v1, v2, v3 = call fn0()
; check: v4 = stack_addr.i64 ss0
; nextln: v10 = func_addr.i64 fn0
; nextln: v5 = call_indirect sig0, v10(v4)
; nextln: v11 = uload8.i32 notrap aligned v5
; nextln: v6 = ireduce.i8 v11
; nextln: v0 -> v6
; nextln: v7 = load.i32 notrap aligned v5+4
; nextln: v1 -> v7
; nextln: v12 = uload8.i32 notrap aligned v5+8
; nextln: v8 = ireduce.i8 v12
; nextln: v2 -> v8
; nextln: v9 = load.i64 notrap aligned v5+16
; nextln: v3 -> v9
return
}

View File

@@ -0,0 +1,18 @@
test compile
target x86_64 haswell
function %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 {
ebb0(v0: i32, v1: i64, v2: f32, v3: f64):
return v0, v1, v2, v3
}
function %caller() {
fn0 = %returner(i32, i64, f32, f64) -> i32, i64, f32, f64
ebb0:
v0 = iconst.i32 0
v1 = iconst.i64 1
v2 = f32const 0x2.0
v3 = f64const 0x3.0
v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
return
}

View File

@@ -0,0 +1,34 @@
test compile
target x86_64 haswell
function %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 {
ebb0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
v4 = iconst.i32 4
v5 = iconst.i32 5
v6 = iconst.i32 6
v7 = iconst.i32 7
v8 = iconst.i32 8
v9 = iconst.i32 9
v10 = iconst.i32 10
v11 = iconst.i32 11
v12 = iconst.i32 12
v13 = iconst.i32 13
v14 = iconst.i32 14
v15 = iconst.i32 15
v16 = iconst.i32 16
v17 = iconst.i32 17
v18 = iconst.i32 18
v19 = iconst.i32 19
return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19
}
function %call_20_i32s() {
fn0 = %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
ebb0:
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19 = call fn0()
return
}