Windows FPRs preservation (#1216)

Preserve FPRs as required by the Windows fastcall calling convention.

This exposes an implementation limit due to Cranelift's approach to stack layout, which conflicts with expectations Windows makes in SEH layout - functions where the Cranelift user desires fastcall unwind information, that require preservation of an ABI-reserved FPR, that have a stack frame 240 bytes or larger, now produce an error when compiled. Several wasm spectests were disabled because they would trip this limit. This is a temporary constraint that should be fixed promptly.

Co-authored-by: bjorn3 <bjorn3@users.noreply.github.com>
This commit is contained in:
iximeow
2020-04-10 13:27:20 -07:00
committed by GitHub
parent 7eea5d8d43
commit 4cca510085
15 changed files with 610 additions and 76 deletions

View File

@@ -32,6 +32,31 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
}
; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
; check that we preserve xmm6 and above if we're using them locally
function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
block0(v0: f64, v1: f64, v2: f64, v3: f64):
; explicitly use a callee-save register
[-, %xmm6] v4 = fadd v0, v1
[-, %xmm7] v5 = fadd v0, v1
return
}
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall {
; nextln: ss0 = explicit_slot 32, offset -80
; nextln: ss1 = incoming_arg 16, offset -48
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]):
; nextln: x86_push v6
; nextln: copy_special %rsp -> %rbp
; nextln: adjust_sp_down_imm 64
; nextln: v7 = stack_addr.i64 ss0
; nextln: store notrap aligned v8, v7
; nextln: store notrap aligned v9, v7+16
; check: v10 = stack_addr.i64 ss0
; nextln: v11 = load.f64 notrap aligned v10
; nextln: v12 = load.f64 notrap aligned v10+16
; nextln: adjust_sp_up_imm 64
; nextln: v13 = x86_pop.i64
; nextln: v13, v11, v12
function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
block0(v0: i64, v1: f64, v2: i64, v3: f32):
return
@@ -43,3 +68,29 @@ block0(v0: f32, v1: f64, v2: i64, v3: i64):
return v1
}
; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
fn0 = %foo(i64, i64, i64, i64) -> i64
fn1 = %foo2(i64, i64, i64, i64) -> i64
block0(v0: i64):
v1 = load.i64 v0+0
v2 = load.i64 v0+8
v3 = load.i64 v0+16
v4 = load.i64 v0+24
v5 = load.i64 v0+32
v6 = load.i64 v0+40
v7 = load.i64 v0+48
v8 = load.i64 v0+56
v9 = load.i64 v0+64
v10 = call fn0(v1, v2, v3, v4)
store.i64 v1, v0+8
store.i64 v2, v0+16
store.i64 v3, v0+24
store.i64 v4, v0+32
store.i64 v5, v0+40
store.i64 v6, v0+48
store.i64 v7, v0+56
store.i64 v8, v0+64
store.i64 v9, v0+72
return v10
}

View File

@@ -118,6 +118,53 @@ block0:
; nextln: ],
; nextln: }
function %fpr_with_function_call(i64, i64) windows_fastcall {
fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall;
block0(v0: i64, v1: i64):
v2 = load.f64 v0+0
v3 = load.f64 v0+8
v4 = load.i64 v0+16
v15 = load.f64 v0+104
v16 = load.f64 v0+112
v17 = load.f64 v0+120
v18 = load.f64 v0+128
v19 = load.f64 v0+136
v20 = load.f64 v0+144
v21 = load.f64 v0+152
v22 = load.f64 v0+160
v23 = load.f64 v0+168
call fn0(v2, v3, v4, v1, v1)
store.f64 v15, v1+104
store.f64 v16, v1+112
store.f64 v17, v1+120
store.f64 v18, v1+128
store.f64 v19, v1+136
store.f64 v20, v1+144
store.f64 v21, v1+152
store.f64 v22, v1+160
store.f64 v23, v1+168
return
}
; Only check the first unwind code here because this test specifically looks to
; see that in a function that is not a leaf, a callee-save FPR is stored in an
; area that does not overlap either the callee's shadow space or stack argument
; space.
;
; sameln: UnwindInfo {
; nextln: version: 1,
; nextln: flags: 0,
; nextln: prologue_size: 26,
; nextln: unwind_code_count_raw: 7,
; nextln: frame_register: 5,
; nextln: frame_register_offset: 12,
; nextln: unwind_codes: [
; nextln: UnwindCode {
; nextln: offset: 26,
; nextln: op: SaveXmm128,
; nextln: info: 15,
; nextln: value: U16(
; nextln: 3,
; check a function that has CSRs
function %lots_of_registers(i64, i64) windows_fastcall {
block0(v0: i64, v1: i64):
@@ -134,6 +181,15 @@ block0(v0: i64, v1: i64):
v12 = load.i32 v0+80
v13 = load.i32 v0+88
v14 = load.i32 v0+96
v15 = load.f64 v0+104
v16 = load.f64 v0+112
v17 = load.f64 v0+120
v18 = load.f64 v0+128
v19 = load.f64 v0+136
v20 = load.f64 v0+144
v21 = load.f64 v0+152
v22 = load.f64 v0+160
v23 = load.f64 v0+168
store.i32 v2, v1+0
store.i32 v3, v1+8
store.i32 v4, v1+16
@@ -147,20 +203,53 @@ block0(v0: i64, v1: i64):
store.i32 v12, v1+80
store.i32 v13, v1+88
store.i32 v14, v1+96
store.f64 v15, v1+104
store.f64 v16, v1+112
store.f64 v17, v1+120
store.f64 v18, v1+128
store.f64 v19, v1+136
store.f64 v20, v1+144
store.f64 v21, v1+152
store.f64 v22, v1+160
store.f64 v23, v1+168
return
}
; sameln: UnwindInfo {
; nextln: version: 1,
; nextln: flags: 0,
; nextln: prologue_size: 19,
; nextln: unwind_code_count_raw: 10,
; nextln: prologue_size: 44,
; nextln: unwind_code_count_raw: 16,
; nextln: frame_register: 5,
; nextln: frame_register_offset: 0,
; nextln: frame_register_offset: 10,
; nextln: unwind_codes: [
; nextln: UnwindCode {
; nextln: offset: 44,
; nextln: op: SaveXmm128,
; nextln: info: 8,
; nextln: value: U16(
; nextln: 2,
; nextln: ),
; nextln: },
; nextln: UnwindCode {
; nextln: offset: 38,
; nextln: op: SaveXmm128,
; nextln: info: 7,
; nextln: value: U16(
; nextln: 1,
; nextln: ),
; nextln: },
; nextln: UnwindCode {
; nextln: offset: 32,
; nextln: op: SaveXmm128,
; nextln: info: 6,
; nextln: value: U16(
; nextln: 0,
; nextln: ),
; nextln: },
; nextln: UnwindCode {
; nextln: offset: 19,
; nextln: op: SmallStackAlloc,
; nextln: info: 3,
; nextln: info: 12,
; nextln: value: None,
; nextln: },
; nextln: UnwindCode {

View File

@@ -64,7 +64,9 @@ impl SubTest for TestUnwind {
}
let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new());
comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink);
comp_ctx
.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink)
.expect("can emit unwind info");
let mut text = String::new();
if sink.0.is_empty() {

View File

@@ -59,7 +59,9 @@ impl SubTest for TestUnwind {
}
let mut sink = Sink(Vec::new());
comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink);
comp_ctx
.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink)
.expect("can emit unwind info");
let mut text = String::new();
if sink.0.is_empty() {
@@ -177,15 +179,15 @@ impl UnwindCode {
#[derive(Debug)]
enum UnwindOperation {
PushNonvolatileRegister,
LargeStackAlloc,
SmallStackAlloc,
SetFramePointer,
SaveNonvolatileRegister,
SaveNonvolatileRegisterFar,
SaveXmm128,
SaveXmm128Far,
PushMachineFrame,
PushNonvolatileRegister = 0,
LargeStackAlloc = 1,
SmallStackAlloc = 2,
SetFramePointer = 3,
SaveNonvolatileRegister = 4,
SaveNonvolatileRegisterFar = 5,
SaveXmm128 = 8,
SaveXmm128Far = 9,
PushMachineFrame = 10,
}
impl From<u8> for UnwindOperation {
@@ -198,9 +200,9 @@ impl From<u8> for UnwindOperation {
3 => Self::SetFramePointer,
4 => Self::SaveNonvolatileRegister,
5 => Self::SaveNonvolatileRegisterFar,
6 => Self::SaveXmm128,
7 => Self::SaveXmm128Far,
8 => Self::PushMachineFrame,
8 => Self::SaveXmm128,
9 => Self::SaveXmm128Far,
10 => Self::PushMachineFrame,
_ => panic!("unsupported unwind operation"),
}
}