Use F64X2 as type when saving and restoring XMM registers

When adding floating-point registers as callee-saved register to
block- and function parameter lists add them as `F64X2` arguments.
This commit is contained in:
Samrat Man Singh
2020-04-11 23:16:28 +05:30
committed by iximeow
parent 63c97e365e
commit 4d34c22a1c
3 changed files with 18 additions and 19 deletions

View File

@@ -620,12 +620,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
// The calling convention described in
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
// preserving the low 128 bits of XMM6-XMM15.
//
// TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require
// encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a
// whole XMM register to be preserved anyway.
let csr_arg =
ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr);
ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr);
func.signature.params.push(csr_arg);
func.signature.returns.push(csr_arg);
}
@@ -910,7 +906,7 @@ fn insert_common_prologue(
for reg in csrs.iter(FPR) {
// Append param to entry Block
let csr_arg = pos.func.dfg.append_block_param(block, types::F64);
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
// Since regalloc has already run, we must assign a location.
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
@@ -1035,9 +1031,12 @@ fn insert_common_epilogue(
let mut fpr_offset = 0;
for reg in csrs.iter(FPR) {
let value = pos
.ins()
.load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset);
let value = pos.ins().load(
types::F64X2,
ir::MemFlags::trusted(),
stack_addr,
fpr_offset,
);
fpr_offset += types::F64X2.bytes() as i32;
if let Some(ref mut cfa_state) = cfa_state.as_mut() {

View File

@@ -40,10 +40,10 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
[-, %xmm7] v5 = fadd v0, v1
return
}
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall {
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
; nextln: ss0 = explicit_slot 32, offset -80
; nextln: ss1 = incoming_arg 16, offset -48
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]):
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
; nextln: x86_push v6
; nextln: copy_special %rsp -> %rbp
; nextln: adjust_sp_down_imm 64
@@ -51,8 +51,8 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
; nextln: store notrap aligned v8, v7
; nextln: store notrap aligned v9, v7+16
; check: v10 = stack_addr.i64 ss0
; nextln: v11 = load.f64 notrap aligned v10
; nextln: v12 = load.f64 notrap aligned v10+16
; nextln: v11 = load.f64x2 notrap aligned v10
; nextln: v12 = load.f64x2 notrap aligned v10+16
; nextln: adjust_sp_up_imm 64
; nextln: v13 = x86_pop.i64
; nextln: v13, v11, v12

View File

@@ -153,13 +153,13 @@ block0(v0: i64, v1: i64):
; sameln: UnwindInfo {
; nextln: version: 1,
; nextln: flags: 0,
; nextln: prologue_size: 26,
; nextln: prologue_size: 25,
; nextln: unwind_code_count_raw: 7,
; nextln: frame_register: 5,
; nextln: frame_register_offset: 12,
; nextln: unwind_codes: [
; nextln: UnwindCode {
; nextln: offset: 26,
; nextln: offset: 25,
; nextln: op: SaveXmm128,
; nextln: info: 15,
; nextln: value: U16(
@@ -217,13 +217,13 @@ block0(v0: i64, v1: i64):
; sameln: UnwindInfo {
; nextln: version: 1,
; nextln: flags: 0,
; nextln: prologue_size: 44,
; nextln: prologue_size: 41,
; nextln: unwind_code_count_raw: 16,
; nextln: frame_register: 5,
; nextln: frame_register_offset: 10,
; nextln: unwind_codes: [
; nextln: UnwindCode {
; nextln: offset: 44,
; nextln: offset: 41,
; nextln: op: SaveXmm128,
; nextln: info: 8,
; nextln: value: U16(
@@ -231,7 +231,7 @@ block0(v0: i64, v1: i64):
; nextln: ),
; nextln: },
; nextln: UnwindCode {
; nextln: offset: 38,
; nextln: offset: 36,
; nextln: op: SaveXmm128,
; nextln: info: 7,
; nextln: value: U16(
@@ -239,7 +239,7 @@ block0(v0: i64, v1: i64):
; nextln: ),
; nextln: },
; nextln: UnwindCode {
; nextln: offset: 32,
; nextln: offset: 31,
; nextln: op: SaveXmm128,
; nextln: info: 6,
; nextln: value: U16(