Use F64X2 as type when saving and restoring XMM registers
When adding floating-point registers as callee-saved register to block- and function parameter lists add them as `F64X2` arguments.
This commit is contained in:
committed by
iximeow
parent
63c97e365e
commit
4d34c22a1c
@@ -620,12 +620,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
|
|||||||
// The calling convention described in
|
// The calling convention described in
|
||||||
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
|
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
|
||||||
// preserving the low 128 bits of XMM6-XMM15.
|
// preserving the low 128 bits of XMM6-XMM15.
|
||||||
//
|
|
||||||
// TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require
|
|
||||||
// encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a
|
|
||||||
// whole XMM register to be preserved anyway.
|
|
||||||
let csr_arg =
|
let csr_arg =
|
||||||
ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr);
|
ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr);
|
||||||
func.signature.params.push(csr_arg);
|
func.signature.params.push(csr_arg);
|
||||||
func.signature.returns.push(csr_arg);
|
func.signature.returns.push(csr_arg);
|
||||||
}
|
}
|
||||||
@@ -910,7 +906,7 @@ fn insert_common_prologue(
|
|||||||
|
|
||||||
for reg in csrs.iter(FPR) {
|
for reg in csrs.iter(FPR) {
|
||||||
// Append param to entry Block
|
// Append param to entry Block
|
||||||
let csr_arg = pos.func.dfg.append_block_param(block, types::F64);
|
let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
|
||||||
|
|
||||||
// Since regalloc has already run, we must assign a location.
|
// Since regalloc has already run, we must assign a location.
|
||||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||||
@@ -1035,9 +1031,12 @@ fn insert_common_epilogue(
|
|||||||
let mut fpr_offset = 0;
|
let mut fpr_offset = 0;
|
||||||
|
|
||||||
for reg in csrs.iter(FPR) {
|
for reg in csrs.iter(FPR) {
|
||||||
let value = pos
|
let value = pos.ins().load(
|
||||||
.ins()
|
types::F64X2,
|
||||||
.load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset);
|
ir::MemFlags::trusted(),
|
||||||
|
stack_addr,
|
||||||
|
fpr_offset,
|
||||||
|
);
|
||||||
fpr_offset += types::F64X2.bytes() as i32;
|
fpr_offset += types::F64X2.bytes() as i32;
|
||||||
|
|
||||||
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
|
if let Some(ref mut cfa_state) = cfa_state.as_mut() {
|
||||||
|
|||||||
@@ -40,10 +40,10 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
|||||||
[-, %xmm7] v5 = fadd v0, v1
|
[-, %xmm7] v5 = fadd v0, v1
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall {
|
; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
|
||||||
; nextln: ss0 = explicit_slot 32, offset -80
|
; nextln: ss0 = explicit_slot 32, offset -80
|
||||||
; nextln: ss1 = incoming_arg 16, offset -48
|
; nextln: ss1 = incoming_arg 16, offset -48
|
||||||
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]):
|
; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
|
||||||
; nextln: x86_push v6
|
; nextln: x86_push v6
|
||||||
; nextln: copy_special %rsp -> %rbp
|
; nextln: copy_special %rsp -> %rbp
|
||||||
; nextln: adjust_sp_down_imm 64
|
; nextln: adjust_sp_down_imm 64
|
||||||
@@ -51,8 +51,8 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
|
|||||||
; nextln: store notrap aligned v8, v7
|
; nextln: store notrap aligned v8, v7
|
||||||
; nextln: store notrap aligned v9, v7+16
|
; nextln: store notrap aligned v9, v7+16
|
||||||
; check: v10 = stack_addr.i64 ss0
|
; check: v10 = stack_addr.i64 ss0
|
||||||
; nextln: v11 = load.f64 notrap aligned v10
|
; nextln: v11 = load.f64x2 notrap aligned v10
|
||||||
; nextln: v12 = load.f64 notrap aligned v10+16
|
; nextln: v12 = load.f64x2 notrap aligned v10+16
|
||||||
; nextln: adjust_sp_up_imm 64
|
; nextln: adjust_sp_up_imm 64
|
||||||
; nextln: v13 = x86_pop.i64
|
; nextln: v13 = x86_pop.i64
|
||||||
; nextln: v13, v11, v12
|
; nextln: v13, v11, v12
|
||||||
|
|||||||
@@ -153,13 +153,13 @@ block0(v0: i64, v1: i64):
|
|||||||
; sameln: UnwindInfo {
|
; sameln: UnwindInfo {
|
||||||
; nextln: version: 1,
|
; nextln: version: 1,
|
||||||
; nextln: flags: 0,
|
; nextln: flags: 0,
|
||||||
; nextln: prologue_size: 26,
|
; nextln: prologue_size: 25,
|
||||||
; nextln: unwind_code_count_raw: 7,
|
; nextln: unwind_code_count_raw: 7,
|
||||||
; nextln: frame_register: 5,
|
; nextln: frame_register: 5,
|
||||||
; nextln: frame_register_offset: 12,
|
; nextln: frame_register_offset: 12,
|
||||||
; nextln: unwind_codes: [
|
; nextln: unwind_codes: [
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
; nextln: offset: 26,
|
; nextln: offset: 25,
|
||||||
; nextln: op: SaveXmm128,
|
; nextln: op: SaveXmm128,
|
||||||
; nextln: info: 15,
|
; nextln: info: 15,
|
||||||
; nextln: value: U16(
|
; nextln: value: U16(
|
||||||
@@ -217,13 +217,13 @@ block0(v0: i64, v1: i64):
|
|||||||
; sameln: UnwindInfo {
|
; sameln: UnwindInfo {
|
||||||
; nextln: version: 1,
|
; nextln: version: 1,
|
||||||
; nextln: flags: 0,
|
; nextln: flags: 0,
|
||||||
; nextln: prologue_size: 44,
|
; nextln: prologue_size: 41,
|
||||||
; nextln: unwind_code_count_raw: 16,
|
; nextln: unwind_code_count_raw: 16,
|
||||||
; nextln: frame_register: 5,
|
; nextln: frame_register: 5,
|
||||||
; nextln: frame_register_offset: 10,
|
; nextln: frame_register_offset: 10,
|
||||||
; nextln: unwind_codes: [
|
; nextln: unwind_codes: [
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
; nextln: offset: 44,
|
; nextln: offset: 41,
|
||||||
; nextln: op: SaveXmm128,
|
; nextln: op: SaveXmm128,
|
||||||
; nextln: info: 8,
|
; nextln: info: 8,
|
||||||
; nextln: value: U16(
|
; nextln: value: U16(
|
||||||
@@ -231,7 +231,7 @@ block0(v0: i64, v1: i64):
|
|||||||
; nextln: ),
|
; nextln: ),
|
||||||
; nextln: },
|
; nextln: },
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
; nextln: offset: 38,
|
; nextln: offset: 36,
|
||||||
; nextln: op: SaveXmm128,
|
; nextln: op: SaveXmm128,
|
||||||
; nextln: info: 7,
|
; nextln: info: 7,
|
||||||
; nextln: value: U16(
|
; nextln: value: U16(
|
||||||
@@ -239,7 +239,7 @@ block0(v0: i64, v1: i64):
|
|||||||
; nextln: ),
|
; nextln: ),
|
||||||
; nextln: },
|
; nextln: },
|
||||||
; nextln: UnwindCode {
|
; nextln: UnwindCode {
|
||||||
; nextln: offset: 32,
|
; nextln: offset: 31,
|
||||||
; nextln: op: SaveXmm128,
|
; nextln: op: SaveXmm128,
|
||||||
; nextln: info: 6,
|
; nextln: info: 6,
|
||||||
; nextln: value: U16(
|
; nextln: value: U16(
|
||||||
|
|||||||
Reference in New Issue
Block a user