diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index c683f101ed..71fdddd393 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -620,12 +620,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C // The calling convention described in // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires // preserving the low 128 bits of XMM6-XMM15. - // - // TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require - // encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a - // whole XMM register to be preserved anyway. let csr_arg = - ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr); + ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr); func.signature.params.push(csr_arg); func.signature.returns.push(csr_arg); } @@ -910,7 +906,7 @@ fn insert_common_prologue( for reg in csrs.iter(FPR) { // Append param to entry Block - let csr_arg = pos.func.dfg.append_block_param(block, types::F64); + let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2); // Since regalloc has already run, we must assign a location. pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); @@ -1035,9 +1031,12 @@ fn insert_common_epilogue( let mut fpr_offset = 0; for reg in csrs.iter(FPR) { - let value = pos - .ins() - .load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset); + let value = pos.ins().load( + types::F64X2, + ir::MemFlags::trusted(), + stack_addr, + fpr_offset, + ); fpr_offset += types::F64X2.bytes() as i32; if let Some(ref mut cfa_state) = cfa_state.as_mut() { diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif index 917d179ccf..bf77c0baef 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif @@ -40,10 +40,10 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64): [-, %xmm7] v5 = fadd v0, v1 return } -; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall { +; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall { ; nextln: ss0 = explicit_slot 32, offset -80 ; nextln: ss1 = incoming_arg 16, offset -48 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]): +; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]): ; nextln: x86_push v6 ; nextln: copy_special %rsp -> %rbp ; nextln: adjust_sp_down_imm 64 @@ -51,8 +51,8 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64): ; nextln: store notrap aligned v8, v7 ; nextln: store notrap aligned v9, v7+16 ; check: v10 = stack_addr.i64 ss0 -; nextln: v11 = load.f64 notrap aligned v10 -; nextln: v12 = load.f64 notrap aligned v10+16 +; nextln: v11 = load.f64x2 notrap aligned v10 +; nextln: v12 = load.f64x2 notrap aligned v10+16 ; nextln: adjust_sp_up_imm 64 ; nextln: v13 = x86_pop.i64 ; nextln: v13, v11, v12 diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index 1119c55012..eeb89aa4ae 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -153,13 +153,13 @@ block0(v0: i64, v1: i64): ; sameln: UnwindInfo { ; nextln: version: 1, ; nextln: flags: 0, -; nextln: prologue_size: 26, +; nextln: prologue_size: 25, ; nextln: unwind_code_count_raw: 7, ; nextln: frame_register: 5, ; nextln: frame_register_offset: 12, ; nextln: unwind_codes: [ ; nextln: UnwindCode { -; nextln: offset: 26, +; nextln: offset: 25, ; nextln: op: SaveXmm128, ; nextln: info: 15, ; nextln: value: U16( @@ -217,13 +217,13 @@ block0(v0: i64, v1: i64): ; sameln: UnwindInfo { ; nextln: version: 1, ; nextln: flags: 0, -; nextln: prologue_size: 44, +; nextln: prologue_size: 41, ; nextln: unwind_code_count_raw: 16, ; nextln: frame_register: 5, ; nextln: frame_register_offset: 10, ; nextln: unwind_codes: [ ; nextln: UnwindCode { -; nextln: offset: 44, +; nextln: offset: 41, ; nextln: op: SaveXmm128, ; nextln: info: 8, ; nextln: value: U16( @@ -231,7 +231,7 @@ block0(v0: i64, v1: i64): ; nextln: ), ; nextln: }, ; nextln: UnwindCode { -; nextln: offset: 38, +; nextln: offset: 36, ; nextln: op: SaveXmm128, ; nextln: info: 7, ; nextln: value: U16( @@ -239,7 +239,7 @@ block0(v0: i64, v1: i64): ; nextln: ), ; nextln: }, ; nextln: UnwindCode { -; nextln: offset: 32, +; nextln: offset: 31, ; nextln: op: SaveXmm128, ; nextln: info: 6, ; nextln: value: U16(