From ea33ce9116a4d77a3517e6d4f6139ce4b8678f2a Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Mon, 6 Jul 2020 18:43:04 +0200 Subject: [PATCH] machinst x64: basic support for baldrdash + fix multi-value support --- cranelift/codegen/src/isa/x64/abi.rs | 147 ++++++++++++++----- cranelift/codegen/src/isa/x64/inst/regs.rs | 160 ++++++++++++--------- cranelift/codegen/src/isa/x64/lower.rs | 18 +++ 3 files changed, 220 insertions(+), 105 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index f1fe5ec213..4adbf966cc 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -7,7 +7,7 @@ use std::mem; use crate::binemit::Stackmap; use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; -use crate::isa::{self, x64::inst::*}; +use crate::isa::{x64::inst::*, CallConv}; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; @@ -40,7 +40,7 @@ struct ABISig { /// Index in `args` of the stack-return-value-area argument. stack_ret_arg: Option, /// Calling convention used. - call_conv: isa::CallConv, + call_conv: CallConv, } pub(crate) struct X64ABIBody { @@ -65,7 +65,7 @@ pub(crate) struct X64ABIBody { /// which RSP is adjusted downwards to allocate the spill area. frame_size_bytes: Option, - call_conv: isa::CallConv, + call_conv: CallConv, /// The settings controlling this function's compilation. flags: settings::Flags, @@ -93,7 +93,11 @@ fn in_vec_reg(ty: types::Type) -> bool { } } -fn get_intreg_for_arg_systemv(idx: usize) -> Option { +fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { + assert!(match call_conv { + CallConv::SystemV | CallConv::BaldrdashSystemV => true, + _ => false, + }); match idx { 0 => Some(regs::rdi()), 1 => Some(regs::rsi()), @@ -105,7 +109,11 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option { } } -fn get_fltreg_for_arg_systemv(idx: usize) -> Option { +fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option { + assert!(match call_conv { + CallConv::SystemV | CallConv::BaldrdashSystemV => true, + _ => false, + }); match idx { 0 => Some(regs::xmm0()), 1 => Some(regs::xmm1()), @@ -119,19 +127,39 @@ fn get_fltreg_for_arg_systemv(idx: usize) -> Option { } } -fn get_intreg_for_retval_systemv(idx: usize) -> Option { - match idx { - 0 => Some(regs::rax()), - 1 => Some(regs::rdx()), - _ => None, +fn get_intreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx { + 0 => Some(regs::rax()), + 1 => Some(regs::rdx()), + _ => None, + }, + CallConv::BaldrdashSystemV => { + if idx == 0 { + Some(regs::rax()) + } else { + None + } + } + CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } -fn get_fltreg_for_retval_systemv(idx: usize) -> Option { - match idx { - 0 => Some(regs::xmm0()), - 1 => Some(regs::xmm1()), - _ => None, +fn get_fltreg_for_retval_systemv(call_conv: &CallConv, idx: usize) -> Option { + match call_conv { + CallConv::Fast | CallConv::Cold | CallConv::SystemV => match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + _ => None, + }, + CallConv::BaldrdashSystemV => { + if idx == 0 { + Some(regs::xmm0()) + } else { + None + } + } + CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(), } } @@ -147,10 +175,39 @@ fn is_callee_save_systemv(r: RealReg) -> bool { } } -fn get_callee_saves(regs: Vec>) -> Vec> { - regs.into_iter() - .filter(|r| is_callee_save_systemv(r.to_reg())) - .collect() +fn is_callee_save_baldrdash(r: RealReg) -> bool { + use regs::*; + match r.get_class() { + RegClass::I64 => { + if r.get_hw_encoding() as u8 == ENC_R14 { + // r14 is the WasmTlsReg and is preserved implicitly. + false + } else { + // Defer to native for the other ones. + is_callee_save_systemv(r) + } + } + RegClass::V128 => false, + _ => unimplemented!(), + } +} + +fn get_callee_saves(call_conv: &CallConv, regs: Vec>) -> Vec> { + match call_conv { + CallConv::BaldrdashSystemV => regs + .into_iter() + .filter(|r| is_callee_save_baldrdash(r.to_reg())) + .collect(), + CallConv::BaldrdashWindows => { + todo!("baldrdash windows"); + } + CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs + .into_iter() + .filter(|r| is_callee_save_systemv(r.to_reg())) + .collect(), + CallConv::WindowsFastcall => todo!("windows fastcall"), + CallConv::Probestack => todo!("probestack?"), + } } impl X64ABIBody { @@ -160,7 +217,7 @@ impl X64ABIBody { let call_conv = f.signature.call_conv; debug_assert!( - call_conv == isa::CallConv::SystemV || call_conv.extends_baldrdash(), + call_conv == CallConv::SystemV || call_conv.extends_baldrdash(), "unsupported or unimplemented calling convention {}", call_conv ); @@ -268,7 +325,18 @@ impl ABIBody for X64ABIBody { } fn gen_retval_area_setup(&self) -> Option { - None + if let Some(i) = self.sig.stack_ret_arg { + let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); + trace!( + "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", + inst, + self.ret_area_ptr.unwrap().to_reg() + ); + Some(inst) + } else { + trace!("gen_retval_area_setup: not needed"); + None + } } fn gen_copy_reg_to_retval( @@ -436,7 +504,7 @@ impl ABIBody for X64ABIBody { insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); } - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); let callee_saved_used: usize = clobbered .iter() .map(|reg| match reg.to_reg().get_class() { @@ -480,7 +548,7 @@ impl ABIBody for X64ABIBody { // Save callee saved registers that we trash. Keep track of how much space we've used, so // as to know what we have to do to get the base of the spill area 0 % 16. - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); for reg in clobbered { let r_reg = reg.to_reg(); match r_reg.get_class() { @@ -510,7 +578,7 @@ impl ABIBody for X64ABIBody { // Undo what we did in the prologue. // Restore regs. - let clobbered = get_callee_saves(self.clobbered.to_vec()); + let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); for wreg in clobbered.into_iter().rev() { let rreg = wreg.to_reg(); match rreg.get_class() { @@ -607,7 +675,7 @@ fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { } } -fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { +fn get_caller_saves(call_conv: CallConv) -> Vec> { let mut caller_saved = Vec::new(); // Systemv calling convention: @@ -622,6 +690,14 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved.push(Writable::from_reg(regs::r10())); caller_saved.push(Writable::from_reg(regs::r11())); + if call_conv.extends_baldrdash() { + caller_saved.push(Writable::from_reg(regs::r12())); + caller_saved.push(Writable::from_reg(regs::r13())); + // Not r14; implicitly preserved in the entry. + caller_saved.push(Writable::from_reg(regs::r15())); + caller_saved.push(Writable::from_reg(regs::rbx())); + } + // - XMM: all the registers! caller_saved.push(Writable::from_reg(regs::xmm0())); caller_saved.push(Writable::from_reg(regs::xmm1())); @@ -640,10 +716,6 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved.push(Writable::from_reg(regs::xmm14())); caller_saved.push(Writable::from_reg(regs::xmm15())); - if call_conv.extends_baldrdash() { - todo!("add the baldrdash caller saved") - } - caller_saved } @@ -670,7 +742,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { } /// Try to fill a Baldrdash register, returning it if it was found. -fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option { +fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { if call_conv.extends_baldrdash() { match ¶m.purpose { &ir::ArgumentPurpose::VMContext => { @@ -704,16 +776,13 @@ enum ArgsOrRets { /// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the /// index of the extra synthetic arg that was added. fn compute_arg_locs( - call_conv: isa::CallConv, + call_conv: CallConv, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, ) -> CodegenResult<(Vec, i64, Option)> { let is_baldrdash = call_conv.extends_baldrdash(); - // XXX assume SystemV at the moment. - debug_assert!(!is_baldrdash, "baldrdash nyi"); - let mut next_gpr = 0; let mut next_vreg = 0; let mut next_stack: u64 = 0; @@ -747,8 +816,8 @@ fn compute_arg_locs( let (next_reg, candidate) = if intreg { let candidate = match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(next_gpr), - ArgsOrRets::Rets => get_intreg_for_retval_systemv(next_gpr), + ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr), }; debug_assert!(candidate .map(|r| r.get_class() == RegClass::I64) @@ -756,8 +825,8 @@ fn compute_arg_locs( (&mut next_gpr, candidate) } else { let candidate = match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(next_vreg), - ArgsOrRets::Rets => get_fltreg_for_retval_systemv(next_vreg), + ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg), }; debug_assert!(candidate .map(|r| r.get_class() == RegClass::V128) @@ -790,7 +859,7 @@ fn compute_arg_locs( let extra_arg = if add_ret_area_ptr { debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(next_gpr) { + if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { ret.push(ABIArg::Reg(reg.to_real_reg(), ir::types::I64)); } else { ret.push(ABIArg::Stack(next_stack as i64, ir::types::I64)); diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 087103b4ac..4f23ab11b0 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -33,46 +33,55 @@ fn gpr(enc: u8, index: u8) -> Reg { } pub(crate) fn r12() -> Reg { - gpr(ENC_R12, 0) + gpr(ENC_R12, 16) } pub(crate) fn r13() -> Reg { - gpr(ENC_R13, 1) + gpr(ENC_R13, 17) } pub(crate) fn r14() -> Reg { - gpr(ENC_R14, 2) -} -pub(crate) fn r15() -> Reg { - gpr(ENC_R15, 3) + gpr(ENC_R14, 18) } pub(crate) fn rbx() -> Reg { - gpr(ENC_RBX, 4) + gpr(ENC_RBX, 19) } pub(crate) fn rsi() -> Reg { - gpr(6, 5) + gpr(6, 20) } pub(crate) fn rdi() -> Reg { - gpr(7, 6) + gpr(7, 21) } pub(crate) fn rax() -> Reg { - gpr(0, 7) + gpr(0, 22) } pub(crate) fn rcx() -> Reg { - gpr(1, 8) + gpr(1, 23) } pub(crate) fn rdx() -> Reg { - gpr(2, 9) + gpr(2, 24) } pub(crate) fn r8() -> Reg { - gpr(8, 10) + gpr(8, 25) } pub(crate) fn r9() -> Reg { - gpr(9, 11) + gpr(9, 26) } pub(crate) fn r10() -> Reg { - gpr(10, 12) + gpr(10, 27) } pub(crate) fn r11() -> Reg { - gpr(11, 13) + gpr(11, 28) +} + +pub(crate) fn r15() -> Reg { + // r15 is put aside since this is the pinned register. + gpr(ENC_R15, 29) +} + +/// The pinned register on this architecture. +/// It must be the same as Spidermonkey's HeapReg, as found in this file. +/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99 +pub(crate) fn pinned_reg() -> Reg { + r15() } fn fpr(enc: u8, index: u8) -> Reg { @@ -80,52 +89,52 @@ fn fpr(enc: u8, index: u8) -> Reg { } pub(crate) fn xmm0() -> Reg { - fpr(0, 14) + fpr(0, 0) } pub(crate) fn xmm1() -> Reg { - fpr(1, 15) + fpr(1, 1) } pub(crate) fn xmm2() -> Reg { - fpr(2, 16) + fpr(2, 2) } pub(crate) fn xmm3() -> Reg { - fpr(3, 17) + fpr(3, 3) } pub(crate) fn xmm4() -> Reg { - fpr(4, 18) + fpr(4, 4) } pub(crate) fn xmm5() -> Reg { - fpr(5, 19) + fpr(5, 5) } pub(crate) fn xmm6() -> Reg { - fpr(6, 20) + fpr(6, 6) } pub(crate) fn xmm7() -> Reg { - fpr(7, 21) + fpr(7, 7) } pub(crate) fn xmm8() -> Reg { - fpr(8, 22) + fpr(8, 8) } pub(crate) fn xmm9() -> Reg { - fpr(9, 23) + fpr(9, 9) } pub(crate) fn xmm10() -> Reg { - fpr(10, 24) + fpr(10, 10) } pub(crate) fn xmm11() -> Reg { - fpr(11, 25) + fpr(11, 11) } pub(crate) fn xmm12() -> Reg { - fpr(12, 26) + fpr(12, 12) } pub(crate) fn xmm13() -> Reg { - fpr(13, 27) + fpr(13, 13) } pub(crate) fn xmm14() -> Reg { - fpr(14, 28) + fpr(14, 14) } pub(crate) fn xmm15() -> Reg { - fpr(15, 29) + fpr(15, 15) } pub(crate) fn rsp() -> Reg { @@ -139,39 +148,14 @@ pub(crate) fn rbp() -> Reg { /// /// The ordering of registers matters, as commented in the file doc comment: assumes the /// calling-convention is SystemV, at the moment. -pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUniverse { +pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse { let mut regs = Vec::<(RealReg, String)>::new(); let mut allocable_by_class = [None; NUM_REG_CLASSES]; - // Integer regs. - let mut base = regs.len(); - - // Callee-saved, in the SystemV x86_64 ABI. - regs.push((r12().to_real_reg(), "%r12".into())); - regs.push((r13().to_real_reg(), "%r13".into())); - regs.push((r14().to_real_reg(), "%r14".into())); - regs.push((r15().to_real_reg(), "%r15".into())); - regs.push((rbx().to_real_reg(), "%rbx".into())); - - // Caller-saved, in the SystemV x86_64 ABI. - regs.push((rsi().to_real_reg(), "%rsi".into())); - regs.push((rdi().to_real_reg(), "%rdi".into())); - regs.push((rax().to_real_reg(), "%rax".into())); - regs.push((rcx().to_real_reg(), "%rcx".into())); - regs.push((rdx().to_real_reg(), "%rdx".into())); - regs.push((r8().to_real_reg(), "%r8".into())); - regs.push((r9().to_real_reg(), "%r9".into())); - regs.push((r10().to_real_reg(), "%r10".into())); - regs.push((r11().to_real_reg(), "%r11".into())); - - allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, - suggested_scratch: Some(r12().get_index()), - }); + let use_pinned_reg = flags.enable_pinned_reg(); // XMM registers - base = regs.len(); + let first_fpr = regs.len(); regs.push((xmm0().to_real_reg(), "%xmm0".into())); regs.push((xmm1().to_real_reg(), "%xmm1".into())); regs.push((xmm2().to_real_reg(), "%xmm2".into())); @@ -188,17 +172,61 @@ pub(crate) fn create_reg_universe_systemv(_flags: &settings::Flags) -> RealRegUn regs.push((xmm13().to_real_reg(), "%xmm13".into())); regs.push((xmm14().to_real_reg(), "%xmm14".into())); regs.push((xmm15().to_real_reg(), "%xmm15".into())); + let last_fpr = regs.len() - 1; + // Integer regs. + let first_gpr = regs.len(); + + // Callee-saved, in the SystemV x86_64 ABI. + regs.push((r12().to_real_reg(), "%r12".into())); + regs.push((r13().to_real_reg(), "%r13".into())); + regs.push((r14().to_real_reg(), "%r14".into())); + + regs.push((rbx().to_real_reg(), "%rbx".into())); + + // Caller-saved, in the SystemV x86_64 ABI. + regs.push((rsi().to_real_reg(), "%rsi".into())); + regs.push((rdi().to_real_reg(), "%rdi".into())); + regs.push((rax().to_real_reg(), "%rax".into())); + regs.push((rcx().to_real_reg(), "%rcx".into())); + regs.push((rdx().to_real_reg(), "%rdx".into())); + regs.push((r8().to_real_reg(), "%r8".into())); + regs.push((r9().to_real_reg(), "%r9".into())); + regs.push((r10().to_real_reg(), "%r10".into())); + regs.push((r11().to_real_reg(), "%r11".into())); + + // Other regs, not available to the allocator. + debug_assert_eq!(r15(), pinned_reg()); + let allocable = if use_pinned_reg { + // The pinned register is not allocatable in this case, so record the length before adding + // it. + let len = regs.len(); + regs.push((r15().to_real_reg(), "%r15/pinned".into())); + len + } else { + regs.push((r15().to_real_reg(), "%r15".into())); + regs.len() + }; + let last_gpr = allocable - 1; + + regs.push((rsp().to_real_reg(), "%rsp".into())); + regs.push((rbp().to_real_reg(), "%rbp".into())); + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: first_gpr, + last: last_gpr, + suggested_scratch: Some(r12().get_index()), + }); allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, + first: first_fpr, + last: last_fpr, suggested_scratch: Some(xmm15().get_index()), }); - // Other regs, not available to the allocator. - let allocable = regs.len(); - regs.push((rsp().to_real_reg(), "%rsp".into())); - regs.push((rbp().to_real_reg(), "%rbp".into())); + // Sanity-check: the index passed to the Reg ctor must match the order in the register list. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } RealRegUniverse { regs, diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 97d06bf3d7..bb6eadb7b2 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1151,6 +1151,20 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); } + Opcode::GetPinnedReg => { + let dst = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), I64)); + } + + Opcode::SetPinnedReg => { + let src = input_to_reg(ctx, inputs[0]); + ctx.emit(Inst::gen_move( + Writable::from_reg(regs::pinned_reg()), + src, + I64, + )); + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -1354,4 +1368,8 @@ impl LowerBackend for X64Backend { Ok(()) } + + fn maybe_pinned_reg(&self) -> Option { + Some(regs::pinned_reg()) + } }