From 71768bb6cf4f39f76d64950eb8bbd7254f4301f5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 5 Oct 2020 18:43:26 -0700 Subject: [PATCH] Fix AArch64 ABI to respect half-caller-save, half-callee-save vec regs. This PR updates the AArch64 ABI implementation so that it (i) properly respects that v8-v15 inclusive have callee-save lower halves, and caller-save upper halves, by conservatively approximating (to full registers) in the appropriate directions when generating prologue caller-saves and when informing the regalloc of clobbered regs across callsites. In order to prevent saving all of these vector registers in the prologue of every non-leaf function due to the above approximation, this also makes use of a new regalloc.rs feature to exclude call instructions' writes from the clobber set returned by register allocation. This is safe whenever the caller and callee have the same ABI (because anything the callee could clobber, the caller is allowed to clobber as well without saving it in the prologue). Fixes #2254. --- Cargo.lock | 4 +- cranelift/codegen/Cargo.toml | 2 +- cranelift/codegen/src/isa/aarch64/abi.rs | 50 +++++++--- .../codegen/src/isa/aarch64/inst/args.rs | 16 +-- .../src/isa/aarch64/inst/emit_tests.rs | 5 + .../codegen/src/isa/aarch64/inst/imms.rs | 25 +++-- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 31 +++++- .../codegen/src/isa/aarch64/inst/regs.rs | 5 +- .../codegen/src/isa/aarch64/lower_inst.rs | 8 +- cranelift/codegen/src/isa/aarch64/mod.rs | 6 +- cranelift/codegen/src/isa/arm32/abi.rs | 8 +- cranelift/codegen/src/isa/arm32/inst/args.rs | 14 +-- cranelift/codegen/src/isa/arm32/inst/mod.rs | 4 +- cranelift/codegen/src/isa/arm32/lower_inst.rs | 8 +- cranelift/codegen/src/isa/arm32/mod.rs | 6 +- cranelift/codegen/src/isa/x64/abi.rs | 6 +- cranelift/codegen/src/isa/x64/inst/args.rs | 19 ++-- cranelift/codegen/src/isa/x64/inst/mod.rs | 6 +- cranelift/codegen/src/isa/x64/inst/regs.rs | 6 +- cranelift/codegen/src/isa/x64/lower.rs | 11 ++- cranelift/codegen/src/isa/x64/mod.rs | 6 +- cranelift/codegen/src/machinst/abi.rs | 4 + cranelift/codegen/src/machinst/abi_impl.rs | 34 +++++-- cranelift/codegen/src/machinst/compile.rs | 4 +- cranelift/codegen/src/machinst/mod.rs | 7 +- .../codegen/src/machinst/pretty_print.rs | 66 ------------- cranelift/codegen/src/machinst/vcode.rs | 10 +- .../filetests/filetests/isa/aarch64/call.clif | 61 ++++-------- .../filetests/isa/aarch64/prologue.clif | 99 +++++++++++++++++++ 29 files changed, 325 insertions(+), 206 deletions(-) delete mode 100644 cranelift/codegen/src/machinst/pretty_print.rs create mode 100644 cranelift/filetests/filetests/isa/aarch64/prologue.clif diff --git a/Cargo.lock b/Cargo.lock index 5f6b37c2fc..388927fd8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1692,9 +1692,9 @@ dependencies = [ [[package]] name = "regalloc" -version = "0.0.30" +version = "0.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2041c2d34f6ff346d6f428974f03d8bf12679b0c816bb640dc5eb1d48848d8d1" +checksum = "571f7f397d61c4755285cd37853fe8e03271c243424a907415909379659381c5" dependencies = [ "log", "rustc-hash", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 3ac7f27171..2be771738c 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -28,7 +28,7 @@ byteorder = { version = "1.3.2", default-features = false } peepmatic = { path = "../peepmatic", optional = true, version = "0.67.0" } peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.67.0" } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.67.0" } -regalloc = "0.0.30" +regalloc = { version = "0.0.31" } souper-ir = { version = "1", optional = true } wast = { version = "25.0.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 03c6622f12..b897352ebe 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -510,7 +510,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fixed_frame_storage_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { let mut insts = SmallVec::new(); - let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers); + let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec); let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32; @@ -561,7 +561,7 @@ impl ABIMachineSpec for AArch64MachineDeps { clobbers: &Set>, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); - let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers); + let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec); for (i, reg_pair) in clobbered_int.chunks(2).enumerate() { @@ -629,6 +629,8 @@ impl ABIMachineSpec for AArch64MachineDeps { loc: SourceLoc, opcode: ir::Opcode, tmp: Writable, + callee_conv: isa::CallConv, + caller_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { let mut insts = SmallVec::new(); match &dest { @@ -641,6 +643,8 @@ impl ABIMachineSpec for AArch64MachineDeps { defs, loc, opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, }), }, )), @@ -663,6 +667,8 @@ impl ABIMachineSpec for AArch64MachineDeps { defs, loc, opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, }), }, )); @@ -676,6 +682,8 @@ impl ABIMachineSpec for AArch64MachineDeps { defs, loc, opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, }), }, )), @@ -704,17 +712,17 @@ impl ABIMachineSpec for AArch64MachineDeps { s.nominal_sp_to_fp } - fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { let mut caller_saved = Vec::new(); for i in 0..29 { let x = writable_xreg(i); - if is_caller_save_reg(call_conv, x.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) { caller_saved.push(x); } } for i in 0..32 { let v = writable_vreg(i); - if is_caller_save_reg(call_conv, v.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) { caller_saved.push(v); } } @@ -731,7 +739,9 @@ fn legal_type_for_machine(ty: Type) -> bool { } } -fn is_callee_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { +/// Is the given register saved in the prologue if clobbered, i.e., is it a +/// callee-save? +fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { match r.get_class() { RegClass::I64 => { @@ -759,14 +769,17 @@ fn is_callee_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { } } -fn get_callee_saves( +/// Return the set of all integer and vector registers that must be saved in the +/// prologue and restored in the epilogue, given the set of all registers +/// written by the function's body. +fn get_regs_saved_in_prologue( call_conv: isa::CallConv, regs: &Set>, ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut vec_saves = vec![]; for ® in regs.iter() { - if is_callee_save_reg(call_conv, reg.to_reg()) { + if is_reg_saved_in_prologue(call_conv, reg.to_reg()) { match reg.to_reg().get_class() { RegClass::I64 => int_saves.push(reg), RegClass::V128 => vec_saves.push(reg), @@ -781,8 +794,8 @@ fn get_callee_saves( (int_saves, vec_saves) } -fn is_caller_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { - if call_conv.extends_baldrdash() { +fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool { + if call_conv_of_callee.extends_baldrdash() { match r.get_class() { RegClass::I64 => { let enc = r.get_hw_encoding(); @@ -808,8 +821,21 @@ fn is_caller_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool { r.get_hw_encoding() <= 17 } RegClass::V128 => { - // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. - r.get_hw_encoding() <= 7 || (r.get_hw_encoding() >= 16 && r.get_hw_encoding() <= 31) + // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The + // upper 64 bits of v8 - v15 inclusive are also caller-saves. + // However, because we cannot currently represent partial registers + // to regalloc.rs, we indicate here that every vector register is + // caller-save. Because this function is used at *callsites*, + // approximating in this direction (save more than necessary) is + // conservative and thus safe. + // + // Note that we set the 'not included in clobber set' flag in the + // regalloc.rs API when a call instruction's callee has the same ABI + // as the caller (the current function body); this is safe (anything + // clobbered by callee can be clobbered by caller as well) and + // avoids unnecessary saves of v8-v15 in the prologue even though we + // include them as defs here. + true } _ => panic!("Unexpected RegClass"), } diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 2ac9d064a1..f85c1028ff 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -8,7 +8,7 @@ use crate::ir::Type; use crate::isa::aarch64::inst::*; use crate::machinst::{ty_bits, MachLabel}; -use regalloc::{RealRegUniverse, Reg, Writable}; +use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable}; use core::convert::Into; use std::string::String; @@ -348,19 +348,19 @@ impl BranchTarget { } } -impl ShowWithRRU for ShiftOpAndAmt { +impl PrettyPrint for ShiftOpAndAmt { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("{:?} {}", self.op(), self.amt().value()) } } -impl ShowWithRRU for ExtendOp { +impl PrettyPrint for ExtendOp { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("{:?}", self) } } -impl ShowWithRRU for MemLabel { +impl PrettyPrint for MemLabel { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { match self { &MemLabel::PCRel(off) => format!("pc+{}", off), @@ -379,7 +379,7 @@ fn shift_for_type(ty: Type) -> usize { } } -impl ShowWithRRU for AMode { +impl PrettyPrint for AMode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { &AMode::Unscaled(reg, simm9) => { @@ -458,7 +458,7 @@ impl ShowWithRRU for AMode { } } -impl ShowWithRRU for PairAMode { +impl PrettyPrint for PairAMode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { &PairAMode::SignedOffset(reg, simm7) => { @@ -482,7 +482,7 @@ impl ShowWithRRU for PairAMode { } } -impl ShowWithRRU for Cond { +impl PrettyPrint for Cond { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let mut s = format!("{:?}", self); s.make_ascii_lowercase(); @@ -490,7 +490,7 @@ impl ShowWithRRU for Cond { } } -impl ShowWithRRU for BranchTarget { +impl PrettyPrint for BranchTarget { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { match self { &BranchTarget::Label(label) => format!("label{:?}", label.get()), diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 6d68e2b65e..1a29ba9659 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1,6 +1,7 @@ use crate::ir::types::*; use crate::isa::aarch64::inst::*; use crate::isa::test_utils; +use crate::isa::CallConv; use crate::settings; use alloc::boxed::Box; @@ -3789,6 +3790,8 @@ fn test_aarch64_binemit() { defs: Vec::new(), loc: SourceLoc::default(), opcode: Opcode::Call, + caller_callconv: CallConv::SystemV, + callee_callconv: CallConv::SystemV, }), }, "00000094", @@ -3803,6 +3806,8 @@ fn test_aarch64_binemit() { defs: Vec::new(), loc: SourceLoc::default(), opcode: Opcode::CallIndirect, + caller_callconv: CallConv::SystemV, + callee_callconv: CallConv::SystemV, }), }, "40013FD6", diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index f1a98ab66c..d8dd45afca 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -5,9 +5,8 @@ use crate::ir::types::*; use crate::ir::Type; use crate::isa::aarch64::inst::OperandSize; -use crate::machinst::*; -use regalloc::RealRegUniverse; +use regalloc::{PrettyPrint, RealRegUniverse}; use core::convert::TryFrom; use std::string::String; @@ -668,7 +667,7 @@ impl MoveWideConst { } } -impl ShowWithRRU for NZCV { +impl PrettyPrint for NZCV { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c }; format!( @@ -681,13 +680,13 @@ impl ShowWithRRU for NZCV { } } -impl ShowWithRRU for UImm5 { +impl PrettyPrint for UImm5 { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for Imm12 { +impl PrettyPrint for Imm12 { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let shift = if self.shift12 { 12 } else { 0 }; let value = u32::from(self.bits) << shift; @@ -695,49 +694,49 @@ impl ShowWithRRU for Imm12 { } } -impl ShowWithRRU for SImm7Scaled { +impl PrettyPrint for SImm7Scaled { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for FPULeftShiftImm { +impl PrettyPrint for FPULeftShiftImm { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.amount) } } -impl ShowWithRRU for FPURightShiftImm { +impl PrettyPrint for FPURightShiftImm { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.amount) } } -impl ShowWithRRU for SImm9 { +impl PrettyPrint for SImm9 { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for UImm12Scaled { +impl PrettyPrint for UImm12Scaled { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for ImmLogic { +impl PrettyPrint for ImmLogic { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value()) } } -impl ShowWithRRU for ImmShift { +impl PrettyPrint for ImmShift { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.imm) } } -impl ShowWithRRU for MoveWideConst { +impl PrettyPrint for MoveWideConst { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { if self.shift == 0 { format!("#{}", self.bits) diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bb71058c5f..544d04c23c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -9,10 +9,11 @@ use crate::ir::types::{ I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; +use crate::isa::CallConv; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; -use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; +use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; use regalloc::{RegUsageCollector, RegUsageMapper}; use alloc::boxed::Box; @@ -392,6 +393,8 @@ pub struct CallInfo { pub defs: Vec>, pub loc: SourceLoc, pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, } /// Additional information for CallInd instructions, left out of line to lower the size of the Inst @@ -403,6 +406,8 @@ pub struct CallIndInfo { pub defs: Vec>, pub loc: SourceLoc, pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, } /// Additional information for JTSequence instructions, left out of line to lower the size of the Inst @@ -2491,6 +2496,24 @@ impl MachInst for Inst { } } + fn is_included_in_clobbers(&self) -> bool { + // We exclude call instructions from the clobber-set when they are calls + // from caller to callee with the same ABI. Such calls cannot possibly + // force any new registers to be saved in the prologue, because anything + // that the callee clobbers, the caller is also allowed to clobber. This + // both saves work and enables us to more precisely follow the + // half-caller-save, half-callee-save SysV ABI for some vector + // registers. + // + // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for + // more information on this ABI-implementation hack. + match self { + &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv, + &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv, + _ => true, + } + } + fn is_term<'a>(&'a self) -> MachTerminator<'a> { match self { &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret, @@ -2623,7 +2646,7 @@ fn mem_finalize_for_show( (mem_str, mem) } -impl ShowWithRRU for Inst { +impl PrettyPrint for Inst { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { self.pretty_print(mb_rru, &mut EmitState::default()) } @@ -2883,13 +2906,13 @@ impl Inst { &Inst::StoreP64 { rt, rt2, ref mem } => { let rt = rt.show_rru(mb_rru); let rt2 = rt2.show_rru(mb_rru); - let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + let mem = mem.show_rru(mb_rru); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::LoadP64 { rt, rt2, ref mem } => { let rt = rt.to_reg().show_rru(mb_rru); let rt2 = rt2.to_reg().show_rru(mb_rru); - let mem = mem.show_rru_sized(mb_rru, /* size = */ 8); + let mem = mem.show_rru(mb_rru); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::Mov64 { rd, rm } => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index cbf1440927..0b4babe04a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -3,10 +3,11 @@ use crate::isa::aarch64::inst::OperandSize; use crate::isa::aarch64::inst::ScalarSize; use crate::isa::aarch64::inst::VectorSize; -use crate::machinst::*; use crate::settings; -use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES}; +use regalloc::{ + PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES, +}; use std::string::{String, ToString}; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5e2fed9064..fc28cb3581 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1857,6 +1857,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Call | Opcode::CallIndirect => { let loc = ctx.srcloc(insn); + let caller_conv = ctx.abi().call_conv(); let (mut abi, inputs) = match op { Opcode::Call => { let (extname, dist) = ctx.call_target(insn).unwrap(); @@ -1865,7 +1866,7 @@ pub(crate) fn lower_insn_to_regs>( assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( - AArch64ABICaller::from_func(sig, &extname, dist, loc)?, + AArch64ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?, &inputs[..], ) } @@ -1874,7 +1875,10 @@ pub(crate) fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); - (AArch64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + ( + AArch64ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?, + &inputs[1..], + ) } _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index bd34a58cf5..9cbbd633f5 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -3,15 +3,13 @@ use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{ - compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode, -}; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; use crate::settings; use alloc::boxed::Box; -use regalloc::RealRegUniverse; +use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Aarch64Architecture, Architecture, Triple}; // New backend: diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs index 54f0d3e6a8..8cd2df1396 100644 --- a/cranelift/codegen/src/isa/arm32/abi.rs +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -361,6 +361,8 @@ impl ABIMachineSpec for Arm32MachineDeps { loc: SourceLoc, opcode: ir::Opcode, tmp: Writable, + _callee_conv: isa::CallConv, + _caller_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { let mut insts = SmallVec::new(); match &dest { @@ -431,11 +433,11 @@ impl ABIMachineSpec for Arm32MachineDeps { s.nominal_sp_to_fp } - fn get_caller_saves(_call_conv: isa::CallConv) -> Vec> { + fn get_regs_clobbered_by_call(_: isa::CallConv) -> Vec> { let mut caller_saved = Vec::new(); for i in 0..15 { let r = writable_rreg(i); - if is_caller_save(r.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(r.to_reg().to_real_reg()) { caller_saved.push(r); } } @@ -461,7 +463,7 @@ fn get_callee_saves(regs: &Set>) -> Vec> { ret } -fn is_caller_save(r: RealReg) -> bool { +fn is_reg_clobbered_by_call(r: RealReg) -> bool { let enc = r.get_hw_encoding(); enc <= 3 } diff --git a/cranelift/codegen/src/isa/arm32/inst/args.rs b/cranelift/codegen/src/isa/arm32/inst/args.rs index 252581a2a1..2c1b8e97d6 100644 --- a/cranelift/codegen/src/isa/arm32/inst/args.rs +++ b/cranelift/codegen/src/isa/arm32/inst/args.rs @@ -2,7 +2,7 @@ use crate::isa::arm32::inst::*; -use regalloc::{RealRegUniverse, Reg}; +use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use std::string::String; @@ -265,7 +265,7 @@ impl BranchTarget { } } -impl ShowWithRRU for ShiftOpAndAmt { +impl PrettyPrint for ShiftOpAndAmt { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let op = match self.op() { ShiftOp::LSL => "lsl", @@ -277,19 +277,19 @@ impl ShowWithRRU for ShiftOpAndAmt { } } -impl ShowWithRRU for UImm8 { +impl PrettyPrint for UImm8 { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for UImm12 { +impl PrettyPrint for UImm12 { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { format!("#{}", self.value) } } -impl ShowWithRRU for AMode { +impl PrettyPrint for AMode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { &AMode::RegReg(rn, rm, imm2) => { @@ -317,7 +317,7 @@ impl ShowWithRRU for AMode { } } -impl ShowWithRRU for Cond { +impl PrettyPrint for Cond { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { let mut s = format!("{:?}", self); s.make_ascii_lowercase(); @@ -325,7 +325,7 @@ impl ShowWithRRU for Cond { } } -impl ShowWithRRU for BranchTarget { +impl PrettyPrint for BranchTarget { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { match self { &BranchTarget::Label(label) => format!("label{:?}", label.get()), diff --git a/cranelift/codegen/src/isa/arm32/inst/mod.rs b/cranelift/codegen/src/isa/arm32/inst/mod.rs index dfd4906f05..78aa0d784f 100644 --- a/cranelift/codegen/src/isa/arm32/inst/mod.rs +++ b/cranelift/codegen/src/isa/arm32/inst/mod.rs @@ -8,7 +8,7 @@ use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; -use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; +use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; use regalloc::{RegUsageCollector, RegUsageMapper}; use alloc::boxed::Box; @@ -897,7 +897,7 @@ fn mem_finalize_for_show( (mem_str, mem) } -impl ShowWithRRU for Inst { +impl PrettyPrint for Inst { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { self.pretty_print(mb_rru, &mut EmitState::default()) } diff --git a/cranelift/codegen/src/isa/arm32/lower_inst.rs b/cranelift/codegen/src/isa/arm32/lower_inst.rs index 7a584fe103..1cff717da3 100644 --- a/cranelift/codegen/src/isa/arm32/lower_inst.rs +++ b/cranelift/codegen/src/isa/arm32/lower_inst.rs @@ -513,6 +513,7 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Call | Opcode::CallIndirect => { let loc = ctx.srcloc(insn); + let caller_conv = ctx.abi().call_conv(); let (mut abi, inputs) = match op { Opcode::Call => { let (extname, dist) = ctx.call_target(insn).unwrap(); @@ -521,7 +522,7 @@ pub(crate) fn lower_insn_to_regs>( assert_eq!(inputs.len(), sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - Arm32ABICaller::from_func(sig, &extname, dist, loc)?, + Arm32ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?, &inputs[..], ) } @@ -530,7 +531,10 @@ pub(crate) fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); - (Arm32ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + ( + Arm32ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?, + &inputs[1..], + ) } _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs index c61d3f27ad..2a278a233d 100644 --- a/cranelift/codegen/src/isa/arm32/mod.rs +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -3,14 +3,12 @@ use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{ - compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode, -}; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; use crate::settings; use alloc::boxed::Box; -use regalloc::RealRegUniverse; +use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Architecture, ArmArchitecture, Triple}; // New backend: diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 2c1404937c..a253a92bae 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -493,6 +493,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { loc: SourceLoc, opcode: ir::Opcode, tmp: Writable, + _callee_conv: isa::CallConv, + _caller_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> { let mut insts = SmallVec::new(); match dest { @@ -545,7 +547,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { s.nominal_sp_to_fp } - fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { let mut caller_saved = vec![ // Systemv calling convention: // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). @@ -577,7 +579,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { Writable::from_reg(regs::xmm15()), ]; - if call_conv.extends_baldrdash() { + if call_conv_of_callee.extends_baldrdash() { caller_saved.push(Writable::from_reg(regs::r12())); caller_saved.push(Writable::from_reg(regs::r13())); // Not r14; implicitly preserved in the entry. diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 29cf01c71a..3eeec52bf0 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -5,7 +5,10 @@ use super::EmitState; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::machinst::*; use core::fmt::Debug; -use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, Writable}; +use regalloc::{ + PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, + RegUsageMapper, Writable, +}; use std::fmt; use std::string::{String, ToString}; @@ -68,7 +71,7 @@ impl Amode { } } -impl ShowWithRRU for Amode { +impl PrettyPrint for Amode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { Amode::ImmReg { simm32, base } => { @@ -156,7 +159,7 @@ impl Into for Amode { } } -impl ShowWithRRU for SyntheticAmode { +impl PrettyPrint for SyntheticAmode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { match self { SyntheticAmode::Real(addr) => addr.show_rru(mb_rru), @@ -214,11 +217,13 @@ impl RegMemImm { } } -impl ShowWithRRU for RegMemImm { +impl PrettyPrint for RegMemImm { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { self.show_rru_sized(mb_rru, 8) } +} +impl PrettyPrintSized for RegMemImm { fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { match self { Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size), @@ -271,11 +276,13 @@ impl From> for RegMem { } } -impl ShowWithRRU for RegMem { +impl PrettyPrint for RegMem { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { self.show_rru_sized(mb_rru, 8) } +} +impl PrettyPrintSized for RegMem { fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { match self { RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size), @@ -1098,7 +1105,7 @@ pub enum BranchTarget { ResolvedOffset(isize), } -impl ShowWithRRU for BranchTarget { +impl PrettyPrint for BranchTarget { fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { match self { BranchTarget::Label(l) => format!("{:?}", l), diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 4818022398..b370c97f44 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -8,8 +8,8 @@ use crate::{settings, settings::Flags, CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use regalloc::{ - RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, VirtualReg, - Writable, + PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, + RegUsageMapper, SpillSlot, VirtualReg, Writable, }; use smallvec::SmallVec; use std::fmt; @@ -1165,7 +1165,7 @@ impl Inst { //============================================================================= // Instructions: printing -impl ShowWithRRU for Inst { +impl PrettyPrint for Inst { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { fn ljustify(s: String) -> String { let w = 7; diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 5cc9a83295..04bc1f09bf 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -10,9 +10,11 @@ //! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe //! for each function we compile. -use crate::{machinst::pretty_print::ShowWithRRU, settings}; +use crate::settings; use alloc::vec::Vec; -use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES}; +use regalloc::{ + PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES, +}; use std::string::String; // Hardware encodings for a few registers. diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 6eac09618a..4fd1da9f3b 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -384,9 +384,10 @@ fn emit_vm_call>( // TODO avoid recreating signatures for every single Libcall function. let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple)); let sig = make_libcall_sig(ctx, insn, call_conv, types::I64); + let caller_conv = ctx.abi().call_conv(); let loc = ctx.srcloc(insn); - let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc)?; + let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc, caller_conv)?; abi.emit_stack_pre_adjust(ctx); @@ -1558,6 +1559,7 @@ fn lower_insn_to_regs>( Opcode::Call | Opcode::CallIndirect => { let loc = ctx.srcloc(insn); + let caller_conv = ctx.abi().call_conv(); let (mut abi, inputs) = match op { Opcode::Call => { let (extname, dist) = ctx.call_target(insn).unwrap(); @@ -1565,7 +1567,7 @@ fn lower_insn_to_regs>( assert_eq!(inputs.len(), sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); ( - X64ABICaller::from_func(sig, &extname, dist, loc)?, + X64ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?, &inputs[..], ) } @@ -1575,7 +1577,10 @@ fn lower_insn_to_regs>( let sig = ctx.call_sig(insn).unwrap(); assert_eq!(inputs.len() - 1, sig.params.len()); assert_eq!(outputs.len(), sig.returns.len()); - (X64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..]) + ( + X64ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?, + &inputs[1..], + ) } _ => unreachable!(), diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 5dfc078a75..211bc50081 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -4,13 +4,11 @@ use super::TargetIsa; use crate::ir::{condcodes::IntCC, Function}; use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{ - compile, pretty_print::ShowWithRRU, MachBackend, MachCompileResult, TargetIsaAdapter, VCode, -}; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; use crate::settings::{self as shared_settings, Flags}; use alloc::boxed::Box; -use regalloc::RealRegUniverse; +use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::Triple; mod abi; diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index c00b58b955..8308cfd49d 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -2,6 +2,7 @@ use crate::binemit::StackMap; use crate::ir::StackSlot; +use crate::isa::CallConv; use crate::machinst::*; use crate::settings; @@ -25,6 +26,9 @@ pub trait ABICallee { /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; + /// Get the calling convention implemented by this ABI object. + fn call_conv(&self) -> CallConv; + /// Get the liveins of the function. fn liveins(&self) -> Set; diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index ed7a8f1448..2f218fd3a8 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -347,6 +347,8 @@ pub trait ABIMachineSpec { loc: SourceLoc, opcode: ir::Opcode, tmp: Writable, + callee_conv: isa::CallConv, + callee_conv: isa::CallConv, ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>; /// Get the number of spillslots required for the given register-class and @@ -359,8 +361,9 @@ pub trait ABIMachineSpec { /// Get the "nominal SP to FP" offset from an instruction-emission state. fn get_nominal_sp_to_fp(s: &::State) -> i64; - /// Get all caller-save registers. - fn get_caller_saves(call_conv: isa::CallConv) -> Vec>; + /// Get all caller-save registers, that is, registers that we expect + /// not to be saved across a call to a callee with the given ABI. + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec>; } /// ABI information shared between body (callee) and caller. @@ -682,6 +685,10 @@ impl ABICallee for ABICalleeImpl { &self.flags } + fn call_conv(&self) -> isa::CallConv { + self.sig.call_conv + } + fn liveins(&self) -> Set { let mut set: Set = Set::empty(); for &arg in &self.sig.args { @@ -1040,7 +1047,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec defs.push(Writable::from_reg(reg.to_reg())), @@ -1063,8 +1070,10 @@ pub struct ABICallerImpl { dest: CallDest, /// Location of callsite. loc: ir::SourceLoc, - /// Actuall call opcode; used to distinguish various types of calls. + /// Actual call opcode; used to distinguish various types of calls. opcode: ir::Opcode, + /// Caller's calling convention. + caller_conv: isa::CallConv, _mach: PhantomData, } @@ -1085,6 +1094,7 @@ impl ABICallerImpl { extname: &ir::ExternalName, dist: RelocDistance, loc: ir::SourceLoc, + caller_conv: isa::CallConv, ) -> CodegenResult> { let sig = ABISig::from_func_sig::(sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); @@ -1095,6 +1105,7 @@ impl ABICallerImpl { dest: CallDest::ExtName(extname.clone(), dist), loc, opcode: ir::Opcode::Call, + caller_conv, _mach: PhantomData, }) } @@ -1106,6 +1117,7 @@ impl ABICallerImpl { ptr: Reg, loc: ir::SourceLoc, opcode: ir::Opcode, + caller_conv: isa::CallConv, ) -> CodegenResult> { let sig = ABISig::from_func_sig::(sig)?; let (uses, defs) = abisig_to_uses_and_defs::(&sig); @@ -1116,6 +1128,7 @@ impl ABICallerImpl { dest: CallDest::Reg(ptr), loc, opcode, + caller_conv, _mach: PhantomData, }) } @@ -1255,8 +1268,17 @@ impl ABICaller for ABICallerImpl { self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } let tmp = ctx.alloc_tmp(word_rc, word_type); - for (is_safepoint, inst) in - M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter() + for (is_safepoint, inst) in M::gen_call( + &self.dest, + uses, + defs, + self.loc, + self.opcode, + tmp, + self.sig.call_conv, + self.caller_conv, + ) + .into_iter() { match is_safepoint { InstIsSafepoint::Yes => ctx.emit_safepoint(inst), diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 67db2dbafa..1264340a30 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -6,7 +6,7 @@ use crate::settings; use crate::timing; use log::debug; -use regalloc::{allocate_registers_with_opts, Algorithm, Options}; +use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint}; /// Compile the given function down to VCode with allocated registers, ready /// for binary emission. @@ -16,7 +16,7 @@ pub fn compile( abi: Box>, ) -> CodegenResult> where - B::MInst: ShowWithRRU, + B::MInst: PrettyPrint, { // Compute lowered block order. let block_order = BlockLoweringOrder::new(f); diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 72f351aa88..3a470d852c 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -125,8 +125,6 @@ pub mod abi; pub use abi::*; pub mod abi_impl; pub use abi_impl::*; -pub mod pretty_print; -pub use pretty_print::*; pub mod buffer; pub use buffer::*; pub mod adapter; @@ -156,6 +154,11 @@ pub trait MachInst: Clone + Debug { /// Returns true if the instruction is an epilogue placeholder. fn is_epilogue_placeholder(&self) -> bool; + /// Should this instruction be included in the clobber-set? + fn is_included_in_clobbers(&self) -> bool { + true + } + /// Generate a move. fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; diff --git a/cranelift/codegen/src/machinst/pretty_print.rs b/cranelift/codegen/src/machinst/pretty_print.rs deleted file mode 100644 index 40e7c1b842..0000000000 --- a/cranelift/codegen/src/machinst/pretty_print.rs +++ /dev/null @@ -1,66 +0,0 @@ -//! Pretty-printing for machine code (virtual-registerized or final). - -use regalloc::{RealRegUniverse, Reg, Writable}; - -use std::fmt::Debug; -use std::hash::Hash; -use std::string::{String, ToString}; - -// FIXME: Should this go into regalloc.rs instead? - -/// A trait for printing instruction bits and pieces, with the the ability to -/// take a contextualising RealRegUniverse that is used to give proper names to -/// registers. -pub trait ShowWithRRU { - /// Return a string that shows the implementing object in context of the - /// given `RealRegUniverse`, if provided. - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String; - - /// The same as |show_rru|, but with an optional hint giving a size in - /// bytes. Its interpretation is object-dependent, and it is intended to - /// pass around enough information to facilitate printing sub-parts of - /// real registers correctly. Objects may ignore size hints that are - /// irrelevant to them. - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { - // Default implementation is to ignore the hint. - self.show_rru(mb_rru) - } -} - -impl ShowWithRRU for Reg { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - if self.is_real() { - if let Some(rru) = mb_rru { - let reg_ix = self.get_index(); - if reg_ix < rru.regs.len() { - return rru.regs[reg_ix].1.to_string(); - } else { - // We have a real reg which isn't listed in the universe. - // Per the regalloc.rs interface requirements, this is - // Totally Not Allowed. Print it generically anyway, so - // we have something to debug. - return format!("!!{:?}!!", self); - } - } - } - // The reg is virtual, or we have no universe. Be generic. - format!("%{:?}", self) - } - - fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { - // For the specific case of Reg, we demand not to have a size hint, - // since interpretation of the size is target specific, but this code - // is used by all targets. - panic!("Reg::show_rru_sized: impossible to implement"); - } -} - -impl ShowWithRRU for Writable { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.to_reg().show_rru(mb_rru) - } - - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { - self.to_reg().show_rru_sized(mb_rru, size) - } -} diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 6733e89da4..26176a7411 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -25,8 +25,8 @@ use crate::timing; use regalloc::Function as RegallocFunction; use regalloc::Set as RegallocSet; use regalloc::{ - BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, - StackmapRequestInfo, + BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector, + RegUsageMapper, SpillSlot, StackmapRequestInfo, }; use alloc::boxed::Box; @@ -543,6 +543,10 @@ impl RegallocFunction for VCode { } } + fn is_included_in_clobbers(&self, insn: &I) -> bool { + insn.is_included_in_clobbers() + } + fn get_regs(insn: &I, collector: &mut RegUsageCollector) { insn.get_regs(collector) } @@ -624,7 +628,7 @@ impl fmt::Debug for VCode { } /// Pretty-printing with `RealRegUniverse` context. -impl ShowWithRRU for VCode { +impl PrettyPrint for VCode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { use std::fmt::Write; diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index f08b7c23fc..96ec0ff04b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -151,34 +151,27 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #48 -; nextln: str q8, [sp] -; nextln: str q9, [sp, #16] -; nextln: str q10, [sp, #32] -; nextln: virtual_sp_offset_adjust 48 +; nextln: sub sp, sp, #32 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v8.16b, v0.16b +; nextln: str s0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v9.16b, v0.16b +; nextln: str d0, [sp, #8] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v10.16b, v0.16b +; nextln: str d0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v8.16b +; nextln: ldr s0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v9.16b +; nextln: ldr d0, [sp, #8] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v10.16b +; nextln: ldr d0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr q8, [sp] -; nextln: ldr q9, [sp, #16] -; nextln: ldr q10, [sp, #32] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -202,33 +195,26 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sub sp, sp, #48 -; nextln: str q8, [sp] -; nextln: str q9, [sp, #16] -; nextln: str q10, [sp, #32] -; nextln: virtual_sp_offset_adjust 48 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v8.16b, v0.16b +; nextln: str q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v9.16b, v0.16b +; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v10.16b, v0.16b +; nextln: str q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v8.16b +; nextln: ldr q0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v9.16b +; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v10.16b +; nextln: ldr q0, [sp, #32] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr q8, [sp] -; nextln: ldr q9, [sp, #16] -; nextln: ldr q10, [sp, #32] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -255,34 +241,27 @@ block0: ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: sub sp, sp, #48 -; nextln: str q8, [sp] -; nextln: str q9, [sp, #16] -; nextln: str q10, [sp, #32] -; nextln: virtual_sp_offset_adjust 48 +; nextln: sub sp, sp, #32 ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v8.16b, v0.16b +; nextln: str s0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v9.16b, v0.16b +; nextln: str d0, [sp, #8] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v10.16b, v0.16b +; nextln: str q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v8.16b +; nextln: ldr s0, [sp] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v9.16b +; nextln: ldr d0, [sp, #8] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: mov v0.16b, v10.16b +; nextln: ldr q0, [sp, #16] ; nextln: ldr x0, 8 ; b 12 ; data ; nextln: blr x0 -; nextln: ldr q8, [sp] -; nextln: ldr q9, [sp, #16] -; nextln: ldr q10, [sp, #32] ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/prologue.clif b/cranelift/filetests/filetests/isa/aarch64/prologue.clif new file mode 100644 index 0000000000..c172f9bee3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/prologue.clif @@ -0,0 +1,99 @@ +test compile +target aarch64 + +function %f(f64) -> f64 { +block0(v0: f64): + v1 = fadd.f64 v0, v0 + v2 = fadd.f64 v0, v0 + v3 = fadd.f64 v0, v0 + v4 = fadd.f64 v0, v0 + v5 = fadd.f64 v0, v0 + v6 = fadd.f64 v0, v0 + v7 = fadd.f64 v0, v0 + v8 = fadd.f64 v0, v0 + v9 = fadd.f64 v0, v0 + v10 = fadd.f64 v0, v0 + v11 = fadd.f64 v0, v0 + v12 = fadd.f64 v0, v0 + v13 = fadd.f64 v0, v0 + v14 = fadd.f64 v0, v0 + v15 = fadd.f64 v0, v0 + v16 = fadd.f64 v0, v0 + v17 = fadd.f64 v0, v0 + v18 = fadd.f64 v0, v0 + v19 = fadd.f64 v0, v0 + v20 = fadd.f64 v0, v0 + v21 = fadd.f64 v0, v0 + v22 = fadd.f64 v0, v0 + v23 = fadd.f64 v0, v0 + v24 = fadd.f64 v0, v0 + v25 = fadd.f64 v0, v0 + v26 = fadd.f64 v0, v0 + v27 = fadd.f64 v0, v0 + v28 = fadd.f64 v0, v0 + v29 = fadd.f64 v0, v0 + v30 = fadd.f64 v0, v0 + v31 = fadd.f64 v0, v0 + + v32 = fadd.f64 v0, v1 + v33 = fadd.f64 v2, v3 + v34 = fadd.f64 v4, v5 + v35 = fadd.f64 v6, v7 + v36 = fadd.f64 v8, v9 + v37 = fadd.f64 v10, v11 + v38 = fadd.f64 v12, v13 + v39 = fadd.f64 v14, v15 + v40 = fadd.f64 v16, v17 + v41 = fadd.f64 v18, v19 + v42 = fadd.f64 v20, v21 + v43 = fadd.f64 v22, v23 + v44 = fadd.f64 v24, v25 + v45 = fadd.f64 v26, v27 + v46 = fadd.f64 v28, v29 + v47 = fadd.f64 v30, v31 + + v48 = fadd.f64 v32, v33 + v49 = fadd.f64 v34, v35 + v50 = fadd.f64 v36, v37 + v51 = fadd.f64 v38, v39 + v52 = fadd.f64 v40, v41 + v53 = fadd.f64 v42, v43 + v54 = fadd.f64 v44, v45 + v55 = fadd.f64 v46, v47 + + v56 = fadd.f64 v48, v49 + v57 = fadd.f64 v50, v51 + v58 = fadd.f64 v52, v53 + v59 = fadd.f64 v54, v55 + + v60 = fadd.f64 v56, v57 + v61 = fadd.f64 v58, v59 + + v62 = fadd.f64 v60, v61 + + return v62 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #128 +; nextln: str q8, [sp] +; nextln: str q9, [sp, #16] +; nextln: str q10, [sp, #32] +; nextln: str q11, [sp, #48] +; nextln: str q12, [sp, #64] +; nextln: str q13, [sp, #80] +; nextln: str q14, [sp, #96] +; nextln: str q15, [sp, #112] + +; check: ldr q8, [sp] +; nextln: ldr q9, [sp, #16] +; nextln: ldr q10, [sp, #32] +; nextln: ldr q11, [sp, #48] +; nextln: ldr q12, [sp, #64] +; nextln: ldr q13, [sp, #80] +; nextln: ldr q14, [sp, #96] +; nextln: ldr q15, [sp, #112] +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret