From b93e8c296d53fa11331ed7f62db4f725df81fe78 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 9 Jun 2020 16:32:46 -0700 Subject: [PATCH 1/3] Initial reftype support in aarch64, modulo safepoints. This commit adds the inital support to allow reftypes to flow through the program when targetting aarch64. It also adds a fix to the `ModuleTranslationState` needed to send R32/R64 types over from the SpiderMonkey embedding. This commit does not include any support for safepoints in aarch64 or the `MachInst` infrastructure; that is in the next commit. This commit also makes a drive-by improvement to `Bint`, avoiding an unneeded zero-extension op when the extended value comes directly from a conditional-set (which produces a full-width 0 or 1). --- cranelift/codegen/src/isa/aarch64/abi.rs | 9 +-- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 6 +- cranelift/codegen/src/isa/aarch64/lower.rs | 6 +- .../codegen/src/isa/aarch64/lower_inst.rs | 38 ++++++++++- .../filetests/vcode/aarch64/reftypes.clif | 65 +++++++++++++++++++ cranelift/wasm/src/state/module_state.rs | 1 + 6 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/reftypes.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 3037e801e2..f89e60b175 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -400,6 +400,7 @@ fn in_int_reg(ty: ir::Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 => true, types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + types::R32 | types::R64 => true, _ => false, } } @@ -653,12 +654,12 @@ fn load_stack(mem: MemArg, into_reg: Writable, ty: Type) -> Inst { mem, srcloc: None, }, - types::B32 | types::I32 => Inst::ULoad32 { + types::B32 | types::I32 | types::R32 => Inst::ULoad32 { rd: into_reg, mem, srcloc: None, }, - types::B64 | types::I64 => Inst::ULoad64 { + types::B64 | types::I64 | types::R64 => Inst::ULoad64 { rd: into_reg, mem, srcloc: None, @@ -689,12 +690,12 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { mem, srcloc: None, }, - types::B32 | types::I32 => Inst::Store32 { + types::B32 | types::I32 | types::R32 => Inst::Store32 { rd: from_reg, mem, srcloc: None, }, - types::B64 | types::I64 => Inst::Store64 { + types::B64 | types::I64 | types::R64 => Inst::Store64 { rd: from_reg, mem, srcloc: None, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 3039fa2f50..d7c2efd544 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -6,7 +6,7 @@ use crate::binemit::CodeOffset; use crate::ir::types::{ B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16, - I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, + I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; @@ -2081,6 +2081,8 @@ impl MachInst for Inst { || ty == B32 || ty == I64 || ty == B64 + || ty == R32 + || ty == R64 ); Inst::load_constant(to_reg, value) } @@ -2102,7 +2104,7 @@ impl MachInst for Inst { fn rc_for_type(ty: Type) -> CodegenResult { match ty { - I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64), + I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64), F32 | F64 => Ok(RegClass::V128), IFLAGS | FFLAGS => Ok(RegClass::I64), B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => { diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 71f257abf3..03a464be9a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -829,8 +829,8 @@ pub fn ty_bits(ty: Type) -> usize { B1 => 1, B8 | I8 => 8, B16 | I16 => 16, - B32 | I32 | F32 => 32, - B64 | I64 | F64 => 64, + B32 | I32 | F32 | R32 => 32, + B64 | I64 | F64 | R64 => 64, B128 | I128 => 128, IFLAGS | FFLAGS => 32, B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64, @@ -842,7 +842,7 @@ pub fn ty_bits(ty: Type) -> usize { pub(crate) fn ty_is_int(ty: Type) -> bool { match ty { - B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true, + B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true, F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false, IFLAGS | FFLAGS => panic!("Unexpected flags type"), _ => panic!("ty_is_int() on unknown type: {:?}", ty), diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index a24a6eec3d..676f206959 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1204,7 +1204,26 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::IsNull | Opcode::IsInvalid => { - panic!("Reference types not supported"); + // Null references are represented by the constant value 0; invalid references are + // represented by the constant value -1. See `define_reftypes()` in + // `meta/src/isa/x86/encodings.rs` to confirm. + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + let (alu_op, const_value) = match op { + Opcode::IsNull => { + // cmp rn, #0 + (choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64), 0) + } + Opcode::IsInvalid => { + // cmn rn, #1 + (choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64), 1) + } + _ => unreachable!(), + }; + let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap()); + ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value)); + ctx.emit(Inst::CSet { rd, cond: Cond::Eq }); } Opcode::Copy => { @@ -1215,6 +1234,21 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Bint | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => { + // If this is a Bint from a Trueif/Trueff/IsNull/IsInvalid, then the result is already + // 64-bit-zero-extended, even if the CLIF type doesn't say so, because it was produced + // by a CSet. In this case, we do not need to do any zero-extension. + let input_info = ctx.get_input(insn, 0); + let src_op = input_info + .inst + .map(|(src_inst, _)| ctx.data(src_inst).opcode()); + let narrow_mode = match (src_op, op) { + (Some(Opcode::Trueif), Opcode::Bint) + | (Some(Opcode::Trueff), Opcode::Bint) + | (Some(Opcode::IsNull), Opcode::Bint) + | (Some(Opcode::IsInvalid), Opcode::Bint) => NarrowValueMode::None, + _ => NarrowValueMode::ZeroExtend64, + }; + // All of these ops are simply a move from a zero-extended source. // Here is why this works, in each case: // @@ -1227,7 +1261,7 @@ pub(crate) fn lower_insn_to_regs>( // - Ireduce: changing width of an integer. Smaller ints are stored // with undefined high-order bits, so we can simply do a copy. - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]); let ty = ctx.input_ty(insn, 0); ctx.emit(Inst::gen_move(rd, rn, ty)); diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif new file mode 100644 index 0000000000..9956b6f4ad --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -0,0 +1,65 @@ +test compile +target aarch64 + +function %f0(r64) -> r64 { +block0(v0: r64): + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f1(r32) -> r32 { +block0(v0: r32): + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f2(r64) -> b1 { +block0(v0: r64): + v1 = is_null v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs xzr, x0, #0 +; nextln: cset x0, eq +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f3(r64) -> b1 { +block0(v0: r64): + v1 = is_invalid v0 + return v1 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: adds xzr, x0, #1 +; nextln: cset x0, eq +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f4() -> r64 { +block0: + v0 = null.r64 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movz x0, #0 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/wasm/src/state/module_state.rs b/cranelift/wasm/src/state/module_state.rs index 4f6ce35394..e1b96b8c82 100644 --- a/cranelift/wasm/src/state/module_state.rs +++ b/cranelift/wasm/src/state/module_state.rs @@ -30,6 +30,7 @@ fn cranelift_to_wasmparser_type(ty: Type) -> WasmResult { types::I64 => wasmparser::Type::I64, types::F32 => wasmparser::Type::F32, types::F64 => wasmparser::Type::F64, + types::R32 | types::R64 => wasmparser::Type::ExternRef, _ => { return Err(WasmError::Unsupported(format!( "Cannot convert Cranelift type to Wasm signature: {:?}", From 08353fcc14b82c9c3728cc521ca6dab615daf930 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 29 Jun 2020 15:49:18 -0700 Subject: [PATCH 2/3] Reftypes part two: add support for stackmaps. This commit adds support for generating stackmaps at safepoints to the new backend framework and to the AArch64 backend in particular. It has been tested to work with SpiderMonkey. --- Cargo.lock | 4 +- cranelift/codegen/Cargo.toml | 2 +- cranelift/codegen/src/inst_predicates.rs | 7 + cranelift/codegen/src/isa/aarch64/abi.rs | 165 +++++++++++------- .../codegen/src/isa/aarch64/inst/emit.rs | 51 +++++- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 45 +++-- .../codegen/src/isa/aarch64/lower_inst.rs | 5 +- cranelift/codegen/src/isa/x64/abi.rs | 28 ++- cranelift/codegen/src/isa/x64/inst/mod.rs | 16 ++ cranelift/codegen/src/machinst/abi.rs | 41 ++++- cranelift/codegen/src/machinst/buffer.rs | 41 ++++- cranelift/codegen/src/machinst/compile.rs | 14 +- cranelift/codegen/src/machinst/lower.rs | 72 ++++++-- cranelift/codegen/src/machinst/mod.rs | 20 ++- cranelift/codegen/src/machinst/vcode.rs | 158 ++++++++++++++--- cranelift/codegen/src/regalloc/safepoint.rs | 9 +- .../filetests/vcode/aarch64/reftypes.clif | 62 +++++++ 17 files changed, 597 insertions(+), 143 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b181fb965d..f7eb191f46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1680,9 +1680,9 @@ dependencies = [ [[package]] name = "regalloc" -version = "0.0.26" +version = "0.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c03092d79e0fd610932d89ed53895a38c0dd3bcd317a0046e69940de32f1d95" +checksum = "b9ba8aaf5fe7cf307c6dbdaeed85478961d29e25e3bee5169e11b92fa9f027a8" dependencies = [ "log", "rustc-hash", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 578acf0e19..cdafe049e2 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -26,7 +26,7 @@ smallvec = { version = "1.0.0" } thiserror = "1.0.4" byteorder = { version = "1.3.2", default-features = false } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.2.0" } -regalloc = "0.0.26" +regalloc = { version = "0.0.27" } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary # machine code. Integration tests that need external dependencies can be diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index f0d6fdf6b5..0ee2eb7c1f 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -61,3 +61,10 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { _ => None, } } + +/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the +/// embedding, and so requires reftyped values to be enumerated with a stackmap)? +pub fn is_safepoint(func: &Function, inst: Inst) -> bool { + let op = func.dfg[inst].opcode(); + op.is_resumable_trap() || op.is_call() +} diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index f89e60b175..fba104ef43 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -90,12 +90,13 @@ //! - Return v1 in memory at `[P+8]`. //! - Return v0 in memory at `[P+16]`. +use crate::binemit::Stackmap; use crate::ir; use crate::ir::types; use crate::ir::types::*; use crate::ir::{ArgumentExtension, StackSlot}; use crate::isa; -use crate::isa::aarch64::{inst::*, lower::ty_bits}; +use crate::isa::aarch64::{inst::EmitState, inst::*, lower::ty_bits}; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; @@ -372,7 +373,10 @@ pub struct AArch64ABIBody { clobbered: Set>, /// Total number of spillslots, from regalloc. spillslots: Option, - /// Total frame size. + /// "Total frame size", as defined by "distance between FP and nominal-SP". + /// Some items are pushed below nominal SP, so the function may actually use + /// more stack than this would otherwise imply. It is simply the initial + /// frame/allocation size needed for stackslots and spillslots. total_frame_size: Option, /// The register holding the return-area pointer, if needed. ret_area_ptr: Option>, @@ -811,6 +815,35 @@ fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { caller_saved } +fn gen_sp_adjust_insts(adj: u64, is_sub: bool, mut f: F) { + let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; + + if let Some(imm12) = Imm12::maybe_from_u64(adj) { + let adj_inst = Inst::AluRRImm12 { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12, + }; + f(adj_inst); + } else { + let tmp = writable_spilltmp_reg(); + let const_inst = Inst::LoadConst64 { + rd: tmp, + const_data: adj, + }; + let adj_inst = Inst::AluRRRExtend { + alu_op, + rd: writable_stack_reg(), + rn: stack_reg(), + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, + }; + f(const_inst); + f(adj_inst); + } +} + impl ABIBody for AArch64ABIBody { type I = Inst; @@ -1025,6 +1058,29 @@ impl ABIBody for AArch64ABIBody { store_stack(MemArg::NominalSPOffset(sp_off, ty), from_reg, ty) } + fn spillslots_to_stackmap(&self, slots: &[SpillSlot], state: &EmitState) -> Stackmap { + assert!(state.virtual_sp_offset >= 0); + trace!( + "spillslots_to_stackmap: slots = {:?}, state = {:?}", + slots, + state + ); + let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; + let map_words = (map_size + 7) / 8; + let mut bits = std::iter::repeat(false) + .take(map_words as usize) + .collect::>(); + + let first_spillslot_word = + ((self.stackslots_size + state.virtual_sp_offset as u32) / 8) as usize; + for &slot in slots { + let slot = slot.get() as usize; + bits[first_spillslot_word + slot] = true; + } + + Stackmap::from_slice(&bits[..]) + } + fn gen_prologue(&mut self) -> Vec { let mut insts = vec![]; if !self.call_conv.extends_baldrdash() { @@ -1060,6 +1116,9 @@ impl ABIBody for AArch64ABIBody { } let total_stacksize = (total_stacksize + 15) & !15; // 16-align the stack. + let mut total_sp_adjust = 0; + let mut nominal_sp_to_real_sp = 0; + if !self.call_conv.extends_baldrdash() { // Leaf functions with zero stack don't need a stack check if one's // specified, otherwise always insert the stack check. @@ -1070,42 +1129,28 @@ impl ABIBody for AArch64ABIBody { } } if total_stacksize > 0 { - // sub sp, sp, #total_stacksize - if let Some(imm12) = Imm12::maybe_from_u64(total_stacksize as u64) { - let sub_inst = Inst::AluRRImm12 { - alu_op: ALUOp::Sub64, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }; - insts.push(sub_inst); - } else { - let tmp = writable_spilltmp_reg(); - let const_inst = Inst::LoadConst64 { - rd: tmp, - const_data: total_stacksize as u64, - }; - let sub_inst = Inst::AluRRRExtend { - alu_op: ALUOp::Sub64, - rd: writable_stack_reg(), - rn: stack_reg(), - rm: tmp.to_reg(), - extendop: ExtendOp::UXTX, - }; - insts.push(const_inst); - insts.push(sub_inst); - } + total_sp_adjust += total_stacksize as u64; } } - // N.B.: "nominal SP", which we use to refer to stackslots - // and spillslots, is *here* (the value of SP at this program point). + // N.B.: "nominal SP", which we use to refer to stackslots and + // spillslots, is right here. + // // If we push any clobbers below, we emit a virtual-SP adjustment // meta-instruction so that the nominal-SP references behave as if SP // were still at this point. See documentation for // [crate::isa::aarch64::abi](this module) for more details on // stackframe layout and nominal-SP maintenance. + if total_sp_adjust > 0 { + // sub sp, sp, #total_stacksize + gen_sp_adjust_insts( + total_sp_adjust, + /* is_sub = */ true, + |inst| insts.push(inst), + ); + } + // Save clobbered registers. let (clobbered_int, clobbered_vec) = get_callee_saves(self.call_conv, self.clobbered.to_vec()); @@ -1149,10 +1194,11 @@ impl ABIBody for AArch64ABIBody { srcloc: None, }); } + nominal_sp_to_real_sp += clobber_size as i64; if clobber_size > 0 { insts.push(Inst::VirtualSPOffsetAdj { - offset: clobber_size as i64, + offset: nominal_sp_to_real_sp, }); } @@ -1246,6 +1292,10 @@ impl ABIBody for AArch64ABIBody { .expect("frame size not computed before prologue generation") } + fn stack_args_size(&self) -> u32 { + self.sig.stack_arg_space as u32 + } + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { @@ -1256,15 +1306,30 @@ impl ABIBody for AArch64ABIBody { } } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst { + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); self.store_spillslot(to_slot, ty, from_reg.to_reg()) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Inst { + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) } } +fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { + match (ty, r.get_class()) { + (Some(t), _) => t, + (None, RegClass::I64) => I64, + _ => panic!("Unexpected register class!"), + } +} + enum CallDest { ExtName(ir::ExternalName, RelocDistance), Reg(Reg), @@ -1343,7 +1408,7 @@ impl AArch64ABICall { } } -fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { +fn adjust_stack_and_nominal_sp>(ctx: &mut C, amount: u64, is_sub: bool) { if amount == 0 { return; } @@ -1357,27 +1422,9 @@ fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { offset: sp_adjustment, }); - let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 }; - if let Some(imm12) = Imm12::maybe_from_u64(amount) { - ctx.emit(Inst::AluRRImm12 { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - }) - } else { - ctx.emit(Inst::LoadConst64 { - rd: writable_spilltmp_reg(), - const_data: amount, - }); - ctx.emit(Inst::AluRRRExtend { - alu_op, - rd: writable_stack_reg(), - rn: stack_reg(), - rm: spilltmp_reg(), - extendop: ExtendOp::UXTX, - }); - } + gen_sp_adjust_insts(amount, is_sub, |inst| { + ctx.emit(inst); + }); } impl ABICall for AArch64ABICall { @@ -1393,12 +1440,12 @@ impl ABICall for AArch64ABICall { fn emit_stack_pre_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ true) + adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ true) } fn emit_stack_post_adjust>(&self, ctx: &mut C) { let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ false) + adjust_stack_and_nominal_sp(ctx, off as u64, /* is_sub = */ false) } fn emit_copy_reg_to_arg>( @@ -1453,7 +1500,7 @@ impl ABICall for AArch64ABICall { self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit(Inst::Call { + &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint(Inst::Call { info: Box::new(CallInfo { dest: name.clone(), uses, @@ -1469,7 +1516,7 @@ impl ABICall for AArch64ABICall { offset: 0, srcloc: self.loc, }); - ctx.emit(Inst::CallInd { + ctx.emit_safepoint(Inst::CallInd { info: Box::new(CallIndInfo { rn: spilltmp_reg(), uses, @@ -1479,7 +1526,7 @@ impl ABICall for AArch64ABICall { }), }); } - &CallDest::Reg(reg) => ctx.emit(Inst::CallInd { + &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::CallInd { info: Box::new(CallIndInfo { rn: reg, uses, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 79f7624bcc..26cedde3b1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1,6 +1,6 @@ //! AArch64 ISA: binary code emission. -use crate::binemit::{CodeOffset, Reloc}; +use crate::binemit::{CodeOffset, Reloc, Stackmap}; use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::TrapCode; @@ -376,7 +376,37 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { - virtual_sp_offset: i64, + /// Addend to convert nominal-SP offsets to real-SP offsets at the current + /// program point. + pub(crate) virtual_sp_offset: i64, + /// Offset of FP from nominal-SP. + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stackmap for upcoming instruction, as provided to `pre_safepoint()`. + stackmap: Option, +} + +impl MachInstEmitState for EmitState { + fn new(abi: &dyn ABIBody) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stackmap: None, + } + } + + fn pre_safepoint(&mut self, stackmap: Stackmap) { + self.stackmap = Some(stackmap); + } +} + +impl EmitState { + fn take_stackmap(&mut self) -> Option { + self.stackmap.take() + } + + fn clear_post_insn(&mut self) { + self.stackmap = None; + } } impl MachInstEmit for Inst { @@ -1463,6 +1493,9 @@ impl MachInstEmit for Inst { // Noop; this is just a placeholder for epilogues. } &Inst::Call { ref info } => { + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.add_reloc(info.loc, Reloc::Arm64Call, &info.dest, 0); sink.put4(enc_jump26(0b100101, 0)); if info.opcode.is_call() { @@ -1470,6 +1503,9 @@ impl MachInstEmit for Inst { } } &Inst::CallInd { ref info } => { + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5)); if info.opcode.is_call() { sink.add_call_site(info.loc, info.opcode); @@ -1525,6 +1561,9 @@ impl MachInstEmit for Inst { &Inst::Udf { trap_info } => { let (srcloc, code) = trap_info; sink.add_trap(srcloc, code); + if let Some(s) = state.take_stackmap() { + sink.add_stackmap(4, s); + } sink.put4(0xd4a00000); } &Inst::Adr { rd, off } => { @@ -1709,7 +1748,7 @@ impl MachInstEmit for Inst { debug!( "virtual sp offset adjusted by {} -> {}", offset, - state.virtual_sp_offset + offset + state.virtual_sp_offset + offset, ); state.virtual_sp_offset += offset; } @@ -1728,5 +1767,11 @@ impl MachInstEmit for Inst { let end_off = sink.cur_offset(); debug_assert!((end_off - start_off) <= Inst::worst_case_size()); + + state.clear_post_insn(); + } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + self.print_with_state(mb_rru, state) } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index d7c2efd544..775fd083d5 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1346,11 +1346,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(rn); } &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {} - &Inst::Call { ref info } => { + &Inst::Call { ref info, .. } => { collector.add_uses(&*info.uses); collector.add_defs(&*info.defs); } - &Inst::CallInd { ref info } => { + &Inst::CallInd { ref info, .. } => { collector.add_uses(&*info.uses); collector.add_defs(&*info.defs); collector.add_use(info.rn); @@ -2137,13 +2137,21 @@ impl MachInst for Inst { // feasible for other reasons). 44 } + + fn ref_type_rc(_: &settings::Flags) -> RegClass { + RegClass::I64 + } } //============================================================================= // Pretty-printing of instructions. -fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (String, MemArg) { - let (mem_insts, mem) = mem_finalize(0, mem, &mut Default::default()); +fn mem_finalize_for_show( + mem: &MemArg, + mb_rru: Option<&RealRegUniverse>, + state: &EmitState, +) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(0, mem, state); let mut mem_str = mem_insts .into_iter() .map(|inst| inst.show_rru(mb_rru)) @@ -2158,6 +2166,12 @@ fn mem_finalize_for_show(mem: &MemArg, mb_rru: Option<&RealRegUniverse>) -> (Str impl ShowWithRRU for Inst { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.pretty_print(mb_rru, &mut EmitState::default()) + } +} + +impl Inst { + fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) { match alu_op { ALUOp::Add32 => ("add", OperandSize::Size32), @@ -2344,7 +2358,7 @@ impl ShowWithRRU for Inst { srcloc: _srcloc, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { &MemArg::Unscaled(..) => true, @@ -2392,7 +2406,7 @@ impl ShowWithRRU for Inst { srcloc: _srcloc, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let is_unscaled = match &mem { &MemArg::Unscaled(..) => true, @@ -2576,39 +2590,39 @@ impl ShowWithRRU for Inst { } &Inst::FpuLoad32 { rd, ref mem, .. } => { let rd = show_freg_sized(rd.to_reg(), mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad64 { rd, ref mem, .. } => { let rd = show_freg_sized(rd.to_reg(), mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad128 { rd, ref mem, .. } => { let rd = rd.to_reg().show_rru(mb_rru); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuStore32 { rd, ref mem, .. } => { let rd = show_freg_sized(rd, mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore64 { rd, ref mem, .. } => { let rd = show_freg_sized(rd, mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore128 { rd, ref mem, .. } => { let rd = rd.show_rru(mb_rru); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); let mem = mem.show_rru(mb_rru); format!("{}str {}, {}", mem_str, rd, mem) } @@ -2978,7 +2992,7 @@ impl ShowWithRRU for Inst { // this logic between `emit()` and `show_rru()` -- a separate 1-to-N // expansion stage (i.e., legalization, but without the slow edit-in-place // of the existing legalization framework). - let (mem_insts, mem) = mem_finalize(0, mem, &EmitState::default()); + let (mem_insts, mem) = mem_finalize(0, mem, state); let mut ret = String::new(); for inst in mem_insts.into_iter() { ret.push_str(&inst.show_rru(mb_rru)); @@ -3025,7 +3039,10 @@ impl ShowWithRRU for Inst { } ret } - &Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset), + &Inst::VirtualSPOffsetAdj { offset } => { + state.virtual_sp_offset += offset; + format!("virtual_sp_offset_adjust {}", offset) + } &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space), } } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 676f206959..d7ddaabf95 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1394,7 +1394,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trap | Opcode::ResumableTrap => { let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap()); - ctx.emit(Inst::Udf { trap_info }) + ctx.emit_safepoint(Inst::Udf { trap_info }); } Opcode::Trapif | Opcode::Trapff => { @@ -1432,10 +1432,11 @@ pub(crate) fn lower_insn_to_regs>( trap_info, kind: CondBrKind::Cond(cond), }); + ctx.emit_safepoint(Inst::Udf { trap_info }) } Opcode::Safepoint => { - panic!("safepoint support not implemented!"); + panic!("safepoint instructions not used by new backend's safepoints!"); } Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 385bedad8c..d19895b675 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -5,6 +5,7 @@ use log::trace; use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use std::mem; +use crate::binemit::Stackmap; use crate::ir::{self, types, types::*, ArgumentExtension, StackSlot, Type}; use crate::isa::{self, x64::inst::*}; use crate::machinst::*; @@ -415,6 +416,10 @@ impl ABIBody for X64ABIBody { ) } + fn spillslots_to_stackmap(&self, _slots: &[SpillSlot], _state: &EmitState) -> Stackmap { + unimplemented!("spillslots_to_stackmap") + } + fn gen_prologue(&mut self) -> Vec { let r_rsp = regs::rsp(); @@ -553,6 +558,10 @@ impl ABIBody for X64ABIBody { .expect("frame size not computed before prologue generation") as u32 } + fn stack_args_size(&self) -> u32 { + unimplemented!("I need to be computed!") + } + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { // We allocate in terms of 8-byte slots. match (rc, ty) { @@ -563,15 +572,30 @@ impl ABIBody for X64ABIBody { } } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Inst { + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); self.store_spillslot(to_slot, ty, from_reg.to_reg()) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Inst { + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Inst { + let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) } } +fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { + match (ty, r.get_class()) { + (Some(t), _) => t, + (None, RegClass::I64) => I64, + _ => panic!("Unexpected register class!"), + } +} + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { let mut caller_saved = Vec::new(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index b682046956..9c917f0bc9 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1258,6 +1258,10 @@ impl MachInst for Inst { 15 } + fn ref_type_rc(_: &settings::Flags) -> RegClass { + RegClass::I64 + } + type LabelUse = LabelUse; } @@ -1273,6 +1277,18 @@ impl MachInstEmit for Inst { fn emit(&self, sink: &mut MachBuffer, flags: &settings::Flags, state: &mut Self::State) { emit::emit(self, sink, flags, state); } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String { + self.show_rru(mb_rru) + } +} + +impl MachInstEmitState for EmitState { + fn new(_: &dyn ABIBody) -> Self { + EmitState { + virtual_sp_offset: 0, + } + } } /// A label-use (internal relocation) in generated code. diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 6e5170c3f6..84f574317b 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1,5 +1,6 @@ //! ABI definitions. +use crate::binemit::Stackmap; use crate::ir::{ArgumentExtension, StackSlot}; use crate::machinst::*; use crate::settings; @@ -100,6 +101,15 @@ pub trait ABIBody { /// Store to a spillslot. fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I; + /// Generate a stackmap, given a list of spillslots and the emission state + /// at a given program point (prior to emission fo the safepointing + /// instruction). + fn spillslots_to_stackmap( + &self, + slots: &[SpillSlot], + state: &::State, + ) -> Stackmap; + /// Generate a prologue, post-regalloc. This should include any stack /// frame or other setup necessary to use the other methods (`load_arg`, /// `store_retval`, and spillslot accesses.) `self` is mutable so that we @@ -113,21 +123,34 @@ pub trait ABIBody { /// likely closely related. fn gen_epilogue(&self) -> Vec; - /// Returns the full frame size for the given function, after prologue emission has run. This - /// comprises the spill slots and stack-storage slots (but not storage for clobbered callee-save - /// registers, arguments pushed at callsites within this function, or other ephemeral pushes). - /// This is used for ABI variants where the client generates prologue/epilogue code, as in - /// Baldrdash (SpiderMonkey integration). + /// Returns the full frame size for the given function, after prologue + /// emission has run. This comprises the spill slots and stack-storage slots + /// (but not storage for clobbered callee-save registers, arguments pushed + /// at callsites within this function, or other ephemeral pushes). This is + /// used for ABI variants where the client generates prologue/epilogue code, + /// as in Baldrdash (SpiderMonkey integration). fn frame_size(&self) -> u32; + /// Returns the size of arguments expected on the stack. + fn stack_args_size(&self) -> u32; + /// Get the spill-slot size. fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32; - /// Generate a spill. - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> Self::I; + /// Generate a spill. The type, if known, is given; this can be used to + /// generate a store instruction optimized for the particular type rather + /// than the RegClass (e.g., only F64 that resides in a V128 register). If + /// no type is given, the implementation should spill the whole register. + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Self::I; - /// Generate a reload (fill). - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> Self::I; + /// Generate a reload (fill). As for spills, the type may be given to allow + /// a more optimized load instruction to be generated. + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + ty: Option, + ) -> Self::I; } /// Trait implemented by an object that tracks ABI-related state and can diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 3a3578ceb5..b12cd9f98c 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -140,7 +140,7 @@ //! Given these invariants, we argue why each optimization preserves execution //! semantics below (grep for "Preserves execution semantics"). -use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc}; +use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, Stackmap}; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeInst}; @@ -168,6 +168,8 @@ pub struct MachBuffer { call_sites: SmallVec<[MachCallSite; 16]>, /// Any source location mappings referring to this code. srclocs: SmallVec<[MachSrcLoc; 64]>, + /// Any stackmaps referring to this code. + stackmaps: SmallVec<[MachStackMap; 8]>, /// The current source location in progress (after `start_srcloc()` and /// before `end_srcloc()`). This is a (start_offset, src_loc) tuple. cur_srcloc: Option<(CodeOffset, SourceLoc)>, @@ -228,6 +230,8 @@ pub struct MachBufferFinalized { call_sites: SmallVec<[MachCallSite; 16]>, /// Any source location mappings referring to this code. srclocs: SmallVec<[MachSrcLoc; 64]>, + /// Any stackmaps referring to this code. + stackmaps: SmallVec<[MachStackMap; 8]>, } static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff; @@ -262,6 +266,7 @@ impl MachBuffer { traps: SmallVec::new(), call_sites: SmallVec::new(), srclocs: SmallVec::new(), + stackmaps: SmallVec::new(), cur_srcloc: None, label_offsets: SmallVec::new(), label_aliases: SmallVec::new(), @@ -1090,6 +1095,7 @@ impl MachBuffer { traps: self.traps, call_sites: self.call_sites, srclocs: self.srclocs, + stackmaps: self.stackmaps, } } @@ -1149,6 +1155,22 @@ impl MachBuffer { self.srclocs.push(MachSrcLoc { start, end, loc }); } } + + /// Add stackmap metadata for this program point: a set of stack offsets + /// (from SP upward) that contain live references. + /// + /// The `offset_to_fp` value is the offset from the nominal SP (at which the + /// `stack_offsets` are based) and the FP value. By subtracting + /// `offset_to_fp` from each `stack_offsets` element, one can obtain + /// live-reference offsets from FP instead. + pub fn add_stackmap(&mut self, insn_len: CodeOffset, stackmap: Stackmap) { + let offset = self.cur_offset(); + self.stackmaps.push(MachStackMap { + offset, + offset_end: offset + insn_len, + stackmap, + }); + } } impl MachBufferFinalized { @@ -1207,6 +1229,11 @@ impl MachBufferFinalized { sink.begin_rodata(); sink.end_codegen(); } + + /// Get the stackmap metadata for this code. + pub fn stackmaps(&self) -> &[MachStackMap] { + &self.stackmaps[..] + } } /// A constant that is deferred to the next constant-pool opportunity. @@ -1286,6 +1313,18 @@ pub struct MachSrcLoc { pub loc: SourceLoc, } +/// Record of stackmap metadata: stack offsets containing references. +#[derive(Clone, Debug)] +pub struct MachStackMap { + /// The code offset at which this stackmap applies. + pub offset: CodeOffset, + /// The code offset at the *end* of the instruction at which this stackmap + /// applies. + pub offset_end: CodeOffset, + /// The Stackmap itself. + pub stackmap: Stackmap, +} + /// Record of branch instruction in the buffer, to facilitate editing. #[derive(Clone, Debug)] struct MachBranch { diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 8254fff77b..38e42fd9f3 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -23,7 +23,7 @@ where // Build the lowering context. let lower = Lower::new(f, abi, block_order)?; // Lower the IR. - let mut vcode = lower.lower(b)?; + let (mut vcode, stackmap_request_info) = lower.lower(b)?; debug!( "vcode from lowering: \n{}", @@ -57,11 +57,23 @@ where } } + // If either there are no reference-typed values, or else there are + // but there are no safepoints at which we need to know about them, + // then we don't need stackmaps. + let sri = if stackmap_request_info.reftyped_vregs.len() > 0 + && stackmap_request_info.safepoint_insns.len() > 0 + { + Some(&stackmap_request_info) + } else { + None + }; + let result = { let _tt = timing::regalloc(); allocate_registers_with_opts( &mut vcode, b.reg_universe(), + sri, Options { run_checker, algorithm, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index ea97607dfe..fbc6c2ee8a 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -4,6 +4,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; +use crate::inst_predicates::is_safepoint; use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; @@ -17,7 +18,7 @@ use crate::machinst::{ }; use crate::CodegenResult; -use regalloc::{Reg, RegClass, VirtualReg, Writable}; +use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable}; use alloc::boxed::Box; use alloc::vec::Vec; @@ -93,6 +94,8 @@ pub trait LowerCtx { /// every side-effecting op; the backend should not try to merge across /// side-effect colors unless the op being merged is known to be pure. fn inst_color(&self, ir_inst: Inst) -> InstColor; + /// Determine whether an instruction is a safepoint. + fn is_safepoint(&self, ir_inst: Inst) -> bool; // Instruction input/output queries: @@ -146,6 +149,8 @@ pub trait LowerCtx { fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable; /// Emit a machine instruction. fn emit(&mut self, mach_inst: Self::I); + /// Emit a machine instruction that is a safepoint. + fn emit_safepoint(&mut self, mach_inst: Self::I); /// Indicate that the given input uses the register returned by /// `get_input()`. Codegen may not happen otherwise for the producing /// instruction if it has no side effects and no uses. @@ -206,6 +211,14 @@ pub trait LowerBackend { } } +/// A pending instruction to insert and auxiliary information about it: its source location and +/// whether it is a safepoint. +struct InstTuple { + loc: SourceLoc, + is_safepoint: bool, + inst: I, +} + /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence /// from original Inst to MachInsts. pub struct Lower<'func, I: VCodeInst> { @@ -237,17 +250,17 @@ pub struct Lower<'func, I: VCodeInst> { next_vreg: u32, /// Insts in reverse block order, before final copy to vcode. - block_insts: Vec<(SourceLoc, I)>, + block_insts: Vec>, /// Ranges in `block_insts` constituting BBs. block_ranges: Vec<(usize, usize)>, /// Instructions collected for the BB in progress, in reverse order, with /// source-locs attached. - bb_insts: Vec<(SourceLoc, I)>, + bb_insts: Vec>, /// Instructions collected for the CLIF inst in progress, in forward order. - ir_insts: Vec, + ir_insts: Vec>, /// The register to use for GetPinnedReg, if any, on this architecture. pinned_reg: Option, @@ -276,6 +289,7 @@ fn alloc_vreg( let v = *next_vreg; *next_vreg += 1; value_regs[value] = Reg::new_virtual(regclass, v); + debug!("value {} gets vreg {:?}", value, v); } value_regs[value].as_virtual_reg().unwrap() } @@ -579,15 +593,18 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn finish_ir_inst(&mut self, loc: SourceLoc) { - for inst in self.ir_insts.drain(..).rev() { - self.bb_insts.push((loc, inst)); + // `bb_insts` is kept in reverse order, so emit the instructions in + // reverse order. + for mut tuple in self.ir_insts.drain(..).rev() { + tuple.loc = loc; + self.bb_insts.push(tuple); } } fn finish_bb(&mut self) { let start = self.block_insts.len(); - for pair in self.bb_insts.drain(..).rev() { - self.block_insts.push(pair); + for tuple in self.bb_insts.drain(..).rev() { + self.block_insts.push(tuple); } let end = self.block_insts.len(); self.block_ranges.push((start, end)); @@ -595,9 +612,14 @@ impl<'func, I: VCodeInst> Lower<'func, I> { fn copy_bbs_to_vcode(&mut self) { for &(start, end) in self.block_ranges.iter().rev() { - for &(loc, ref inst) in &self.block_insts[start..end] { + for &InstTuple { + loc, + is_safepoint, + ref inst, + } in &self.block_insts[start..end] + { self.vcode.set_srcloc(loc); - self.vcode.push(inst.clone()); + self.vcode.push(inst.clone(), is_safepoint); } self.vcode.end_bb(); } @@ -645,7 +667,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } /// Lower the function. - pub fn lower>(mut self, backend: &B) -> CodegenResult> { + pub fn lower>( + mut self, + backend: &B, + ) -> CodegenResult<(VCode, StackmapRequestInfo)> { debug!("about to lower function: {:?}", self.f); // Initialize the ABI object, giving it a temp if requested. @@ -730,10 +755,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.copy_bbs_to_vcode(); // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode. - let vcode = self.vcode.build(); + let (vcode, stackmap_info) = self.vcode.build(); debug!("built vcode: {:?}", vcode); - Ok(vcode) + Ok((vcode, stackmap_info)) } /// Get the actual inputs for a value. This is the implementation for @@ -874,6 +899,13 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.inst_colors[ir_inst] } + fn is_safepoint(&self, ir_inst: Inst) -> bool { + // There is no safepoint metadata at all if we have no reftyped values + // in this function; lack of metadata implies "nothing to trace", and + // avoids overhead. + self.vcode.have_ref_values() && is_safepoint(self.f, ir_inst) + } + fn num_inputs(&self, ir_inst: Inst) -> usize { self.f.dfg.inst_args(ir_inst).len() } @@ -916,7 +948,19 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { } fn emit(&mut self, mach_inst: I) { - self.ir_insts.push(mach_inst); + self.ir_insts.push(InstTuple { + loc: SourceLoc::default(), + is_safepoint: false, + inst: mach_inst, + }); + } + + fn emit_safepoint(&mut self, mach_inst: I) { + self.ir_insts.push(InstTuple { + loc: SourceLoc::default(), + is_safepoint: true, + inst: mach_inst, + }); } fn use_input_reg(&mut self, input: LowerInput) { diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 380a6fe37c..e7605cff7b 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -96,7 +96,7 @@ //! //! ``` -use crate::binemit::{CodeInfo, CodeOffset}; +use crate::binemit::{CodeInfo, CodeOffset, Stackmap}; use crate::ir::condcodes::IntCC; use crate::ir::{Function, Type}; use crate::result::CodegenResult; @@ -191,6 +191,10 @@ pub trait MachInst: Clone + Debug { /// What is the worst-case instruction size emitted by this instruction type? fn worst_case_size() -> CodeOffset; + /// What is the register class used for reference types (GC-observable pointers)? Can + /// be dependent on compilation flags. + fn ref_type_rc(_flags: &Flags) -> RegClass; + /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. type LabelUse: MachInstLabelUse; @@ -256,9 +260,21 @@ pub enum MachTerminator<'a> { /// A trait describing the ability to encode a MachInst into binary machine code. pub trait MachInstEmit: MachInst { /// Persistent state carried across `emit` invocations. - type State: Default + Clone + Debug; + type State: MachInstEmitState; /// Emit the instruction. fn emit(&self, code: &mut MachBuffer, flags: &Flags, state: &mut Self::State); + /// Pretty-print the instruction. + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String; +} + +/// A trait describing the emission state carried between MachInsts when +/// emitting a function body. +pub trait MachInstEmitState: Default + Clone + Debug { + /// Create a new emission state given the ABI object. + fn new(abi: &dyn ABIBody) -> Self; + /// Update the emission state before emitting an instruction that is a + /// safepoint. + fn pre_safepoint(&mut self, _stackmap: Stackmap) {} } /// The result of a `MachBackend::compile_function()` call. Contains machine diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 74c99b3af1..42bc0a16bd 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -17,14 +17,15 @@ //! See the main module comment in `mod.rs` for more details on the VCode-based //! backend pipeline. -use crate::ir::{self, SourceLoc}; +use crate::ir::{self, types, SourceLoc}; use crate::machinst::*; use crate::settings; use regalloc::Function as RegallocFunction; use regalloc::Set as RegallocSet; use regalloc::{ - BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, + BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, + StackmapRequestInfo, }; use alloc::boxed::Box; @@ -56,6 +57,9 @@ pub struct VCode { /// VReg IR-level types. vreg_types: Vec, + /// Do we have any ref values among our vregs? + have_ref_values: bool, + /// Lowered machine instructions in order corresponding to the original IR. insts: Vec, @@ -82,6 +86,16 @@ pub struct VCode { /// ABI object. abi: Box>, + + /// Safepoint instruction indices. Filled in post-regalloc. (Prior to + /// regalloc, the safepoint instructions are listed in the separate + /// `StackmapRequestInfo` held separate from the `VCode`.) + safepoint_insns: Vec, + + /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s. + /// These are used to generate actual stackmaps at emission. Filled in + /// post-regalloc. + safepoint_slots: Vec>, } /// A builder for a VCode function body. This builder is designed for the @@ -102,6 +116,9 @@ pub struct VCodeBuilder { /// In-progress VCode. vcode: VCode, + /// In-progress stackmap-request info. + stackmap_info: StackmapRequestInfo, + /// Index of the last block-start in the vcode. block_start: InsnIndex, @@ -115,9 +132,17 @@ pub struct VCodeBuilder { impl VCodeBuilder { /// Create a new VCodeBuilder. pub fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCodeBuilder { + let reftype_class = I::ref_type_rc(abi.flags()); let vcode = VCode::new(abi, block_order); + let stackmap_info = StackmapRequestInfo { + reftype_class, + reftyped_vregs: vec![], + safepoint_insns: vec![], + }; + VCodeBuilder { vcode, + stackmap_info, block_start: 0, succ_start: 0, cur_srcloc: SourceLoc::default(), @@ -142,6 +167,15 @@ impl VCodeBuilder { .resize(vreg.get_index() + 1, ir::types::I8); } self.vcode.vreg_types[vreg.get_index()] = ty; + if is_reftype(ty) { + self.stackmap_info.reftyped_vregs.push(vreg); + self.vcode.have_ref_values = true; + } + } + + /// Are there any reference-typed values at all among the vregs? + pub fn have_ref_values(&self) -> bool { + self.vcode.have_ref_values() } /// Set the current block as the entry block. @@ -166,7 +200,7 @@ impl VCodeBuilder { } /// Push an instruction for the current BB and current IR inst within the BB. - pub fn push(&mut self, insn: I) { + pub fn push(&mut self, insn: I, is_safepoint: bool) { match insn.is_term() { MachTerminator::None | MachTerminator::Ret => {} MachTerminator::Uncond(target) => { @@ -186,6 +220,11 @@ impl VCodeBuilder { } self.vcode.insts.push(insn); self.vcode.srclocs.push(self.cur_srcloc); + if is_safepoint { + self.stackmap_info + .safepoint_insns + .push(InstIx::new((self.vcode.insts.len() - 1) as u32)); + } } /// Get the current source location. @@ -198,21 +237,16 @@ impl VCodeBuilder { self.cur_srcloc = srcloc; } - /// Build the final VCode. - pub fn build(self) -> VCode { - self.vcode + /// Build the final VCode, returning the vcode itself as well as auxiliary + /// information, such as the stackmap request information. + pub fn build(self) -> (VCode, StackmapRequestInfo) { + // TODO: come up with an abstraction for "vcode and auxiliary data". The + // auxiliary data needs to be separate from the vcode so that it can be + // referenced as the vcode is mutated (e.g. by the register allocator). + (self.vcode, self.stackmap_info) } } -fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> { - let v = indices - .iter() - .map(|iix| iix.get() as usize) - .chain(iter::once(len)) - .collect::>(); - v.windows(2).map(|p| (p[0], p[1])).collect() -} - fn is_redundant_move(insn: &I) -> bool { if let Some((to, from)) = insn.is_move() { to.to_reg() == from @@ -221,6 +255,11 @@ fn is_redundant_move(insn: &I) -> bool { } } +/// Is this type a reference type? +fn is_reftype(ty: Type) -> bool { + ty == types::R32 || ty == types::R64 +} + impl VCode { /// New empty VCode. fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCode { @@ -228,6 +267,7 @@ impl VCode { liveins: abi.liveins(), liveouts: abi.liveouts(), vreg_types: vec![], + have_ref_values: false, insts: vec![], srclocs: vec![], entry: 0, @@ -236,6 +276,8 @@ impl VCode { block_succs: vec![], block_order, abi, + safepoint_insns: vec![], + safepoint_slots: vec![], } } @@ -249,6 +291,11 @@ impl VCode { self.vreg_types[vreg.get_index()] } + /// Are there any reference-typed values at all among the vregs? + pub fn have_ref_values(&self) -> bool { + self.have_ref_values + } + /// Get the entry block. pub fn entry(&self) -> BlockIndex { self.entry @@ -265,6 +312,11 @@ impl VCode { self.abi.frame_size() } + /// Inbound stack-args size. + pub fn stack_args_size(&self) -> u32 { + self.abi.stack_args_size() + } + /// Get the successors for a block. pub fn succs(&self, block: BlockIndex) -> &[BlockIx] { let (start, end) = self.block_succ_range[block as usize]; @@ -281,17 +333,21 @@ impl VCode { self.abi .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r))); - // We want to move instructions over in final block order, using the new - // block-start map given by the regalloc. - let block_ranges: Vec<(usize, usize)> = - block_ranges(result.target_map.elems(), result.insns.len()); let mut final_insns = vec![]; let mut final_block_ranges = vec![(0, 0); self.num_blocks()]; let mut final_srclocs = vec![]; + let mut final_safepoint_insns = vec![]; + let mut safept_idx = 0; + assert!(result.target_map.elems().len() == self.num_blocks()); for block in 0..self.num_blocks() { + let start = result.target_map.elems()[block].get() as usize; + let end = if block == self.num_blocks() - 1 { + result.insns.len() + } else { + result.target_map.elems()[block + 1].get() as usize + }; let block = block as BlockIndex; - let (start, end) = block_ranges[block as usize]; let final_start = final_insns.len() as InsnIndex; if block == self.entry { @@ -333,6 +389,16 @@ impl VCode { final_insns.push(insn.clone()); final_srclocs.push(srcloc); } + + // Was this instruction a safepoint instruction? Add its final + // index to the safepoint insn-index list if so. + if safept_idx < result.new_safepoint_insns.len() + && (result.new_safepoint_insns[safept_idx].get() as usize) == i + { + let idx = final_insns.len() - 1; + final_safepoint_insns.push(idx as InsnIndex); + safept_idx += 1; + } } let final_end = final_insns.len() as InsnIndex; @@ -344,6 +410,12 @@ impl VCode { self.insts = final_insns; self.srclocs = final_srclocs; self.block_ranges = final_block_ranges; + self.safepoint_insns = final_safepoint_insns; + + // Save safepoint slot-lists. These will be passed to the `EmitState` + // for the machine backend during emission so that it can do + // target-specific translations of slot numbers to stack offsets. + self.safepoint_slots = result.stackmaps; } /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external @@ -353,11 +425,12 @@ impl VCode { I: MachInstEmit, { let mut buffer = MachBuffer::new(); - let mut state = Default::default(); + let mut state = I::State::new(&*self.abi); buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); // first N MachLabels are simply block indices. let flags = self.abi.flags(); + let mut safepoint_idx = 0; let mut cur_srcloc = None; for block in 0..self.num_blocks() { let block = block as BlockIndex; @@ -381,6 +454,19 @@ impl VCode { cur_srcloc = Some(srcloc); } + if safepoint_idx < self.safepoint_insns.len() + && self.safepoint_insns[safepoint_idx] == iix + { + if self.safepoint_slots[safepoint_idx].len() > 0 { + let stackmap = self.abi.spillslots_to_stackmap( + &self.safepoint_slots[safepoint_idx][..], + &state, + ); + state.pre_safepoint(stackmap); + } + safepoint_idx += 1; + } + self.insts[iix as usize].emit(&mut buffer, flags, &mut state); } @@ -476,13 +562,18 @@ impl RegallocFunction for VCode { self.abi.get_spillslot_size(regclass, ty) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I { - let ty = self.vreg_type(vreg); + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option) -> I { + let ty = vreg.map(|v| self.vreg_type(v)); self.abi.gen_spill(to_slot, from_reg, ty) } - fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, vreg: VirtualReg) -> I { - let ty = self.vreg_type(vreg); + fn gen_reload( + &self, + to_reg: Writable, + from_slot: SpillSlot, + vreg: Option, + ) -> I { + let ty = vreg.map(|v| self.vreg_type(v)); self.abi.gen_reload(to_reg, from_slot, ty) } @@ -531,7 +622,7 @@ impl fmt::Debug for VCode { } /// Pretty-printing with `RealRegUniverse` context. -impl ShowWithRRU for VCode { +impl ShowWithRRU for VCode { fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { use std::fmt::Write; @@ -539,6 +630,8 @@ impl ShowWithRRU for VCode { write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap(); write!(&mut s, " Entry block: {}\n", self.entry).unwrap(); + let mut state = Default::default(); + let mut safepoint_idx = 0; for i in 0..self.num_blocks() { let block = i as BlockIndex; @@ -552,11 +645,22 @@ impl ShowWithRRU for VCode { let (start, end) = self.block_ranges[block as usize]; write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap(); for inst in start..end { + if safepoint_idx < self.safepoint_insns.len() + && self.safepoint_insns[safepoint_idx] == inst + { + write!( + &mut s, + " (safepoint: slots {:?} with EmitState {:?})\n", + self.safepoint_slots[safepoint_idx], state, + ) + .unwrap(); + safepoint_idx += 1; + } write!( &mut s, " Inst {}: {}\n", inst, - self.insts[inst as usize].show_rru(mb_rru) + self.insts[inst as usize].pretty_print(mb_rru, &mut state) ) .unwrap(); } diff --git a/cranelift/codegen/src/regalloc/safepoint.rs b/cranelift/codegen/src/regalloc/safepoint.rs index 0700735611..18fe20ffac 100644 --- a/cranelift/codegen/src/regalloc/safepoint.rs +++ b/cranelift/codegen/src/regalloc/safepoint.rs @@ -1,6 +1,7 @@ use crate::cursor::{Cursor, FuncCursor}; use crate::dominator_tree::DominatorTree; -use crate::ir::{Function, InstBuilder, Opcode}; +use crate::inst_predicates::is_safepoint; +use crate::ir::{Function, InstBuilder}; use crate::isa::TargetIsa; use crate::regalloc::live_value_tracker::LiveValueTracker; use crate::regalloc::liveness::Liveness; @@ -51,12 +52,8 @@ pub fn emit_stackmaps( pos.goto_top(block); while let Some(inst) = pos.next_inst() { - if pos.func.dfg[inst].opcode().is_resumable_trap() { + if is_safepoint(&pos.func, inst) { insert_and_encode_safepoint(&mut pos, tracker, isa); - } else if pos.func.dfg[inst].opcode().is_call() { - insert_and_encode_safepoint(&mut pos, tracker, isa); - } else if pos.func.dfg[inst].opcode() == Opcode::Safepoint { - panic!("safepoint instruction can only be used by the compiler!"); } // Process the instruction and get rid of dead values. diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index 9956b6f4ad..c4e8067b9f 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -63,3 +63,65 @@ block0: ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f5(r64, r64) -> r64, r64, r64 { + fn0 = %f(r64) -> b1 + ss0 = explicit_slot 8 + +block0(v0: r64, v1: r64): + v2 = call fn0(v0) + stack_store.r64 v0, ss0 + brz v2, block1(v1, v0) + jump block2(v0, v1) + +block1(v3: r64, v4: r64): + jump block3(v3, v4) + +block2(v5: r64, v6: r64): + jump block3(v5, v6) + +block3(v7: r64, v8: r64): + v9 = stack_load.r64 ss0 + return v7, v8, v9 +} + +; check: Block 0: +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: sub sp, sp, #32 +; nextln: stp x19, x20, [sp, #-16]! +; nextln: virtual_sp_offset_adjust 16 +; nextln: mov x19, x0 +; nextln: mov x20, x1 +; nextln: mov x0, x19 +; nextln: ldr x16, 8 ; b 12 ; data +; nextln: stur x19, [sp, #24] +; nextln: stur x20, [sp, #32] +; nextln: (safepoint: slots [S0, S1] +; nextln: blr x16 +; nextln: ldur x19, [sp, #24] +; nextln: ldur x20, [sp, #32] +; nextln: add x1, sp, #16 +; nextln: stur x19, [x1] +; nextln: and w0, w0, #1 +; nextln: cbz x0, label1 ; b label3 +; check: Block 1: +; check: b label2 +; check: Block 2: +; check: mov x0, x20 +; nextln: b label5 +; check: Block 3: +; check: b label4 +; check: Block 4: +; check: mov x0, x19 +; nextln: mov x19, x20 +; nextln: b label5 +; check: Block 5: +; check: add x1, sp, #16 +; nextln: ldur x1, [x1] +; nextln: mov x2, x1 +; nextln: mov x1, x19 +; nextln: ldp x19, x20, [sp], #16 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret From 26529006e0e58dd8aa912d938b36730c9d2ad0c2 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 14 Jul 2020 10:12:40 -0700 Subject: [PATCH 3/3] Address review comments. --- cranelift/codegen/src/isa/aarch64/abi.rs | 18 ++++++++++++++++-- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 2 +- cranelift/codegen/src/isa/x64/abi.rs | 12 ++++++++++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 2 +- cranelift/codegen/src/machinst/buffer.rs | 5 +++-- cranelift/codegen/src/machinst/lower.rs | 10 ---------- cranelift/codegen/src/machinst/mod.rs | 2 +- cranelift/codegen/src/machinst/vcode.rs | 4 ++-- .../filetests/vcode/aarch64/reftypes.clif | 19 ++++--------------- 9 files changed, 40 insertions(+), 34 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index fba104ef43..f642326172 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -404,7 +404,8 @@ fn in_int_reg(ty: ir::Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 => true, types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, - types::R32 | types::R64 => true, + types::R64 => true, + types::R32 => panic!("Unexpected 32-bit reference on a 64-bit platform!"), _ => false, } } @@ -1134,7 +1135,8 @@ impl ABIBody for AArch64ABIBody { } // N.B.: "nominal SP", which we use to refer to stackslots and - // spillslots, is right here. + // spillslots, is defined to be equal to the stack pointer at this point + // in the prologue. // // If we push any clobbers below, we emit a virtual-SP adjustment // meta-instruction so that the nominal-SP references behave as if SP @@ -1322,9 +1324,21 @@ impl ABIBody for AArch64ABIBody { } } +/// Return a type either from an optional type hint, or if not, from the default +/// type associated with the given register's class. This is used to generate +/// loads/spills appropriately given the type of value loaded/stored (which may +/// be narrower than the spillslot). We usually have the type because the +/// regalloc usually provides the vreg being spilled/reloaded, and we know every +/// vreg's type. However, the regalloc *can* request a spill/reload without an +/// associated vreg when needed to satisfy a safepoint (which requires all +/// ref-typed values, even those in real registers in the original vcode, to be +/// in spillslots). fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { match (ty, r.get_class()) { + // If the type is provided (Some(t), _) => t, + // If no type is provided, this should be a register spill for a + // safepoint, so we only expect I64 (integer) registers. (None, RegClass::I64) => I64, _ => panic!("Unexpected register class!"), } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 775fd083d5..22ace912aa 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -2138,7 +2138,7 @@ impl MachInst for Inst { 44 } - fn ref_type_rc(_: &settings::Flags) -> RegClass { + fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::I64 } } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index d19895b675..4444edafc3 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -588,9 +588,21 @@ impl ABIBody for X64ABIBody { } } +/// Return a type either from an optional type hint, or if not, from the default +/// type associated with the given register's class. This is used to generate +/// loads/spills appropriately given the type of value loaded/stored (which may +/// be narrower than the spillslot). We usually have the type because the +/// regalloc usually provides the vreg being spilled/reloaded, and we know every +/// vreg's type. However, the regalloc *can* request a spill/reload without an +/// associated vreg when needed to satisfy a safepoint (which requires all +/// ref-typed values, even those in real registers in the original vcode, to be +/// in spillslots). fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { match (ty, r.get_class()) { + // If the type is provided (Some(t), _) => t, + // If no type is provided, this should be a register spill for a + // safepoint, so we only expect I64 (integer) registers. (None, RegClass::I64) => I64, _ => panic!("Unexpected register class!"), } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9c917f0bc9..20b7037ba4 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1258,7 +1258,7 @@ impl MachInst for Inst { 15 } - fn ref_type_rc(_: &settings::Flags) -> RegClass { + fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::I64 } diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index b12cd9f98c..edbb2a2a5d 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -1318,8 +1318,9 @@ pub struct MachSrcLoc { pub struct MachStackMap { /// The code offset at which this stackmap applies. pub offset: CodeOffset, - /// The code offset at the *end* of the instruction at which this stackmap - /// applies. + /// The code offset just past the "end" of the instruction: that is, the + /// offset of the first byte of the following instruction, or equivalently, + /// the start offset plus the instruction length. pub offset_end: CodeOffset, /// The Stackmap itself. pub stackmap: Stackmap, diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index fbc6c2ee8a..608e8b4896 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -4,7 +4,6 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; -use crate::inst_predicates::is_safepoint; use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit}; use crate::ir::instructions::BranchInfo; use crate::ir::types::I64; @@ -94,8 +93,6 @@ pub trait LowerCtx { /// every side-effecting op; the backend should not try to merge across /// side-effect colors unless the op being merged is known to be pure. fn inst_color(&self, ir_inst: Inst) -> InstColor; - /// Determine whether an instruction is a safepoint. - fn is_safepoint(&self, ir_inst: Inst) -> bool; // Instruction input/output queries: @@ -899,13 +896,6 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.inst_colors[ir_inst] } - fn is_safepoint(&self, ir_inst: Inst) -> bool { - // There is no safepoint metadata at all if we have no reftyped values - // in this function; lack of metadata implies "nothing to trace", and - // avoids overhead. - self.vcode.have_ref_values() && is_safepoint(self.f, ir_inst) - } - fn num_inputs(&self, ir_inst: Inst) -> usize { self.f.dfg.inst_args(ir_inst).len() } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index e7605cff7b..9a0ae1e820 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -193,7 +193,7 @@ pub trait MachInst: Clone + Debug { /// What is the register class used for reference types (GC-observable pointers)? Can /// be dependent on compilation flags. - fn ref_type_rc(_flags: &Flags) -> RegClass; + fn ref_type_regclass(_flags: &Flags) -> RegClass; /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 42bc0a16bd..e7e9267569 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -132,7 +132,7 @@ pub struct VCodeBuilder { impl VCodeBuilder { /// Create a new VCodeBuilder. pub fn new(abi: Box>, block_order: BlockLoweringOrder) -> VCodeBuilder { - let reftype_class = I::ref_type_rc(abi.flags()); + let reftype_class = I::ref_type_regclass(abi.flags()); let vcode = VCode::new(abi, block_order); let stackmap_info = StackmapRequestInfo { reftype_class, @@ -257,7 +257,7 @@ fn is_redundant_move(insn: &I) -> bool { /// Is this type a reference type? fn is_reftype(ty: Type) -> bool { - ty == types::R32 || ty == types::R64 + ty == types::R64 || ty == types::R32 } impl VCode { diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index c4e8067b9f..3e24795676 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -12,18 +12,7 @@ block0(v0: r64): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %f1(r32) -> r32 { -block0(v0: r32): - return v0 -} - -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: mov sp, fp -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - -function %f2(r64) -> b1 { +function %f1(r64) -> b1 { block0(v0: r64): v1 = is_null v0 return v1 @@ -37,7 +26,7 @@ block0(v0: r64): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %f3(r64) -> b1 { +function %f2(r64) -> b1 { block0(v0: r64): v1 = is_invalid v0 return v1 @@ -51,7 +40,7 @@ block0(v0: r64): ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %f4() -> r64 { +function %f3() -> r64 { block0: v0 = null.r64 return v0 @@ -64,7 +53,7 @@ block0: ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret -function %f5(r64, r64) -> r64, r64, r64 { +function %f4(r64, r64) -> r64, r64, r64 { fn0 = %f(r64) -> b1 ss0 = explicit_slot 8