From e39b4aba1c006e23af3a233cbe85e308d5806e66 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 21 Apr 2020 12:23:10 -0700 Subject: [PATCH] Fix long-range (non-colocated) aarch64 calls to not use Arm64Call reloc, and fix simplejit to use it. Previously, every call was lowered on AArch64 to a `call` instruction, which takes a signed 26-bit PC-relative offset. Including the 2-bit left shift, this gives a range of +/- 128 MB. Longer-distance offsets would cause an impossible relocation record to be emitted (or rather, a record that a more sophisticated linker would fix up by inserting a shim/veneer). This commit adds a notion of "relocation distance" in the MachInst backends, and provides this information for every call target and symbol reference. The intent is that backends on architectures like AArch64, where there are different offset sizes / addressing strategies to choose from, can either emit a regular call or a load-64-bit-constant / call-indirect sequence, as necessary. This avoids the need to implement complex linking behavior. The MachInst driver code provides this information based on the "colocated" bit in the CLIF symbol references, which appears to have been designed for this purpose, or at least a similar one. Combined with the `use_colocated_libcalls` setting, this allows client code to ensure that library calls can link to library code at any location in the address space. Separately, the `simplejit` example did not handle `Arm64Call`; rather than doing so, it appears all that is necessary to get its tests to pass is to set the `use_colocated_libcalls` flag to false, to make use of the above change. This fixes the `libcall_function` unit-test in this crate. --- cranelift/codegen/src/ir/extfunc.rs | 22 ++++++++++ cranelift/codegen/src/ir/globalvalue.rs | 19 +++++++++ cranelift/codegen/src/isa/aarch64/abi.rs | 22 ++++++++-- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 5 ++- .../codegen/src/isa/aarch64/lower_inst.rs | 12 ++++-- cranelift/codegen/src/machinst/lower.rs | 40 ++++++++++++++----- .../filetests/vcode/aarch64/call.clif | 3 +- .../filetests/vcode/aarch64/stack-limit.clif | 6 ++- cranelift/simplejit/src/backend.rs | 7 +++- cranelift/simplejit/tests/basic.rs | 1 - 10 files changed, 114 insertions(+), 23 deletions(-) diff --git a/cranelift/codegen/src/ir/extfunc.rs b/cranelift/codegen/src/ir/extfunc.rs index 9274efe9b9..0a34974c9d 100644 --- a/cranelift/codegen/src/ir/extfunc.rs +++ b/cranelift/codegen/src/ir/extfunc.rs @@ -7,6 +7,7 @@ use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type}; use crate::isa::{CallConv, RegInfo, RegUnit}; +use crate::machinst::RelocDistance; use alloc::vec::Vec; use core::fmt; use core::str::FromStr; @@ -366,6 +367,16 @@ pub struct ExtFuncData { /// Will this function be defined nearby, such that it will always be a certain distance away, /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that /// symbols meant to be preemptible cannot be considered colocated. + /// + /// If `true`, some backends may use relocation forms that have limited range. The exact + /// distance depends on the code model in use. Currently on AArch64, for example, Cranelift + /// uses a custom code model supporting up to +/- 128MB displacements. If it is unknown how + /// far away the target will be, it is best not to set the `colocated` flag; in general, this + /// flag is best used when the target is known to be in the same unit of code generation, such + /// as a Wasm module. + /// + /// See the documentation for [`RelocDistance`](machinst::RelocDistance) for more details. A + /// `colocated` flag value of `true` implies `RelocDistance::Near`. pub colocated: bool, } @@ -378,6 +389,17 @@ impl fmt::Display for ExtFuncData { } } +impl ExtFuncData { + /// Return an estimate of the distance to the referred-to function symbol. + pub fn reloc_distance(&self) -> RelocDistance { + if self.colocated { + RelocDistance::Near + } else { + RelocDistance::Far + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cranelift/codegen/src/ir/globalvalue.rs b/cranelift/codegen/src/ir/globalvalue.rs index 305654a95f..dae869445c 100644 --- a/cranelift/codegen/src/ir/globalvalue.rs +++ b/cranelift/codegen/src/ir/globalvalue.rs @@ -3,6 +3,7 @@ use crate::ir::immediates::{Imm64, Offset32}; use crate::ir::{ExternalName, GlobalValue, Type}; use crate::isa::TargetIsa; +use crate::machinst::RelocDistance; use core::fmt; /// Information about a global value declaration. @@ -62,6 +63,10 @@ pub enum GlobalValueData { /// Will this symbol be defined nearby, such that it will always be a certain distance /// away, after linking? If so, references to it can avoid going through a GOT. Note that /// symbols meant to be preemptible cannot be colocated. + /// + /// If `true`, some backends may use relocation forms that have limited range: for example, + /// a +/- 2^27-byte range on AArch64. See the documentation for + /// [`RelocDistance`](machinst::RelocDistance) for more details. colocated: bool, /// Does this symbol refer to a thread local storage value? @@ -85,6 +90,20 @@ impl GlobalValueData { Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type, } } + + /// If this global references a symbol, return an estimate of the relocation distance, + /// based on the `colocated` flag. + pub fn maybe_reloc_distance(&self) -> Option { + match self { + &GlobalValueData::Symbol { + colocated: true, .. + } => Some(RelocDistance::Near), + &GlobalValueData::Symbol { + colocated: false, .. + } => Some(RelocDistance::Far), + _ => None, + } + } } impl fmt::Display for GlobalValueData { diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 8fb085725f..a2d2552d86 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1061,7 +1061,7 @@ impl ABIBody for AArch64ABIBody { } enum CallDest { - ExtName(ir::ExternalName), + ExtName(ir::ExternalName, RelocDistance), Reg(Reg), } @@ -1102,6 +1102,7 @@ impl AArch64ABICall { pub fn from_func( sig: &ir::Signature, extname: &ir::ExternalName, + dist: RelocDistance, loc: ir::SourceLoc, ) -> AArch64ABICall { let sig = ABISig::from_func_sig(sig); @@ -1110,7 +1111,7 @@ impl AArch64ABICall { sig, uses, defs, - dest: CallDest::ExtName(extname.clone()), + dest: CallDest::ExtName(extname.clone(), dist), loc, opcode: ir::Opcode::Call, } @@ -1207,13 +1208,28 @@ impl ABICall for AArch64ABICall { fn gen_call(&self) -> Vec { let (uses, defs) = (self.uses.clone(), self.defs.clone()); match &self.dest { - &CallDest::ExtName(ref name) => vec![Inst::Call { + &CallDest::ExtName(ref name, RelocDistance::Near) => vec![Inst::Call { dest: name.clone(), uses, defs, loc: self.loc, opcode: self.opcode, }], + &CallDest::ExtName(ref name, RelocDistance::Far) => vec![ + Inst::LoadExtName { + rd: writable_spilltmp_reg(), + name: name.clone(), + offset: 0, + srcloc: self.loc, + }, + Inst::CallInd { + rn: spilltmp_reg(), + uses, + defs, + loc: self.loc, + opcode: self.opcode, + }, + ], &CallDest::Reg(reg) => vec![Inst::CallInd { rn: reg, uses, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 4221d1e3b8..436c0f4b78 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -612,7 +612,10 @@ pub enum Inst { cond: Cond, }, - /// A machine call instruction. + /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation + /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the + /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit + /// target. Call { dest: ExternalName, uses: Set, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 46fab7aadb..f8741212a9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1233,7 +1233,8 @@ pub(crate) fn lower_insn_to_regs>(ctx: &mut C, insn: IRIns Opcode::FuncAddr => { let rd = output_to_reg(ctx, outputs[0]); - let extname = ctx.call_target(insn).unwrap().clone(); + let (extname, _) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); let loc = ctx.srcloc(insn); ctx.emit(Inst::LoadExtName { rd, @@ -1249,7 +1250,7 @@ pub(crate) fn lower_insn_to_regs>(ctx: &mut C, insn: IRIns Opcode::SymbolValue => { let rd = output_to_reg(ctx, outputs[0]); - let (extname, offset) = ctx.symbol_value(insn).unwrap(); + let (extname, _, offset) = ctx.symbol_value(insn).unwrap(); let extname = extname.clone(); let loc = ctx.srcloc(insn); ctx.emit(Inst::LoadExtName { @@ -1264,12 +1265,15 @@ pub(crate) fn lower_insn_to_regs>(ctx: &mut C, insn: IRIns let loc = ctx.srcloc(insn); let (abi, inputs) = match op { Opcode::Call => { - let extname = ctx.call_target(insn).unwrap(); + let (extname, dist) = ctx.call_target(insn).unwrap(); let extname = extname.clone(); let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); - (AArch64ABICall::from_func(sig, &extname, loc), &inputs[..]) + ( + AArch64ABICall::from_func(sig, &extname, dist, loc), + &inputs[..], + ) } Opcode::CallIndirect => { let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index b90b08b3be..c9078fbc7c 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -67,12 +67,15 @@ pub trait LowerCtx { fn bb_param(&self, bb: Block, idx: usize) -> Reg; /// Get the register for a return value. fn retval(&self, idx: usize) -> Writable; - /// Get the target for a call instruction, as an `ExternalName`. - fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>; + /// Get the target for a call instruction, as an `ExternalName`. Returns a tuple + /// providing this name and the "relocation distance", i.e., whether the backend + /// can assume the target will be "nearby" (within some small offset) or an + /// arbitrary address. (This comes from the `colocated` bit in the CLIF.) + fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance)>; /// Get the signature for a call or call-indirect instruction. fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>; - /// Get the symbol name and offset for a symbol_value instruction. - fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>; + /// Get the symbol name, relocation distance estimate, and offset for a symbol_value instruction. + fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance, i64)>; /// Returns the memory flags of a given memory access. fn memflags(&self, ir_inst: Inst) -> Option; /// Get the source location for a given instruction. @@ -122,6 +125,18 @@ pub struct Lower<'func, I: VCodeInst> { next_vreg: u32, } +/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a +/// reference. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum RelocDistance { + /// Target of relocation is "nearby". The threshold for this is fuzzy but should be interpreted + /// as approximately "within the compiled output of one module"; e.g., within AArch64's +/- + /// 128MB offset. If unsure, use `Far` instead. + Near, + /// Target of relocation could be anywhere in the address space. + Far, +} + fn alloc_vreg( value_regs: &mut SecondaryMap, regclass: RegClass, @@ -647,13 +662,17 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { Writable::from_reg(self.retval_regs[idx].0) } - /// Get the target for a call instruction, as an `ExternalName`. - fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> { + /// Get the target for a call instruction, as an `ExternalName`. Returns a tuple + /// providing this name and the "relocation distance", i.e., whether the backend + /// can assume the target will be "nearby" (within some small offset) or an + /// arbitrary address. (This comes from the `colocated` bit in the CLIF.) + fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance)> { match &self.f.dfg[ir_inst] { &InstructionData::Call { func_ref, .. } | &InstructionData::FuncAddr { func_ref, .. } => { let funcdata = &self.f.dfg.ext_funcs[func_ref]; - Some(&funcdata.name) + let dist = funcdata.reloc_distance(); + Some((&funcdata.name, dist)) } _ => None, } @@ -670,8 +689,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { } } - /// Get the symbol name and offset for a symbol_value instruction. - fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> { + /// Get the symbol name, relocation distance estimate, and offset for a symbol_value instruction. + fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance, i64)> { match &self.f.dfg[ir_inst] { &InstructionData::UnaryGlobalValue { global_value, .. } => { let gvdata = &self.f.global_values[global_value]; @@ -682,7 +701,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { .. } => { let offset = offset.bits(); - Some((name, offset)) + let dist = gvdata.maybe_reloc_distance().unwrap(); + Some((name, dist, offset)) } _ => None, } diff --git a/cranelift/filetests/filetests/vcode/aarch64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif index 1429dceed6..d88a20ceab 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/call.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif @@ -11,7 +11,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: bl 0 +; nextln: ldr x15, 8 ; b 12 ; data +; nextln: blr x15 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index 4b39a81b6f..13b431867e 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -45,7 +45,8 @@ block0(v0: i64): ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; nextln: udf -; nextln: bl 0 +; nextln: ldr x15 +; nextln: blr x15 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -68,7 +69,8 @@ block0(v0: i64): ; nextln: subs xzr, sp, x15 ; nextln: b.hs 8 ; nextln: udf -; nextln: bl 0 +; nextln: ldr x15 +; nextln: blr x15 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/simplejit/src/backend.rs b/cranelift/simplejit/src/backend.rs index c69cf11b49..fc22d62f92 100644 --- a/cranelift/simplejit/src/backend.rs +++ b/cranelift/simplejit/src/backend.rs @@ -5,6 +5,7 @@ use cranelift_codegen::binemit::{ Addend, CodeOffset, Reloc, RelocSink, Stackmap, StackmapSink, TrapSink, }; use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::settings::Configurable; use cranelift_codegen::{self, ir, settings}; use cranelift_module::{ Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleNamespace, @@ -40,7 +41,11 @@ impl SimpleJITBuilder { /// floating point instructions, and for stack probes. If you don't know what to use for this /// argument, use `cranelift_module::default_libcall_names()`. pub fn new(libcall_names: Box String>) -> Self { - let flag_builder = settings::builder(); + let mut flag_builder = settings::builder(); + // On at least AArch64, "colocated" calls use shorter-range relocations, + // which might not reach all definitions; we can't handle that here, so + // we require long-range relocation types. + flag_builder.set("use_colocated_libcalls", "false").unwrap(); let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| { panic!("host machine is not supported: {}", msg); }); diff --git a/cranelift/simplejit/tests/basic.rs b/cranelift/simplejit/tests/basic.rs index 30a8784aba..31d19fa97a 100644 --- a/cranelift/simplejit/tests/basic.rs +++ b/cranelift/simplejit/tests/basic.rs @@ -153,7 +153,6 @@ fn switch_error() { } #[test] -#[cfg_attr(target_arch = "aarch64", should_panic)] // FIXME(#1521) fn libcall_function() { let mut module: Module = Module::new(SimpleJITBuilder::new(default_libcall_names()));