diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index ce669459e1..74ec299bed 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -110,7 +110,13 @@ fn machreg_to_gpr_or_vec(m: Reg) -> u32 { u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() } -fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable, rn: Reg, rm: Reg) -> u32 { +pub(crate) fn enc_arith_rrr( + bits_31_21: u32, + bits_15_10: u32, + rd: Writable, + rn: Reg, + rm: Reg, +) -> u32 { (bits_31_21 << 21) | (bits_15_10 << 10) | machreg_to_gpr(rd.to_reg()) @@ -243,7 +249,7 @@ fn enc_ldst_reg( | machreg_to_gpr_or_vec(rd) } -fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { +pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd) } @@ -320,11 +326,11 @@ fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable) | machreg_to_gpr(rd.to_reg()) } -fn enc_br(rn: Reg) -> u32 { +pub(crate) fn enc_br(rn: Reg) -> u32 { 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5) } -fn enc_adr(off: i32, rd: Writable) -> u32 { +pub(crate) fn enc_adr(off: i32, rd: Writable) -> u32 { let off = u32::try_from(off).unwrap(); let immlo = off & 3; let immhi = (off >> 2) & ((1 << 19) - 1); @@ -2694,7 +2700,7 @@ impl MachInstEmit for Inst { dest: BranchTarget::Label(jump_around_label), }; jmp.emit(sink, emit_info, state); - sink.emit_island(); + sink.emit_island(needed_space + 4); sink.bind_label(jump_around_label); } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index ce1b520429..ac4d958bb1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -3,7 +3,7 @@ // Some variants are not constructed, but we still want them as options in the future. #![allow(dead_code)] -use crate::binemit::CodeOffset; +use crate::binemit::{Addend, CodeOffset, Reloc}; use crate::ir::types::{ B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64, }; @@ -4786,13 +4786,18 @@ impl MachInstLabelUse for LabelUse { fn supports_veneer(self) -> bool { match self { LabelUse::Branch19 => true, // veneer is a Branch26 + LabelUse::Branch26 => true, // veneer is a PCRel32 _ => false, } } /// How large is the veneer, if supported? fn veneer_size(self) -> CodeOffset { - 4 + match self { + LabelUse::Branch19 => 4, + LabelUse::Branch26 => 20, + _ => unreachable!(), + } } /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return @@ -4810,7 +4815,47 @@ impl MachInstLabelUse for LabelUse { buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word)); (veneer_offset, LabelUse::Branch26) } + + // This is promoting a 26-bit call/jump to a 32-bit call/jump to + // get a further range. This jump translates to a jump to a + // relative location based on the address of the constant loaded + // from here. + // + // If this path is taken from a call instruction then caller-saved + // registers are available (minus arguments), so x16/x17 are + // available. Otherwise for intra-function jumps we also reserve + // x16/x17 as spill-style registers. In both cases these are + // available for us to use. + LabelUse::Branch26 => { + let tmp1 = regs::spilltmp_reg(); + let tmp1_w = regs::writable_spilltmp_reg(); + let tmp2 = regs::tmp2_reg(); + let tmp2_w = regs::writable_tmp2_reg(); + // ldrsw x16, 16 + let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1); + // adr x17, 12 + let adr = emit::enc_adr(12, tmp2_w); + // add x16, x16, x17 + let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2); + // br x16 + let br = emit::enc_br(tmp1); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr)); + buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr)); + buffer[8..12].clone_from_slice(&u32::to_le_bytes(add)); + buffer[12..16].clone_from_slice(&u32::to_le_bytes(br)); + // the 4-byte signed immediate we'll load is after these + // instructions, 16-bytes in. + (veneer_offset + 16, LabelUse::PCRel32) + } + _ => panic!("Unsupported label-reference type for veneer generation!"), } } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match (reloc, addend) { + (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26), + _ => None, + } + } } diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 3243d0f7b4..c239beef05 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -4,7 +4,10 @@ use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::aarch64::settings as aarch64_settings; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::machinst::{ + compile, MachBackend, MachCompileResult, MachTextSectionBuilder, TargetIsaAdapter, + TextSectionBuilder, VCode, +}; use crate::result::CodegenResult; use crate::settings as shared_settings; use alloc::{boxed::Box, vec::Vec}; @@ -161,6 +164,10 @@ impl MachBackend for AArch64Backend { fn create_systemv_cie(&self) -> Option { Some(inst::unwind::systemv::create_cie()) } + + fn text_section_builder(&self, num_funcs: u32) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/isa/arm32/inst/mod.rs b/cranelift/codegen/src/isa/arm32/inst/mod.rs index b57ec5e0f2..8151eab818 100644 --- a/cranelift/codegen/src/isa/arm32/inst/mod.rs +++ b/cranelift/codegen/src/isa/arm32/inst/mod.rs @@ -2,7 +2,7 @@ #![allow(dead_code)] -use crate::binemit::CodeOffset; +use crate::binemit::{Addend, CodeOffset, Reloc}; use crate::ir::types::{B1, B16, B32, B8, I16, I32, I8, IFLAGS}; use crate::ir::{ExternalName, Opcode, TrapCode, Type}; use crate::machinst::*; @@ -1317,6 +1317,10 @@ impl MachInstLabelUse for LabelUse { ) -> (CodeOffset, LabelUse) { panic!("Veneer not supported yet.") } + + fn from_reloc(_reloc: Reloc, _addend: Addend) -> Option { + None + } } #[cfg(test)] diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs index 477dc6ec46..e4280894a1 100644 --- a/cranelift/codegen/src/isa/arm32/mod.rs +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -3,7 +3,10 @@ use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::machinst::{ + compile, MachBackend, MachCompileResult, MachTextSectionBuilder, TargetIsaAdapter, + TextSectionBuilder, VCode, +}; use crate::result::CodegenResult; use crate::settings; @@ -115,6 +118,10 @@ impl MachBackend for Arm32Backend { // Carry flag clear. IntCC::UnsignedLessThan } + + fn text_section_builder(&self, num_funcs: u32) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index c84294906e..5f1404ed4b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -3,7 +3,7 @@ // Some variants are not constructed, but we still want them as options in the future. #![allow(dead_code)] -use crate::binemit::CodeOffset; +use crate::binemit::{Addend, CodeOffset, Reloc}; use crate::ir::{types, ExternalName, Opcode, TrapCode, Type, ValueLabel}; use crate::isa::unwind::UnwindInst; use crate::machinst::*; @@ -3686,4 +3686,8 @@ impl MachInstLabelUse for LabelUse { ) -> (CodeOffset, LabelUse) { unreachable!(); } + + fn from_reloc(_reloc: Reloc, _addend: Addend) -> Option { + None + } } diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index d83791cb98..5c1b0b32da 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -6,7 +6,10 @@ use crate::isa::s390x::settings as s390x_settings; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::machinst::{ + compile, MachBackend, MachCompileResult, MachTextSectionBuilder, TargetIsaAdapter, + TextSectionBuilder, VCode, +}; use crate::result::CodegenResult; use crate::settings as shared_settings; @@ -165,6 +168,10 @@ impl MachBackend for S390xBackend { fn map_reg_to_dwarf(&self, reg: Reg) -> Result { inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) } + + fn text_section_builder(&self, num_funcs: u32) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index cb5b27dfbc..e682c6f51c 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1,6 +1,6 @@ //! This module defines x86_64-specific machine instruction types. -use crate::binemit::{CodeOffset, StackMap}; +use crate::binemit::{Addend, CodeOffset, Reloc, StackMap}; use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type, ValueLabel}; use crate::isa::unwind::UnwindInst; use crate::isa::x64::abi::X64ABIMachineSpec; @@ -3005,4 +3005,11 @@ impl MachInstLabelUse for LabelUse { } } } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match (reloc, addend) { + (Reloc::X86CallPCRel4, -4) => Some(LabelUse::JmpRel32), + _ => None, + } + } } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 381898e485..43c7fb74a6 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -8,7 +8,10 @@ use crate::ir::{condcodes::IntCC, Function}; use crate::isa::unwind::systemv; use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; use crate::isa::Builder as IsaBuilder; -use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::machinst::{ + compile, MachBackend, MachCompileResult, MachTextSectionBuilder, TargetIsaAdapter, + TextSectionBuilder, VCode, +}; use crate::result::CodegenResult; use crate::settings::{self as shared_settings, Flags}; use alloc::{boxed::Box, vec::Vec}; @@ -158,6 +161,10 @@ impl MachBackend for X64Backend { fn map_reg_to_dwarf(&self, reg: Reg) -> Result { inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) } + + fn text_section_builder(&self, num_funcs: u32) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 2f7d4d268b..25f1e6902d 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -85,6 +85,7 @@ pub mod write; pub use crate::entity::packed_option; pub use crate::machinst::buffer::MachSrcLoc; +pub use crate::machinst::TextSectionBuilder; mod abi; mod bitset; diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index be9bf468e1..d4f840092d 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -143,14 +143,17 @@ use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, StackMap}; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use crate::isa::unwind::UnwindInst; -use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeConstant, VCodeConstants, VCodeInst}; +use crate::machinst::{ + BlockIndex, MachInstLabelUse, TextSectionBuilder, VCodeConstant, VCodeConstants, VCodeInst, +}; use crate::timing; use cranelift_entity::{entity_impl, SecondaryMap}; - use log::trace; use smallvec::SmallVec; +use std::convert::TryFrom; use std::mem; use std::string::String; +use std::vec::Vec; /// A buffer of output to be produced, fixed up, and then emitted to a CodeSink /// in bulk. @@ -1067,140 +1070,187 @@ impl MachBuffer { /// Is an island needed within the next N bytes? pub fn island_needed(&self, distance: CodeOffset) -> bool { - let worst_case_end_of_island = self.cur_offset() + distance + self.island_worst_case_size; - worst_case_end_of_island > self.island_deadline + self.worst_case_end_of_island(distance) > self.island_deadline } - /// Emit all pending constants and veneers. Should only be called if - /// `island_needed()` returns true, i.e., if we actually reach a deadline: - /// otherwise, unnecessary veneers may be inserted. - pub fn emit_island(&mut self) { + /// Returns the maximal offset that islands can reach if `distance` more + /// bytes are appended. + /// + /// This is used to determine if veneers need insertions since jumps that + /// can't reach past this point must get a veneer of some form. + fn worst_case_end_of_island(&self, distance: CodeOffset) -> CodeOffset { + self.cur_offset() + .saturating_add(distance) + .saturating_add(self.island_worst_case_size) + } + + /// Emit all pending constants and required pending veneers. + /// + /// Should only be called if `island_needed()` returns true, i.e., if we + /// actually reach a deadline. It's not necessarily a problem to do so + /// otherwise but it may result in unnecessary work during emission. + pub fn emit_island(&mut self, distance: CodeOffset) { + self.emit_island_maybe_forced(false, distance); + } + + /// Same as `emit_island`, but an internal API with a `force_veneers` + /// argument to force all veneers to always get emitted for debugging. + fn emit_island_maybe_forced(&mut self, force_veneers: bool, distance: CodeOffset) { // We're going to purge fixups, so no latest-branch editing can happen // anymore. self.latest_branches.clear(); - let pending_constants = mem::replace(&mut self.pending_constants, SmallVec::new()); - for MachLabelConstant { label, align, data } in pending_constants.into_iter() { + // Reset internal calculations about islands since we're going to + // change the calculus as we apply fixups. The `forced_threshold` is + // used here to determine whether jumps to unknown labels will require + // a veneer or not. + let forced_threshold = self.worst_case_end_of_island(distance); + self.island_deadline = UNKNOWN_LABEL_OFFSET; + self.island_worst_case_size = 0; + + // First flush out all constants so we have more labels in case fixups + // are applied against these labels. + for MachLabelConstant { label, align, data } in mem::take(&mut self.pending_constants) { self.align_to(align); self.bind_label(label); self.put_data(&data[..]); } - let fixup_records = mem::replace(&mut self.fixup_records, SmallVec::new()); - let mut new_fixups = SmallVec::new(); - for MachLabelFixup { - label, - offset, - kind, - } in fixup_records.into_iter() - { - trace!( - "emit_island: fixup for label {:?} at offset {} kind {:?}", + for fixup in mem::take(&mut self.fixup_records) { + trace!("emit_island: fixup {:?}", fixup); + let MachLabelFixup { label, offset, - kind - ); - // We eagerly perform fixups whose label targets are known, if not out - // of range, to avoid unnecessary veneers. + kind, + } = fixup; let label_offset = self.resolve_label_offset(label); - let known = label_offset != UNKNOWN_LABEL_OFFSET; - let in_range = if known { - if label_offset >= offset { - (label_offset - offset) <= kind.max_pos_range() - } else { - (offset - label_offset) <= kind.max_neg_range() - } - } else { - false - }; - - trace!( - " -> label_offset = {}, known = {}, in_range = {} (pos {} neg {})", - label_offset, - known, - in_range, - kind.max_pos_range(), - kind.max_neg_range() - ); - let start = offset as usize; let end = (offset + kind.patch_size()) as usize; - if in_range { - debug_assert!(known); // implied by in_range. - let slice = &mut self.data[start..end]; - trace!("patching in-range!"); - kind.patch(slice, offset, label_offset); - } else if !known && !kind.supports_veneer() { - // Nothing for now. Keep it for next round. - new_fixups.push(MachLabelFixup { - label, - offset, - kind, - }); - } else if !in_range && kind.supports_veneer() { - // Allocate space for a veneer in the island. - self.align_to(I::LabelUse::ALIGN); - let veneer_offset = self.cur_offset(); - trace!("making a veneer at {}", veneer_offset); - let slice = &mut self.data[start..end]; - // Patch the original label use to refer to the veneer. - trace!( - "patching original at offset {} to veneer offset {}", - offset, - veneer_offset - ); - kind.patch(slice, offset, veneer_offset); - // Generate the veneer. - let veneer_slice = self.get_appended_space(kind.veneer_size() as usize); - let (veneer_fixup_off, veneer_label_use) = - kind.generate_veneer(veneer_slice, veneer_offset); - trace!( - "generated veneer; fixup offset {}, label_use {:?}", - veneer_fixup_off, - veneer_label_use - ); - // If the label is known (but was just out of range), do the - // veneer label-use fixup now too; otherwise, save it for later. - if known { - let start = veneer_fixup_off as usize; - let end = (veneer_fixup_off + veneer_label_use.patch_size()) as usize; - let veneer_slice = &mut self.data[start..end]; - trace!("doing veneer fixup right away too"); - veneer_label_use.patch(veneer_slice, veneer_fixup_off, label_offset); + + if label_offset != UNKNOWN_LABEL_OFFSET { + // If the offset of the label for this fixup is known then + // we're going to do something here-and-now. We're either going + // to patch the original offset because it's an in-bounds jump, + // or we're going to generate a veneer, patch the fixup to jump + // to the veneer, and then keep going. + // + // If the label comes after the original fixup, then we should + // be guaranteed that the jump is in-bounds. Otherwise there's + // a bug somewhere because this method wasn't called soon + // enough. All forward-jumps are tracked and should get veneers + // before their deadline comes and they're unable to jump + // further. + // + // Otherwise if the label is before the fixup, then that's a + // backwards jump. If it's past the maximum negative range + // then we'll emit a veneer that to jump forward to which can + // then jump backwards. + let veneer_required = if label_offset >= offset { + assert!((label_offset - offset) <= kind.max_pos_range()); + false } else { - new_fixups.push(MachLabelFixup { - label, - offset: veneer_fixup_off, - kind: veneer_label_use, - }); + (offset - label_offset) > kind.max_neg_range() + }; + trace!( + " -> label_offset = {}, known, required = {} (pos {} neg {})", + label_offset, + veneer_required, + kind.max_pos_range(), + kind.max_neg_range() + ); + + if (force_veneers && kind.supports_veneer()) || veneer_required { + self.emit_veneer(label, offset, kind); + } else { + let slice = &mut self.data[start..end]; + trace!("patching in-range!"); + kind.patch(slice, offset, label_offset); } } else { - panic!( - "Cannot support label-use {:?} (known = {}, in-range = {})", - kind, known, in_range - ); + // If the offset of this label is not known at this time then + // there's one of two possibilities: + // + // * First we may be about to exceed the maximum jump range of + // this fixup. In that case a veneer is inserted to buy some + // more budget for the forward-jump. It's guaranteed that the + // label will eventually come after where we're at, so we know + // that the forward jump is necessary. + // + // * Otherwise we're still within range of the forward jump but + // the precise target isn't known yet. In that case we + // enqueue the fixup to get processed later. + if forced_threshold - offset > kind.max_pos_range() { + self.emit_veneer(label, offset, kind); + } else { + self.use_label_at_offset(offset, label, kind); + } } } - - self.fixup_records = new_fixups; - self.island_deadline = UNKNOWN_LABEL_OFFSET; } - /// Finish any deferred emissions and/or fixups. - pub fn finish(mut self) -> MachBufferFinalized { - let _tt = timing::vcode_emit_finish(); + /// Emits a "veneer" the `kind` code at `offset` to jump to `label`. + /// + /// This will generate extra machine code, using `kind`, to get a + /// larger-jump-kind than `kind` allows. The code at `offset` is then + /// patched to jump to our new code, and then the new code is enqueued for + /// a fixup to get processed at some later time. + fn emit_veneer(&mut self, label: MachLabel, offset: CodeOffset, kind: I::LabelUse) { + // If this `kind` doesn't support a veneer then that's a bug in the + // backend because we need to implement support for such a veneer. + assert!( + kind.supports_veneer(), + "jump beyond the range of {:?} but a veneer isn't supported", + kind, + ); + // Allocate space for a veneer in the island. + self.align_to(I::LabelUse::ALIGN); + let veneer_offset = self.cur_offset(); + trace!("making a veneer at {}", veneer_offset); + let start = offset as usize; + let end = (offset + kind.patch_size()) as usize; + let slice = &mut self.data[start..end]; + // Patch the original label use to refer to the veneer. + trace!( + "patching original at offset {} to veneer offset {}", + offset, + veneer_offset + ); + kind.patch(slice, offset, veneer_offset); + // Generate the veneer. + let veneer_slice = self.get_appended_space(kind.veneer_size() as usize); + let (veneer_fixup_off, veneer_label_use) = + kind.generate_veneer(veneer_slice, veneer_offset); + trace!( + "generated veneer; fixup offset {}, label_use {:?}", + veneer_fixup_off, + veneer_label_use + ); + // Register a new use of `label` with our new veneer fixup and offset. + // This'll recalculate deadlines accordingly and enqueue this fixup to + // get processed at some later time. + self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use); + } + + fn finish_emission_maybe_forcing_veneers(&mut self, force_veneers: bool) { while !self.pending_constants.is_empty() || !self.fixup_records.is_empty() { // `emit_island()` will emit any pending veneers and constants, and // as a side-effect, will also take care of any fixups with resolved // labels eagerly. - self.emit_island(); + self.emit_island_maybe_forced(force_veneers, u32::MAX); } // Ensure that all labels have been fixed up after the last island is emitted. This is a // full (release-mode) assert because an unresolved label means the emitted code is // incorrect. assert!(self.fixup_records.is_empty()); + } + + /// Finish any deferred emissions and/or fixups. + pub fn finish(mut self) -> MachBufferFinalized { + let _tt = timing::vcode_emit_finish(); + + self.finish_emission_maybe_forcing_veneers(false); let mut srclocs = self.srclocs; srclocs.sort_by_key(|entry| entry.start); @@ -1225,6 +1275,39 @@ impl MachBuffer { addend: Addend, ) { let name = name.clone(); + // FIXME(#3277): This should use `I::LabelUse::from_reloc` to optionally + // generate a label-use statement to track whether an island is possibly + // needed to escape this function to actually get to the external name. + // This is most likely to come up on AArch64 where calls between + // functions use a 26-bit signed offset which gives +/- 64MB. This means + // that if a function is 128MB in size and there's a call in the middle + // it's impossible to reach the actual target. Also, while it's + // technically possible to jump to the start of a function and then jump + // further, island insertion below always inserts islands after + // previously appended code so for Cranelift's own implementation this + // is also a problem for 64MB functions on AArch64 which start with a + // call instruction, those won't be able to escape. + // + // Ideally what needs to happen here is that a `LabelUse` is + // transparently generated (or call-sites of this function are audited + // to generate a `LabelUse` instead) and tracked internally. The actual + // relocation would then change over time if and when a veneer is + // inserted, where the relocation here would be patched by this + // `MachBuffer` to jump to the veneer. The problem, though, is that all + // this still needs to end up, in the case of a singular function, + // generating a final relocation pointing either to this particular + // relocation or to the veneer inserted. Additionally + // `MachBuffer` needs the concept of a label which will never be + // resolved, so `emit_island` doesn't trip over not actually ever + // knowning what some labels are. Currently the loop in + // `finish_emission_maybe_forcing_veneers` would otherwise infinitely + // loop. + // + // For now this means that because relocs aren't tracked at all that + // AArch64 functions have a rough size limits of 64MB. For now that's + // somewhat reasonable and the failure mode is a panic in `MachBuffer` + // when a relocation can't otherwise be resolved later, so it shouldn't + // actually result in any memory unsafety or anything like that. self.relocs.push(MachReloc { offset: self.data.len() as CodeOffset, srcloc, @@ -1481,6 +1564,79 @@ impl MachBranch { } } +/// Implementation of the `TextSectionBuilder` trait backed by `MachBuffer`. +/// +/// Note that `MachBuffer` was primarily written for intra-function references +/// of jumps between basic blocks, but it's also quite usable for entire text +/// sections and resolving references between functions themselves. This +/// builder interprets "blocks" as labeled functions for the purposes of +/// resolving labels internally in the buffer. +pub struct MachTextSectionBuilder { + buf: MachBuffer, + next_func: u32, + force_veneers: bool, +} + +impl MachTextSectionBuilder { + pub fn new(num_funcs: u32) -> MachTextSectionBuilder { + let mut buf = MachBuffer::new(); + buf.reserve_labels_for_blocks(num_funcs); + MachTextSectionBuilder { + buf, + next_func: 0, + force_veneers: false, + } + } +} + +impl TextSectionBuilder for MachTextSectionBuilder { + fn append(&mut self, named: bool, func: &[u8], align: u32) -> u64 { + // Conditionally emit an island if it's necessary to resolve jumps + // between functions which are too far away. + let size = func.len() as u32; + if self.force_veneers || self.buf.island_needed(size) { + self.buf.emit_island_maybe_forced(self.force_veneers, size); + } + + self.buf.align_to(align); + let pos = self.buf.cur_offset(); + if named { + self.buf.bind_label(MachLabel::from_block(self.next_func)); + self.next_func += 1; + } + self.buf.put_data(func); + u64::from(pos) + } + + fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool { + let label = MachLabel::from_block(target); + let offset = u32::try_from(offset).unwrap(); + match I::LabelUse::from_reloc(reloc, addend) { + Some(label_use) => { + self.buf.use_label_at_offset(offset, label, label_use); + true + } + None => false, + } + } + + fn force_veneers(&mut self) { + self.force_veneers = true; + } + + fn finish(&mut self) -> Vec { + // Double-check all functions were pushed. + assert_eq!(self.next_func, self.buf.label_offsets.len() as u32); + + // Finish up any veneers, if necessary. + self.buf + .finish_emission_maybe_forcing_veneers(self.force_veneers); + + // We don't need the data any more, so return it to the caller. + mem::take(&mut self.buf.data).into_vec() + } +} + // We use an actual instruction definition to do tests, so we depend on the `arm64` feature here. #[cfg(all(test, feature = "arm64"))] mod test { @@ -1610,7 +1766,7 @@ mod test { buf.bind_label(label(1)); while buf.cur_offset() < 2000000 { if buf.island_needed(0) { - buf.emit_island(); + buf.emit_island(0); } let inst = Inst::Nop4; inst.emit(&mut buf, &info, &mut state); @@ -1632,7 +1788,23 @@ mod test { let mut state = Default::default(); let inst = Inst::CondBr { kind: CondBrKind::NotZero(xreg(0)), - taken: BranchTarget::ResolvedOffset(1048576 - 4), + + // This conditionally taken branch has a 19-bit constant, shifted + // to the left by two, giving us a 21-bit range in total. Half of + // this range positive so the we should be around 1 << 20 bytes + // away for our jump target. + // + // There are two pending fixups by the time we reach this point, + // one for this 19-bit jump and one for the unconditional 26-bit + // jump below. A 19-bit veneer is 4 bytes large and the 26-bit + // veneer is 20 bytes large, which means that pessimistically + // assuming we'll need two veneers we need 24 bytes of extra + // space, meaning that the actual island should come 24-bytes + // before the deadline. + taken: BranchTarget::ResolvedOffset((1 << 20) - 4 - 20), + + // This branch is in-range so no veneers should be needed, it should + // go directly to the target. not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4), }; inst.emit(&mut buf2, &info, &mut state); diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 41d6e05bed..554e31ad4e 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -60,7 +60,7 @@ //! //! ``` -use crate::binemit::{CodeInfo, CodeOffset, StackMap}; +use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; use crate::ir::condcodes::IntCC; use crate::ir::{Function, SourceLoc, StackSlot, Type, ValueLabel}; use crate::result::CodegenResult; @@ -248,6 +248,12 @@ pub trait MachInstLabelUse: Clone + Copy + Debug + Eq { /// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that /// stage, a jump that supports a full 32-bit range, for example. fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self); + + /// Returns the corresponding label-use for the relocation specified. + /// + /// This returns `None` if the relocation doesn't have a corresponding + /// representation for the target architecture. + fn from_reloc(reloc: Reloc, addend: Addend) -> Option; } /// Describes a block terminator (not call) in the vcode, when its branches @@ -425,6 +431,57 @@ pub trait MachBackend { fn map_reg_to_dwarf(&self, _: Reg) -> Result { Err(RegisterMappingError::UnsupportedArchitecture) } + + /// Returns an object that can be used to build the text section of an + /// executable. + /// + /// This object will internally attempt to handle as many relocations as + /// possible using relative calls/jumps/etc between functions. + /// + /// The `num_labeled_funcs` argument here is the number of functions which + /// will be "labeled" or might have calls between them, typically the number + /// of defined functions in the object file. + fn text_section_builder(&self, num_labeled_funcs: u32) -> Box; +} + +/// An object that can be used to create the text section of an executable. +/// +/// This primarily handles resolving relative relocations at +/// text-section-assembly time rather than at load/link time. This +/// architecture-specific logic is sort of like a linker, but only for one +/// object file at a time. +pub trait TextSectionBuilder { + /// Appends `data` to the text section with the `align` specified. + /// + /// If `labeled` is `true` then the offset of the final data is used to + /// resolve relocations in `resolve_reloc` in the future. + /// + /// This function returns the offset at which the data was placed in the + /// text section. + fn append(&mut self, labeled: bool, data: &[u8], align: u32) -> u64; + + /// Attempts to resolve a relocation for this function. + /// + /// The `offset` is the offset of the relocation, within the text section. + /// The `reloc` is the kind of relocation. + /// The `addend` is the value to add to the relocation. + /// The `target` is the labeled function that is the target of this + /// relocation. + /// + /// Labeled functions are created with the `append` function above by + /// setting the `labeled` parameter to `true`. + /// + /// If this builder does not know how to handle `reloc` then this function + /// will return `false`. Otherwise this function will return `true` and this + /// relocation will be resolved in the final bytes returned by `finish`. + fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool; + + /// A debug-only option which is used to for + fn force_veneers(&mut self); + + /// Completes this text section, filling out any final details, and returns + /// the bytes of the text section. + fn finish(&mut self) -> Vec; } /// Expected unwind info type. diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bd3bfbab85..96a1acc909 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -557,7 +557,7 @@ impl VCode { let next_block_size = next_block_range.1 - next_block_range.0; let worst_case_next_bb = I::worst_case_size() * next_block_size; if buffer.island_needed(worst_case_next_bb) { - buffer.emit_island(); + buffer.emit_island(worst_case_next_bb); } } } diff --git a/crates/cranelift/src/builder.rs b/crates/cranelift/src/builder.rs index f7f6c1dba2..905b3814bd 100644 --- a/crates/cranelift/src/builder.rs +++ b/crates/cranelift/src/builder.rs @@ -13,6 +13,20 @@ use wasmtime_environ::{CompilerBuilder, Setting, SettingKind}; struct Builder { flags: settings::Builder, isa_flags: isa::Builder, + linkopts: LinkOptions, +} + +#[derive(Clone, Default)] +pub struct LinkOptions { + /// A debug-only setting used to synthetically insert 0-byte padding between + /// compiled functions to simulate huge compiled artifacts and exercise + /// logic related to jump veneers. + pub padding_between_functions: usize, + + /// A debug-only setting used to force inter-function calls in a wasm module + /// to always go through "jump veneers" which are typically only generated + /// when functions are very far from each other. + pub force_jump_veneers: bool, } pub fn builder() -> Box { @@ -32,6 +46,7 @@ pub fn builder() -> Box { Box::new(Builder { flags, isa_flags: cranelift_native::builder().expect("host machine is not a supported target"), + linkopts: LinkOptions::default(), }) } @@ -50,6 +65,17 @@ impl CompilerBuilder for Builder { } fn set(&mut self, name: &str, value: &str) -> Result<()> { + // Special wasmtime-cranelift-only settings first + if name == "wasmtime_linkopt_padding_between_functions" { + self.linkopts.padding_between_functions = value.parse()?; + return Ok(()); + } + if name == "wasmtime_linkopt_force_jump_veneer" { + self.linkopts.force_jump_veneers = value.parse()?; + return Ok(()); + } + + // ... then forward this to Cranelift if let Err(err) = self.flags.set(name, value) { match err { SetError::BadName(_) => { @@ -80,7 +106,7 @@ impl CompilerBuilder for Builder { .isa_flags .clone() .finish(settings::Flags::new(self.flags.clone())); - Box::new(crate::compiler::Compiler::new(isa)) + Box::new(crate::compiler::Compiler::new(isa, self.linkopts.clone())) } fn settings(&self) -> Vec { diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 9fc86f8ac3..444f994a7d 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -1,3 +1,4 @@ +use crate::builder::LinkOptions; use crate::debug::ModuleMemoryOffset; use crate::func_environ::{get_func_name, FuncEnvironment}; use crate::obj::ObjectBuilder; @@ -36,13 +37,15 @@ use wasmtime_environ::{ pub(crate) struct Compiler { translators: Mutex>, isa: Box, + linkopts: LinkOptions, } impl Compiler { - pub(crate) fn new(isa: Box) -> Compiler { + pub(crate) fn new(isa: Box, linkopts: LinkOptions) -> Compiler { Compiler { translators: Default::default(), isa, + linkopts, } } @@ -170,7 +173,7 @@ impl wasmtime_environ::Compiler for Compiler { self.save_translator(func_translator); let mut code_buf: Vec = Vec::new(); - let mut reloc_sink = RelocSink::new(func_index); + let mut reloc_sink = RelocSink::new(); let mut trap_sink = TrapSink::new(); let mut stack_map_sink = StackMapSink::default(); context @@ -228,13 +231,15 @@ impl wasmtime_environ::Compiler for Compiler { emit_dwarf: bool, obj: &mut Object, ) -> Result<(PrimaryMap, Vec)> { - const CODE_SECTION_ALIGNMENT: u64 = 0x1000; let funcs: crate::CompiledFunctions = funcs .into_iter() .map(|(_i, f)| *f.downcast().unwrap()) .collect(); - let mut builder = ObjectBuilder::new(obj, &translation.module); + let mut builder = ObjectBuilder::new(obj, &translation.module, &*self.isa); + if self.linkopts.force_jump_veneers { + builder.text.force_veneers(); + } let mut addrs = AddressMapSection::default(); let mut traps = TrapEncodingBuilder::default(); let compiled_trampolines = translation @@ -249,6 +254,11 @@ impl wasmtime_environ::Compiler for Compiler { addrs.push(range.clone(), &func.address_map.instructions); traps.push(range.clone(), &func.traps); func_starts.push(range.start); + if self.linkopts.padding_between_functions > 0 { + builder + .text + .append(false, &vec![0; self.linkopts.padding_between_functions], 1); + } } // Build trampolines for every signature that can be used by this module. @@ -260,7 +270,6 @@ impl wasmtime_environ::Compiler for Compiler { { trampolines.push(builder.trampoline(*i, &func)); } - builder.align_text_to(CODE_SECTION_ALIGNMENT); if emit_dwarf && funcs.len() > 0 { let ofs = VMOffsets::new( @@ -292,7 +301,7 @@ impl wasmtime_environ::Compiler for Compiler { builder.dwarf_sections(&dwarf_sections)?; } - builder.finish(&*self.isa)?; + builder.finish()?; addrs.append_to(obj); traps.append_to(obj); @@ -318,10 +327,10 @@ impl wasmtime_environ::Compiler for Compiler { let host_to_wasm = self.host_to_wasm_trampoline(ty)?; let wasm_to_host = self.wasm_to_host_trampoline(ty, host_fn)?; let module = Module::new(); - let mut builder = ObjectBuilder::new(obj, &module); + let mut builder = ObjectBuilder::new(obj, &module, &*self.isa); let a = builder.trampoline(SignatureIndex::new(0), &host_to_wasm); let b = builder.trampoline(SignatureIndex::new(1), &wasm_to_host); - builder.finish(&*self.isa)?; + builder.finish()?; Ok((a, b)) } @@ -617,9 +626,6 @@ fn collect_address_maps( /// Implementation of a relocation sink that just saves all the information for later struct RelocSink { - /// Current function index. - func_index: FuncIndex, - /// Relocations recorded for the function. func_relocs: Vec, } @@ -662,7 +668,7 @@ impl binemit::RelocSink for RelocSink { fn reloc_jt(&mut self, offset: binemit::CodeOffset, reloc: binemit::Reloc, jt: ir::JumpTable) { self.func_relocs.push(Relocation { reloc, - reloc_target: RelocationTarget::JumpTable(self.func_index, jt), + reloc_target: RelocationTarget::JumpTable(jt), offset, addend: 0, }); @@ -671,9 +677,8 @@ impl binemit::RelocSink for RelocSink { impl RelocSink { /// Return a new `RelocSink` instance. - fn new(func_index: FuncIndex) -> Self { + fn new() -> Self { Self { - func_index, func_relocs: Vec::new(), } } diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index e98bdacf23..6f7bcb7fe2 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1297,9 +1297,22 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m Ok(func.import_function(ir::ExtFuncData { name, signature, - // We currently allocate all code segments independently, so nothing - // is colocated. - colocated: false, + + // The value of this flag determines the codegen for calls to this + // function. If this flag is `false` then absolute relocations will + // be generated for references to the function, which requires + // load-time relocation resolution. If this flag is set to `true` + // then relative relocations are emitted which can be resolved at + // object-link-time, just after all functions are compiled. + // + // This flag is set to `true` for functions defined in the object + // we'll be defining in this compilation unit, or everything local + // to the wasm module. This means that between functions in a wasm + // module there's relative calls encoded. All calls external to a + // wasm module (e.g. imports or libcalls) are either encoded through + // the `VMContext` as relative jumps (hence no relocations) or + // they're libcalls with absolute relocations. + colocated: self.module.defined_func_index(index).is_some(), })) } diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index df846574a2..dccaa383ea 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -182,7 +182,7 @@ enum RelocationTarget { /// A compiler-generated libcall. LibCall(ir::LibCall), /// Jump table index. - JumpTable(FuncIndex, ir::JumpTable), + JumpTable(ir::JumpTable), } /// Creates a new cranelift `Signature` with no wasm params/results for the diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 3789c51b88..edd462e1e2 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -16,12 +16,13 @@ use crate::debug::{DwarfSection, DwarfSectionRelocTarget}; use crate::{CompiledFunction, Relocation, RelocationTarget}; use anyhow::Result; -use cranelift_codegen::binemit::Reloc; -use cranelift_codegen::ir::{JumpTableOffsets, LibCall}; +use cranelift_codegen::binemit::{Addend, Reloc}; +use cranelift_codegen::ir::LibCall; use cranelift_codegen::isa::{ unwind::{systemv, UnwindInfo}, TargetIsa, }; +use cranelift_codegen::TextSectionBuilder; use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer}; use gimli::RunTimeEndian; use object::write::{ @@ -29,11 +30,12 @@ use object::write::{ SymbolSection, }; use object::{ - elf, Architecture, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, + Architecture, RelocationEncoding, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope, }; use std::collections::HashMap; use std::convert::TryFrom; +use std::mem; use std::ops::Range; use wasmtime_environ::obj; use wasmtime_environ::{ @@ -91,16 +93,52 @@ fn write_libcall_symbols(obj: &mut Object) -> HashMap { libcalls } +/// A helper structure used to assemble the final text section of an exectuable, +/// plus unwinding information and other related details. +/// +/// This builder relies on Cranelift-specific internals but assembles into a +/// generic `Object` which will get further appended to in a compiler-agnostic +/// fashion later. pub struct ObjectBuilder<'a> { + /// The target that we're compiling for, used to query target-specific + /// information as necessary. + isa: &'a dyn TargetIsa, + + /// The object file that we're generating code into. obj: &'a mut Object, + + /// The WebAssembly module we're generating code for. module: &'a Module, - text_section: SectionId, - func_symbols: PrimaryMap, - jump_tables: PrimaryMap, + + /// Map of injected symbols for all possible libcalls, used whenever there's + /// a relocation against a libcall. libcalls: HashMap, - pending_relocations: Vec<(u64, &'a [Relocation])>, + + /// Packed form of windows unwind tables which, if present, will get emitted + /// to a windows-specific unwind info section. windows_unwind_info: Vec, + + /// Pending unwinding information for DWARF-based platforms. This is used to + /// build a `.eh_frame` lookalike at the very end of object building. systemv_unwind_info: Vec<(u64, &'a systemv::UnwindInfo)>, + + /// The corresponding symbol for each function, inserted as they're defined. + /// + /// If an index isn't here yet then it hasn't been defined yet. + func_symbols: PrimaryMap, + + /// `object`-crate identifier for the text section. + text_section: SectionId, + + /// Relocations to be added once we've got all function symbols available to + /// us. The first entry is the relocation that we're applying, relative + /// within a function, and the second entry here is the offset of the + /// function that contains this relocation. + relocations: Vec<(&'a Relocation, u64)>, + + /// In-progress text section that we're using cranelift's `MachBuffer` to + /// build to resolve relocations (calls) between functions. + pub text: Box, } // This is a mirror of `RUNTIME_FUNCTION` in the Windows API, but defined here @@ -116,7 +154,7 @@ struct RUNTIME_FUNCTION { } impl<'a> ObjectBuilder<'a> { - pub fn new(obj: &'a mut Object, module: &'a Module) -> Self { + pub fn new(obj: &'a mut Object, module: &'a Module, isa: &'a dyn TargetIsa) -> Self { // Entire code (functions and trampolines) will be placed // in the ".text" section. let text_section = obj.add_section( @@ -146,15 +184,21 @@ impl<'a> ObjectBuilder<'a> { let libcalls = write_libcall_symbols(obj); Self { + isa, obj, module, text_section, func_symbols, libcalls, - pending_relocations: Vec::new(), - jump_tables: PrimaryMap::with_capacity(module.functions.len()), windows_unwind_info: Vec::new(), systemv_unwind_info: Vec::new(), + relocations: Vec::new(), + text: match isa.get_mach_backend() { + Some(backend) => backend.text_section_builder( + (module.functions.len() - module.num_imported_funcs) as u32, + ), + None => Box::new(DummyBuilder::default()), + }, } } @@ -162,14 +206,19 @@ impl<'a> ObjectBuilder<'a> { /// /// Returns the symbol associated with the function as well as the range /// that the function resides within the text section. - fn append_func(&mut self, name: Vec, func: &'a CompiledFunction) -> (SymbolId, Range) { - let off = self - .obj - .append_section_data(self.text_section, &func.body, 1); + fn append_func( + &mut self, + wat: bool, + name: Vec, + func: &'a CompiledFunction, + ) -> (SymbolId, Range) { + let body_len = func.body.len() as u64; + let off = self.text.append(wat, &func.body, 1); + let symbol_id = self.obj.add_symbol(Symbol { name, value: off, - size: func.body.len() as u64, + size: body_len, kind: SymbolKind::Text, scope: SymbolScope::Compilation, weak: false, @@ -190,12 +239,10 @@ impl<'a> ObjectBuilder<'a> { let unwind_size = info.emit_size(); let mut unwind_info = vec![0; unwind_size]; info.emit(&mut unwind_info); - let unwind_off = self - .obj - .append_section_data(self.text_section, &unwind_info, 4); + let unwind_off = self.text.append(false, &unwind_info, 4); self.windows_unwind_info.push(RUNTIME_FUNCTION { begin: u32::try_from(off).unwrap(), - end: u32::try_from(off + func.body.len() as u64).unwrap(), + end: u32::try_from(off + body_len).unwrap(), unwind_address: u32::try_from(unwind_off).unwrap(), }); } @@ -209,30 +256,92 @@ impl<'a> ObjectBuilder<'a> { Some(_) => panic!("some unwind info isn't handled here"), None => {} } - if !func.relocations.is_empty() { - self.pending_relocations.push((off, &func.relocations)); + + for r in func.relocations.iter() { + let (symbol, symbol_offset) = match r.reloc_target { + // Relocations against user-defined functions means that this is + // a relocation against a module-local function, typically a + // call between functions. The `text` field is given priority to + // resolve this relocation before we actually emit an object + // file, but if it can't handle it then we pass through the + // relocation. + RelocationTarget::UserFunc(index) => { + let defined_index = self.module.defined_func_index(index).unwrap(); + if self.text.resolve_reloc( + off + u64::from(r.offset), + r.reloc, + r.addend, + defined_index.as_u32(), + ) { + continue; + } + + // FIXME(#3009) once the old backend is removed all + // inter-function relocations should be handled by + // `self.text`. This can become `unreachable!()` in that + // case. + self.relocations.push((r, off)); + continue; + } + + // These relocations, unlike against user funcs above, typically + // involve absolute addresses and need to get resolved at load + // time. These are persisted immediately into the object file. + // + // FIXME: these, like user-defined-functions, should probably + // use relative jumps and avoid absolute relocations. They don't + // seem too common though so aren't necessarily that important + // to optimize. + RelocationTarget::LibCall(call) => (self.libcalls[&call], 0), + RelocationTarget::JumpTable(jt) => (symbol_id, func.jt_offsets[jt]), + }; + let (kind, encoding, size) = match r.reloc { + Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32), + Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64), + + // This is emitted by the old x86 backend and is only present + // for when the constant rodata is separated from the code + // itself. We don't do that, though, so we ignore these + // relocations since the offsets already listed here are already + // correct. + // + // FIXME(#3009): when the old backend is removed delete this + // case. + Reloc::X86PCRelRodata4 => continue, + + other => unimplemented!("Unimplemented relocation {:?}", other), + }; + self.obj + .add_relocation( + self.text_section, + ObjectRelocation { + offset: off + r.offset as u64, + size, + kind, + encoding, + symbol, + addend: r.addend.wrapping_add(symbol_offset as i64), + }, + ) + .unwrap(); } - (symbol_id, off..off + func.body.len() as u64) + (symbol_id, off..off + body_len) } - /// Pushes a new defined function from the a wasm module into this object, - /// returning the range that the compiled code will live at relative in the - /// text section of the final executable. + /// Appends a function to this object file. /// - /// Note that functions must be pushed in the order of their - /// `DefinedFuncIndex`. + /// This is expected to be called in-order for ascending `index` values. pub fn func(&mut self, index: DefinedFuncIndex, func: &'a CompiledFunction) -> Range { - assert_eq!(self.jump_tables.push(&func.jt_offsets), index); let index = self.module.func_index(index); let name = obj::func_symbol_name(index); - let (symbol_id, range) = self.append_func(name.into_bytes(), func); + let (symbol_id, range) = self.append_func(true, name.into_bytes(), func); assert_eq!(self.func_symbols.push(symbol_id), index); range } pub fn trampoline(&mut self, sig: SignatureIndex, func: &'a CompiledFunction) -> Trampoline { let name = obj::trampoline_symbol_name(sig); - let (_, range) = self.append_func(name.into_bytes(), func); + let (_, range) = self.append_func(false, name.into_bytes(), func); Trampoline { signature: sig, start: range.start, @@ -240,10 +349,6 @@ impl<'a> ObjectBuilder<'a> { } } - pub fn align_text_to(&mut self, align: u64) { - self.obj.append_section_data(self.text_section, &[], align); - } - pub fn dwarf_sections(&mut self, sections: &[DwarfSection]) -> Result<()> { // If we have DWARF data, write it in the object file. let (debug_bodies, debug_relocs): (Vec<_>, Vec<_>) = sections @@ -289,80 +394,50 @@ impl<'a> ObjectBuilder<'a> { Ok(()) } - pub fn finish(&mut self, isa: &dyn TargetIsa) -> Result<()> { - self.append_relocations()?; + pub fn finish(&mut self) -> Result<()> { + // Now that all function symbols are available register all final + // relocations between functions. + // + // FIXME(#3009) once the old backend is removed this loop should be + // deleted since there won't be any relocations here. + for (r, off) in mem::take(&mut self.relocations) { + let symbol = match r.reloc_target { + RelocationTarget::UserFunc(index) => self.func_symbols[index], + _ => unreachable!("should be handled in `append_func`"), + }; + let (kind, encoding, size) = match r.reloc { + Reloc::X86CallPCRel4 => { + (RelocationKind::Relative, RelocationEncoding::X86Branch, 32) + } + other => unimplemented!("Unimplemented relocation {:?}", other), + }; + self.obj.add_relocation( + self.text_section, + ObjectRelocation { + offset: off + u64::from(r.offset), + size, + kind, + encoding, + symbol, + addend: r.addend, + }, + )?; + } + + // Finish up the text section now that we're done adding functions. + const CODE_SECTION_ALIGNMENT: u64 = 0x1000; + let text = self.text.finish(); + self.obj + .section_mut(self.text_section) + .set_data(text, CODE_SECTION_ALIGNMENT); + + // With all functions added we can also emit the fully-formed unwinding + // information sections. if self.windows_unwind_info.len() > 0 { self.append_windows_unwind_info(); } if self.systemv_unwind_info.len() > 0 { - self.append_systemv_unwind_info(isa); - } - Ok(()) - } - - fn append_relocations(&mut self) -> Result<()> { - for (off, relocations) in self.pending_relocations.iter() { - for r in relocations.iter() { - let (symbol, symbol_offset) = match r.reloc_target { - RelocationTarget::UserFunc(index) => (self.func_symbols[index], 0), - RelocationTarget::LibCall(call) => (self.libcalls[&call], 0), - RelocationTarget::JumpTable(f, jt) => { - let df = self.module.defined_func_index(f).unwrap(); - let offset = *self - .jump_tables - .get(df) - .and_then(|t| t.get(jt)) - .expect("func jump table"); - (self.func_symbols[f], offset) - } - }; - let (kind, encoding, size) = match r.reloc { - Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32), - Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64), - Reloc::X86PCRel4 => (RelocationKind::Relative, RelocationEncoding::Generic, 32), - Reloc::X86CallPCRel4 => { - (RelocationKind::Relative, RelocationEncoding::X86Branch, 32) - } - // TODO: Get Cranelift to tell us when we can use - // R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX. - Reloc::X86CallPLTRel4 => ( - RelocationKind::PltRelative, - RelocationEncoding::X86Branch, - 32, - ), - Reloc::X86GOTPCRel4 => { - (RelocationKind::GotRelative, RelocationEncoding::Generic, 32) - } - Reloc::ElfX86_64TlsGd => ( - RelocationKind::Elf(elf::R_X86_64_TLSGD), - RelocationEncoding::Generic, - 32, - ), - Reloc::X86PCRelRodata4 => { - continue; - } - Reloc::Arm64Call => ( - RelocationKind::Elf(elf::R_AARCH64_CALL26), - RelocationEncoding::Generic, - 32, - ), - Reloc::S390xPCRel32Dbl => { - (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) - } - other => unimplemented!("Unimplemented relocation {:?}", other), - }; - self.obj.add_relocation( - self.text_section, - ObjectRelocation { - offset: off + r.offset as u64, - size, - kind, - encoding, - symbol, - addend: r.addend.wrapping_add(symbol_offset as i64), - }, - )?; - } + self.append_systemv_unwind_info(); } Ok(()) } @@ -437,14 +512,15 @@ impl<'a> ObjectBuilder<'a> { /// This allows `.eh_frame` to have different virtual memory permissions, /// such as being purely read-only instead of read/execute like the code /// bits. - fn append_systemv_unwind_info(&mut self, isa: &dyn TargetIsa) { + fn append_systemv_unwind_info(&mut self) { let segment = self.obj.segment_name(StandardSegment::Data).to_vec(); let section_id = self.obj.add_section( segment, b"_wasmtime_eh_frame".to_vec(), SectionKind::ReadOnlyData, ); - let mut cie = isa + let mut cie = self + .isa .create_systemv_cie() .expect("must be able to create a CIE for system-v unwind info"); let mut table = FrameTable::default(); @@ -465,7 +541,7 @@ impl<'a> ObjectBuilder<'a> { let fde = unwind_info.to_fde(Address::Constant(actual_offset as u64)); table.add_fde(cie_id, fde); } - let endian = match isa.triple().endianness().unwrap() { + let endian = match self.isa.triple().endianness().unwrap() { target_lexicon::Endianness::Little => RunTimeEndian::Little, target_lexicon::Endianness::Big => RunTimeEndian::Big, }; @@ -526,3 +602,37 @@ impl<'a> ObjectBuilder<'a> { } } } + +#[derive(Default)] +struct DummyBuilder { + data: Vec, +} + +impl TextSectionBuilder for DummyBuilder { + fn append(&mut self, _named: bool, func: &[u8], align: u32) -> u64 { + while self.data.len() % align as usize != 0 { + self.data.push(0); + } + let pos = self.data.len() as u64; + self.data.extend_from_slice(func); + pos + } + + fn resolve_reloc( + &mut self, + _offset: u64, + _reloc: Reloc, + _addend: Addend, + _target: u32, + ) -> bool { + false + } + + fn force_veneers(&mut self) { + // not implemented + } + + fn finish(&mut self) -> Vec { + mem::take(&mut self.data) + } +} diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index efe9302ce8..361298ca3c 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -20,6 +20,7 @@ use arbitrary::{Arbitrary, Unstructured}; pub struct DifferentialConfig { strategy: DifferentialStrategy, opt_level: OptLevel, + force_jump_veneers: bool, } impl DifferentialConfig { @@ -30,6 +31,11 @@ impl DifferentialConfig { DifferentialStrategy::Lightbeam => wasmtime::Strategy::Lightbeam, })?; config.cranelift_opt_level(self.opt_level.to_wasmtime()); + if self.force_jump_veneers { + unsafe { + config.cranelift_flag_set("wasmtime_linkopt_force_jump_veneer", "true")?; + } + } Ok(config) } } diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index 0b52f13fc9..08f0cbf735 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -1,11 +1,10 @@ //! Linking for JIT-compiled code. use object::read::{Object, Relocation, RelocationTarget}; -use object::{elf, File, NativeEndian as NE, ObjectSymbol, RelocationEncoding, RelocationKind}; +use object::{File, NativeEndian as NE, ObjectSymbol, RelocationEncoding, RelocationKind}; use std::convert::TryFrom; use wasmtime_runtime::libcalls; -type U32 = object::U32Bytes; type I32 = object::I32Bytes; type U64 = object::U64Bytes; @@ -36,11 +35,10 @@ pub fn apply_reloc(obj: &File, code: &mut [u8], offset: u64, r: Relocation) { } } } - _ => panic!("unexpected relocation target"), + _ => panic!("unexpected relocation target: not a symbol"), }; match (r.kind(), r.encoding(), r.size()) { - #[cfg(target_pointer_width = "64")] (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { let reloc_address = reloc_address::(code, offset); let reloc_abs = (target_func_address as u64) @@ -48,63 +46,17 @@ pub fn apply_reloc(obj: &File, code: &mut [u8], offset: u64, r: Relocation) { .unwrap(); reloc_address.set(NE, reloc_abs); } - #[cfg(target_pointer_width = "32")] - (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { - let reloc_address = reloc_address::(code, offset); - let reloc_delta_u32 = (target_func_address as u32) - .wrapping_sub(reloc_address as *const _ as u32) - .checked_add(r.addend() as u32) - .unwrap(); - reloc_address.set(NE, reloc_delta_u32); - } - #[cfg(target_pointer_width = "32")] - (RelocationKind::Relative, RelocationEncoding::X86Branch, 32) => { - let reloc_address = reloc_address::(code, offset); - let reloc_delta_u32 = (target_func_address as u32) - .wrapping_sub(reloc_address as *const _ as u32) - .wrapping_add(r.addend() as u32); - reloc_address.set(NE, reloc_delta_u32); - } - #[cfg(target_pointer_width = "64")] + + // FIXME(#3009) after the old backend is removed this won't ever show up + // again so it can be removed. (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { let reloc_address = reloc_address::(code, offset); - let reloc_delta_i64 = (target_func_address as i64) - .wrapping_sub(reloc_address as *const _ as i64) - .wrapping_add(r.addend()); - // TODO implement far calls mode in x64 new backend. - reloc_address.set( - NE, - i32::try_from(reloc_delta_i64).expect("relocation too large to fit in i32"), - ); - } - #[cfg(target_pointer_width = "64")] - (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) => { - let reloc_address = reloc_address::(code, offset); - let reloc_delta_i64 = (target_func_address as i64) - .wrapping_sub(reloc_address as *const _ as i64) + let val = (target_func_address as i64) .wrapping_add(r.addend()) - >> 1; - reloc_address.set( - NE, - i32::try_from(reloc_delta_i64).expect("relocation too large to fit in i32"), - ); - } - (RelocationKind::Elf(elf::R_AARCH64_CALL26), RelocationEncoding::Generic, 32) => { - let reloc_address = reloc_address::(code, offset); - let reloc_delta = (target_func_address as u64).wrapping_sub(r.addend() as u64); - // TODO: come up with a PLT-like solution for longer calls. We can't extend the - // code segment at this point, but we could conservatively allocate space at the - // end of the function during codegen, a fixed amount per call, to allow for - // potential branch islands. - assert!((reloc_delta as i64) < (1 << 27)); - assert!((reloc_delta as i64) >= -(1 << 27)); - let reloc_delta = reloc_delta as u32; - let reloc_delta = reloc_delta.wrapping_add(r.addend() as u32); - let delta_bits = reloc_delta >> 2; - let insn = reloc_address.get(NE); - let new_insn = (insn & 0xfc00_0000) | (delta_bits & 0x03ff_ffff); - reloc_address.set(NE, new_insn); + .wrapping_sub(reloc_address as *const _ as i64); + reloc_address.set(NE, i32::try_from(val).expect("relocation out-of-bounds")); } + other => panic!("unsupported reloc kind: {:?}", other), } } diff --git a/tests/all/main.rs b/tests/all/main.rs index a98cc91467..345cb6562f 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -25,6 +25,7 @@ mod module_serialize; mod name; mod native_hooks; mod pooling_allocator; +mod relocs; mod stack_overflow; mod store; mod table; diff --git a/tests/all/relocs.rs b/tests/all/relocs.rs new file mode 100644 index 0000000000..6dab73cd74 --- /dev/null +++ b/tests/all/relocs.rs @@ -0,0 +1,119 @@ +//! These tests are intended to exercise various relocation-based logic of +//! Wasmtime, especially the "jump veneer" insertion in the object-file-assembly +//! for when platform-specific relative call instructios can't always reach +//! their destination within the platform-specific limits. +//! +//! Note that the limits of AArch64 are primarily what's being stressed here +//! where the jump target for a call is 26-bits. On x86_64 the jump target is +//! 32-bits, and right now object files aren't supported larger than 4gb anyway +//! so we would need a lot of other support necessary to exercise that. + +#![cfg(not(feature = "old-x86-backend"))] // multi-value not supported here + +use anyhow::Result; +use wasmtime::*; + +const MB: usize = 1 << 20; + +fn store_with_padding(padding: usize) -> Result> { + let mut config = Config::new(); + // This is an internal debug-only setting specifically recognized for + // basically just this set of tests. + unsafe { + config.cranelift_flag_set( + "wasmtime_linkopt_padding_between_functions", + &padding.to_string(), + )?; + } + let engine = Engine::new(&config)?; + Ok(Store::new(&engine, ())) +} + +#[test] +fn forward_call_works() -> Result<()> { + let mut store = store_with_padding(128 * MB)?; + let module = Module::new( + store.engine(), + r#" + (module + (func (export "foo") (result i32) + call 1) + (func (result i32) + i32.const 4) + ) + "#, + )?; + + let i = Instance::new(&mut store, &module, &[])?; + let foo = i.get_typed_func::<(), i32, _>(&mut store, "foo")?; + assert_eq!(foo.call(&mut store, ())?, 4); + Ok(()) +} + +#[test] +fn backwards_call_works() -> Result<()> { + let mut store = store_with_padding(128 * MB)?; + let module = Module::new( + store.engine(), + r#" + (module + (func (result i32) + i32.const 4) + (func (export "foo") (result i32) + call 0) + ) + "#, + )?; + + let i = Instance::new(&mut store, &module, &[])?; + let foo = i.get_typed_func::<(), i32, _>(&mut store, "foo")?; + assert_eq!(foo.call(&mut store, ())?, 4); + Ok(()) +} + +#[test] +fn mixed() -> Result<()> { + test_many_call_module(store_with_padding(MB)?) +} + +#[test] +fn mixed_forced() -> Result<()> { + let mut config = Config::new(); + unsafe { + config.cranelift_flag_set("wasmtime_linkopt_force_jump_veneer", "true")?; + } + let engine = Engine::new(&config)?; + test_many_call_module(Store::new(&engine, ())) +} + +fn test_many_call_module(mut store: Store<()>) -> Result<()> { + const N: i32 = 200; + + let mut wat = String::new(); + wat.push_str("(module\n"); + wat.push_str("(func $first (result i32) (i32.const 1))\n"); + for i in 0..N { + wat.push_str(&format!("(func (export \"{}\") (result i32 i32)\n", i)); + wat.push_str("call $first\n"); + wat.push_str(&format!("i32.const {}\n", i)); + wat.push_str("i32.add\n"); + wat.push_str("call $last\n"); + wat.push_str(&format!("i32.const {}\n", i)); + wat.push_str("i32.add)\n"); + } + wat.push_str("(func $last (result i32) (i32.const 2))\n"); + wat.push_str(")\n"); + + let module = Module::new(store.engine(), &wat)?; + + let instance = Instance::new(&mut store, &module, &[])?; + + for i in 0..N { + let name = i.to_string(); + let func = instance.get_typed_func::<(), (i32, i32), _>(&mut store, &name)?; + let (a, b) = func.call(&mut store, ())?; + assert_eq!(a, i + 1); + assert_eq!(b, i + 2); + } + Ok(()) +}