moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

2019-01-28 18:56:54 -05:00
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions
--- a/cranelift/codegen/src/binemit/memorysink.rs
+++ b/cranelift/codegen/src/binemit/memorysink.rs
@@ -0,0 +1,145 @@
+//! Code sink that writes binary machine code into contiguous memory.
+//!
+//! The `CodeSink` trait is the most general way of extracting binary machine code from Cranelift,
+//! and it is implemented by things like the `test binemit` file test driver to generate
+//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
+//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
+//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
+//! This results in many virtual function callbacks (one per `put*` call) when
+//! `TargetIsa::emit_inst()` is used.
+//!
+//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
+//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
+//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
+//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
+//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
+
+use super::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
+use core::ptr::write_unaligned;
+
+/// A `CodeSink` that writes binary machine code directly into memory.
+///
+/// A `MemoryCodeSink` object should be used when emitting a Cranelift IR function into executable
+/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
+/// sure to allocate enough memory for the whole function. The number of bytes required is returned
+/// by the `Context::compile()` function.
+///
+/// Any relocations in the function are forwarded to the `RelocSink` trait object.
+///
+/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
+/// is not the right thing to do for cross compilation.
+pub struct MemoryCodeSink<'a> {
+    data: *mut u8,
+    offset: isize,
+    /// Size of the machine code portion of output
+    pub code_size: isize,
+    relocs: &'a mut RelocSink,
+    traps: &'a mut TrapSink,
+}
+
+impl<'a> MemoryCodeSink<'a> {
+    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
+    ///
+    /// This function is unsafe since `MemoryCodeSink` does not perform bounds checking on the
+    /// memory buffer, and it can't guarantee that the `data` pointer is valid.
+    pub unsafe fn new(data: *mut u8, relocs: &'a mut RelocSink, traps: &'a mut TrapSink) -> Self {
+        Self {
+            data,
+            offset: 0,
+            code_size: 0,
+            relocs,
+            traps,
+        }
+    }
+}
+
+/// A trait for receiving relocations for code that is emitted directly into memory.
+pub trait RelocSink {
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, _: CodeOffset, _: Reloc, _: CodeOffset);
+
+    /// Add a relocation referencing an external symbol at the current offset.
+    fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable);
+}
+
+/// A trait for receiving trap codes and offsets.
+///
+/// If you don't need information about possible traps, you can use the
+/// [`NullTrapSink`](binemit/trait.TrapSink.html) implementation.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode);
+}
+
+impl<'a> CodeSink for MemoryCodeSink<'a> {
+    fn offset(&self) -> CodeOffset {
+        self.offset as CodeOffset
+    }
+
+    fn put1(&mut self, x: u8) {
+        unsafe {
+            write_unaligned(self.data.offset(self.offset), x);
+        }
+        self.offset += 1;
+    }
+
+    fn put2(&mut self, x: u16) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u16, x);
+        }
+        self.offset += 2;
+    }
+
+    fn put4(&mut self, x: u32) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u32, x);
+        }
+        self.offset += 4;
+    }
+
+    fn put8(&mut self, x: u64) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut u64, x);
+        }
+        self.offset += 8;
+    }
+
+    fn reloc_ebb(&mut self, rel: Reloc, ebb_offset: CodeOffset) {
+        let ofs = self.offset();
+        self.relocs.reloc_ebb(ofs, rel, ebb_offset);
+    }
+
+    fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) {
+        let ofs = self.offset();
+        self.relocs.reloc_external(ofs, rel, name, addend);
+    }
+
+    fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
+        let ofs = self.offset();
+        self.relocs.reloc_jt(ofs, rel, jt);
+    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
+
+    fn begin_rodata(&mut self) {
+        self.code_size = self.offset;
+    }
+}
+
+/// A `TrapSink` implementation that does nothing, which is convenient when
+/// compiling code that does not rely on trapping semantics.
+pub struct NullTrapSink {}
+
+impl TrapSink for NullTrapSink {
+    fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
+}
--- a/cranelift/codegen/src/binemit/mod.rs
+++ b/cranelift/codegen/src/binemit/mod.rs
@@ -0,0 +1,140 @@
+//! Binary machine code emission.
+//!
+//! The `binemit` module contains code for translating Cranelift's intermediate representation into
+//! binary machine code.
+
+mod memorysink;
+mod relaxation;
+mod shrink;
+
+pub use self::memorysink::{MemoryCodeSink, NullTrapSink, RelocSink, TrapSink};
+pub use self::relaxation::relax_branches;
+pub use self::shrink::shrink_instructions;
+pub use crate::regalloc::RegDiversions;
+
+use crate::ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
+use core::fmt;
+
+/// Offset in bytes from the beginning of the function.
+///
+/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
+/// depends on the *host* platform, not the *target* platform.
+pub type CodeOffset = u32;
+
+/// Addend to add to the symbol value.
+pub type Addend = i64;
+
+/// Relocation kinds for every ISA
+#[derive(Copy, Clone, Debug)]
+pub enum Reloc {
+    /// absolute 4-byte
+    Abs4,
+    /// absolute 8-byte
+    Abs8,
+    /// x86 PC-relative 4-byte
+    X86PCRel4,
+    /// x86 call to PC-relative 4-byte
+    X86CallPCRel4,
+    /// x86 call to PLT-relative 4-byte
+    X86CallPLTRel4,
+    /// x86 GOT PC-relative 4-byte
+    X86GOTPCRel4,
+    /// Arm32 call target
+    Arm32Call,
+    /// Arm64 call target
+    Arm64Call,
+    /// RISC-V call target
+    RiscvCall,
+}
+
+impl fmt::Display for Reloc {
+    /// Display trait implementation drops the arch, since its used in contexts where the arch is
+    /// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Reloc::Abs4 => write!(f, "Abs4"),
+            Reloc::Abs8 => write!(f, "Abs8"),
+            Reloc::X86PCRel4 => write!(f, "PCRel4"),
+            Reloc::X86CallPCRel4 => write!(f, "CallPCRel4"),
+            Reloc::X86CallPLTRel4 => write!(f, "CallPLTRel4"),
+            Reloc::X86GOTPCRel4 => write!(f, "GOTPCRel4"),
+            Reloc::Arm32Call | Reloc::Arm64Call | Reloc::RiscvCall => write!(f, "Call"),
+        }
+    }
+}
+
+/// Abstract interface for adding bytes to the code segment.
+///
+/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
+/// which are locations in the code section that need to be fixed up when linking.
+pub trait CodeSink {
+    /// Get the current position.
+    fn offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the code section.
+    fn put1(&mut self, _: u8);
+
+    /// Add 2 bytes to the code section.
+    fn put2(&mut self, _: u16);
+
+    /// Add 4 bytes to the code section.
+    fn put4(&mut self, _: u32);
+
+    /// Add 8 bytes to the code section.
+    fn put8(&mut self, _: u64);
+
+    /// Add a relocation referencing an EBB at the current offset.
+    fn reloc_ebb(&mut self, _: Reloc, _: CodeOffset);
+
+    /// Add a relocation referencing an external symbol plus the addend at the current offset.
+    fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: Reloc, _: JumpTable);
+
+    /// Add trap information for the current offset.
+    fn trap(&mut self, _: TrapCode, _: SourceLoc);
+
+    /// Code output is complete, read-only data may follow.
+    fn begin_rodata(&mut self);
+}
+
+/// Report a bad encoding error.
+#[cold]
+pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
+    panic!(
+        "Bad encoding {} for {}",
+        func.encodings[inst],
+        func.dfg.display_inst(inst, None)
+    );
+}
+
+/// Emit a function to `sink`, given an instruction emitter function.
+///
+/// This function is called from the `TargetIsa::emit_function()` implementations with the
+/// appropriate instruction emitter.
+pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
+where
+    CS: CodeSink,
+    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
+{
+    let mut divert = RegDiversions::new();
+    for ebb in func.layout.ebbs() {
+        divert.clear();
+        debug_assert_eq!(func.offsets[ebb], sink.offset());
+        for inst in func.layout.ebb_insts(ebb) {
+            emit_inst(func, inst, &mut divert, sink);
+        }
+    }
+
+    sink.begin_rodata();
+
+    // output jump tables
+    for (jt, jt_data) in func.jump_tables.iter() {
+        let jt_offset = func.jt_offsets[jt];
+        for ebb in jt_data.iter() {
+            let rel_offset: i32 = func.offsets[*ebb] as i32 - jt_offset as i32;
+            sink.put4(rel_offset as u32)
+        }
+    }
+}
--- a/cranelift/codegen/src/binemit/relaxation.rs
+++ b/cranelift/codegen/src/binemit/relaxation.rs
@@ -0,0 +1,216 @@
+//! Branch relaxation and offset computation.
+//!
+//! # EBB header offsets
+//!
+//! Before we can generate binary machine code for branch instructions, we need to know the final
+//! offsets of all the EBB headers in the function. This information is encoded in the
+//! `func.offsets` table.
+//!
+//! # Branch relaxation
+//!
+//! Branch relaxation is the process of ensuring that all branches in the function have enough
+//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
+//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
+//!
+//! On RISC architectures, it can happen that conditional branches have a shorter range than
+//! unconditional branches:
+//!
+//! ```clif
+//!     brz v1, ebb17
+//! ```
+//!
+//! can be transformed into:
+//!
+//! ```clif
+//!     brnz v1, ebb23
+//!     jump ebb17
+//! ebb23:
+//! ```
+
+use crate::binemit::CodeOffset;
+use crate::cursor::{Cursor, FuncCursor};
+use crate::ir::{Function, InstructionData, Opcode};
+use crate::isa::{EncInfo, TargetIsa};
+use crate::iterators::IteratorExtras;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use crate::CodegenResult;
+use log::debug;
+
+/// Relax branches and compute the final layout of EBB headers in `func`.
+///
+/// Fill in the `func.offsets` table so the function is ready for binary emission.
+pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
+    let _tt = timing::relax_branches();
+
+    let encinfo = isa.encoding_info();
+
+    // Clear all offsets so we can recognize EBBs that haven't been visited yet.
+    func.offsets.clear();
+    func.offsets.resize(func.dfg.num_ebbs());
+
+    // Start by inserting fall through instructions.
+    fallthroughs(func);
+
+    let mut offset = 0;
+    let mut divert = RegDiversions::new();
+
+    // The relaxation algorithm iterates to convergence.
+    let mut go_again = true;
+    while go_again {
+        go_again = false;
+        offset = 0;
+
+        // Visit all instructions in layout order
+        let mut cur = FuncCursor::new(func);
+        while let Some(ebb) = cur.next_ebb() {
+            divert.clear();
+
+            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
+            if cur.func.offsets[ebb] != offset {
+                debug_assert!(
+                    cur.func.offsets[ebb] < offset,
+                    "Code shrinking during relaxation"
+                );
+                cur.func.offsets[ebb] = offset;
+                go_again = true;
+            }
+
+            while let Some(inst) = cur.next_inst() {
+                divert.apply(&cur.func.dfg[inst]);
+
+                let enc = cur.func.encodings[inst];
+
+                // See if this might be a branch that is out of range.
+                if let Some(range) = encinfo.branch_range(enc) {
+                    if let Some(dest) = cur.func.dfg[inst].branch_destination() {
+                        let dest_offset = cur.func.offsets[dest];
+                        // This could be an out-of-range branch.
+                        // Relax it unless the destination offset has not been computed yet.
+                        if !range.contains(offset, dest_offset)
+                            && (dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
+                        {
+                            offset +=
+                                relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
+                            continue;
+                        }
+                    }
+                }
+
+                offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+            }
+        }
+    }
+
+    for (jt, jt_data) in func.jump_tables.iter() {
+        func.jt_offsets[jt] = offset;
+        // TODO: this should be computed based on the min size needed to hold
+        //        the furthest branch.
+        offset += jt_data.len() as u32 * 4;
+    }
+
+    Ok(offset)
+}
+
+/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
+/// existing `fallthrough` instructions are correct.
+fn fallthroughs(func: &mut Function) {
+    for (ebb, succ) in func.layout.ebbs().adjacent_pairs() {
+        let term = func.layout.last_inst(ebb).expect("EBB has no terminator.");
+        if let InstructionData::Jump {
+            ref mut opcode,
+            destination,
+            ..
+        } = func.dfg[term]
+        {
+            match *opcode {
+                Opcode::Fallthrough => {
+                    // Somebody used a fall-through instruction before the branch relaxation pass.
+                    // Make sure it is correct, i.e. the destination is the layout successor.
+                    debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
+                }
+                Opcode::Jump => {
+                    // If this is a jump to the successor EBB, change it to a fall-through.
+                    if destination == succ {
+                        *opcode = Opcode::Fallthrough;
+                        func.encodings[term] = Default::default();
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
+///
+/// Return the size of the replacement instructions up to and including the location where `cur` is
+/// left.
+fn relax_branch(
+    cur: &mut FuncCursor,
+    divert: &RegDiversions,
+    offset: CodeOffset,
+    dest_offset: CodeOffset,
+    encinfo: &EncInfo,
+    isa: &TargetIsa,
+) -> CodeOffset {
+    let inst = cur.current_inst().unwrap();
+    debug!(
+        "Relaxing [{}] {} for {:#x}-{:#x} range",
+        encinfo.display(cur.func.encodings[inst]),
+        cur.func.dfg.display_inst(inst, isa),
+        offset,
+        dest_offset
+    );
+
+    // Pick the first encoding that can handle the branch range.
+    let dfg = &cur.func.dfg;
+    let ctrl_type = dfg.ctrl_typevar(inst);
+    if let Some(enc) = isa
+        .legal_encodings(cur.func, &dfg[inst], ctrl_type)
+        .find(|&enc| {
+            let range = encinfo.branch_range(enc).expect("Branch with no range");
+            if !range.contains(offset, dest_offset) {
+                debug!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc)
+                != encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                debug!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                debug!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
+        })
+    {
+        cur.func.encodings[inst] = enc;
+        return encinfo.byte_size(enc, inst, &divert, &cur.func);
+    }
+
+    // Note: On some RISC ISAs, conditional branches have shorter range than unconditional
+    // branches, so one way of extending the range of a conditional branch is to invert its
+    // condition and make it branch over an unconditional jump which has the larger range.
+    //
+    // Splitting the EBB is problematic this late because there may be register diversions in
+    // effect across the conditional branch, and they can't survive the control flow edge to a new
+    // EBB. We have two options for handling that:
+    //
+    // 1. Set a flag on the new EBB that indicates it wants the preserve the register diversions of
+    //    its layout predecessor, or
+    // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the EBB.
+    //
+    // It seems that 1. would allow us to share code among RISC ISAs that need this.
+    //
+    // We can't allow register diversions to survive from the layout predecessor because the layout
+    // predecessor could contain kill points for some values that are live in this EBB, and
+    // diversions are not automatically cancelled when the live range of a value ends.
+
+    // This assumes solution 2. above:
+    panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
+}
--- a/cranelift/codegen/src/binemit/shrink.rs
+++ b/cranelift/codegen/src/binemit/shrink.rs
@@ -0,0 +1,70 @@
+//! Instruction shrinking.
+//!
+//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
+//! chooses the largest one, because this typically provides the register allocator the most
+//! flexibility. However, once register allocation is done, this is no longer important, and we
+//! can switch to smaller encodings when possible.
+
+use crate::ir::instructions::InstructionData;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use log::debug;
+
+/// Pick the smallest valid encodings for instructions.
+pub fn shrink_instructions(func: &mut Function, isa: &TargetIsa) {
+    let _tt = timing::shrink_instructions();
+
+    let encinfo = isa.encoding_info();
+    let mut divert = RegDiversions::new();
+
+    for ebb in func.layout.ebbs() {
+        divert.clear();
+        for inst in func.layout.ebb_insts(ebb) {
+            let enc = func.encodings[inst];
+            if enc.is_legal() {
+                // regmove/regfill/regspill are special instructions with register immediates
+                // that represented as normal operands, so the normal predicates below don't
+                // handle them correctly.
+                //
+                // Also, they need to be presented to the `RegDiversions` to update the
+                // location tracking.
+                //
+                // TODO: Eventually, we want the register allocator to avoid leaving these special
+                // instructions behind, but for now, just temporarily avoid trying to shrink them.
+                match func.dfg[inst] {
+                    InstructionData::RegMove { .. }
+                    | InstructionData::RegFill { .. }
+                    | InstructionData::RegSpill { .. } => {
+                        divert.apply(&func.dfg[inst]);
+                        continue;
+                    }
+                    _ => (),
+                }
+
+                let ctrl_type = func.dfg.ctrl_typevar(inst);
+
+                // Pick the last encoding with constraints that are satisfied.
+                let best_enc = isa
+                    .legal_encodings(func, &func.dfg[inst], ctrl_type)
+                    .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
+                    .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
+                    .unwrap();
+
+                if best_enc != enc {
+                    func.encodings[inst] = best_enc;
+
+                    debug!(
+                        "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
+                        encinfo.display(enc),
+                        encinfo.display(best_enc),
+                        func.dfg.display_inst(inst, isa),
+                        encinfo.byte_size(enc, inst, &divert, &func),
+                        encinfo.byte_size(best_enc, inst, &divert, &func)
+                    );
+                }
+            }
+        }
+    }
+}