moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
lazypassion
2019-01-28 18:56:54 -05:00
committed by Dan Gohman
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions

View File

@@ -0,0 +1,222 @@
//! Common helper code for ABI lowering.
//!
//! This module provides functions and data structures that are useful for implementing the
//! `TargetIsa::legalize_signature()` method.
use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
use core::cmp::Ordering;
use std::vec::Vec;
/// Legalization action to perform on a single argument or return value when converting a
/// signature.
///
/// An argument may go through a sequence of legalization steps before it reaches the final
/// `Assign` action.
#[derive(Clone, Copy, Debug)]
pub enum ArgAction {
/// Assign the argument to the given location.
Assign(ArgumentLoc),
/// Convert the argument, then call again.
///
/// This action can split an integer type into two smaller integer arguments, or it can split a
/// SIMD vector into halves.
Convert(ValueConversion),
}
impl From<ArgumentLoc> for ArgAction {
fn from(x: ArgumentLoc) -> Self {
ArgAction::Assign(x)
}
}
impl From<ValueConversion> for ArgAction {
fn from(x: ValueConversion) -> Self {
ArgAction::Convert(x)
}
}
/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ValueConversion {
/// Split an integer types into low and high parts, using `isplit`.
IntSplit,
/// Split a vector type into halves with identical lane types, using `vsplit`.
VectorSplit,
/// Bit-cast to an integer type of the same size.
IntBits,
/// Sign-extend integer value to the required type.
Sext(Type),
/// Unsigned zero-extend value to the required type.
Uext(Type),
}
impl ValueConversion {
/// Apply this conversion to a type, return the converted type.
pub fn apply(self, ty: Type) -> Type {
match self {
ValueConversion::IntSplit => ty.half_width().expect("Integer type too small to split"),
ValueConversion::VectorSplit => ty.half_vector().expect("Not a vector"),
ValueConversion::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
ValueConversion::Sext(nty) | ValueConversion::Uext(nty) => nty,
}
}
/// Is this a split conversion that results in two arguments?
pub fn is_split(self) -> bool {
match self {
ValueConversion::IntSplit | ValueConversion::VectorSplit => true,
_ => false,
}
}
}
/// Common trait for assigning arguments to registers or stack locations.
///
/// This will be implemented by individual ISAs.
pub trait ArgAssigner {
/// Pick an assignment action for function argument (or return value) `arg`.
fn assign(&mut self, arg: &AbiParam) -> ArgAction;
}
/// Legalize the arguments in `args` using the given argument assigner.
///
/// This function can be used for both arguments and return values.
pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
// Iterate over the arguments.
// We may need to mutate the vector in place, so don't use a normal iterator, and clone the
// argument to avoid holding a reference.
let mut argno = 0;
while let Some(arg) = args.get(argno).cloned() {
// Leave the pre-assigned arguments alone.
// We'll assume that they don't interfere with our assignments.
if arg.location.is_assigned() {
argno += 1;
continue;
}
match aa.assign(&arg) {
// Assign argument to a location and move on to the next one.
ArgAction::Assign(loc) => {
args[argno].location = loc;
argno += 1;
}
// Split this argument into two smaller ones. Then revisit both.
ArgAction::Convert(conv) => {
let value_type = conv.apply(arg.value_type);
let new_arg = AbiParam { value_type, ..arg };
args[argno].value_type = value_type;
if conv.is_split() {
args.insert(argno + 1, new_arg);
}
}
}
}
}
/// Determine the right action to take when passing a `have` value type to a call signature where
/// the next argument is `arg` which has a different value type.
///
/// The signature legalization process in `legalize_args` above can replace a single argument value
/// with multiple arguments of smaller types. It can also change the type of an integer argument to
/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
///
/// The legalizer needs to repair the values at all ABI boundaries:
///
/// - Incoming function arguments to the entry EBB.
/// - Function arguments passed to a call.
/// - Return values from a call.
/// - Return values passed to a return instruction.
///
/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
/// for the argument.
///
/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
/// desired argument type appears. This will happen when a vector or integer type needs to be split
/// more than once, for example.
pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
let have_bits = have.bits();
let arg_bits = arg.value_type.bits();
match have_bits.cmp(&arg_bits) {
// We have fewer bits than the ABI argument.
Ordering::Less => {
debug_assert!(
have.is_int() && arg.value_type.is_int(),
"Can only extend integer values"
);
match arg.extension {
ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
_ => panic!("No argument extension specified"),
}
}
// We have the same number of bits as the argument.
Ordering::Equal => {
// This must be an integer vector that is split and then extended.
debug_assert!(arg.value_type.is_int());
debug_assert!(have.is_vector(), "expected vector type, got {}", have);
ValueConversion::VectorSplit
}
// We have more bits than the argument.
Ordering::Greater => {
if have.is_vector() {
ValueConversion::VectorSplit
} else if have.is_float() {
// Convert a float to int so it can be split the next time.
// ARM would do this to pass an `f64` in two registers.
ValueConversion::IntBits
} else {
ValueConversion::IntSplit
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::types;
use crate::ir::AbiParam;
#[test]
fn legalize() {
let mut arg = AbiParam::new(types::I32);
assert_eq!(
legalize_abi_value(types::I64X2, &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I64, &arg),
ValueConversion::IntSplit
);
// Vector of integers is broken down, then sign-extended.
arg.extension = ArgumentExtension::Sext;
assert_eq!(
legalize_abi_value(types::I16X4, &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I16.by(2).unwrap(), &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I16, &arg),
ValueConversion::Sext(types::I32)
);
// 64-bit float is split as an integer.
assert_eq!(
legalize_abi_value(types::F64, &arg),
ValueConversion::IntBits
);
}
}

View File

@@ -0,0 +1,145 @@
//! Code sink that writes binary machine code into contiguous memory.
//!
//! The `CodeSink` trait is the most general way of extracting binary machine code from Cranelift,
//! and it is implemented by things like the `test binemit` file test driver to generate
//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
//! This results in many virtual function callbacks (one per `put*` call) when
//! `TargetIsa::emit_inst()` is used.
//!
//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
use super::{Addend, CodeOffset, CodeSink, Reloc};
use crate::ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
use core::ptr::write_unaligned;
/// A `CodeSink` that writes binary machine code directly into memory.
///
/// A `MemoryCodeSink` object should be used when emitting a Cranelift IR function into executable
/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
/// sure to allocate enough memory for the whole function. The number of bytes required is returned
/// by the `Context::compile()` function.
///
/// Any relocations in the function are forwarded to the `RelocSink` trait object.
///
/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
/// is not the right thing to do for cross compilation.
pub struct MemoryCodeSink<'a> {
data: *mut u8,
offset: isize,
/// Size of the machine code portion of output
pub code_size: isize,
relocs: &'a mut RelocSink,
traps: &'a mut TrapSink,
}
impl<'a> MemoryCodeSink<'a> {
/// Create a new memory code sink that writes a function to the memory pointed to by `data`.
///
/// This function is unsafe since `MemoryCodeSink` does not perform bounds checking on the
/// memory buffer, and it can't guarantee that the `data` pointer is valid.
pub unsafe fn new(data: *mut u8, relocs: &'a mut RelocSink, traps: &'a mut TrapSink) -> Self {
Self {
data,
offset: 0,
code_size: 0,
relocs,
traps,
}
}
}
/// A trait for receiving relocations for code that is emitted directly into memory.
pub trait RelocSink {
/// Add a relocation referencing an EBB at the current offset.
fn reloc_ebb(&mut self, _: CodeOffset, _: Reloc, _: CodeOffset);
/// Add a relocation referencing an external symbol at the current offset.
fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend);
/// Add a relocation referencing a jump table.
fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable);
}
/// A trait for receiving trap codes and offsets.
///
/// If you don't need information about possible traps, you can use the
/// [`NullTrapSink`](binemit/trait.TrapSink.html) implementation.
pub trait TrapSink {
/// Add trap information for a specific offset.
fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode);
}
impl<'a> CodeSink for MemoryCodeSink<'a> {
fn offset(&self) -> CodeOffset {
self.offset as CodeOffset
}
fn put1(&mut self, x: u8) {
unsafe {
write_unaligned(self.data.offset(self.offset), x);
}
self.offset += 1;
}
fn put2(&mut self, x: u16) {
unsafe {
#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
write_unaligned(self.data.offset(self.offset) as *mut u16, x);
}
self.offset += 2;
}
fn put4(&mut self, x: u32) {
unsafe {
#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
write_unaligned(self.data.offset(self.offset) as *mut u32, x);
}
self.offset += 4;
}
fn put8(&mut self, x: u64) {
unsafe {
#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
write_unaligned(self.data.offset(self.offset) as *mut u64, x);
}
self.offset += 8;
}
fn reloc_ebb(&mut self, rel: Reloc, ebb_offset: CodeOffset) {
let ofs = self.offset();
self.relocs.reloc_ebb(ofs, rel, ebb_offset);
}
fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) {
let ofs = self.offset();
self.relocs.reloc_external(ofs, rel, name, addend);
}
fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
let ofs = self.offset();
self.relocs.reloc_jt(ofs, rel, jt);
}
fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
let ofs = self.offset();
self.traps.trap(ofs, srcloc, code);
}
fn begin_rodata(&mut self) {
self.code_size = self.offset;
}
}
/// A `TrapSink` implementation that does nothing, which is convenient when
/// compiling code that does not rely on trapping semantics.
pub struct NullTrapSink {}
impl TrapSink for NullTrapSink {
fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
}

View File

@@ -0,0 +1,140 @@
//! Binary machine code emission.
//!
//! The `binemit` module contains code for translating Cranelift's intermediate representation into
//! binary machine code.
mod memorysink;
mod relaxation;
mod shrink;
pub use self::memorysink::{MemoryCodeSink, NullTrapSink, RelocSink, TrapSink};
pub use self::relaxation::relax_branches;
pub use self::shrink::shrink_instructions;
pub use crate::regalloc::RegDiversions;
use crate::ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
use core::fmt;
/// Offset in bytes from the beginning of the function.
///
/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
/// depends on the *host* platform, not the *target* platform.
pub type CodeOffset = u32;
/// Addend to add to the symbol value.
pub type Addend = i64;
/// Relocation kinds for every ISA
#[derive(Copy, Clone, Debug)]
pub enum Reloc {
/// absolute 4-byte
Abs4,
/// absolute 8-byte
Abs8,
/// x86 PC-relative 4-byte
X86PCRel4,
/// x86 call to PC-relative 4-byte
X86CallPCRel4,
/// x86 call to PLT-relative 4-byte
X86CallPLTRel4,
/// x86 GOT PC-relative 4-byte
X86GOTPCRel4,
/// Arm32 call target
Arm32Call,
/// Arm64 call target
Arm64Call,
/// RISC-V call target
RiscvCall,
}
impl fmt::Display for Reloc {
/// Display trait implementation drops the arch, since its used in contexts where the arch is
/// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug.
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Reloc::Abs4 => write!(f, "Abs4"),
Reloc::Abs8 => write!(f, "Abs8"),
Reloc::X86PCRel4 => write!(f, "PCRel4"),
Reloc::X86CallPCRel4 => write!(f, "CallPCRel4"),
Reloc::X86CallPLTRel4 => write!(f, "CallPLTRel4"),
Reloc::X86GOTPCRel4 => write!(f, "GOTPCRel4"),
Reloc::Arm32Call | Reloc::Arm64Call | Reloc::RiscvCall => write!(f, "Call"),
}
}
}
/// Abstract interface for adding bytes to the code segment.
///
/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
/// which are locations in the code section that need to be fixed up when linking.
pub trait CodeSink {
/// Get the current position.
fn offset(&self) -> CodeOffset;
/// Add 1 byte to the code section.
fn put1(&mut self, _: u8);
/// Add 2 bytes to the code section.
fn put2(&mut self, _: u16);
/// Add 4 bytes to the code section.
fn put4(&mut self, _: u32);
/// Add 8 bytes to the code section.
fn put8(&mut self, _: u64);
/// Add a relocation referencing an EBB at the current offset.
fn reloc_ebb(&mut self, _: Reloc, _: CodeOffset);
/// Add a relocation referencing an external symbol plus the addend at the current offset.
fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend);
/// Add a relocation referencing a jump table.
fn reloc_jt(&mut self, _: Reloc, _: JumpTable);
/// Add trap information for the current offset.
fn trap(&mut self, _: TrapCode, _: SourceLoc);
/// Code output is complete, read-only data may follow.
fn begin_rodata(&mut self);
}
/// Report a bad encoding error.
#[cold]
pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
panic!(
"Bad encoding {} for {}",
func.encodings[inst],
func.dfg.display_inst(inst, None)
);
}
/// Emit a function to `sink`, given an instruction emitter function.
///
/// This function is called from the `TargetIsa::emit_function()` implementations with the
/// appropriate instruction emitter.
pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
where
CS: CodeSink,
EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
{
let mut divert = RegDiversions::new();
for ebb in func.layout.ebbs() {
divert.clear();
debug_assert_eq!(func.offsets[ebb], sink.offset());
for inst in func.layout.ebb_insts(ebb) {
emit_inst(func, inst, &mut divert, sink);
}
}
sink.begin_rodata();
// output jump tables
for (jt, jt_data) in func.jump_tables.iter() {
let jt_offset = func.jt_offsets[jt];
for ebb in jt_data.iter() {
let rel_offset: i32 = func.offsets[*ebb] as i32 - jt_offset as i32;
sink.put4(rel_offset as u32)
}
}
}

View File

@@ -0,0 +1,216 @@
//! Branch relaxation and offset computation.
//!
//! # EBB header offsets
//!
//! Before we can generate binary machine code for branch instructions, we need to know the final
//! offsets of all the EBB headers in the function. This information is encoded in the
//! `func.offsets` table.
//!
//! # Branch relaxation
//!
//! Branch relaxation is the process of ensuring that all branches in the function have enough
//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
//!
//! On RISC architectures, it can happen that conditional branches have a shorter range than
//! unconditional branches:
//!
//! ```clif
//! brz v1, ebb17
//! ```
//!
//! can be transformed into:
//!
//! ```clif
//! brnz v1, ebb23
//! jump ebb17
//! ebb23:
//! ```
use crate::binemit::CodeOffset;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{Function, InstructionData, Opcode};
use crate::isa::{EncInfo, TargetIsa};
use crate::iterators::IteratorExtras;
use crate::regalloc::RegDiversions;
use crate::timing;
use crate::CodegenResult;
use log::debug;
/// Relax branches and compute the final layout of EBB headers in `func`.
///
/// Fill in the `func.offsets` table so the function is ready for binary emission.
pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
let _tt = timing::relax_branches();
let encinfo = isa.encoding_info();
// Clear all offsets so we can recognize EBBs that haven't been visited yet.
func.offsets.clear();
func.offsets.resize(func.dfg.num_ebbs());
// Start by inserting fall through instructions.
fallthroughs(func);
let mut offset = 0;
let mut divert = RegDiversions::new();
// The relaxation algorithm iterates to convergence.
let mut go_again = true;
while go_again {
go_again = false;
offset = 0;
// Visit all instructions in layout order
let mut cur = FuncCursor::new(func);
while let Some(ebb) = cur.next_ebb() {
divert.clear();
// Record the offset for `ebb` and make sure we iterate until offsets are stable.
if cur.func.offsets[ebb] != offset {
debug_assert!(
cur.func.offsets[ebb] < offset,
"Code shrinking during relaxation"
);
cur.func.offsets[ebb] = offset;
go_again = true;
}
while let Some(inst) = cur.next_inst() {
divert.apply(&cur.func.dfg[inst]);
let enc = cur.func.encodings[inst];
// See if this might be a branch that is out of range.
if let Some(range) = encinfo.branch_range(enc) {
if let Some(dest) = cur.func.dfg[inst].branch_destination() {
let dest_offset = cur.func.offsets[dest];
// This could be an out-of-range branch.
// Relax it unless the destination offset has not been computed yet.
if !range.contains(offset, dest_offset)
&& (dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
{
offset +=
relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
continue;
}
}
}
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
}
}
}
for (jt, jt_data) in func.jump_tables.iter() {
func.jt_offsets[jt] = offset;
// TODO: this should be computed based on the min size needed to hold
// the furthest branch.
offset += jt_data.len() as u32 * 4;
}
Ok(offset)
}
/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
/// existing `fallthrough` instructions are correct.
fn fallthroughs(func: &mut Function) {
for (ebb, succ) in func.layout.ebbs().adjacent_pairs() {
let term = func.layout.last_inst(ebb).expect("EBB has no terminator.");
if let InstructionData::Jump {
ref mut opcode,
destination,
..
} = func.dfg[term]
{
match *opcode {
Opcode::Fallthrough => {
// Somebody used a fall-through instruction before the branch relaxation pass.
// Make sure it is correct, i.e. the destination is the layout successor.
debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
}
Opcode::Jump => {
// If this is a jump to the successor EBB, change it to a fall-through.
if destination == succ {
*opcode = Opcode::Fallthrough;
func.encodings[term] = Default::default();
}
}
_ => {}
}
}
}
}
/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
///
/// Return the size of the replacement instructions up to and including the location where `cur` is
/// left.
fn relax_branch(
cur: &mut FuncCursor,
divert: &RegDiversions,
offset: CodeOffset,
dest_offset: CodeOffset,
encinfo: &EncInfo,
isa: &TargetIsa,
) -> CodeOffset {
let inst = cur.current_inst().unwrap();
debug!(
"Relaxing [{}] {} for {:#x}-{:#x} range",
encinfo.display(cur.func.encodings[inst]),
cur.func.dfg.display_inst(inst, isa),
offset,
dest_offset
);
// Pick the first encoding that can handle the branch range.
let dfg = &cur.func.dfg;
let ctrl_type = dfg.ctrl_typevar(inst);
if let Some(enc) = isa
.legal_encodings(cur.func, &dfg[inst], ctrl_type)
.find(|&enc| {
let range = encinfo.branch_range(enc).expect("Branch with no range");
if !range.contains(offset, dest_offset) {
debug!(" trying [{}]: out of range", encinfo.display(enc));
false
} else if encinfo.operand_constraints(enc)
!= encinfo.operand_constraints(cur.func.encodings[inst])
{
// Conservatively give up if the encoding has different constraints
// than the original, so that we don't risk picking a new encoding
// which the existing operands don't satisfy. We can't check for
// validity directly because we don't have a RegDiversions active so
// we don't know which registers are actually in use.
debug!(" trying [{}]: constraints differ", encinfo.display(enc));
false
} else {
debug!(" trying [{}]: OK", encinfo.display(enc));
true
}
})
{
cur.func.encodings[inst] = enc;
return encinfo.byte_size(enc, inst, &divert, &cur.func);
}
// Note: On some RISC ISAs, conditional branches have shorter range than unconditional
// branches, so one way of extending the range of a conditional branch is to invert its
// condition and make it branch over an unconditional jump which has the larger range.
//
// Splitting the EBB is problematic this late because there may be register diversions in
// effect across the conditional branch, and they can't survive the control flow edge to a new
// EBB. We have two options for handling that:
//
// 1. Set a flag on the new EBB that indicates it wants the preserve the register diversions of
// its layout predecessor, or
// 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the EBB.
//
// It seems that 1. would allow us to share code among RISC ISAs that need this.
//
// We can't allow register diversions to survive from the layout predecessor because the layout
// predecessor could contain kill points for some values that are live in this EBB, and
// diversions are not automatically cancelled when the live range of a value ends.
// This assumes solution 2. above:
panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
}

View File

@@ -0,0 +1,70 @@
//! Instruction shrinking.
//!
//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
//! chooses the largest one, because this typically provides the register allocator the most
//! flexibility. However, once register allocation is done, this is no longer important, and we
//! can switch to smaller encodings when possible.
use crate::ir::instructions::InstructionData;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::regalloc::RegDiversions;
use crate::timing;
use log::debug;
/// Pick the smallest valid encodings for instructions.
pub fn shrink_instructions(func: &mut Function, isa: &TargetIsa) {
let _tt = timing::shrink_instructions();
let encinfo = isa.encoding_info();
let mut divert = RegDiversions::new();
for ebb in func.layout.ebbs() {
divert.clear();
for inst in func.layout.ebb_insts(ebb) {
let enc = func.encodings[inst];
if enc.is_legal() {
// regmove/regfill/regspill are special instructions with register immediates
// that represented as normal operands, so the normal predicates below don't
// handle them correctly.
//
// Also, they need to be presented to the `RegDiversions` to update the
// location tracking.
//
// TODO: Eventually, we want the register allocator to avoid leaving these special
// instructions behind, but for now, just temporarily avoid trying to shrink them.
match func.dfg[inst] {
InstructionData::RegMove { .. }
| InstructionData::RegFill { .. }
| InstructionData::RegSpill { .. } => {
divert.apply(&func.dfg[inst]);
continue;
}
_ => (),
}
let ctrl_type = func.dfg.ctrl_typevar(inst);
// Pick the last encoding with constraints that are satisfied.
let best_enc = isa
.legal_encodings(func, &func.dfg[inst], ctrl_type)
.filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
.min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
.unwrap();
if best_enc != enc {
func.encodings[inst] = best_enc;
debug!(
"Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
encinfo.display(enc),
encinfo.display(best_enc),
func.dfg.display_inst(inst, isa),
encinfo.byte_size(enc, inst, &divert, &func),
encinfo.byte_size(best_enc, inst, &divert, &func)
);
}
}
}
}
}

View File

@@ -0,0 +1,161 @@
//! Small Bitset
//!
//! This module defines a struct `BitSet<T>` encapsulating a bitset built over the type T.
//! T is intended to be a primitive unsigned type. Currently it can be any type between u8 and u32
//!
//! If you would like to add support for larger bitsets in the future, you need to change the trait
//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
use core::convert::{From, Into};
use core::mem::size_of;
use core::ops::{Add, BitOr, Shl, Sub};
/// A small bitset built on a single primitive integer type
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BitSet<T>(pub T);
impl<T> BitSet<T>
where
T: Into<u32>
+ From<u8>
+ BitOr<T, Output = T>
+ Shl<u8, Output = T>
+ Sub<T, Output = T>
+ Add<T, Output = T>
+ PartialEq
+ Copy,
{
/// Maximum number of bits supported by this BitSet instance
pub fn bits() -> usize {
size_of::<T>() * 8
}
/// Maximum number of bits supported by any bitset instance atm.
pub fn max_bits() -> usize {
size_of::<u32>() * 8
}
/// Check if this BitSet contains the number num
pub fn contains(&self, num: u8) -> bool {
debug_assert!((num as usize) < Self::bits());
debug_assert!((num as usize) < Self::max_bits());
self.0.into() & (1 << num) != 0
}
/// Return the smallest number contained in the bitset or None if empty
pub fn min(&self) -> Option<u8> {
if self.0.into() == 0 {
None
} else {
Some(self.0.into().trailing_zeros() as u8)
}
}
/// Return the largest number contained in the bitset or None if empty
pub fn max(&self) -> Option<u8> {
if self.0.into() == 0 {
None
} else {
let leading_zeroes = self.0.into().leading_zeros() as usize;
Some((Self::max_bits() - leading_zeroes - 1) as u8)
}
}
/// Construct a BitSet with the half-open range [lo,hi) filled in
pub fn from_range(lo: u8, hi: u8) -> Self {
debug_assert!(lo <= hi);
debug_assert!((hi as usize) <= Self::bits());
let one: T = T::from(1);
// I can't just do (one << hi) - one here as the shift may overflow
let hi_rng = if hi >= 1 {
(one << (hi - 1)) + ((one << (hi - 1)) - one)
} else {
T::from(0)
};
let lo_rng = (one << lo) - one;
BitSet(hi_rng - lo_rng)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn contains() {
let s = BitSet::<u8>(255);
for i in 0..7 {
assert!(s.contains(i));
}
let s1 = BitSet::<u8>(0);
for i in 0..7 {
assert!(!s1.contains(i));
}
let s2 = BitSet::<u8>(127);
for i in 0..6 {
assert!(s2.contains(i));
}
assert!(!s2.contains(7));
let s3 = BitSet::<u8>(2 | 4 | 64);
assert!(!s3.contains(0) && !s3.contains(3) && !s3.contains(4));
assert!(!s3.contains(5) && !s3.contains(7));
assert!(s3.contains(1) && s3.contains(2) && s3.contains(6));
let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
assert!(
!s4.contains(0)
&& !s4.contains(1)
&& !s4.contains(4)
&& !s4.contains(5)
&& !s4.contains(6)
&& !s4.contains(7)
&& !s4.contains(9)
&& !s4.contains(11)
);
assert!(s4.contains(2) && s4.contains(3) && s4.contains(8) && s4.contains(10));
}
#[test]
fn minmax() {
let s = BitSet::<u8>(255);
assert_eq!(s.min(), Some(0));
assert_eq!(s.max(), Some(7));
assert!(s.min() == Some(0) && s.max() == Some(7));
let s1 = BitSet::<u8>(0);
assert!(s1.min() == None && s1.max() == None);
let s2 = BitSet::<u8>(127);
assert!(s2.min() == Some(0) && s2.max() == Some(6));
let s3 = BitSet::<u8>(2 | 4 | 64);
assert!(s3.min() == Some(1) && s3.max() == Some(6));
let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
assert!(s4.min() == Some(2) && s4.max() == Some(10));
}
#[test]
fn from_range() {
let s = BitSet::<u8>::from_range(5, 5);
assert!(s.0 == 0);
let s = BitSet::<u8>::from_range(0, 8);
assert!(s.0 == 255);
let s = BitSet::<u16>::from_range(0, 8);
assert!(s.0 == 255u16);
let s = BitSet::<u16>::from_range(0, 16);
assert!(s.0 == 65535u16);
let s = BitSet::<u8>::from_range(5, 6);
assert!(s.0 == 32u8);
let s = BitSet::<u8>::from_range(3, 7);
assert!(s.0 == 8 | 16 | 32 | 64);
let s = BitSet::<u16>::from_range(5, 11);
assert!(s.0 == 32 | 64 | 128 | 256 | 512 | 1024);
}
}

View File

@@ -0,0 +1,79 @@
//! The `CFGPrinter` utility.
use core::fmt::{Display, Formatter, Result, Write};
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::ir::instructions::BranchInfo;
use crate::ir::Function;
/// A utility for pretty-printing the CFG of a `Function`.
pub struct CFGPrinter<'a> {
func: &'a Function,
cfg: ControlFlowGraph,
}
/// A utility for pretty-printing the CFG of a `Function`.
impl<'a> CFGPrinter<'a> {
/// Create a new CFGPrinter.
pub fn new(func: &'a Function) -> Self {
Self {
func,
cfg: ControlFlowGraph::with_function(func),
}
}
/// Write the CFG for this function to `w`.
pub fn write(&self, w: &mut Write) -> Result {
self.header(w)?;
self.ebb_nodes(w)?;
self.cfg_connections(w)?;
writeln!(w, "}}")
}
fn header(&self, w: &mut Write) -> Result {
writeln!(w, "digraph \"{}\" {{", self.func.name)?;
if let Some(entry) = self.func.layout.entry_block() {
writeln!(w, " {{rank=min; {}}}", entry)?;
}
Ok(())
}
fn ebb_nodes(&self, w: &mut Write) -> Result {
for ebb in &self.func.layout {
write!(w, " {} [shape=record, label=\"{{{}", ebb, ebb)?;
// Add all outgoing branch instructions to the label.
for inst in self.func.layout.ebb_insts(ebb) {
let idata = &self.func.dfg[inst];
match idata.analyze_branch(&self.func.dfg.value_lists) {
BranchInfo::SingleDest(dest, _) => {
write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
}
BranchInfo::Table(table, dest) => {
write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?;
if let Some(dest) = dest {
write!(w, " {}", dest)?
}
}
BranchInfo::NotABranch => {}
}
}
writeln!(w, "}}\"]")?
}
Ok(())
}
fn cfg_connections(&self, w: &mut Write) -> Result {
for ebb in &self.func.layout {
for BasicBlock { ebb: parent, inst } in self.cfg.pred_iter(ebb) {
writeln!(w, " {}:{} -> {}", parent, inst, ebb)?;
}
}
Ok(())
}
}
impl<'a> Display for CFGPrinter<'a> {
fn fmt(&self, f: &mut Formatter) -> Result {
self.write(f)
}
}

View File

@@ -0,0 +1,78 @@
//! Runtime support for precomputed constant hash tables.
//!
//! The `cranelift-codegen/meta-python/constant_hash.py` Python module can generate constant hash tables
//! using open addressing and quadratic probing. The hash tables are arrays that are guaranteed to:
//!
//! - Have a power-of-two size.
//! - Contain at least one empty slot.
//!
//! This module provides runtime support for lookups in these tables.
/// Trait that must be implemented by the entries in a constant hash table.
pub trait Table<K: Copy + Eq> {
/// Get the number of entries in this table which must be a power of two.
fn len(&self) -> usize;
/// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty.
/// The `idx` must be in range.
fn key(&self, idx: usize) -> Option<K>;
}
/// Look for `key` in `table`.
///
/// The provided `hash` value must have been computed from `key` using the same hash function that
/// was used to construct the table.
///
/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty
/// sentinel entry if no entry could be found.
pub fn probe<K: Copy + Eq, T: Table<K> + ?Sized>(
table: &T,
key: K,
hash: usize,
) -> Result<usize, usize> {
debug_assert!(table.len().is_power_of_two());
let mask = table.len() - 1;
let mut idx = hash;
let mut step = 0;
loop {
idx &= mask;
match table.key(idx) {
None => return Err(idx),
Some(k) if k == key => return Ok(idx),
_ => {}
}
// Quadratic probing.
step += 1;
// When `table.len()` is a power of two, it can be proven that `idx` will visit all
// entries. This means that this loop will always terminate if the hash table has even
// one unused entry.
debug_assert!(step < table.len());
idx += step;
}
}
/// A primitive hash function for matching opcodes.
/// Must match `cranelift-codegen/meta-python/constant_hash.py` and `cranelift-codegen/meta/constant_hash.rs`.
pub fn simple_hash(s: &str) -> usize {
let mut h: u32 = 5381;
for c in s.chars() {
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
}
h as usize
}
#[cfg(test)]
mod tests {
use super::simple_hash;
#[test]
fn basic() {
// c.f. `meta-python/constant_hash.py` tests.
assert_eq!(simple_hash("Hello"), 0x2fa70c01);
assert_eq!(simple_hash("world"), 0x5b0c31d5);
}
}

View File

@@ -0,0 +1,327 @@
//! Cranelift compilation context and main entry point.
//!
//! When compiling many small functions, it is important to avoid repeatedly allocating and
//! deallocating the data structures needed for compilation. The `Context` struct is used to hold
//! on to memory allocations between function compilations.
//!
//! The context does not hold a `TargetIsa` instance which has to be provided as an argument
//! instead. This is because an ISA instance is immutable and can be used by multiple compilation
//! contexts concurrently. Typically, you would have one context per compilation thread and only a
//! single ISA instance.
use crate::binemit::{
relax_branches, shrink_instructions, CodeOffset, MemoryCodeSink, RelocSink, TrapSink,
};
use crate::dce::do_dce;
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::legalize_function;
use crate::licm::do_licm;
use crate::loop_analysis::LoopAnalysis;
use crate::nan_canonicalization::do_nan_canonicalization;
use crate::postopt::do_postopt;
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings::{FlagsOrIsa, OptLevel};
use crate::simple_gvn::do_simple_gvn;
use crate::simple_preopt::do_preopt;
use crate::timing;
use crate::unreachable_code::eliminate_unreachable_code;
use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
use std::vec::Vec;
/// Persistent data structures and compilation pipeline.
pub struct Context {
/// The function we're compiling.
pub func: Function,
/// The control flow graph of `func`.
pub cfg: ControlFlowGraph,
/// Dominator tree for `func`.
pub domtree: DominatorTree,
/// Register allocation context.
pub regalloc: regalloc::Context,
/// Loop analysis of `func`.
pub loop_analysis: LoopAnalysis,
}
impl Context {
/// Allocate a new compilation context.
///
/// The returned instance should be reused for compiling multiple functions in order to avoid
/// needless allocator thrashing.
pub fn new() -> Self {
Self::for_function(Function::new())
}
/// Allocate a new compilation context with an existing Function.
///
/// The returned instance should be reused for compiling multiple functions in order to avoid
/// needless allocator thrashing.
pub fn for_function(func: Function) -> Self {
Self {
func,
cfg: ControlFlowGraph::new(),
domtree: DominatorTree::new(),
regalloc: regalloc::Context::new(),
loop_analysis: LoopAnalysis::new(),
}
}
/// Clear all data structures in this context.
pub fn clear(&mut self) {
self.func.clear();
self.cfg.clear();
self.domtree.clear();
self.regalloc.clear();
self.loop_analysis.clear();
}
/// Compile the function, and emit machine code into a `Vec<u8>`.
///
/// Run the function through all the passes necessary to generate code for the target ISA
/// represented by `isa`, as well as the final step of emitting machine code into a
/// `Vec<u8>`. The machine code is not relocated. Instead, any relocations are emitted
/// into `relocs`.
///
/// This function calls `compile` and `emit_to_memory`, taking care to resize `mem` as
/// needed, so it provides a safe interface.
pub fn compile_and_emit(
&mut self,
isa: &TargetIsa,
mem: &mut Vec<u8>,
relocs: &mut RelocSink,
traps: &mut TrapSink,
) -> CodegenResult<()> {
let code_size = self.compile(isa)?;
let old_len = mem.len();
mem.resize(old_len + code_size as usize, 0);
unsafe { self.emit_to_memory(isa, mem.as_mut_ptr().add(old_len), relocs, traps) };
Ok(())
}
/// Compile the function.
///
/// Run the function through all the passes necessary to generate code for the target ISA
/// represented by `isa`. This does not include the final step of emitting machine code into a
/// code sink.
///
/// Returns the size of the function's code.
pub fn compile(&mut self, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
let _tt = timing::compile();
self.verify_if(isa)?;
self.compute_cfg();
if isa.flags().opt_level() != OptLevel::Fastest {
self.preopt(isa)?;
}
if isa.flags().enable_nan_canonicalization() {
self.canonicalize_nans(isa)?;
}
self.legalize(isa)?;
if isa.flags().opt_level() != OptLevel::Fastest {
self.postopt(isa)?;
}
if isa.flags().opt_level() == OptLevel::Best {
self.compute_domtree();
self.compute_loop_analysis();
self.licm(isa)?;
self.simple_gvn(isa)?;
}
self.compute_domtree();
self.eliminate_unreachable_code(isa)?;
if isa.flags().opt_level() != OptLevel::Fastest {
self.dce(isa)?;
}
self.regalloc(isa)?;
self.prologue_epilogue(isa)?;
if isa.flags().opt_level() == OptLevel::Best {
self.shrink_instructions(isa)?;
}
self.relax_branches(isa)
}
/// Emit machine code directly into raw memory.
///
/// Write all of the function's machine code to the memory at `mem`. The size of the machine
/// code is returned by `compile` above.
///
/// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
///
/// This function is unsafe since it does not perform bounds checking on the memory buffer,
/// and it can't guarantee that the `mem` pointer is valid.
pub unsafe fn emit_to_memory(
&self,
isa: &TargetIsa,
mem: *mut u8,
relocs: &mut RelocSink,
traps: &mut TrapSink,
) {
let _tt = timing::binemit();
isa.emit_function_to_memory(&self.func, &mut MemoryCodeSink::new(mem, relocs, traps));
}
/// Run the verifier on the function.
///
/// Also check that the dominator tree and control flow graph are consistent with the function.
pub fn verify<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> VerifierResult<()> {
let mut errors = VerifierErrors::default();
let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors);
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Run the verifier only if the `enable_verifier` setting is true.
pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CodegenResult<()> {
let fisa = fisa.into();
if fisa.flags.enable_verifier() {
self.verify(fisa)?;
}
Ok(())
}
/// Run the locations verifier on the function.
pub fn verify_locations(&self, isa: &TargetIsa) -> VerifierResult<()> {
let mut errors = VerifierErrors::default();
let _ = verify_locations(isa, &self.func, None, &mut errors);
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Run the locations verifier only if the `enable_verifier` setting is true.
pub fn verify_locations_if(&self, isa: &TargetIsa) -> CodegenResult<()> {
if isa.flags().enable_verifier() {
self.verify_locations(isa)?;
}
Ok(())
}
/// Perform dead-code elimination on the function.
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
do_dce(&mut self.func, &mut self.domtree);
self.verify_if(fisa)?;
Ok(())
}
/// Perform pre-legalization rewrites on the function.
pub fn preopt(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
do_preopt(&mut self.func);
self.verify_if(isa)?;
Ok(())
}
/// Perform NaN canonicalizing rewrites on the function.
pub fn canonicalize_nans(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
do_nan_canonicalization(&mut self.func);
self.verify_if(isa)
}
/// Run the legalizer for `isa` on the function.
pub fn legalize(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
// Legalization invalidates the domtree and loop_analysis by mutating the CFG.
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
self.domtree.clear();
self.loop_analysis.clear();
legalize_function(&mut self.func, &mut self.cfg, isa);
self.verify_if(isa)
}
/// Perform post-legalization rewrites on the function.
pub fn postopt(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
do_postopt(&mut self.func, isa);
self.verify_if(isa)?;
Ok(())
}
/// Compute the control flow graph.
pub fn compute_cfg(&mut self) {
self.cfg.compute(&self.func)
}
/// Compute dominator tree.
pub fn compute_domtree(&mut self) {
self.domtree.compute(&self.func, &self.cfg)
}
/// Compute the loop analysis.
pub fn compute_loop_analysis(&mut self) {
self.loop_analysis
.compute(&self.func, &self.cfg, &self.domtree)
}
/// Compute the control flow graph and dominator tree.
pub fn flowgraph(&mut self) {
self.compute_cfg();
self.compute_domtree()
}
/// Perform simple GVN on the function.
pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
do_simple_gvn(&mut self.func, &mut self.domtree);
self.verify_if(fisa)
}
/// Perform LICM on the function.
pub fn licm(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
do_licm(
isa,
&mut self.func,
&mut self.cfg,
&mut self.domtree,
&mut self.loop_analysis,
);
self.verify_if(isa)
}
/// Perform unreachable code elimination.
pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()>
where
FOI: Into<FlagsOrIsa<'a>>,
{
eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree);
self.verify_if(fisa)
}
/// Run the register allocator.
pub fn regalloc(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
self.regalloc
.run(isa, &mut self.func, &self.cfg, &mut self.domtree)
}
/// Insert prologue and epilogues after computing the stack frame layout.
pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
isa.prologue_epilogue(&mut self.func)?;
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(())
}
/// Run the instruction shrinking pass.
pub fn shrink_instructions(&mut self, isa: &TargetIsa) -> CodegenResult<()> {
shrink_instructions(&mut self.func, isa);
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(())
}
/// Run the branch relaxation pass and return the final code size.
pub fn relax_branches(&mut self, isa: &TargetIsa) -> CodegenResult<CodeOffset> {
let code_size = relax_branches(&mut self.func, isa)?;
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(code_size)
}
}

View File

@@ -0,0 +1,765 @@
//! Cursor library.
//!
//! This module defines cursor data types that can be used for inserting instructions.
use crate::ir;
use crate::isa::TargetIsa;
/// The possible positions of a cursor.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum CursorPosition {
/// Cursor is not pointing anywhere. No instructions can be inserted.
Nowhere,
/// Cursor is pointing at an existing instruction.
/// New instructions will be inserted *before* the current instruction.
At(ir::Inst),
/// Cursor is before the beginning of an EBB. No instructions can be inserted. Calling
/// `next_inst()` will move to the first instruction in the EBB.
Before(ir::Ebb),
/// Cursor is pointing after the end of an EBB.
/// New instructions will be appended to the EBB.
After(ir::Ebb),
}
/// All cursor types implement the `Cursor` which provides common navigation operations.
pub trait Cursor {
/// Get the current cursor position.
fn position(&self) -> CursorPosition;
/// Set the current position.
fn set_position(&mut self, pos: CursorPosition);
/// Get the source location that should be assigned to new instructions.
fn srcloc(&self) -> ir::SourceLoc;
/// Set the source location that should be assigned to new instructions.
fn set_srcloc(&mut self, srcloc: ir::SourceLoc);
/// Borrow a reference to the function layout that this cursor is navigating.
fn layout(&self) -> &ir::Layout;
/// Borrow a mutable reference to the function layout that this cursor is navigating.
fn layout_mut(&mut self) -> &mut ir::Layout;
/// Exchange this cursor for one with a set source location.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, SourceLoc};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, srcloc: SourceLoc) {
/// let mut pos = FuncCursor::new(func).with_srcloc(srcloc);
///
/// // Use `pos`...
/// }
/// ```
fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self
where
Self: Sized,
{
self.set_srcloc(srcloc);
self
}
/// Rebuild this cursor positioned at `pos`.
fn at_position(mut self, pos: CursorPosition) -> Self
where
Self: Sized,
{
self.set_position(pos);
self
}
/// Rebuild this cursor positioned at `inst`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, inst: Inst) {
/// let mut pos = FuncCursor::new(func).at_inst(inst);
///
/// // Use `pos`...
/// }
/// ```
fn at_inst(mut self, inst: ir::Inst) -> Self
where
Self: Sized,
{
self.goto_inst(inst);
self
}
/// Rebuild this cursor positioned at the first insertion point for `ebb`.
/// This differs from `at_first_inst` in that it doesn't assume that any
/// instructions have been inserted into `ebb` yet.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, ebb: Ebb) {
/// let mut pos = FuncCursor::new(func).at_first_insertion_point(ebb);
///
/// // Use `pos`...
/// }
/// ```
fn at_first_insertion_point(mut self, ebb: ir::Ebb) -> Self
where
Self: Sized,
{
self.goto_first_insertion_point(ebb);
self
}
/// Rebuild this cursor positioned at the first instruction in `ebb`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, ebb: Ebb) {
/// let mut pos = FuncCursor::new(func).at_first_inst(ebb);
///
/// // Use `pos`...
/// }
/// ```
fn at_first_inst(mut self, ebb: ir::Ebb) -> Self
where
Self: Sized,
{
self.goto_first_inst(ebb);
self
}
/// Rebuild this cursor positioned at the last instruction in `ebb`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, ebb: Ebb) {
/// let mut pos = FuncCursor::new(func).at_last_inst(ebb);
///
/// // Use `pos`...
/// }
/// ```
fn at_last_inst(mut self, ebb: ir::Ebb) -> Self
where
Self: Sized,
{
self.goto_last_inst(ebb);
self
}
/// Rebuild this cursor positioned after `inst`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, inst: Inst) {
/// let mut pos = FuncCursor::new(func).after_inst(inst);
///
/// // Use `pos`...
/// }
/// ```
fn after_inst(mut self, inst: ir::Inst) -> Self
where
Self: Sized,
{
self.goto_after_inst(inst);
self
}
/// Rebuild this cursor positioned at the top of `ebb`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, ebb: Ebb) {
/// let mut pos = FuncCursor::new(func).at_top(ebb);
///
/// // Use `pos`...
/// }
/// ```
fn at_top(mut self, ebb: ir::Ebb) -> Self
where
Self: Sized,
{
self.goto_top(ebb);
self
}
/// Rebuild this cursor positioned at the bottom of `ebb`.
///
/// This is intended to be used as a builder method:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb, Inst};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function, ebb: Ebb) {
/// let mut pos = FuncCursor::new(func).at_bottom(ebb);
///
/// // Use `pos`...
/// }
/// ```
fn at_bottom(mut self, ebb: ir::Ebb) -> Self
where
Self: Sized,
{
self.goto_bottom(ebb);
self
}
/// Get the EBB corresponding to the current position.
fn current_ebb(&self) -> Option<ir::Ebb> {
use self::CursorPosition::*;
match self.position() {
Nowhere => None,
At(inst) => self.layout().inst_ebb(inst),
Before(ebb) | After(ebb) => Some(ebb),
}
}
/// Get the instruction corresponding to the current position, if any.
fn current_inst(&self) -> Option<ir::Inst> {
use self::CursorPosition::*;
match self.position() {
At(inst) => Some(inst),
_ => None,
}
}
/// Go to the position after a specific instruction, which must be inserted
/// in the layout. New instructions will be inserted after `inst`.
fn goto_after_inst(&mut self, inst: ir::Inst) {
debug_assert!(self.layout().inst_ebb(inst).is_some());
let new_pos = if let Some(next) = self.layout().next_inst(inst) {
CursorPosition::At(next)
} else {
CursorPosition::After(
self.layout()
.inst_ebb(inst)
.expect("current instruction removed?"),
)
};
self.set_position(new_pos);
}
/// Go to a specific instruction which must be inserted in the layout.
/// New instructions will be inserted before `inst`.
fn goto_inst(&mut self, inst: ir::Inst) {
debug_assert!(self.layout().inst_ebb(inst).is_some());
self.set_position(CursorPosition::At(inst));
}
/// Go to the position for inserting instructions at the beginning of `ebb`,
/// which unlike `goto_first_inst` doesn't assume that any instructions have
/// been inserted into `ebb` yet.
fn goto_first_insertion_point(&mut self, ebb: ir::Ebb) {
if let Some(inst) = self.layout().first_inst(ebb) {
self.goto_inst(inst);
} else {
self.goto_bottom(ebb);
}
}
/// Go to the first instruction in `ebb`.
fn goto_first_inst(&mut self, ebb: ir::Ebb) {
let inst = self.layout().first_inst(ebb).expect("Empty EBB");
self.goto_inst(inst);
}
/// Go to the last instruction in `ebb`.
fn goto_last_inst(&mut self, ebb: ir::Ebb) {
let inst = self.layout().last_inst(ebb).expect("Empty EBB");
self.goto_inst(inst);
}
/// Go to the top of `ebb` which must be inserted into the layout.
/// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
/// instruction in `ebb`.
fn goto_top(&mut self, ebb: ir::Ebb) {
debug_assert!(self.layout().is_ebb_inserted(ebb));
self.set_position(CursorPosition::Before(ebb));
}
/// Go to the bottom of `ebb` which must be inserted into the layout.
/// At this position, inserted instructions will be appended to `ebb`.
fn goto_bottom(&mut self, ebb: ir::Ebb) {
debug_assert!(self.layout().is_ebb_inserted(ebb));
self.set_position(CursorPosition::After(ebb));
}
/// Go to the top of the next EBB in layout order and return it.
///
/// - If the cursor wasn't pointing at anything, go to the top of the first EBB in the
/// function.
/// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
///
/// # Examples
///
/// The `next_ebb()` method is intended for iterating over the EBBs in layout order:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function) {
/// let mut cursor = FuncCursor::new(func);
/// while let Some(ebb) = cursor.next_ebb() {
/// // Edit ebb.
/// }
/// }
/// ```
fn next_ebb(&mut self) -> Option<ir::Ebb> {
let next = if let Some(ebb) = self.current_ebb() {
self.layout().next_ebb(ebb)
} else {
self.layout().entry_block()
};
self.set_position(match next {
Some(ebb) => CursorPosition::Before(ebb),
None => CursorPosition::Nowhere,
});
next
}
/// Go to the bottom of the previous EBB in layout order and return it.
///
/// - If the cursor wasn't pointing at anything, go to the bottom of the last EBB in the
/// function.
/// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
///
/// # Examples
///
/// The `prev_ebb()` method is intended for iterating over the EBBs in backwards layout order:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function) {
/// let mut cursor = FuncCursor::new(func);
/// while let Some(ebb) = cursor.prev_ebb() {
/// // Edit ebb.
/// }
/// }
/// ```
fn prev_ebb(&mut self) -> Option<ir::Ebb> {
let prev = if let Some(ebb) = self.current_ebb() {
self.layout().prev_ebb(ebb)
} else {
self.layout().last_ebb()
};
self.set_position(match prev {
Some(ebb) => CursorPosition::After(ebb),
None => CursorPosition::Nowhere,
});
prev
}
/// Move to the next instruction in the same EBB and return it.
///
/// - If the cursor was positioned before an EBB, go to the first instruction in that EBB.
/// - If there are no more instructions in the EBB, go to the `After(ebb)` position and return
/// `None`.
/// - If the cursor wasn't pointing anywhere, keep doing that.
///
/// This method will never move the cursor to a different EBB.
///
/// # Examples
///
/// The `next_inst()` method is intended for iterating over the instructions in an EBB like
/// this:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_ebb(func: &mut Function, ebb: Ebb) {
/// let mut cursor = FuncCursor::new(func).at_top(ebb);
/// while let Some(inst) = cursor.next_inst() {
/// // Edit instructions...
/// }
/// }
/// ```
/// The loop body can insert and remove instructions via the cursor.
///
/// Iterating over all the instructions in a function looks like this:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_func(func: &mut Function) {
/// let mut cursor = FuncCursor::new(func);
/// while let Some(ebb) = cursor.next_ebb() {
/// while let Some(inst) = cursor.next_inst() {
/// // Edit instructions...
/// }
/// }
/// }
/// ```
fn next_inst(&mut self) -> Option<ir::Inst> {
use self::CursorPosition::*;
match self.position() {
Nowhere | After(..) => None,
At(inst) => {
if let Some(next) = self.layout().next_inst(inst) {
self.set_position(At(next));
Some(next)
} else {
let pos = After(
self.layout()
.inst_ebb(inst)
.expect("current instruction removed?"),
);
self.set_position(pos);
None
}
}
Before(ebb) => {
if let Some(next) = self.layout().first_inst(ebb) {
self.set_position(At(next));
Some(next)
} else {
self.set_position(After(ebb));
None
}
}
}
}
/// Move to the previous instruction in the same EBB and return it.
///
/// - If the cursor was positioned after an EBB, go to the last instruction in that EBB.
/// - If there are no more instructions in the EBB, go to the `Before(ebb)` position and return
/// `None`.
/// - If the cursor wasn't pointing anywhere, keep doing that.
///
/// This method will never move the cursor to a different EBB.
///
/// # Examples
///
/// The `prev_inst()` method is intended for iterating backwards over the instructions in an
/// EBB like this:
///
/// ```
/// # use cranelift_codegen::ir::{Function, Ebb};
/// # use cranelift_codegen::cursor::{Cursor, FuncCursor};
/// fn edit_ebb(func: &mut Function, ebb: Ebb) {
/// let mut cursor = FuncCursor::new(func).at_bottom(ebb);
/// while let Some(inst) = cursor.prev_inst() {
/// // Edit instructions...
/// }
/// }
/// ```
fn prev_inst(&mut self) -> Option<ir::Inst> {
use self::CursorPosition::*;
match self.position() {
Nowhere | Before(..) => None,
At(inst) => {
if let Some(prev) = self.layout().prev_inst(inst) {
self.set_position(At(prev));
Some(prev)
} else {
let pos = Before(
self.layout()
.inst_ebb(inst)
.expect("current instruction removed?"),
);
self.set_position(pos);
None
}
}
After(ebb) => {
if let Some(prev) = self.layout().last_inst(ebb) {
self.set_position(At(prev));
Some(prev)
} else {
self.set_position(Before(ebb));
None
}
}
}
}
/// Insert an instruction at the current position.
///
/// - If pointing at an instruction, the new instruction is inserted before the current
/// instruction.
/// - If pointing at the bottom of an EBB, the new instruction is appended to the EBB.
/// - Otherwise panic.
///
/// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes
/// instructions to appear in insertion order in the EBB.
fn insert_inst(&mut self, inst: ir::Inst) {
use self::CursorPosition::*;
match self.position() {
Nowhere | Before(..) => panic!("Invalid insert_inst position"),
At(cur) => self.layout_mut().insert_inst(inst, cur),
After(ebb) => self.layout_mut().append_inst(inst, ebb),
}
}
/// Remove the instruction under the cursor.
///
/// The cursor is left pointing at the position following the current instruction.
///
/// Return the instruction that was removed.
fn remove_inst(&mut self) -> ir::Inst {
let inst = self.current_inst().expect("No instruction to remove");
self.next_inst();
self.layout_mut().remove_inst(inst);
inst
}
/// Remove the instruction under the cursor.
///
/// The cursor is left pointing at the position preceding the current instruction.
///
/// Return the instruction that was removed.
fn remove_inst_and_step_back(&mut self) -> ir::Inst {
let inst = self.current_inst().expect("No instruction to remove");
self.prev_inst();
self.layout_mut().remove_inst(inst);
inst
}
/// Insert an EBB at the current position and switch to it.
///
/// As far as possible, this method behaves as if the EBB header were an instruction inserted
/// at the current position.
///
/// - If the cursor is pointing at an existing instruction, *the current EBB is split in two*
/// and the current instruction becomes the first instruction in the inserted EBB.
/// - If the cursor points at the bottom of an EBB, the new EBB is inserted after the current
/// one, and moved to the bottom of the new EBB where instructions can be appended.
/// - If the cursor points to the top of an EBB, the new EBB is inserted above the current one.
/// - If the cursor is not pointing at anything, the new EBB is placed last in the layout.
///
/// This means that it is always valid to call this method, and it always leaves the cursor in
/// a state that will insert instructions into the new EBB.
fn insert_ebb(&mut self, new_ebb: ir::Ebb) {
use self::CursorPosition::*;
match self.position() {
At(inst) => {
self.layout_mut().split_ebb(new_ebb, inst);
// All other cases move to `After(ebb)`, but in this case we'll stay `At(inst)`.
return;
}
Nowhere => self.layout_mut().append_ebb(new_ebb),
Before(ebb) => self.layout_mut().insert_ebb(new_ebb, ebb),
After(ebb) => self.layout_mut().insert_ebb_after(new_ebb, ebb),
}
// For everything but `At(inst)` we end up appending to the new EBB.
self.set_position(After(new_ebb));
}
}
/// Function cursor.
///
/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position
/// too. The function can be re-borrowed by accessing the public `cur.func` member.
///
/// This cursor is for use before legalization. The inserted instructions are not given an
/// encoding.
pub struct FuncCursor<'f> {
pos: CursorPosition,
srcloc: ir::SourceLoc,
/// The referenced function.
pub func: &'f mut ir::Function,
}
impl<'f> FuncCursor<'f> {
/// Create a new `FuncCursor` pointing nowhere.
pub fn new(func: &'f mut ir::Function) -> Self {
Self {
pos: CursorPosition::Nowhere,
srcloc: Default::default(),
func,
}
}
/// Use the source location of `inst` for future instructions.
pub fn use_srcloc(&mut self, inst: ir::Inst) {
self.srcloc = self.func.srclocs[inst];
}
/// Create an instruction builder that inserts an instruction at the current position.
pub fn ins(&mut self) -> ir::InsertBuilder<&mut FuncCursor<'f>> {
ir::InsertBuilder::new(self)
}
}
impl<'f> Cursor for FuncCursor<'f> {
fn position(&self) -> CursorPosition {
self.pos
}
fn set_position(&mut self, pos: CursorPosition) {
self.pos = pos
}
fn srcloc(&self) -> ir::SourceLoc {
self.srcloc
}
fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
self.srcloc = srcloc;
}
fn layout(&self) -> &ir::Layout {
&self.func.layout
}
fn layout_mut(&mut self) -> &mut ir::Layout {
&mut self.func.layout
}
}
impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
fn data_flow_graph(&self) -> &ir::DataFlowGraph {
&self.func.dfg
}
fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
&mut self.func.dfg
}
fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
self.insert_inst(inst);
if !self.srcloc.is_default() {
self.func.srclocs[inst] = self.srcloc;
}
&mut self.func.dfg
}
}
/// Encoding cursor.
///
/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding.
/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the
/// public `pos.func` member.
pub struct EncCursor<'f> {
pos: CursorPosition,
srcloc: ir::SourceLoc,
built_inst: Option<ir::Inst>,
/// The referenced function.
pub func: &'f mut ir::Function,
/// The target ISA that will be used to encode instructions.
pub isa: &'f TargetIsa,
}
impl<'f> EncCursor<'f> {
/// Create a new `EncCursor` pointing nowhere.
pub fn new(func: &'f mut ir::Function, isa: &'f TargetIsa) -> Self {
Self {
pos: CursorPosition::Nowhere,
srcloc: Default::default(),
built_inst: None,
func,
isa,
}
}
/// Use the source location of `inst` for future instructions.
pub fn use_srcloc(&mut self, inst: ir::Inst) {
self.srcloc = self.func.srclocs[inst];
}
/// Create an instruction builder that will insert an encoded instruction at the current
/// position.
///
/// The builder will panic if it is used to insert an instruction that can't be encoded for
/// `self.isa`.
pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> {
ir::InsertBuilder::new(self)
}
/// Get the last built instruction.
///
/// This returns the last instruction that was built using the `ins()` method on this cursor.
/// Panics if no instruction was built.
pub fn built_inst(&self) -> ir::Inst {
self.built_inst.expect("No instruction was inserted")
}
/// Return an object that can display `inst`.
///
/// This is a convenience wrapper for the DFG equivalent.
pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst {
self.func.dfg.display_inst(inst, self.isa)
}
}
impl<'f> Cursor for EncCursor<'f> {
fn position(&self) -> CursorPosition {
self.pos
}
fn set_position(&mut self, pos: CursorPosition) {
self.pos = pos
}
fn srcloc(&self) -> ir::SourceLoc {
self.srcloc
}
fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
self.srcloc = srcloc;
}
fn layout(&self) -> &ir::Layout {
&self.func.layout
}
fn layout_mut(&mut self) -> &mut ir::Layout {
&mut self.func.layout
}
}
impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
fn data_flow_graph(&self) -> &ir::DataFlowGraph {
&self.func.dfg
}
fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
&mut self.func.dfg
}
fn insert_built_inst(
self,
inst: ir::Inst,
ctrl_typevar: ir::Type,
) -> &'c mut ir::DataFlowGraph {
// Insert the instruction and remember the reference.
self.insert_inst(inst);
self.built_inst = Some(inst);
if !self.srcloc.is_default() {
self.func.srclocs[inst] = self.srcloc;
}
// Assign an encoding.
// XXX Is there a way to describe this error to the user?
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
match self
.isa
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
{
Ok(e) => self.func.encodings[inst] = e,
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
}
&mut self.func.dfg
}
}

View File

@@ -0,0 +1,28 @@
//! Debug tracing helpers.
use core::fmt;
/// Prefix added to the log file names, just before the thread name or id.
pub static LOG_FILENAME_PREFIX: &str = "cranelift.dbg.";
/// Helper for printing lists.
pub struct DisplayList<'a, T>(pub &'a [T])
where
T: 'a + fmt::Display;
impl<'a, T> fmt::Display for DisplayList<'a, T>
where
T: 'a + fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0.split_first() {
None => write!(f, "[]"),
Some((first, rest)) => {
write!(f, "[{}", first)?;
for x in rest {
write!(f, ", {}", x)?;
}
write!(f, "]")
}
}
}
}

View File

@@ -0,0 +1,69 @@
//! A Dead-Code Elimination (DCE) pass.
//!
//! Dead code here means instructions that have no side effects and have no
//! result values used by other instructions.
use crate::cursor::{Cursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::entity::EntityRef;
use crate::ir::instructions::InstructionData;
use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
use crate::timing;
/// Test whether the given opcode is unsafe to even consider for DCE.
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
opcode.is_call()
|| opcode.is_branch()
|| opcode.is_terminator()
|| opcode.is_return()
|| opcode.can_trap()
|| opcode.other_side_effects()
|| opcode.can_store()
}
/// Preserve instructions with used result values.
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
dfg.inst_results(inst).iter().any(|v| live[v.index()])
}
/// Load instructions without the `notrap` flag are defined to trap when
/// operating on inaccessible memory, so we can't DCE them even if the
/// loaded value is unused.
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
if !opcode.can_load() {
return false;
}
match *data {
InstructionData::StackLoad { .. } => false,
InstructionData::Load { flags, .. } => !flags.notrap(),
_ => true,
}
}
/// Perform DCE on `func`.
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
let _tt = timing::dce();
debug_assert!(domtree.is_valid());
let mut live = vec![false; func.dfg.num_values()];
for &ebb in domtree.cfg_postorder() {
let mut pos = FuncCursor::new(func).at_bottom(ebb);
while let Some(inst) = pos.prev_inst() {
{
let data = &pos.func.dfg[inst];
let opcode = data.opcode();
if trivially_unsafe_for_dce(opcode)
|| is_load_with_defined_trapping(opcode, &data)
|| any_inst_results_used(inst, &live, &pos.func.dfg)
{
for arg in pos.func.dfg.inst_args(inst) {
let v = pos.func.dfg.resolve_aliases(*arg);
live[v.index()] = true;
}
continue;
}
}
pos.remove_inst();
}
}
}

View File

@@ -0,0 +1,590 @@
//! Compute "magic numbers" for division-by-constants transformations.
//!
//! Math helpers for division by (non-power-of-2) constants. This is based
//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
//! makes little difference, but the signed-vs-unsigned aspect has a large
//! effect. Therefore everything is presented in the order U32 U64 S32 S64
//! so as to emphasise the similarity of the U32 and U64 cases and the S32
//! and S64 cases.
// Structures to hold the "magic numbers" computed.
#[derive(PartialEq, Debug)]
pub struct MU32 {
pub mul_by: u32,
pub do_add: bool,
pub shift_by: i32,
}
#[derive(PartialEq, Debug)]
pub struct MU64 {
pub mul_by: u64,
pub do_add: bool,
pub shift_by: i32,
}
#[derive(PartialEq, Debug)]
pub struct MS32 {
pub mul_by: i32,
pub shift_by: i32,
}
#[derive(PartialEq, Debug)]
pub struct MS64 {
pub mul_by: i64,
pub shift_by: i32,
}
// The actual "magic number" generators follow.
pub fn magic_u32(d: u32) -> MU32 {
debug_assert_ne!(d, 0);
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
let mut do_add: bool = false;
let mut p: i32 = 31;
let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
let mut q1: u32 = 0x80000000u32 / nc;
let mut r1: u32 = 0x80000000u32 - q1 * nc;
let mut q2: u32 = 0x7FFFFFFFu32 / d;
let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
loop {
p = p + 1;
if r1 >= nc - r1 {
q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
} else {
q1 = 2 * q1;
r1 = 2 * r1;
}
if r2 + 1 >= d - r2 {
if q2 >= 0x7FFFFFFFu32 {
do_add = true;
}
q2 = 2 * q2 + 1;
r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
} else {
if q2 >= 0x80000000u32 {
do_add = true;
}
q2 = u32::wrapping_mul(2, q2);
r2 = 2 * r2 + 1;
}
let delta: u32 = d - 1 - r2;
if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
break;
}
}
MU32 {
mul_by: q2 + 1,
do_add: do_add,
shift_by: p - 32,
}
}
pub fn magic_u64(d: u64) -> MU64 {
debug_assert_ne!(d, 0);
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
let mut do_add: bool = false;
let mut p: i32 = 63;
let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
let mut q1: u64 = 0x8000000000000000u64 / nc;
let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
loop {
p = p + 1;
if r1 >= nc - r1 {
q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
} else {
q1 = 2 * q1;
r1 = 2 * r1;
}
if r2 + 1 >= d - r2 {
if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
do_add = true;
}
q2 = 2 * q2 + 1;
r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
} else {
if q2 >= 0x8000000000000000u64 {
do_add = true;
}
q2 = u64::wrapping_mul(2, q2);
r2 = 2 * r2 + 1;
}
let delta: u64 = d - 1 - r2;
if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
break;
}
}
MU64 {
mul_by: q2 + 1,
do_add: do_add,
shift_by: p - 64,
}
}
pub fn magic_s32(d: i32) -> MS32 {
debug_assert_ne!(d, -1);
debug_assert_ne!(d, 0);
debug_assert_ne!(d, 1);
let two31: u32 = 0x80000000u32;
let mut p: i32 = 31;
let ad: u32 = i32::wrapping_abs(d) as u32;
let t: u32 = two31 + ((d as u32) >> 31);
let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
let mut q1: u32 = two31 / anc;
let mut r1: u32 = two31 - q1 * anc;
let mut q2: u32 = two31 / ad;
let mut r2: u32 = two31 - q2 * ad;
loop {
p = p + 1;
q1 = 2 * q1;
r1 = 2 * r1;
if r1 >= anc {
q1 = q1 + 1;
r1 = r1 - anc;
}
q2 = 2 * q2;
r2 = 2 * r2;
if r2 >= ad {
q2 = q2 + 1;
r2 = r2 - ad;
}
let delta: u32 = ad - r2;
if !(q1 < delta || (q1 == delta && r1 == 0)) {
break;
}
}
MS32 {
mul_by: (if d < 0 {
u32::wrapping_neg(q2 + 1)
} else {
q2 + 1
}) as i32,
shift_by: p - 32,
}
}
pub fn magic_s64(d: i64) -> MS64 {
debug_assert_ne!(d, -1);
debug_assert_ne!(d, 0);
debug_assert_ne!(d, 1);
let two63: u64 = 0x8000000000000000u64;
let mut p: i32 = 63;
let ad: u64 = i64::wrapping_abs(d) as u64;
let t: u64 = two63 + ((d as u64) >> 63);
let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
let mut q1: u64 = two63 / anc;
let mut r1: u64 = two63 - q1 * anc;
let mut q2: u64 = two63 / ad;
let mut r2: u64 = two63 - q2 * ad;
loop {
p = p + 1;
q1 = 2 * q1;
r1 = 2 * r1;
if r1 >= anc {
q1 = q1 + 1;
r1 = r1 - anc;
}
q2 = 2 * q2;
r2 = 2 * r2;
if r2 >= ad {
q2 = q2 + 1;
r2 = r2 - ad;
}
let delta: u64 = ad - r2;
if !(q1 < delta || (q1 == delta && r1 == 0)) {
break;
}
}
MS64 {
mul_by: (if d < 0 {
u64::wrapping_neg(q2 + 1)
} else {
q2 + 1
}) as i64,
shift_by: p - 64,
}
}
#[cfg(test)]
mod tests {
use super::{magic_s32, magic_s64, magic_u32, magic_u64};
use super::{MS32, MS64, MU32, MU64};
fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {
MU32 {
mul_by,
do_add,
shift_by,
}
}
fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {
MU64 {
mul_by,
do_add,
shift_by,
}
}
fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {
MS32 { mul_by, shift_by }
}
fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {
MS64 { mul_by, shift_by }
}
#[test]
fn test_magicU32() {
assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));
assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));
assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));
assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));
assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));
assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));
assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));
assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));
assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));
assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));
assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));
assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));
assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));
assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));
assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));
assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));
assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));
assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));
assert_eq!(
magic_u32(0xdeadbeefu32),
make_mu32(0x93275ab3u32, false, 31)
);
assert_eq!(
magic_u32(0xfffffffdu32),
make_mu32(0x40000001u32, false, 30)
);
assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));
assert_eq!(
magic_u32(0xffffffffu32),
make_mu32(0x80000001u32, false, 31)
);
}
#[test]
fn test_magicU64() {
assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));
assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));
assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));
assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));
assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));
assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));
assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));
assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));
assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));
assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));
assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));
assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));
assert_eq!(
magic_u64(625u64),
make_mu64(0x346dc5d63886594bu64, false, 7)
);
assert_eq!(
magic_u64(1337u64),
make_mu64(0xc4119d952866a139u64, false, 10)
);
assert_eq!(
magic_u64(31415927u64),
make_mu64(0x116d154b9c3d2f85u64, true, 25)
);
assert_eq!(
magic_u64(0x00000000deadbeefu64),
make_mu64(0x93275ab2dfc9094bu64, false, 31)
);
assert_eq!(
magic_u64(0x00000000fffffffdu64),
make_mu64(0x8000000180000005u64, false, 31)
);
assert_eq!(
magic_u64(0x00000000fffffffeu64),
make_mu64(0x0000000200000005u64, true, 32)
);
assert_eq!(
magic_u64(0x00000000ffffffffu64),
make_mu64(0x8000000080000001u64, false, 31)
);
assert_eq!(
magic_u64(0x0000000100000000u64),
make_mu64(0x0000000100000000u64, false, 0)
);
assert_eq!(
magic_u64(0x0000000100000001u64),
make_mu64(0xffffffff00000001u64, false, 32)
);
assert_eq!(
magic_u64(0x0ddc0ffeebadf00du64),
make_mu64(0x2788e9d394b77da1u64, true, 60)
);
assert_eq!(
magic_u64(0xfffffffffffffffdu64),
make_mu64(0x4000000000000001u64, false, 62)
);
assert_eq!(
magic_u64(0xfffffffffffffffeu64),
make_mu64(0x0000000000000003u64, true, 64)
);
assert_eq!(
magic_u64(0xffffffffffffffffu64),
make_mu64(0x8000000000000001u64, false, 63)
);
}
#[test]
fn test_magicS32() {
assert_eq!(
magic_s32(-0x80000000i32),
make_ms32(0x7fffffffu32 as i32, 30)
);
assert_eq!(
magic_s32(-0x7FFFFFFFi32),
make_ms32(0xbfffffffu32 as i32, 29)
);
assert_eq!(
magic_s32(-0x7FFFFFFEi32),
make_ms32(0x7ffffffdu32 as i32, 30)
);
assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));
assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));
assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));
assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));
assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));
assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));
assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));
assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));
assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));
assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));
assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));
assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));
assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));
assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));
assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));
assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));
assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));
assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));
assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));
assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));
assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));
assert_eq!(
magic_s32(0x7ffffffei32),
make_ms32(0x80000003u32 as i32, 30)
);
assert_eq!(
magic_s32(0x7fffffffi32),
make_ms32(0x40000001u32 as i32, 29)
);
}
#[test]
fn test_magicS64() {
assert_eq!(
magic_s64(-0x8000000000000000i64),
make_ms64(0x7fffffffffffffffu64 as i64, 62)
);
assert_eq!(
magic_s64(-0x7FFFFFFFFFFFFFFFi64),
make_ms64(0xbfffffffffffffffu64 as i64, 61)
);
assert_eq!(
magic_s64(-0x7FFFFFFFFFFFFFFEi64),
make_ms64(0x7ffffffffffffffdu64 as i64, 62)
);
assert_eq!(
magic_s64(-0x0ddC0ffeeBadF00di64),
make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)
);
assert_eq!(
magic_s64(-0x100000001i64),
make_ms64(0x800000007fffffffu64 as i64, 31)
);
assert_eq!(
magic_s64(-0x100000000i64),
make_ms64(0x7fffffffffffffffu64 as i64, 31)
);
assert_eq!(
magic_s64(-0xFFFFFFFFi64),
make_ms64(0x7fffffff7fffffffu64 as i64, 31)
);
assert_eq!(
magic_s64(-0xFFFFFFFEi64),
make_ms64(0x7ffffffefffffffdu64 as i64, 31)
);
assert_eq!(
magic_s64(-0xFFFFFFFDi64),
make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)
);
assert_eq!(
magic_s64(-0xDeadBeefi64),
make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)
);
assert_eq!(
magic_s64(-31415927i64),
make_ms64(0x7749755a31e1683du64 as i64, 24)
);
assert_eq!(
magic_s64(-1337i64),
make_ms64(0x9df731356bccaf63u64 as i64, 9)
);
assert_eq!(
magic_s64(-256i64),
make_ms64(0x7fffffffffffffffu64 as i64, 7)
);
assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));
assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));
assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));
assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));
assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));
assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));
assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));
assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));
assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));
assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));
assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));
assert_eq!(
magic_s64(125i64),
make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)
);
assert_eq!(
magic_s64(625i64),
make_ms64(0x346dc5d63886594bu64 as i64, 7)
);
assert_eq!(
magic_s64(1337i64),
make_ms64(0x6208ceca9433509du64 as i64, 9)
);
assert_eq!(
magic_s64(31415927i64),
make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)
);
assert_eq!(
magic_s64(0x00000000deadbeefi64),
make_ms64(0x93275ab2dfc9094bu64 as i64, 31)
);
assert_eq!(
magic_s64(0x00000000fffffffdi64),
make_ms64(0x8000000180000005u64 as i64, 31)
);
assert_eq!(
magic_s64(0x00000000fffffffei64),
make_ms64(0x8000000100000003u64 as i64, 31)
);
assert_eq!(
magic_s64(0x00000000ffffffffi64),
make_ms64(0x8000000080000001u64 as i64, 31)
);
assert_eq!(
magic_s64(0x0000000100000000i64),
make_ms64(0x8000000000000001u64 as i64, 31)
);
assert_eq!(
magic_s64(0x0000000100000001i64),
make_ms64(0x7fffffff80000001u64 as i64, 31)
);
assert_eq!(
magic_s64(0x0ddc0ffeebadf00di64),
make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)
);
assert_eq!(
magic_s64(0x7ffffffffffffffdi64),
make_ms64(0x2000000000000001u64 as i64, 60)
);
assert_eq!(
magic_s64(0x7ffffffffffffffei64),
make_ms64(0x8000000000000003u64 as i64, 62)
);
assert_eq!(
magic_s64(0x7fffffffffffffffi64),
make_ms64(0x4000000000000001u64 as i64, 61)
);
}
#[test]
fn test_magic_generators_dont_panic() {
// The point of this is to check that the magic number generators
// don't panic with integer wraparounds, especially at boundary
// cases for their arguments. The actual results are thrown away.
let mut total: u64 = 0;
// Testing UP magic_u32
for x in 2..(200 * 1000u32) {
let m = magic_u32(x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
total = total - (if m.do_add { 123 } else { 456 });
}
assert_eq!(total, 1747815691);
// Testing DOWN magic_u32
for x in 0..(200 * 1000u32) {
let m = magic_u32(0xFFFF_FFFFu32 - x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
total = total - (if m.do_add { 123 } else { 456 });
}
assert_eq!(total, 2210292772);
// Testing UP magic_u64
for x in 2..(200 * 1000u64) {
let m = magic_u64(x);
total = total ^ m.mul_by;
total = total + (m.shift_by as u64);
total = total - (if m.do_add { 123 } else { 456 });
}
assert_eq!(total, 7430004084791260605);
// Testing DOWN magic_u64
for x in 0..(200 * 1000u64) {
let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
total = total ^ m.mul_by;
total = total + (m.shift_by as u64);
total = total - (if m.do_add { 123 } else { 456 });
}
assert_eq!(total, 7547519887519825919);
// Testing UP magic_s32
for x in 0..(200 * 1000i32) {
let m = magic_s32(-0x8000_0000i32 + x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
}
assert_eq!(total, 10899224186731671235);
// Testing DOWN magic_s32
for x in 0..(200 * 1000i32) {
let m = magic_s32(0x7FFF_FFFFi32 - x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
}
assert_eq!(total, 7547519887517897369);
// Testing UP magic_s64
for x in 0..(200 * 1000i64) {
let m = magic_s64(-0x8000_0000_0000_0000i64 + x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
}
assert_eq!(total, 8029756891368555163);
// Testing DOWN magic_s64
for x in 0..(200 * 1000i64) {
let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
total = total ^ (m.mul_by as u64);
total = total + (m.shift_by as u64);
}
// Force `total` -- and hence, the entire computation -- to
// be used, so that rustc can't optimise it out.
assert_eq!(total, 7547519887532559585u64);
}
}

View File

@@ -0,0 +1,943 @@
//! A Dominator Tree represented as mappings of Ebbs to their immediate dominator.
use crate::entity::SecondaryMap;
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::ir::instructions::BranchInfo;
use crate::ir::{Ebb, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value};
use crate::packed_option::PackedOption;
use crate::timing;
use core::cmp;
use core::cmp::Ordering;
use core::mem;
use std::vec::Vec;
/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
/// room for modifications of the dominator tree.
const STRIDE: u32 = 4;
/// Special RPO numbers used during `compute_postorder`.
const DONE: u32 = 1;
const SEEN: u32 = 2;
/// Dominator tree node. We keep one of these per EBB.
#[derive(Clone, Default)]
struct DomNode {
/// Number of this node in a reverse post-order traversal of the CFG, starting from 1.
/// This number is monotonic in the reverse postorder but not contiguous, since we leave
/// holes for later localized modifications of the dominator tree.
/// Unreachable nodes get number 0, all others are positive.
rpo_number: u32,
/// The immediate dominator of this EBB, represented as the branch or jump instruction at the
/// end of the dominating basic block.
///
/// This is `None` for unreachable blocks and the entry block which doesn't have an immediate
/// dominator.
idom: PackedOption<Inst>,
}
/// The dominator tree for a single function.
pub struct DominatorTree {
nodes: SecondaryMap<Ebb, DomNode>,
/// CFG post-order of all reachable EBBs.
postorder: Vec<Ebb>,
/// Scratch memory used by `compute_postorder()`.
stack: Vec<Ebb>,
valid: bool,
}
/// Methods for querying the dominator tree.
impl DominatorTree {
/// Is `ebb` reachable from the entry block?
pub fn is_reachable(&self, ebb: Ebb) -> bool {
self.nodes[ebb].rpo_number != 0
}
/// Get the CFG post-order of EBBs that was used to compute the dominator tree.
///
/// Note that this post-order is not updated automatically when the CFG is modified. It is
/// computed from scratch and cached by `compute()`.
pub fn cfg_postorder(&self) -> &[Ebb] {
debug_assert!(self.is_valid());
&self.postorder
}
/// Returns the immediate dominator of `ebb`.
///
/// The immediate dominator of an extended basic block is a basic block which we represent by
/// the branch or jump instruction at the end of the basic block. This does not have to be the
/// terminator of its EBB.
///
/// A branch or jump is said to *dominate* `ebb` if all control flow paths from the function
/// entry to `ebb` must go through the branch.
///
/// The *immediate dominator* is the dominator that is closest to `ebb`. All other dominators
/// also dominate the immediate dominator.
///
/// This returns `None` if `ebb` is not reachable from the entry EBB, or if it is the entry EBB
/// which has no dominators.
pub fn idom(&self, ebb: Ebb) -> Option<Inst> {
self.nodes[ebb].idom.into()
}
/// Compare two EBBs relative to the reverse post-order.
fn rpo_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number)
}
/// Compare two program points relative to a reverse post-order traversal of the control-flow
/// graph.
///
/// Return `Ordering::Less` if `a` comes before `b` in the RPO.
///
/// If `a` and `b` belong to the same EBB, compare their relative position in the EBB.
pub fn rpo_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
let a = a.into();
let b = b.into();
self.rpo_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b))
.then(layout.cmp(a, b))
}
/// Returns `true` if `a` dominates `b`.
///
/// This means that every control-flow path from the function entry to `b` must go through `a`.
///
/// Dominance is ill defined for unreachable blocks. This function can always determine
/// dominance for instructions in the same EBB, but otherwise returns `false` if either block
/// is unreachable.
///
/// An instruction is considered to dominate itself.
pub fn dominates<A, B>(&self, a: A, b: B, layout: &Layout) -> bool
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
let a = a.into();
let b = b.into();
match a {
ExpandedProgramPoint::Ebb(ebb_a) => {
a == b || self.last_dominator(ebb_a, b, layout).is_some()
}
ExpandedProgramPoint::Inst(inst_a) => {
let ebb_a = layout.inst_ebb(inst_a).expect("Instruction not in layout.");
match self.last_dominator(ebb_a, b, layout) {
Some(last) => layout.cmp(inst_a, last) != Ordering::Greater,
None => false,
}
}
}
}
/// Find the last instruction in `a` that dominates `b`.
/// If no instructions in `a` dominate `b`, return `None`.
pub fn last_dominator<B>(&self, a: Ebb, b: B, layout: &Layout) -> Option<Inst>
where
B: Into<ExpandedProgramPoint>,
{
let (mut ebb_b, mut inst_b) = match b.into() {
ExpandedProgramPoint::Ebb(ebb) => (ebb, None),
ExpandedProgramPoint::Inst(inst) => (
layout.inst_ebb(inst).expect("Instruction not in layout."),
Some(inst),
),
};
let rpo_a = self.nodes[a].rpo_number;
// Run a finger up the dominator tree from b until we see a.
// Do nothing if b is unreachable.
while rpo_a < self.nodes[ebb_b].rpo_number {
let idom = match self.idom(ebb_b) {
Some(idom) => idom,
None => return None, // a is unreachable, so we climbed past the entry
};
ebb_b = layout.inst_ebb(idom).expect("Dominator got removed.");
inst_b = Some(idom);
}
if a == ebb_b {
inst_b
} else {
None
}
}
/// Compute the common dominator of two basic blocks.
///
/// Both basic blocks are assumed to be reachable.
pub fn common_dominator(
&self,
mut a: BasicBlock,
mut b: BasicBlock,
layout: &Layout,
) -> BasicBlock {
loop {
match self.rpo_cmp_ebb(a.ebb, b.ebb) {
Ordering::Less => {
// `a` comes before `b` in the RPO. Move `b` up.
let idom = self.nodes[b.ebb].idom.expect("Unreachable basic block?");
b = BasicBlock::new(
layout.inst_ebb(idom).expect("Dangling idom instruction"),
idom,
);
}
Ordering::Greater => {
// `b` comes before `a` in the RPO. Move `a` up.
let idom = self.nodes[a.ebb].idom.expect("Unreachable basic block?");
a = BasicBlock::new(
layout.inst_ebb(idom).expect("Dangling idom instruction"),
idom,
);
}
Ordering::Equal => break,
}
}
debug_assert_eq!(
a.ebb, b.ebb,
"Unreachable block passed to common_dominator?"
);
// We're in the same EBB. The common dominator is the earlier instruction.
if layout.cmp(a.inst, b.inst) == Ordering::Less {
a
} else {
b
}
}
}
impl DominatorTree {
/// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a
/// function.
pub fn new() -> Self {
Self {
nodes: SecondaryMap::new(),
postorder: Vec::new(),
stack: Vec::new(),
valid: false,
}
}
/// Allocate and compute a dominator tree.
pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self {
let mut domtree = Self::new();
domtree.compute(func, cfg);
domtree
}
/// Reset and compute a CFG post-order and dominator tree.
pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
let _tt = timing::domtree();
debug_assert!(cfg.is_valid());
self.compute_postorder(func);
self.compute_domtree(func, cfg);
self.valid = true;
}
/// Clear the data structures used to represent the dominator tree. This will leave the tree in
/// a state where `is_valid()` returns false.
pub fn clear(&mut self) {
self.nodes.clear();
self.postorder.clear();
debug_assert!(self.stack.is_empty());
self.valid = false;
}
/// Check if the dominator tree is in a valid state.
///
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
/// `compute()` method has been called since the last `clear()`. It does not check that the
/// dominator tree is consistent with the CFG.
pub fn is_valid(&self) -> bool {
self.valid
}
/// Reset all internal data structures and compute a post-order of the control flow graph.
///
/// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
fn compute_postorder(&mut self, func: &Function) {
self.clear();
self.nodes.resize(func.dfg.num_ebbs());
// This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
// post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
// unique. The specific order we get is controlled by two factors:
//
// 1. The order each node's children are visited, and
// 2. The method used for pruning graph edges to get a tree.
//
// There are two ways of viewing the CFG as a graph:
//
// 1. Each EBB is a node, with outgoing edges for all the branches in the EBB.
// 2. Each basic block is a node, with outgoing edges for the single branch at the end of
// the BB. (An EBB is a linear sequence of basic blocks).
//
// The first graph is a contraction of the second one. We want to compute an EBB post-order
// that is compatible both graph interpretations. That is, if you compute a BB post-order
// and then remove those BBs that do not correspond to EBB headers, you get a post-order of
// the EBB graph.
//
// Node child order:
//
// In the BB graph, we always go down the fall-through path first and follow the branch
// destination second.
//
// In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
// order, starting from the destination of the EBB's terminating jump, ending at the
// destination of the first branch in the EBB.
//
// Edge pruning:
//
// In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
// of the edge. Any subsequent edges to the same EBB are pruned.
//
// The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
// in a top-down traversal of the successors. (And then visiting edges in a bottom-up
// order).
//
// This pruning method makes it possible to compute the DFT without storing lots of
// information about the progress through an EBB.
// During this algorithm only, use `rpo_number` to hold the following state:
//
// 0: EBB has not yet been reached in the pre-order.
// SEEN: EBB has been pushed on the stack but successors not yet pushed.
// DONE: Successors pushed.
match func.layout.entry_block() {
Some(ebb) => {
self.stack.push(ebb);
self.nodes[ebb].rpo_number = SEEN;
}
None => return,
}
while let Some(ebb) = self.stack.pop() {
match self.nodes[ebb].rpo_number {
SEEN => {
// This is the first time we pop the EBB, so we need to scan its successors and
// then revisit it.
self.nodes[ebb].rpo_number = DONE;
self.stack.push(ebb);
self.push_successors(func, ebb);
}
DONE => {
// This is the second time we pop the EBB, so all successors have been
// processed.
self.postorder.push(ebb);
}
_ => unreachable!(),
}
}
}
/// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
///
/// The successors are pushed in program order which is important to get a split-invariant
/// post-order. Split-invariant means that if an EBB is split in two, we get the same
/// post-order except for the insertion of the new EBB header at the split point.
fn push_successors(&mut self, func: &Function, ebb: Ebb) {
for inst in func.layout.ebb_insts(ebb) {
match func.dfg.analyze_branch(inst) {
BranchInfo::SingleDest(succ, _) => self.push_if_unseen(succ),
BranchInfo::Table(jt, dest) => {
for succ in func.jump_tables[jt].iter() {
self.push_if_unseen(*succ);
}
if let Some(dest) = dest {
self.push_if_unseen(dest);
}
}
BranchInfo::NotABranch => {}
}
}
}
/// Push `ebb` onto `self.stack` if it has not already been seen.
fn push_if_unseen(&mut self, ebb: Ebb) {
if self.nodes[ebb].rpo_number == 0 {
self.nodes[ebb].rpo_number = SEEN;
self.stack.push(ebb);
}
}
/// Build a dominator tree from a control flow graph using Keith D. Cooper's
/// "Simple, Fast Dominator Algorithm."
fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) {
// During this algorithm, `rpo_number` has the following values:
//
// 0: EBB is not reachable.
// 1: EBB is reachable, but has not yet been visited during the first pass. This is set by
// `compute_postorder`.
// 2+: EBB is reachable and has an assigned RPO number.
// We'll be iterating over a reverse post-order of the CFG, skipping the entry block.
let (entry_block, postorder) = match self.postorder.as_slice().split_last() {
Some((&eb, rest)) => (eb, rest),
None => return,
};
debug_assert_eq!(Some(entry_block), func.layout.entry_block());
// Do a first pass where we assign RPO numbers to all reachable nodes.
self.nodes[entry_block].rpo_number = 2 * STRIDE;
for (rpo_idx, &ebb) in postorder.iter().rev().enumerate() {
// Update the current node and give it an RPO number.
// The entry block got 2, the rest start at 3 by multiples of STRIDE to leave
// room for future dominator tree modifications.
//
// Since `compute_idom` will only look at nodes with an assigned RPO number, the
// function will never see an uninitialized predecessor.
//
// Due to the nature of the post-order traversal, every node we visit will have at
// least one predecessor that has previously been visited during this RPO.
self.nodes[ebb] = DomNode {
idom: self.compute_idom(ebb, cfg, &func.layout).into(),
rpo_number: (rpo_idx as u32 + 3) * STRIDE,
}
}
// Now that we have RPO numbers for everything and initial immediate dominator estimates,
// iterate until convergence.
//
// If the function is free of irreducible control flow, this will exit after one iteration.
let mut changed = true;
while changed {
changed = false;
for &ebb in postorder.iter().rev() {
let idom = self.compute_idom(ebb, cfg, &func.layout).into();
if self.nodes[ebb].idom != idom {
self.nodes[ebb].idom = idom;
changed = true;
}
}
}
}
// Compute the immediate dominator for `ebb` using the current `idom` states for the reachable
// nodes.
fn compute_idom(&self, ebb: Ebb, cfg: &ControlFlowGraph, layout: &Layout) -> Inst {
// Get an iterator with just the reachable, already visited predecessors to `ebb`.
// Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't
// been visited yet, 0 for unreachable blocks.
let mut reachable_preds = cfg
.pred_iter(ebb)
.filter(|&BasicBlock { ebb: pred, .. }| self.nodes[pred].rpo_number > 1);
// The RPO must visit at least one predecessor before this node.
let mut idom = reachable_preds
.next()
.expect("EBB node must have one reachable predecessor");
for pred in reachable_preds {
idom = self.common_dominator(idom, pred, layout);
}
idom.inst
}
}
impl DominatorTree {
/// When splitting an `Ebb` using `Layout::split_ebb`, you can use this method to update
/// the dominator tree locally rather than recomputing it.
///
/// `old_ebb` is the `Ebb` before splitting, and `new_ebb` is the `Ebb` which now contains
/// the second half of `old_ebb`. `split_jump_inst` is the terminator jump instruction of
/// `old_ebb` that points to `new_ebb`.
pub fn recompute_split_ebb(&mut self, old_ebb: Ebb, new_ebb: Ebb, split_jump_inst: Inst) {
if !self.is_reachable(old_ebb) {
// old_ebb is unreachable, it stays so and new_ebb is unreachable too
self.nodes[new_ebb] = Default::default();
return;
}
// We use the RPO comparison on the postorder list so we invert the operands of the
// comparison
let old_ebb_postorder_index = self
.postorder
.as_slice()
.binary_search_by(|probe| self.rpo_cmp_ebb(old_ebb, *probe))
.expect("the old ebb is not declared to the dominator tree");
let new_ebb_rpo = self.insert_after_rpo(old_ebb, old_ebb_postorder_index, new_ebb);
self.nodes[new_ebb] = DomNode {
rpo_number: new_ebb_rpo,
idom: Some(split_jump_inst).into(),
};
}
// Insert new_ebb just after ebb in the RPO. This function checks
// if there is a gap in rpo numbers; if yes it returns the number in the gap and if
// not it renumbers.
fn insert_after_rpo(&mut self, ebb: Ebb, ebb_postorder_index: usize, new_ebb: Ebb) -> u32 {
let ebb_rpo_number = self.nodes[ebb].rpo_number;
let inserted_rpo_number = ebb_rpo_number + 1;
// If there is no gaps in RPo numbers to insert this new number, we iterate
// forward in RPO numbers and backwards in the postorder list of EBBs, renumbering the Ebbs
// until we find a gap
for (&current_ebb, current_rpo) in self.postorder[0..ebb_postorder_index]
.iter()
.rev()
.zip(inserted_rpo_number + 1..)
{
if self.nodes[current_ebb].rpo_number < current_rpo {
// There is no gap, we renumber
self.nodes[current_ebb].rpo_number = current_rpo;
} else {
// There is a gap, we stop the renumbering and exit
break;
}
}
// TODO: insert in constant time?
self.postorder.insert(ebb_postorder_index, new_ebb);
inserted_rpo_number
}
}
/// Optional pre-order information that can be computed for a dominator tree.
///
/// This data structure is computed from a `DominatorTree` and provides:
///
/// - A forward traversable dominator tree through the `children()` iterator.
/// - An ordering of EBBs according to a dominator tree pre-order.
/// - Constant time dominance checks at the EBB granularity.
///
/// The information in this auxiliary data structure is not easy to update when the control flow
/// graph changes, which is why it is kept separate.
pub struct DominatorTreePreorder {
nodes: SecondaryMap<Ebb, ExtraNode>,
// Scratch memory used by `compute_postorder()`.
stack: Vec<Ebb>,
}
#[derive(Default, Clone)]
struct ExtraNode {
/// First child node in the domtree.
child: PackedOption<Ebb>,
/// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO.
sibling: PackedOption<Ebb>,
/// Sequence number for this node in a pre-order traversal of the dominator tree.
/// Unreachable blocks have number 0, the entry block is 1.
pre_number: u32,
/// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node.
/// This is always >= `pre_number`.
pre_max: u32,
}
/// Creating and computing the dominator tree pre-order.
impl DominatorTreePreorder {
/// Create a new blank `DominatorTreePreorder`.
pub fn new() -> Self {
Self {
nodes: SecondaryMap::new(),
stack: Vec::new(),
}
}
/// Recompute this data structure to match `domtree`.
pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
self.nodes.clear();
debug_assert_eq!(self.stack.len(), 0);
// Step 1: Populate the child and sibling links.
//
// By following the CFG post-order and pushing to the front of the lists, we make sure that
// sibling lists are ordered according to the CFG reverse post-order.
for &ebb in domtree.cfg_postorder() {
if let Some(idom_inst) = domtree.idom(ebb) {
let idom = layout.pp_ebb(idom_inst);
let sib = mem::replace(&mut self.nodes[idom].child, ebb.into());
self.nodes[ebb].sibling = sib;
} else {
// The only EBB without an immediate dominator is the entry.
self.stack.push(ebb);
}
}
// Step 2. Assign pre-order numbers from a DFS of the dominator tree.
debug_assert!(self.stack.len() <= 1);
let mut n = 0;
while let Some(ebb) = self.stack.pop() {
n += 1;
let node = &mut self.nodes[ebb];
node.pre_number = n;
node.pre_max = n;
if let Some(n) = node.sibling.expand() {
self.stack.push(n);
}
if let Some(n) = node.child.expand() {
self.stack.push(n);
}
}
// Step 3. Propagate the `pre_max` numbers up the tree.
// The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all
// its dominator tree children.
for &ebb in domtree.cfg_postorder() {
if let Some(idom_inst) = domtree.idom(ebb) {
let idom = layout.pp_ebb(idom_inst);
let pre_max = cmp::max(self.nodes[ebb].pre_max, self.nodes[idom].pre_max);
self.nodes[idom].pre_max = pre_max;
}
}
}
}
/// An iterator that enumerates the direct children of an EBB in the dominator tree.
pub struct ChildIter<'a> {
dtpo: &'a DominatorTreePreorder,
next: PackedOption<Ebb>,
}
impl<'a> Iterator for ChildIter<'a> {
type Item = Ebb;
fn next(&mut self) -> Option<Ebb> {
let n = self.next.expand();
if let Some(ebb) = n {
self.next = self.dtpo.nodes[ebb].sibling;
}
n
}
}
/// Query interface for the dominator tree pre-order.
impl DominatorTreePreorder {
/// Get an iterator over the direct children of `ebb` in the dominator tree.
///
/// These are the EBB's whose immediate dominator is an instruction in `ebb`, ordered according
/// to the CFG reverse post-order.
pub fn children(&self, ebb: Ebb) -> ChildIter {
ChildIter {
dtpo: self,
next: self.nodes[ebb].child,
}
}
/// Fast, constant time dominance check with EBB granularity.
///
/// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant
/// time. This is less general than the `DominatorTree` method because it only works with EBB
/// program points.
///
/// An EBB is considered to dominate itself.
pub fn dominates(&self, a: Ebb, b: Ebb) -> bool {
let na = &self.nodes[a];
let nb = &self.nodes[b];
na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max
}
/// Compare two EBBs according to the dominator pre-order.
pub fn pre_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number)
}
/// Compare two program points according to the dominator tree pre-order.
///
/// This ordering of program points have the property that given a program point, pp, all the
/// program points dominated by pp follow immediately and contiguously after pp in the order.
pub fn pre_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
let a = a.into();
let b = b.into();
self.pre_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b))
.then(layout.cmp(a, b))
}
/// Compare two value defs according to the dominator tree pre-order.
///
/// Two values defined at the same program point are compared according to their parameter or
/// result order.
///
/// This is a total ordering of the values in the function.
pub fn pre_cmp_def(&self, a: Value, b: Value, func: &Function) -> Ordering {
let da = func.dfg.value_def(a);
let db = func.dfg.value_def(b);
self.pre_cmp(da, db, &func.layout)
.then_with(|| da.num().cmp(&db.num()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::types::*;
use crate::ir::{Function, InstBuilder, TrapCode};
use crate::settings;
use crate::verifier::{verify_context, VerifierErrors};
#[test]
fn empty() {
let func = Function::new();
let cfg = ControlFlowGraph::with_function(&func);
debug_assert!(cfg.is_valid());
let dtree = DominatorTree::with_function(&func, &cfg);
assert_eq!(0, dtree.nodes.keys().count());
assert_eq!(dtree.cfg_postorder(), &[]);
let mut dtpo = DominatorTreePreorder::new();
dtpo.compute(&dtree, &func.layout);
}
#[test]
fn unreachable_node() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let v0 = func.dfg.append_ebb_param(ebb0, I32);
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb0);
cur.ins().brnz(v0, ebb2, &[]);
cur.ins().trap(TrapCode::User(0));
cur.insert_ebb(ebb1);
let v1 = cur.ins().iconst(I32, 1);
let v2 = cur.ins().iadd(v0, v1);
cur.ins().jump(ebb0, &[v2]);
cur.insert_ebb(ebb2);
cur.ins().return_(&[v0]);
let cfg = ControlFlowGraph::with_function(cur.func);
let dt = DominatorTree::with_function(cur.func, &cfg);
// Fall-through-first, prune-at-source DFT:
//
// ebb0 {
// brnz ebb2 {
// trap
// ebb2 {
// return
// } ebb2
// } ebb0
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
let mut dtpo = DominatorTreePreorder::new();
dtpo.compute(&dt, &cur.func.layout);
assert!(dtpo.dominates(ebb0, ebb0));
assert!(!dtpo.dominates(ebb0, ebb1));
assert!(dtpo.dominates(ebb0, ebb2));
assert!(!dtpo.dominates(ebb1, ebb0));
assert!(dtpo.dominates(ebb1, ebb1));
assert!(!dtpo.dominates(ebb1, ebb2));
assert!(!dtpo.dominates(ebb2, ebb0));
assert!(!dtpo.dominates(ebb2, ebb1));
assert!(dtpo.dominates(ebb2, ebb2));
}
#[test]
fn non_zero_entry_block() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let ebb3 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb3, I32);
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb3);
let jmp_ebb3_ebb1 = cur.ins().jump(ebb1, &[]);
cur.insert_ebb(ebb1);
let br_ebb1_ebb0 = cur.ins().brnz(cond, ebb0, &[]);
let jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
cur.insert_ebb(ebb2);
cur.ins().jump(ebb0, &[]);
cur.insert_ebb(ebb0);
let cfg = ControlFlowGraph::with_function(cur.func);
let dt = DominatorTree::with_function(cur.func, &cfg);
// Fall-through-first, prune-at-source DFT:
//
// ebb3 {
// ebb3:jump ebb1 {
// ebb1 {
// ebb1:brnz ebb0 {
// ebb1:jump ebb2 {
// ebb2 {
// ebb2:jump ebb0 (seen)
// } ebb2
// } ebb1:jump ebb2
// ebb0 {
// } ebb0
// } ebb1:brnz ebb0
// } ebb1
// } ebb3:jump ebb1
// } ebb3
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
assert_eq!(dt.idom(ebb3), None);
assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
assert_eq!(dt.idom(ebb2).unwrap(), jmp_ebb1_ebb2);
assert_eq!(dt.idom(ebb0).unwrap(), br_ebb1_ebb0);
assert!(dt.dominates(br_ebb1_ebb0, br_ebb1_ebb0, &cur.func.layout));
assert!(!dt.dominates(br_ebb1_ebb0, jmp_ebb3_ebb1, &cur.func.layout));
assert!(dt.dominates(jmp_ebb3_ebb1, br_ebb1_ebb0, &cur.func.layout));
assert_eq!(dt.rpo_cmp(ebb3, ebb3, &cur.func.layout), Ordering::Equal);
assert_eq!(dt.rpo_cmp(ebb3, ebb1, &cur.func.layout), Ordering::Less);
assert_eq!(
dt.rpo_cmp(ebb3, jmp_ebb3_ebb1, &cur.func.layout),
Ordering::Less
);
assert_eq!(
dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
Ordering::Less
);
}
#[test]
fn backwards_layout() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb0);
let jmp02 = cur.ins().jump(ebb2, &[]);
cur.insert_ebb(ebb1);
let trap = cur.ins().trap(TrapCode::User(5));
cur.insert_ebb(ebb2);
let jmp21 = cur.ins().jump(ebb1, &[]);
let cfg = ControlFlowGraph::with_function(cur.func);
let dt = DominatorTree::with_function(cur.func, &cfg);
assert_eq!(cur.func.layout.entry_block(), Some(ebb0));
assert_eq!(dt.idom(ebb0), None);
assert_eq!(dt.idom(ebb1), Some(jmp21));
assert_eq!(dt.idom(ebb2), Some(jmp02));
assert!(dt.dominates(ebb0, ebb0, &cur.func.layout));
assert!(dt.dominates(ebb0, jmp02, &cur.func.layout));
assert!(dt.dominates(ebb0, ebb1, &cur.func.layout));
assert!(dt.dominates(ebb0, trap, &cur.func.layout));
assert!(dt.dominates(ebb0, ebb2, &cur.func.layout));
assert!(dt.dominates(ebb0, jmp21, &cur.func.layout));
assert!(!dt.dominates(jmp02, ebb0, &cur.func.layout));
assert!(dt.dominates(jmp02, jmp02, &cur.func.layout));
assert!(dt.dominates(jmp02, ebb1, &cur.func.layout));
assert!(dt.dominates(jmp02, trap, &cur.func.layout));
assert!(dt.dominates(jmp02, ebb2, &cur.func.layout));
assert!(dt.dominates(jmp02, jmp21, &cur.func.layout));
assert!(!dt.dominates(ebb1, ebb0, &cur.func.layout));
assert!(!dt.dominates(ebb1, jmp02, &cur.func.layout));
assert!(dt.dominates(ebb1, ebb1, &cur.func.layout));
assert!(dt.dominates(ebb1, trap, &cur.func.layout));
assert!(!dt.dominates(ebb1, ebb2, &cur.func.layout));
assert!(!dt.dominates(ebb1, jmp21, &cur.func.layout));
assert!(!dt.dominates(trap, ebb0, &cur.func.layout));
assert!(!dt.dominates(trap, jmp02, &cur.func.layout));
assert!(!dt.dominates(trap, ebb1, &cur.func.layout));
assert!(dt.dominates(trap, trap, &cur.func.layout));
assert!(!dt.dominates(trap, ebb2, &cur.func.layout));
assert!(!dt.dominates(trap, jmp21, &cur.func.layout));
assert!(!dt.dominates(ebb2, ebb0, &cur.func.layout));
assert!(!dt.dominates(ebb2, jmp02, &cur.func.layout));
assert!(dt.dominates(ebb2, ebb1, &cur.func.layout));
assert!(dt.dominates(ebb2, trap, &cur.func.layout));
assert!(dt.dominates(ebb2, ebb2, &cur.func.layout));
assert!(dt.dominates(ebb2, jmp21, &cur.func.layout));
assert!(!dt.dominates(jmp21, ebb0, &cur.func.layout));
assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout));
assert!(dt.dominates(jmp21, ebb1, &cur.func.layout));
assert!(dt.dominates(jmp21, trap, &cur.func.layout));
assert!(!dt.dominates(jmp21, ebb2, &cur.func.layout));
assert!(dt.dominates(jmp21, jmp21, &cur.func.layout));
}
#[test]
fn renumbering() {
let mut func = Function::new();
let entry = func.dfg.make_ebb();
let ebb0 = func.dfg.make_ebb();
let ebb100 = func.dfg.make_ebb();
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(entry);
cur.ins().jump(ebb0, &[]);
cur.insert_ebb(ebb0);
let cond = cur.ins().iconst(I32, 0);
let inst2 = cur.ins().brz(cond, ebb0, &[]);
let inst3 = cur.ins().brz(cond, ebb0, &[]);
let inst4 = cur.ins().brz(cond, ebb0, &[]);
let inst5 = cur.ins().brz(cond, ebb0, &[]);
cur.ins().jump(ebb100, &[]);
cur.insert_ebb(ebb100);
cur.ins().return_(&[]);
let mut cfg = ControlFlowGraph::with_function(cur.func);
let mut dt = DominatorTree::with_function(cur.func, &cfg);
let ebb1 = cur.func.dfg.make_ebb();
cur.func.layout.split_ebb(ebb1, inst2);
cur.goto_bottom(ebb0);
let middle_jump_inst = cur.ins().jump(ebb1, &[]);
dt.recompute_split_ebb(ebb0, ebb1, middle_jump_inst);
let ebb2 = cur.func.dfg.make_ebb();
cur.func.layout.split_ebb(ebb2, inst3);
cur.goto_bottom(ebb1);
let middle_jump_inst = cur.ins().jump(ebb2, &[]);
dt.recompute_split_ebb(ebb1, ebb2, middle_jump_inst);
let ebb3 = cur.func.dfg.make_ebb();
cur.func.layout.split_ebb(ebb3, inst4);
cur.goto_bottom(ebb2);
let middle_jump_inst = cur.ins().jump(ebb3, &[]);
dt.recompute_split_ebb(ebb2, ebb3, middle_jump_inst);
let ebb4 = cur.func.dfg.make_ebb();
cur.func.layout.split_ebb(ebb4, inst5);
cur.goto_bottom(ebb3);
let middle_jump_inst = cur.ins().jump(ebb4, &[]);
dt.recompute_split_ebb(ebb3, ebb4, middle_jump_inst);
cfg.compute(cur.func);
let flags = settings::Flags::new(settings::builder());
let mut errors = VerifierErrors::default();
verify_context(cur.func, &cfg, &dt, &flags, &mut errors).unwrap();
assert!(errors.0.is_empty());
}
}

View File

@@ -0,0 +1,350 @@
//! A control flow graph represented as mappings of extended basic blocks to their predecessors
//! and successors.
//!
//! Successors are represented as extended basic blocks while predecessors are represented by basic
//! blocks. Basic blocks are denoted by tuples of EBB and branch/jump instructions. Each
//! predecessor tuple corresponds to the end of a basic block.
//!
//! ```c
//! Ebb0:
//! ... ; beginning of basic block
//!
//! ...
//!
//! brz vx, Ebb1 ; end of basic block
//!
//! ... ; beginning of basic block
//!
//! ...
//!
//! jmp Ebb2 ; end of basic block
//! ```
//!
//! Here `Ebb1` and `Ebb2` would each have a single predecessor denoted as `(Ebb0, brz)`
//! and `(Ebb0, jmp Ebb2)` respectively.
use crate::bforest;
use crate::entity::SecondaryMap;
use crate::ir::instructions::BranchInfo;
use crate::ir::{Ebb, Function, Inst};
use crate::timing;
use core::mem;
/// A basic block denoted by its enclosing Ebb and last instruction.
#[derive(PartialEq, Eq)]
pub struct BasicBlock {
/// Enclosing Ebb key.
pub ebb: Ebb,
/// Last instruction in the basic block.
pub inst: Inst,
}
impl BasicBlock {
/// Convenient method to construct new BasicBlock.
pub fn new(ebb: Ebb, inst: Inst) -> Self {
Self { ebb, inst }
}
}
/// A container for the successors and predecessors of some Ebb.
#[derive(Clone, Default)]
struct CFGNode {
/// Instructions that can branch or jump to this EBB.
///
/// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing
/// the branch instruction is available from the `layout.inst_ebb()` method. We store the
/// redundant information because:
///
/// 1. Many `pred_iter()` consumers want the EBB anyway, so it is handily available.
/// 2. The `invalidate_ebb_successors()` may be called *after* branches have been removed from
/// their EBB, but we still need to remove them form the old EBB predecessor map.
///
/// The redundant EBB stored here is always consistent with the CFG successor lists, even after
/// the IR has been edited.
pub predecessors: bforest::Map<Inst, Ebb>,
/// Set of EBBs that are the targets of branches and jumps in this EBB.
/// The set is ordered by EBB number, indicated by the `()` comparator type.
pub successors: bforest::Set<Ebb>,
}
/// The Control Flow Graph maintains a mapping of ebbs to their predecessors
/// and successors where predecessors are basic blocks and successors are
/// extended basic blocks.
pub struct ControlFlowGraph {
data: SecondaryMap<Ebb, CFGNode>,
pred_forest: bforest::MapForest<Inst, Ebb>,
succ_forest: bforest::SetForest<Ebb>,
valid: bool,
}
impl ControlFlowGraph {
/// Allocate a new blank control flow graph.
pub fn new() -> Self {
Self {
data: SecondaryMap::new(),
valid: false,
pred_forest: bforest::MapForest::new(),
succ_forest: bforest::SetForest::new(),
}
}
/// Clear all data structures in this control flow graph.
pub fn clear(&mut self) {
self.data.clear();
self.pred_forest.clear();
self.succ_forest.clear();
self.valid = false;
}
/// Allocate and compute the control flow graph for `func`.
pub fn with_function(func: &Function) -> Self {
let mut cfg = Self::new();
cfg.compute(func);
cfg
}
/// Compute the control flow graph of `func`.
///
/// This will clear and overwrite any information already stored in this data structure.
pub fn compute(&mut self, func: &Function) {
let _tt = timing::flowgraph();
self.clear();
self.data.resize(func.dfg.num_ebbs());
for ebb in &func.layout {
self.compute_ebb(func, ebb);
}
self.valid = true;
}
fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
for inst in func.layout.ebb_insts(ebb) {
match func.dfg.analyze_branch(inst) {
BranchInfo::SingleDest(dest, _) => {
self.add_edge(ebb, inst, dest);
}
BranchInfo::Table(jt, dest) => {
if let Some(dest) = dest {
self.add_edge(ebb, inst, dest);
}
for dest in func.jump_tables[jt].iter() {
self.add_edge(ebb, inst, *dest);
}
}
BranchInfo::NotABranch => {}
}
}
}
fn invalidate_ebb_successors(&mut self, ebb: Ebb) {
// Temporarily take ownership because we need mutable access to self.data inside the loop.
// Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias
// our iteration over successors.
let mut successors = mem::replace(&mut self.data[ebb].successors, Default::default());
for succ in successors.iter(&self.succ_forest) {
self.data[succ]
.predecessors
.retain(&mut self.pred_forest, |_, &mut e| e != ebb);
}
successors.clear(&mut self.succ_forest);
}
/// Recompute the control flow graph of `ebb`.
///
/// This is for use after modifying instructions within a specific EBB. It recomputes all edges
/// from `ebb` while leaving edges to `ebb` intact. Its functionality a subset of that of the
/// more expensive `compute`, and should be used when we know we don't need to recompute the CFG
/// from scratch, but rather that our changes have been restricted to specific EBBs.
pub fn recompute_ebb(&mut self, func: &Function, ebb: Ebb) {
debug_assert!(self.is_valid());
self.invalidate_ebb_successors(ebb);
self.compute_ebb(func, ebb);
}
fn add_edge(&mut self, from: Ebb, from_inst: Inst, to: Ebb) {
self.data[from]
.successors
.insert(to, &mut self.succ_forest, &());
self.data[to]
.predecessors
.insert(from_inst, from, &mut self.pred_forest, &());
}
/// Get an iterator over the CFG predecessors to `ebb`.
pub fn pred_iter(&self, ebb: Ebb) -> PredIter {
PredIter(self.data[ebb].predecessors.iter(&self.pred_forest))
}
/// Get an iterator over the CFG successors to `ebb`.
pub fn succ_iter(&self, ebb: Ebb) -> SuccIter {
debug_assert!(self.is_valid());
self.data[ebb].successors.iter(&self.succ_forest)
}
/// Check if the CFG is in a valid state.
///
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
/// `compute()` method has been called since the last `clear()`. It does not check that the
/// CFG is consistent with the function.
pub fn is_valid(&self) -> bool {
self.valid
}
}
/// An iterator over EBB predecessors. The iterator type is `BasicBlock`.
///
/// Each predecessor is an instruction that branches to the EBB.
pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Ebb>);
impl<'a> Iterator for PredIter<'a> {
type Item = BasicBlock;
fn next(&mut self) -> Option<BasicBlock> {
self.0.next().map(|(i, e)| BasicBlock::new(e, i))
}
}
/// An iterator over EBB successors. The iterator type is `Ebb`.
pub type SuccIter<'a> = bforest::SetIter<'a, Ebb>;
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{types, Function, InstBuilder};
use std::vec::Vec;
#[test]
fn empty() {
let func = Function::new();
ControlFlowGraph::with_function(&func);
}
#[test]
fn no_predecessors() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
func.layout.append_ebb(ebb0);
func.layout.append_ebb(ebb1);
func.layout.append_ebb(ebb2);
let cfg = ControlFlowGraph::with_function(&func);
let mut fun_ebbs = func.layout.ebbs();
for ebb in func.layout.ebbs() {
assert_eq!(ebb, fun_ebbs.next().unwrap());
assert_eq!(cfg.pred_iter(ebb).count(), 0);
assert_eq!(cfg.succ_iter(ebb).count(), 0);
}
}
#[test]
fn branches_and_jumps() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let br_ebb0_ebb2;
let br_ebb1_ebb1;
let jmp_ebb0_ebb1;
let jmp_ebb1_ebb2;
{
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb0);
br_ebb0_ebb2 = cur.ins().brnz(cond, ebb2, &[]);
jmp_ebb0_ebb1 = cur.ins().jump(ebb1, &[]);
cur.insert_ebb(ebb1);
br_ebb1_ebb1 = cur.ins().brnz(cond, ebb1, &[]);
jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
cur.insert_ebb(ebb2);
}
let mut cfg = ControlFlowGraph::with_function(&func);
{
let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
let ebb0_successors = cfg.succ_iter(ebb0).collect::<Vec<_>>();
let ebb1_successors = cfg.succ_iter(ebb1).collect::<Vec<_>>();
let ebb2_successors = cfg.succ_iter(ebb2).collect::<Vec<_>>();
assert_eq!(ebb0_predecessors.len(), 0);
assert_eq!(ebb1_predecessors.len(), 2);
assert_eq!(ebb2_predecessors.len(), 2);
assert_eq!(
ebb1_predecessors.contains(&BasicBlock::new(ebb0, jmp_ebb0_ebb1)),
true
);
assert_eq!(
ebb1_predecessors.contains(&BasicBlock::new(ebb1, br_ebb1_ebb1)),
true
);
assert_eq!(
ebb2_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb2)),
true
);
assert_eq!(
ebb2_predecessors.contains(&BasicBlock::new(ebb1, jmp_ebb1_ebb2)),
true
);
assert_eq!(ebb0_successors, [ebb1, ebb2]);
assert_eq!(ebb1_successors, [ebb1, ebb2]);
assert_eq!(ebb2_successors, []);
}
// Change some instructions and recompute ebb0
func.dfg.replace(br_ebb0_ebb2).brnz(cond, ebb1, &[]);
func.dfg.replace(jmp_ebb0_ebb1).return_(&[]);
cfg.recompute_ebb(&mut func, ebb0);
let br_ebb0_ebb1 = br_ebb0_ebb2;
{
let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
let ebb0_successors = cfg.succ_iter(ebb0);
let ebb1_successors = cfg.succ_iter(ebb1);
let ebb2_successors = cfg.succ_iter(ebb2);
assert_eq!(ebb0_predecessors.len(), 0);
assert_eq!(ebb1_predecessors.len(), 2);
assert_eq!(ebb2_predecessors.len(), 1);
assert_eq!(
ebb1_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb1)),
true
);
assert_eq!(
ebb1_predecessors.contains(&BasicBlock::new(ebb1, br_ebb1_ebb1)),
true
);
assert_eq!(
ebb2_predecessors.contains(&BasicBlock::new(ebb0, br_ebb0_ebb2)),
false
);
assert_eq!(
ebb2_predecessors.contains(&BasicBlock::new(ebb1, jmp_ebb1_ebb2)),
true
);
assert_eq!(ebb0_successors.collect::<Vec<_>>(), [ebb1]);
assert_eq!(ebb1_successors.collect::<Vec<_>>(), [ebb1, ebb2]);
assert_eq!(ebb2_successors.collect::<Vec<_>>(), []);
}
}
}

111
cranelift/codegen/src/fx.rs Normal file
View File

@@ -0,0 +1,111 @@
// This file is taken from the Rust compiler: src/librustc_data_structures/fx.rs
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use super::{HashMap, HashSet};
use core::default::Default;
use core::hash::{BuildHasherDefault, Hash, Hasher};
use core::ops::BitXor;
pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;
#[allow(non_snake_case)]
pub fn FxHashMap<K: Hash + Eq, V>() -> FxHashMap<K, V> {
HashMap::default()
}
#[allow(non_snake_case)]
pub fn FxHashSet<V: Hash + Eq>() -> FxHashSet<V> {
HashSet::default()
}
/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
/// by default uses SipHash which isn't quite as speedy as we want. In the
/// compiler we're not really worried about DOS attempts, so we use a fast
/// non-cryptographic hash.
///
/// This is the same as the algorithm used by Firefox -- which is a homespun
/// one not based on any widely-known algorithm -- though modified to produce
/// 64-bit hash values instead of 32-bit hash values. It consistently
/// out-performs an FNV-based hash within rustc itself -- the collision rate is
/// similar or slightly worse than FNV, but the speed of the hash function
/// itself is much higher because it works on up to 8 bytes at a time.
pub struct FxHasher {
hash: usize,
}
#[cfg(target_pointer_width = "32")]
const K: usize = 0x9e3779b9;
#[cfg(target_pointer_width = "64")]
const K: usize = 0x517cc1b727220a95;
impl Default for FxHasher {
#[inline]
fn default() -> Self {
Self { hash: 0 }
}
}
impl FxHasher {
#[inline]
fn add_to_hash(&mut self, i: usize) {
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
}
}
impl Hasher for FxHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
for byte in bytes {
let i = *byte;
self.add_to_hash(i as usize);
}
}
#[inline]
fn write_u8(&mut self, i: u8) {
self.add_to_hash(i as usize);
}
#[inline]
fn write_u16(&mut self, i: u16) {
self.add_to_hash(i as usize);
}
#[inline]
fn write_u32(&mut self, i: u32) {
self.add_to_hash(i as usize);
}
#[cfg(target_pointer_width = "32")]
#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as usize);
self.add_to_hash((i >> 32) as usize);
}
#[cfg(target_pointer_width = "64")]
#[inline]
fn write_u64(&mut self, i: u64) {
self.add_to_hash(i as usize);
}
#[inline]
fn write_usize(&mut self, i: usize) {
self.add_to_hash(i);
}
#[inline]
fn finish(&self) -> u64 {
self.hash as u64
}
}

View File

@@ -0,0 +1,266 @@
//! Cranelift instruction builder.
//!
//! A `Builder` provides a convenient interface for inserting instructions into a Cranelift
//! function. Many of its methods are generated from the meta language instruction definitions.
use crate::ir;
use crate::ir::types;
use crate::ir::{DataFlowGraph, InstructionData};
use crate::ir::{Inst, Opcode, Type, Value};
use crate::isa;
/// Base trait for instruction builders.
///
/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the
/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the
/// methods in the `InstBuilder` trait instead.
///
/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder`
/// trait.
pub trait InstBuilderBase<'f>: Sized {
/// Get an immutable reference to the data flow graph that will hold the constructed
/// instructions.
fn data_flow_graph(&self) -> &DataFlowGraph;
/// Get a mutable reference to the data flow graph that will hold the constructed
/// instructions.
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
/// Insert an instruction and return a reference to it, consuming the builder.
///
/// The result types may depend on a controlling type variable. For non-polymorphic
/// instructions with multiple results, pass `INVALID` for the `ctrl_typevar` argument.
fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph);
}
// Include trait code generated by `cranelift-codegen/meta-python/gen_instr.py`.
//
// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
// instruction format and per opcode.
include!(concat!(env!("OUT_DIR"), "/inst_builder.rs"));
/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free.
impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {}
/// Base trait for instruction inserters.
///
/// This is an alternative base trait for an instruction builder to implement.
///
/// An instruction inserter can be adapted into an instruction builder by wrapping it in an
/// `InsertBuilder`. This provides some common functionality for instruction builders that insert
/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions.
pub trait InstInserterBase<'f>: Sized {
/// Get an immutable reference to the data flow graph.
fn data_flow_graph(&self) -> &DataFlowGraph;
/// Get a mutable reference to the data flow graph.
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
/// Insert a new instruction which belongs to the DFG.
fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
}
use core::marker::PhantomData;
/// Builder that inserts an instruction at the current position.
///
/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction
/// builder with some additional facilities for creating instructions that reuse existing values as
/// their results.
pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> {
inserter: IIB,
unused: PhantomData<&'f u32>,
}
impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> {
/// Create a new builder which inserts instructions at `pos`.
/// The `dfg` and `pos.layout` references should be from the same `Function`.
pub fn new(inserter: IIB) -> Self {
Self {
inserter,
unused: PhantomData,
}
}
/// Reuse result values in `reuse`.
///
/// Convert this builder into one that will reuse the provided result values instead of
/// allocating new ones. The provided values for reuse must not be attached to anything. Any
/// missing result values will be allocated as normal.
///
/// The `reuse` argument is expected to be an array of `Option<Value>`.
pub fn with_results<Array>(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array>
where
Array: AsRef<[Option<Value>]>,
{
InsertReuseBuilder {
inserter: self.inserter,
reuse,
unused: PhantomData,
}
}
/// Reuse a single result value.
///
/// Convert this into a builder that will reuse `v` as the single result value. The reused
/// result value `v` must not be attached to anything.
///
/// This method should only be used when building an instruction with exactly one result. Use
/// `with_results()` for the more general case.
pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option<Value>; 1]> {
// TODO: Specialize this to return a different builder that just attaches `v` instead of
// calling `make_inst_results_reusing()`.
self.with_results([Some(v)])
}
}
impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> {
fn data_flow_graph(&self) -> &DataFlowGraph {
self.inserter.data_flow_graph()
}
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
self.inserter.data_flow_graph_mut()
}
fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
let inst;
{
let dfg = self.inserter.data_flow_graph_mut();
inst = dfg.make_inst(data);
dfg.make_inst_results(inst, ctrl_typevar);
}
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
}
}
/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values.
pub struct InsertReuseBuilder<'f, IIB, Array>
where
IIB: InstInserterBase<'f>,
Array: AsRef<[Option<Value>]>,
{
inserter: IIB,
reuse: Array,
unused: PhantomData<&'f u32>,
}
impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array>
where
IIB: InstInserterBase<'f>,
Array: AsRef<[Option<Value>]>,
{
fn data_flow_graph(&self) -> &DataFlowGraph {
self.inserter.data_flow_graph()
}
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
self.inserter.data_flow_graph_mut()
}
fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
let inst;
{
let dfg = self.inserter.data_flow_graph_mut();
inst = dfg.make_inst(data);
// Make an `Iterator<Item = Option<Value>>`.
let ru = self.reuse.as_ref().iter().cloned();
dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
}
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
}
}
/// Instruction builder that replaces an existing instruction.
///
/// The inserted instruction will have the same `Inst` number as the old one.
///
/// If the old instruction still has result values attached, it is assumed that the new instruction
/// produces the same number and types of results. The old result values are preserved. If the
/// replacement instruction format does not support multiple results, the builder panics. It is a
/// bug to leave result values dangling.
pub struct ReplaceBuilder<'f> {
dfg: &'f mut DataFlowGraph,
inst: Inst,
}
impl<'f> ReplaceBuilder<'f> {
/// Create a `ReplaceBuilder` that will overwrite `inst`.
pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> Self {
Self { dfg, inst }
}
}
impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> {
fn data_flow_graph(&self) -> &DataFlowGraph {
self.dfg
}
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
self.dfg
}
fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
// Splat the new instruction on top of the old one.
self.dfg[self.inst] = data;
if !self.dfg.has_results(self.inst) {
// The old result values were either detached or non-existent.
// Construct new ones.
self.dfg.make_inst_results(self.inst, ctrl_typevar);
}
(self.inst, self.dfg)
}
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::condcodes::*;
use crate::ir::types::*;
use crate::ir::{Function, InstBuilder, ValueDef};
#[test]
fn types() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let arg0 = func.dfg.append_ebb_param(ebb0, I32);
let mut pos = FuncCursor::new(&mut func);
pos.insert_ebb(ebb0);
// Explicit types.
let v0 = pos.ins().iconst(I32, 3);
assert_eq!(pos.func.dfg.value_type(v0), I32);
// Inferred from inputs.
let v1 = pos.ins().iadd(arg0, v0);
assert_eq!(pos.func.dfg.value_type(v1), I32);
// Formula.
let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0);
assert_eq!(pos.func.dfg.value_type(cmp), B1);
}
#[test]
fn reuse_results() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let arg0 = func.dfg.append_ebb_param(ebb0, I32);
let mut pos = FuncCursor::new(&mut func);
pos.insert_ebb(ebb0);
let v0 = pos.ins().iadd_imm(arg0, 17);
assert_eq!(pos.func.dfg.value_type(v0), I32);
let iadd = pos.prev_inst().unwrap();
assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0));
// Detach v0 and reuse it for a different instruction.
pos.func.dfg.clear_results(iadd);
let v0b = pos.ins().with_result(v0).iconst(I32, 3);
assert_eq!(v0, v0b);
assert_eq!(pos.current_inst(), Some(iadd));
let iconst = pos.prev_inst().unwrap();
assert!(iadd != iconst);
assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0));
}
}

View File

@@ -0,0 +1,358 @@
//! Condition codes for the Cranelift code generator.
//!
//! A condition code here is an enumerated type that determined how to compare two numbers. There
//! are different rules for comparing integers and floating point numbers, so they use different
//! condition codes.
use core::fmt::{self, Display, Formatter};
use core::str::FromStr;
/// Common traits of condition codes.
pub trait CondCode: Copy {
/// Get the inverse condition code of `self`.
///
/// The inverse condition code produces the opposite result for all comparisons.
/// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false.
#[must_use]
fn inverse(self) -> Self;
/// Get the reversed condition code for `self`.
///
/// The reversed condition code produces the same result as swapping `x` and `y` in the
/// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`.
#[must_use]
fn reverse(self) -> Self;
}
/// Condition code for comparing integers.
///
/// This condition code is used by the `icmp` instruction to compare integer values. There are
/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
/// difference.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum IntCC {
/// `==`.
Equal,
/// `!=`.
NotEqual,
/// Signed `<`.
SignedLessThan,
/// Signed `>=`.
SignedGreaterThanOrEqual,
/// Signed `>`.
SignedGreaterThan,
/// Signed `<=`.
SignedLessThanOrEqual,
/// Unsigned `<`.
UnsignedLessThan,
/// Unsigned `>=`.
UnsignedGreaterThanOrEqual,
/// Unsigned `>`.
UnsignedGreaterThan,
/// Unsigned `<=`.
UnsignedLessThanOrEqual,
}
impl CondCode for IntCC {
fn inverse(self) -> Self {
use self::IntCC::*;
match self {
Equal => NotEqual,
NotEqual => Equal,
SignedLessThan => SignedGreaterThanOrEqual,
SignedGreaterThanOrEqual => SignedLessThan,
SignedGreaterThan => SignedLessThanOrEqual,
SignedLessThanOrEqual => SignedGreaterThan,
UnsignedLessThan => UnsignedGreaterThanOrEqual,
UnsignedGreaterThanOrEqual => UnsignedLessThan,
UnsignedGreaterThan => UnsignedLessThanOrEqual,
UnsignedLessThanOrEqual => UnsignedGreaterThan,
}
}
fn reverse(self) -> Self {
use self::IntCC::*;
match self {
Equal => Equal,
NotEqual => NotEqual,
SignedGreaterThan => SignedLessThan,
SignedGreaterThanOrEqual => SignedLessThanOrEqual,
SignedLessThan => SignedGreaterThan,
SignedLessThanOrEqual => SignedGreaterThanOrEqual,
UnsignedGreaterThan => UnsignedLessThan,
UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual,
UnsignedLessThan => UnsignedGreaterThan,
UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual,
}
}
}
impl Display for IntCC {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::IntCC::*;
f.write_str(match *self {
Equal => "eq",
NotEqual => "ne",
SignedGreaterThan => "sgt",
SignedGreaterThanOrEqual => "sge",
SignedLessThan => "slt",
SignedLessThanOrEqual => "sle",
UnsignedGreaterThan => "ugt",
UnsignedGreaterThanOrEqual => "uge",
UnsignedLessThan => "ult",
UnsignedLessThanOrEqual => "ule",
})
}
}
impl FromStr for IntCC {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
use self::IntCC::*;
match s {
"eq" => Ok(Equal),
"ne" => Ok(NotEqual),
"sge" => Ok(SignedGreaterThanOrEqual),
"sgt" => Ok(SignedGreaterThan),
"sle" => Ok(SignedLessThanOrEqual),
"slt" => Ok(SignedLessThan),
"uge" => Ok(UnsignedGreaterThanOrEqual),
"ugt" => Ok(UnsignedGreaterThan),
"ule" => Ok(UnsignedLessThanOrEqual),
"ult" => Ok(UnsignedLessThan),
_ => Err(()),
}
}
}
/// Condition code for comparing floating point numbers.
///
/// This condition code is used by the `fcmp` instruction to compare floating point values. Two
/// IEEE floating point values relate in exactly one of four ways:
///
/// 1. `UN` - unordered when either value is NaN.
/// 2. `EQ` - equal numerical value.
/// 3. `LT` - `x` is less than `y`.
/// 4. `GT` - `x` is greater than `y`.
///
/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0.
///
/// The condition codes described here are used to produce a single boolean value from the
/// comparison. The 14 condition codes here cover every possible combination of the relation above
/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum FloatCC {
/// EQ | LT | GT
Ordered,
/// UN
Unordered,
/// EQ
Equal,
/// The C '!=' operator is the inverse of '==': `NotEqual`.
/// UN | LT | GT
NotEqual,
/// LT | GT
OrderedNotEqual,
/// UN | EQ
UnorderedOrEqual,
/// LT
LessThan,
/// LT | EQ
LessThanOrEqual,
/// GT
GreaterThan,
/// GT | EQ
GreaterThanOrEqual,
/// UN | LT
UnorderedOrLessThan,
/// UN | LT | EQ
UnorderedOrLessThanOrEqual,
/// UN | GT
UnorderedOrGreaterThan,
/// UN | GT | EQ
UnorderedOrGreaterThanOrEqual,
}
impl CondCode for FloatCC {
fn inverse(self) -> Self {
use self::FloatCC::*;
match self {
Ordered => Unordered,
Unordered => Ordered,
Equal => NotEqual,
NotEqual => Equal,
OrderedNotEqual => UnorderedOrEqual,
UnorderedOrEqual => OrderedNotEqual,
LessThan => UnorderedOrGreaterThanOrEqual,
LessThanOrEqual => UnorderedOrGreaterThan,
GreaterThan => UnorderedOrLessThanOrEqual,
GreaterThanOrEqual => UnorderedOrLessThan,
UnorderedOrLessThan => GreaterThanOrEqual,
UnorderedOrLessThanOrEqual => GreaterThan,
UnorderedOrGreaterThan => LessThanOrEqual,
UnorderedOrGreaterThanOrEqual => LessThan,
}
}
fn reverse(self) -> Self {
use self::FloatCC::*;
match self {
Ordered => Ordered,
Unordered => Unordered,
Equal => Equal,
NotEqual => NotEqual,
OrderedNotEqual => OrderedNotEqual,
UnorderedOrEqual => UnorderedOrEqual,
LessThan => GreaterThan,
LessThanOrEqual => GreaterThanOrEqual,
GreaterThan => LessThan,
GreaterThanOrEqual => LessThanOrEqual,
UnorderedOrLessThan => UnorderedOrGreaterThan,
UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual,
UnorderedOrGreaterThan => UnorderedOrLessThan,
UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual,
}
}
}
impl Display for FloatCC {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::FloatCC::*;
f.write_str(match *self {
Ordered => "ord",
Unordered => "uno",
Equal => "eq",
NotEqual => "ne",
OrderedNotEqual => "one",
UnorderedOrEqual => "ueq",
LessThan => "lt",
LessThanOrEqual => "le",
GreaterThan => "gt",
GreaterThanOrEqual => "ge",
UnorderedOrLessThan => "ult",
UnorderedOrLessThanOrEqual => "ule",
UnorderedOrGreaterThan => "ugt",
UnorderedOrGreaterThanOrEqual => "uge",
})
}
}
impl FromStr for FloatCC {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
use self::FloatCC::*;
match s {
"ord" => Ok(Ordered),
"uno" => Ok(Unordered),
"eq" => Ok(Equal),
"ne" => Ok(NotEqual),
"one" => Ok(OrderedNotEqual),
"ueq" => Ok(UnorderedOrEqual),
"lt" => Ok(LessThan),
"le" => Ok(LessThanOrEqual),
"gt" => Ok(GreaterThan),
"ge" => Ok(GreaterThanOrEqual),
"ult" => Ok(UnorderedOrLessThan),
"ule" => Ok(UnorderedOrLessThanOrEqual),
"ugt" => Ok(UnorderedOrGreaterThan),
"uge" => Ok(UnorderedOrGreaterThanOrEqual),
_ => Err(()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
static INT_ALL: [IntCC; 10] = [
IntCC::Equal,
IntCC::NotEqual,
IntCC::SignedLessThan,
IntCC::SignedGreaterThanOrEqual,
IntCC::SignedGreaterThan,
IntCC::SignedLessThanOrEqual,
IntCC::UnsignedLessThan,
IntCC::UnsignedGreaterThanOrEqual,
IntCC::UnsignedGreaterThan,
IntCC::UnsignedLessThanOrEqual,
];
#[test]
fn int_inverse() {
for r in &INT_ALL {
let cc = *r;
let inv = cc.inverse();
assert!(cc != inv);
assert_eq!(inv.inverse(), cc);
}
}
#[test]
fn int_reverse() {
for r in &INT_ALL {
let cc = *r;
let rev = cc.reverse();
assert_eq!(rev.reverse(), cc);
}
}
#[test]
fn int_display() {
for r in &INT_ALL {
let cc = *r;
assert_eq!(cc.to_string().parse(), Ok(cc));
}
assert_eq!("bogus".parse::<IntCC>(), Err(()));
}
static FLOAT_ALL: [FloatCC; 14] = [
FloatCC::Ordered,
FloatCC::Unordered,
FloatCC::Equal,
FloatCC::NotEqual,
FloatCC::OrderedNotEqual,
FloatCC::UnorderedOrEqual,
FloatCC::LessThan,
FloatCC::LessThanOrEqual,
FloatCC::GreaterThan,
FloatCC::GreaterThanOrEqual,
FloatCC::UnorderedOrLessThan,
FloatCC::UnorderedOrLessThanOrEqual,
FloatCC::UnorderedOrGreaterThan,
FloatCC::UnorderedOrGreaterThanOrEqual,
];
#[test]
fn float_inverse() {
for r in &FLOAT_ALL {
let cc = *r;
let inv = cc.inverse();
assert!(cc != inv);
assert_eq!(inv.inverse(), cc);
}
}
#[test]
fn float_reverse() {
for r in &FLOAT_ALL {
let cc = *r;
let rev = cc.reverse();
assert_eq!(rev.reverse(), cc);
}
}
#[test]
fn float_display() {
for r in &FLOAT_ALL {
let cc = *r;
assert_eq!(cc.to_string().parse(), Ok(cc));
}
assert_eq!("bogus".parse::<FloatCC>(), Err(()));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,330 @@
//! Cranelift IR entity references.
//!
//! Instructions in Cranelift IR need to reference other entities in the function. This can be other
//! parts of the function like extended basic blocks or stack slots, or it can be external entities
//! that are declared in the function preamble in the text format.
//!
//! These entity references in instruction operands are not implemented as Rust references both
//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers
//! take up a lot of space, and we want a compact in-memory representation. Instead, entity
//! references are structs wrapping a `u32` index into a table in the `Function` main data
//! structure. There is a separate index type for each entity type, so we don't lose type safety.
//!
//! The `entities` module defines public types for the entity references along with constants
//! representing an invalid reference. We prefer to use `Option<EntityRef>` whenever possible, but
//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact
//! data structures use the `PackedOption<EntityRef>` representation, while function arguments and
//! return values prefer the more Rust-like `Option<EntityRef>` variant.
//!
//! The entity references all implement the `Display` trait in a way that matches the textual IR
//! format.
use crate::entity::entity_impl;
use core::fmt;
use core::u32;
/// An opaque reference to an extended basic block in a function.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Ebb(u32);
entity_impl!(Ebb, "ebb");
impl Ebb {
/// Create a new EBB reference from its number. This corresponds to the `ebbNN` representation.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(Ebb(n))
} else {
None
}
}
}
/// An opaque reference to an SSA value.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Value(u32);
entity_impl!(Value, "v");
impl Value {
/// Create a value from its number representation.
/// This is the number in the `vNN` notation.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX / 2 {
Some(Value(n))
} else {
None
}
}
}
/// An opaque reference to an instruction in a function.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Inst(u32);
entity_impl!(Inst, "inst");
/// An opaque reference to a stack slot.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct StackSlot(u32);
entity_impl!(StackSlot, "ss");
impl StackSlot {
/// Create a new stack slot reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(StackSlot(n))
} else {
None
}
}
}
/// An opaque reference to a global value.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct GlobalValue(u32);
entity_impl!(GlobalValue, "gv");
impl GlobalValue {
/// Create a new global value reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(GlobalValue(n))
} else {
None
}
}
}
/// An opaque reference to a jump table.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct JumpTable(u32);
entity_impl!(JumpTable, "jt");
impl JumpTable {
/// Create a new jump table reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(JumpTable(n))
} else {
None
}
}
}
/// A reference to an external function.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct FuncRef(u32);
entity_impl!(FuncRef, "fn");
impl FuncRef {
/// Create a new external function reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(FuncRef(n))
} else {
None
}
}
}
/// A reference to a function signature.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct SigRef(u32);
entity_impl!(SigRef, "sig");
impl SigRef {
/// Create a new function signature reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(SigRef(n))
} else {
None
}
}
}
/// A reference to a heap.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Heap(u32);
entity_impl!(Heap, "heap");
impl Heap {
/// Create a new heap reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(Heap(n))
} else {
None
}
}
}
/// A reference to a table.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Table(u32);
entity_impl!(Table, "table");
impl Table {
/// Create a new table reference from its number.
///
/// This method is for use by the parser.
pub fn with_number(n: u32) -> Option<Self> {
if n < u32::MAX {
Some(Table(n))
} else {
None
}
}
}
/// A reference to any of the entities defined in this module.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub enum AnyEntity {
/// The whole function.
Function,
/// An extended basic block.
Ebb(Ebb),
/// An instruction.
Inst(Inst),
/// An SSA value.
Value(Value),
/// A stack slot.
StackSlot(StackSlot),
/// A Global value.
GlobalValue(GlobalValue),
/// A jump table.
JumpTable(JumpTable),
/// An external function.
FuncRef(FuncRef),
/// A function call signature.
SigRef(SigRef),
/// A heap.
Heap(Heap),
/// A table.
Table(Table),
}
impl fmt::Display for AnyEntity {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
AnyEntity::Function => write!(f, "function"),
AnyEntity::Ebb(r) => r.fmt(f),
AnyEntity::Inst(r) => r.fmt(f),
AnyEntity::Value(r) => r.fmt(f),
AnyEntity::StackSlot(r) => r.fmt(f),
AnyEntity::GlobalValue(r) => r.fmt(f),
AnyEntity::JumpTable(r) => r.fmt(f),
AnyEntity::FuncRef(r) => r.fmt(f),
AnyEntity::SigRef(r) => r.fmt(f),
AnyEntity::Heap(r) => r.fmt(f),
AnyEntity::Table(r) => r.fmt(f),
}
}
}
impl fmt::Debug for AnyEntity {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
(self as &fmt::Display).fmt(f)
}
}
impl From<Ebb> for AnyEntity {
fn from(r: Ebb) -> Self {
AnyEntity::Ebb(r)
}
}
impl From<Inst> for AnyEntity {
fn from(r: Inst) -> Self {
AnyEntity::Inst(r)
}
}
impl From<Value> for AnyEntity {
fn from(r: Value) -> Self {
AnyEntity::Value(r)
}
}
impl From<StackSlot> for AnyEntity {
fn from(r: StackSlot) -> Self {
AnyEntity::StackSlot(r)
}
}
impl From<GlobalValue> for AnyEntity {
fn from(r: GlobalValue) -> Self {
AnyEntity::GlobalValue(r)
}
}
impl From<JumpTable> for AnyEntity {
fn from(r: JumpTable) -> Self {
AnyEntity::JumpTable(r)
}
}
impl From<FuncRef> for AnyEntity {
fn from(r: FuncRef) -> Self {
AnyEntity::FuncRef(r)
}
}
impl From<SigRef> for AnyEntity {
fn from(r: SigRef) -> Self {
AnyEntity::SigRef(r)
}
}
impl From<Heap> for AnyEntity {
fn from(r: Heap) -> Self {
AnyEntity::Heap(r)
}
}
impl From<Table> for AnyEntity {
fn from(r: Table) -> Self {
AnyEntity::Table(r)
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::u32;
use std::string::ToString;
#[test]
fn value_with_number() {
assert_eq!(Value::with_number(0).unwrap().to_string(), "v0");
assert_eq!(Value::with_number(1).unwrap().to_string(), "v1");
assert_eq!(Value::with_number(u32::MAX / 2), None);
assert!(Value::with_number(u32::MAX / 2 - 1).is_some());
}
#[test]
fn memory() {
use crate::packed_option::PackedOption;
use core::mem;
// This is the whole point of `PackedOption`.
assert_eq!(
mem::size_of::<Value>(),
mem::size_of::<PackedOption<Value>>()
);
}
}

View File

@@ -0,0 +1,405 @@
//! External function calls.
//!
//! To a Cranelift function, all functions are "external". Directly called functions must be
//! declared in the preamble, and all function calls must have a signature.
//!
//! This module declares the data types used to represent external functions and call signatures.
use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type};
use crate::isa::{CallConv, RegInfo, RegUnit};
use core::fmt;
use core::str::FromStr;
use std::vec::Vec;
/// Function signature.
///
/// The function signature describes the types of formal parameters and return values along with
/// other details that are needed to call a function correctly.
///
/// A signature can optionally include ISA-specific ABI information which specifies exactly how
/// arguments and return values are passed.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Signature {
/// The arguments passed to the function.
pub params: Vec<AbiParam>,
/// Values returned from the function.
pub returns: Vec<AbiParam>,
/// Calling convention.
pub call_conv: CallConv,
}
impl Signature {
/// Create a new blank signature.
pub fn new(call_conv: CallConv) -> Self {
Self {
params: Vec::new(),
returns: Vec::new(),
call_conv,
}
}
/// Clear the signature so it is identical to a fresh one returned by `new()`.
pub fn clear(&mut self, call_conv: CallConv) {
self.params.clear();
self.returns.clear();
self.call_conv = call_conv;
}
/// Return an object that can display `self` with correct register names.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplaySignature<'a> {
DisplaySignature(self, regs.into())
}
/// Find the index of a presumed unique special-purpose parameter.
pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
self.params.iter().rposition(|arg| arg.purpose == purpose)
}
}
/// Wrapper type capable of displaying a `Signature` with correct register names.
pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>);
fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result {
match args.split_first() {
None => {}
Some((first, rest)) => {
write!(f, "{}", first.display(regs))?;
for arg in rest {
write!(f, ", {}", arg.display(regs))?;
}
}
}
Ok(())
}
impl<'a> fmt::Display for DisplaySignature<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(")?;
write_list(f, &self.0.params, self.1)?;
write!(f, ")")?;
if !self.0.returns.is_empty() {
write!(f, " -> ")?;
write_list(f, &self.0.returns, self.1)?;
}
write!(f, " {}", self.0.call_conv)
}
}
impl fmt::Display for Signature {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
}
/// Function parameter or return value descriptor.
///
/// This describes the value type being passed to or from a function along with flags that affect
/// how the argument is passed.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct AbiParam {
/// Type of the argument value.
pub value_type: Type,
/// Special purpose of argument, or `Normal`.
pub purpose: ArgumentPurpose,
/// Method for extending argument to a full register.
pub extension: ArgumentExtension,
/// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
/// been legalized.
pub location: ArgumentLoc,
}
impl AbiParam {
/// Create a parameter with default flags.
pub fn new(vt: Type) -> Self {
Self {
value_type: vt,
extension: ArgumentExtension::None,
purpose: ArgumentPurpose::Normal,
location: Default::default(),
}
}
/// Create a special-purpose parameter that is not (yet) bound to a specific register.
pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self {
Self {
value_type: vt,
extension: ArgumentExtension::None,
purpose,
location: Default::default(),
}
}
/// Create a parameter for a special-purpose register.
pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self {
Self {
value_type: vt,
extension: ArgumentExtension::None,
purpose,
location: ArgumentLoc::Reg(regunit),
}
}
/// Convert `self` to a parameter with the `uext` flag set.
pub fn uext(self) -> Self {
debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type);
Self {
extension: ArgumentExtension::Uext,
..self
}
}
/// Convert `self` to a parameter type with the `sext` flag set.
pub fn sext(self) -> Self {
debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type);
Self {
extension: ArgumentExtension::Sext,
..self
}
}
/// Return an object that can display `self` with correct register names.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayAbiParam<'a> {
DisplayAbiParam(self, regs.into())
}
}
/// Wrapper type capable of displaying a `AbiParam` with correct register names.
pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAbiParam<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0.value_type)?;
match self.0.extension {
ArgumentExtension::None => {}
ArgumentExtension::Uext => write!(f, " uext")?,
ArgumentExtension::Sext => write!(f, " sext")?,
}
if self.0.purpose != ArgumentPurpose::Normal {
write!(f, " {}", self.0.purpose)?;
}
if self.0.location.is_assigned() {
write!(f, " [{}]", self.0.location.display(self.1))?;
}
Ok(())
}
}
impl fmt::Display for AbiParam {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
}
/// Function argument extension options.
///
/// On some architectures, small integer function arguments are extended to the width of a
/// general-purpose register.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub enum ArgumentExtension {
/// No extension, high bits are indeterminate.
None,
/// Unsigned extension: high bits in register are 0.
Uext,
/// Signed extension: high bits in register replicate sign bit.
Sext,
}
/// The special purpose of a function argument.
///
/// Function arguments and return values are used to pass user program values between functions,
/// but they are also used to represent special registers with significance to the ABI such as
/// frame pointers and callee-saved registers.
///
/// The argument purpose is used to indicate any special meaning of an argument or return value.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub enum ArgumentPurpose {
/// A normal user program value passed to or from a function.
Normal,
/// Struct return pointer.
///
/// When a function needs to return more data than will fit in registers, the caller passes a
/// pointer to a memory location where the return value can be written. In some ABIs, this
/// struct return pointer is passed in a specific register.
///
/// This argument kind can also appear as a return value for ABIs that require a function with
/// a `StructReturn` pointer argument to also return that pointer in a register.
StructReturn,
/// The link register.
///
/// Most RISC architectures implement calls by saving the return address in a designated
/// register rather than pushing it on the stack. This is represented with a `Link` argument.
///
/// Similarly, some return instructions expect the return address in a register represented as
/// a `Link` return value.
Link,
/// The frame pointer.
///
/// This indicates the frame pointer register which has a special meaning in some ABIs.
///
/// The frame pointer appears as an argument and as a return value since it is a callee-saved
/// register.
FramePointer,
/// A callee-saved register.
///
/// Some calling conventions have registers that must be saved by the callee. These registers
/// are represented as `CalleeSaved` arguments and return values.
CalleeSaved,
/// A VM context pointer.
///
/// This is a pointer to a context struct containing details about the current sandbox. It is
/// used as a base pointer for `vmctx` global values.
VMContext,
/// A signature identifier.
///
/// This is a special-purpose argument used to identify the calling convention expected by the
/// caller in an indirect call. The callee can verify that the expected signature ID matches.
SignatureId,
/// A stack limit pointer.
///
/// This is a pointer to a stack limit. It is used to check the current stack pointer
/// against. Can only appear once in a signature.
StackLimit,
}
/// Text format names of the `ArgumentPurpose` variants.
static PURPOSE_NAMES: [&str; 8] = [
"normal",
"sret",
"link",
"fp",
"csr",
"vmctx",
"sigid",
"stack_limit",
];
impl fmt::Display for ArgumentPurpose {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(PURPOSE_NAMES[*self as usize])
}
}
impl FromStr for ArgumentPurpose {
type Err = ();
fn from_str(s: &str) -> Result<Self, ()> {
match s {
"normal" => Ok(ArgumentPurpose::Normal),
"sret" => Ok(ArgumentPurpose::StructReturn),
"link" => Ok(ArgumentPurpose::Link),
"fp" => Ok(ArgumentPurpose::FramePointer),
"csr" => Ok(ArgumentPurpose::CalleeSaved),
"vmctx" => Ok(ArgumentPurpose::VMContext),
"sigid" => Ok(ArgumentPurpose::SignatureId),
"stack_limit" => Ok(ArgumentPurpose::StackLimit),
_ => Err(()),
}
}
}
/// An external function.
///
/// Information about a function that can be called directly with a direct `call` instruction.
#[derive(Clone, Debug)]
pub struct ExtFuncData {
/// Name of the external function.
pub name: ExternalName,
/// Call signature of function.
pub signature: SigRef,
/// Will this function be defined nearby, such that it will always be a certain distance away,
/// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
/// symbols meant to be preemptible cannot be considered colocated.
pub colocated: bool,
}
impl fmt::Display for ExtFuncData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.colocated {
write!(f, "colocated ")?;
}
write!(f, "{} {}", self.name, self.signature)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::types::{B8, F32, I32};
use std::string::ToString;
#[test]
fn argument_type() {
let t = AbiParam::new(I32);
assert_eq!(t.to_string(), "i32");
let mut t = t.uext();
assert_eq!(t.to_string(), "i32 uext");
assert_eq!(t.sext().to_string(), "i32 sext");
t.purpose = ArgumentPurpose::StructReturn;
assert_eq!(t.to_string(), "i32 uext sret");
}
#[test]
fn argument_purpose() {
let all_purpose = [
ArgumentPurpose::Normal,
ArgumentPurpose::StructReturn,
ArgumentPurpose::Link,
ArgumentPurpose::FramePointer,
ArgumentPurpose::CalleeSaved,
ArgumentPurpose::VMContext,
ArgumentPurpose::SignatureId,
ArgumentPurpose::StackLimit,
];
for (&e, &n) in all_purpose.iter().zip(PURPOSE_NAMES.iter()) {
assert_eq!(e.to_string(), n);
assert_eq!(Ok(e), n.parse());
}
}
#[test]
fn call_conv() {
for &cc in &[
CallConv::Fast,
CallConv::Cold,
CallConv::SystemV,
CallConv::WindowsFastcall,
CallConv::Baldrdash,
] {
assert_eq!(Ok(cc), cc.to_string().parse())
}
}
#[test]
fn signatures() {
let mut sig = Signature::new(CallConv::Baldrdash);
assert_eq!(sig.to_string(), "() baldrdash");
sig.params.push(AbiParam::new(I32));
assert_eq!(sig.to_string(), "(i32) baldrdash");
sig.returns.push(AbiParam::new(F32));
assert_eq!(sig.to_string(), "(i32) -> f32 baldrdash");
sig.params.push(AbiParam::new(I32.by(4).unwrap()));
assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 baldrdash");
sig.returns.push(AbiParam::new(B8));
assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 baldrdash");
// Order does not matter.
sig.params[0].location = ArgumentLoc::Stack(24);
sig.params[1].location = ArgumentLoc::Stack(8);
// Writing ABI-annotated signatures.
assert_eq!(
sig.to_string(),
"(i32 [24], i32x4 [8]) -> f32, b8 baldrdash"
);
}
}

View File

@@ -0,0 +1,163 @@
//! External names.
//!
//! These are identifiers for declaring entities defined outside the current
//! function. The name of an external declaration doesn't have any meaning to
//! Cranelift, which compiles functions independently.
use crate::ir::LibCall;
use core::cmp;
use core::fmt::{self, Write};
use core::str::FromStr;
const TESTCASE_NAME_LENGTH: usize = 16;
/// The name of an external is either a reference to a user-defined symbol
/// table, or a short sequence of ascii bytes so that test cases do not have
/// to keep track of a symbol table.
///
/// External names are primarily used as keys by code using Cranelift to map
/// from a `cranelift_codegen::ir::FuncRef` or similar to additional associated
/// data.
///
/// External names can also serve as a primitive testing and debugging tool.
/// In particular, many `.clif` test files use function names to identify
/// functions.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ExternalName {
/// A name in a user-defined symbol table. Cranelift does not interpret
/// these numbers in any way.
User {
/// Arbitrary.
namespace: u32,
/// Arbitrary.
index: u32,
},
/// A test case function name of up to 10 ascii characters. This is
/// not intended to be used outside test cases.
TestCase {
/// How many of the bytes in `ascii` are valid?
length: u8,
/// Ascii bytes of the name.
ascii: [u8; TESTCASE_NAME_LENGTH],
},
/// A well-known runtime library function.
LibCall(LibCall),
}
impl ExternalName {
/// Creates a new external name from a sequence of bytes. Caller is expected
/// to guarantee bytes are only ascii alphanumeric or `_`.
///
/// # Examples
///
/// ```rust
/// # use cranelift_codegen::ir::ExternalName;
/// // Create `ExternalName` from a string.
/// let name = ExternalName::testcase("hello");
/// assert_eq!(name.to_string(), "%hello");
/// ```
pub fn testcase<T: AsRef<[u8]>>(v: T) -> Self {
let vec = v.as_ref();
let len = cmp::min(vec.len(), TESTCASE_NAME_LENGTH);
let mut bytes = [0u8; TESTCASE_NAME_LENGTH];
bytes[0..len].copy_from_slice(&vec[0..len]);
ExternalName::TestCase {
length: len as u8,
ascii: bytes,
}
}
/// Create a new external name from user-provided integer indices.
///
/// # Examples
/// ```rust
/// # use cranelift_codegen::ir::ExternalName;
/// // Create `ExternalName` from integer indices
/// let name = ExternalName::user(123, 456);
/// assert_eq!(name.to_string(), "u123:456");
/// ```
pub fn user(namespace: u32, index: u32) -> Self {
ExternalName::User { namespace, index }
}
}
impl Default for ExternalName {
fn default() -> Self {
Self::user(0, 0)
}
}
impl fmt::Display for ExternalName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ExternalName::User { namespace, index } => write!(f, "u{}:{}", namespace, index),
ExternalName::TestCase { length, ascii } => {
f.write_char('%')?;
for byte in ascii.iter().take(length as usize) {
f.write_char(*byte as char)?;
}
Ok(())
}
ExternalName::LibCall(lc) => write!(f, "%{}", lc),
}
}
}
impl FromStr for ExternalName {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Try to parse as a libcall name, otherwise it's a test case.
match s.parse() {
Ok(lc) => Ok(ExternalName::LibCall(lc)),
Err(_) => Ok(Self::testcase(s.as_bytes())),
}
}
}
#[cfg(test)]
mod tests {
use super::ExternalName;
use crate::ir::LibCall;
use core::u32;
use std::string::ToString;
#[test]
fn display_testcase() {
assert_eq!(ExternalName::testcase("").to_string(), "%");
assert_eq!(ExternalName::testcase("x").to_string(), "%x");
assert_eq!(ExternalName::testcase("x_1").to_string(), "%x_1");
assert_eq!(
ExternalName::testcase("longname12345678").to_string(),
"%longname12345678"
);
// Constructor will silently drop bytes beyond the 16th
assert_eq!(
ExternalName::testcase("longname123456789").to_string(),
"%longname12345678"
);
}
#[test]
fn display_user() {
assert_eq!(ExternalName::user(0, 0).to_string(), "u0:0");
assert_eq!(ExternalName::user(1, 1).to_string(), "u1:1");
assert_eq!(
ExternalName::user(u32::MAX, u32::MAX).to_string(),
"u4294967295:4294967295"
);
}
#[test]
fn parsing() {
assert_eq!(
"FloorF32".parse(),
Ok(ExternalName::LibCall(LibCall::FloorF32))
);
assert_eq!(
ExternalName::LibCall(LibCall::FloorF32).to_string(),
"%FloorF32"
);
}
}

View File

@@ -0,0 +1,252 @@
//! Intermediate representation of a function.
//!
//! The `Function` struct defined in this module owns all of its extended basic blocks and
//! instructions.
use crate::binemit::CodeOffset;
use crate::entity::{PrimaryMap, SecondaryMap};
use crate::ir;
use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
use crate::ir::{
Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, JumpTable,
JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData,
};
use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
use crate::ir::{JumpTableOffsets, JumpTables};
use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
use crate::regalloc::RegDiversions;
use crate::write::write_function;
use core::fmt;
/// A function.
///
/// Functions can be cloned, but it is not a very fast operation.
/// The clone will have all the same entity numbers as the original.
#[derive(Clone)]
pub struct Function {
/// Name of this function. Mostly used by `.clif` files.
pub name: ExternalName,
/// Signature of this function.
pub signature: Signature,
/// Stack slots allocated in this function.
pub stack_slots: StackSlots,
/// Global values referenced.
pub global_values: PrimaryMap<ir::GlobalValue, ir::GlobalValueData>,
/// Heaps referenced.
pub heaps: PrimaryMap<ir::Heap, ir::HeapData>,
/// Tables referenced.
pub tables: PrimaryMap<ir::Table, ir::TableData>,
/// Jump tables used in this function.
pub jump_tables: JumpTables,
/// Data flow graph containing the primary definition of all instructions, EBBs and values.
pub dfg: DataFlowGraph,
/// Layout of EBBs and instructions in the function body.
pub layout: Layout,
/// Encoding recipe and bits for the legal instructions.
/// Illegal instructions have the `Encoding::default()` value.
pub encodings: InstEncodings,
/// Location assigned to every value.
pub locations: ValueLocations,
/// Code offsets of the EBB headers.
///
/// This information is only transiently available after the `binemit::relax_branches` function
/// computes it, and it can easily be recomputed by calling that function. It is not included
/// in the textual IR format.
pub offsets: EbbOffsets,
/// Code offsets of Jump Table headers.
pub jt_offsets: JumpTableOffsets,
/// Source locations.
///
/// Track the original source location for each instruction. The source locations are not
/// interpreted by Cranelift, only preserved.
pub srclocs: SourceLocs,
}
impl Function {
/// Create a function with the given name and signature.
pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
Self {
name,
signature: sig,
stack_slots: StackSlots::new(),
global_values: PrimaryMap::new(),
heaps: PrimaryMap::new(),
tables: PrimaryMap::new(),
jump_tables: PrimaryMap::new(),
dfg: DataFlowGraph::new(),
layout: Layout::new(),
encodings: SecondaryMap::new(),
locations: SecondaryMap::new(),
offsets: SecondaryMap::new(),
jt_offsets: SecondaryMap::new(),
srclocs: SecondaryMap::new(),
}
}
/// Clear all data structures in this function.
pub fn clear(&mut self) {
self.signature.clear(CallConv::Fast);
self.stack_slots.clear();
self.global_values.clear();
self.heaps.clear();
self.tables.clear();
self.jump_tables.clear();
self.dfg.clear();
self.layout.clear();
self.encodings.clear();
self.locations.clear();
self.offsets.clear();
self.srclocs.clear();
}
/// Create a new empty, anonymous function with a Fast calling convention.
pub fn new() -> Self {
Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::Fast))
}
/// Creates a jump table in the function, to be used by `br_table` instructions.
pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable {
self.jump_tables.push(data)
}
/// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and
/// `stack_addr` instructions.
pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot {
self.stack_slots.push(data)
}
/// Adds a signature which can later be used to declare an external function import.
pub fn import_signature(&mut self, signature: Signature) -> SigRef {
self.dfg.signatures.push(signature)
}
/// Declare an external function import.
pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef {
self.dfg.ext_funcs.push(data)
}
/// Declares a global value accessible to the function.
pub fn create_global_value(&mut self, data: GlobalValueData) -> GlobalValue {
self.global_values.push(data)
}
/// Declares a heap accessible to the function.
pub fn create_heap(&mut self, data: HeapData) -> Heap {
self.heaps.push(data)
}
/// Declares a table accessible to the function.
pub fn create_table(&mut self, data: TableData) -> Table {
self.tables.push(data)
}
/// Return an object that can display this function with correct ISA-specific annotations.
pub fn display<'a, I: Into<Option<&'a TargetIsa>>>(&'a self, isa: I) -> DisplayFunction<'a> {
DisplayFunction(self, isa.into())
}
/// Find a presumed unique special-purpose function parameter value.
///
/// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists.
pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option<ir::Value> {
let entry = self.layout.entry_block().expect("Function is empty");
self.signature
.special_param_index(purpose)
.map(|i| self.dfg.ebb_params(entry)[i])
}
/// Get an iterator over the instructions in `ebb`, including offsets and encoded instruction
/// sizes.
///
/// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
/// from the beginning of the function to the instruction, and `size` is the size of the
/// instruction in bytes, or 0 for unencoded instructions.
///
/// This function can only be used after the code layout has been computed by the
/// `binemit::relax_branches()` function.
pub fn inst_offsets<'a>(&'a self, ebb: Ebb, encinfo: &EncInfo) -> InstOffsetIter<'a> {
assert!(
!self.offsets.is_empty(),
"Code layout must be computed first"
);
InstOffsetIter {
encinfo: encinfo.clone(),
func: self,
divert: RegDiversions::new(),
encodings: &self.encodings,
offset: self.offsets[ebb],
iter: self.layout.ebb_insts(ebb),
}
}
/// Wrapper around `encode` which assigns `inst` the resulting encoding.
pub fn update_encoding(&mut self, inst: ir::Inst, isa: &TargetIsa) -> Result<(), Legalize> {
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
}
/// Wrapper around `TargetIsa::encode` for encoding an existing instruction
/// in the `Function`.
pub fn encode(&self, inst: ir::Inst, isa: &TargetIsa) -> Result<Encoding, Legalize> {
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
}
}
/// Wrapper type capable of displaying a `Function` with correct ISA annotations.
pub struct DisplayFunction<'a>(&'a Function, Option<&'a TargetIsa>);
impl<'a> fmt::Display for DisplayFunction<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write_function(fmt, self.0, self.1)
}
}
impl fmt::Display for Function {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write_function(fmt, self, None)
}
}
impl fmt::Debug for Function {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write_function(fmt, self, None)
}
}
/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
pub struct InstOffsetIter<'a> {
encinfo: EncInfo,
divert: RegDiversions,
func: &'a Function,
encodings: &'a InstEncodings,
offset: CodeOffset,
iter: ir::layout::Insts<'a>,
}
impl<'a> Iterator for InstOffsetIter<'a> {
type Item = (CodeOffset, ir::Inst, CodeOffset);
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|inst| {
self.divert.apply(&self.func.dfg[inst]);
let byte_size =
self.encinfo
.byte_size(self.encodings[inst], inst, &self.divert, self.func);
let offset = self.offset;
self.offset += byte_size;
(offset, inst, byte_size)
})
}
}

View File

@@ -0,0 +1,132 @@
//! Global values.
use crate::ir::immediates::{Imm64, Offset32};
use crate::ir::{ExternalName, GlobalValue, Type};
use crate::isa::TargetIsa;
use core::fmt;
/// Information about a global value declaration.
#[derive(Clone)]
pub enum GlobalValueData {
/// Value is the address of the VM context struct.
VMContext,
/// Value is pointed to by another global value.
///
/// The `base` global value is assumed to contain a pointer. This global value is computed
/// by loading from memory at that pointer value. The memory must be accessible, and
/// naturally aligned to hold a value of the type. The data at this address is assumed
/// to never change while the current function is executing.
Load {
/// The base pointer global value.
base: GlobalValue,
/// Offset added to the base pointer before doing the load.
offset: Offset32,
/// Type of the loaded value.
global_type: Type,
/// Specifies whether the memory that this refers to is readonly, allowing for the
/// elimination of redundant loads.
readonly: bool,
},
/// Value is an offset from another global value.
IAddImm {
/// The base pointer global value.
base: GlobalValue,
/// Byte offset to be added to the value.
offset: Imm64,
/// Type of the iadd.
global_type: Type,
},
/// Value is symbolic, meaning it's a name which will be resolved to an
/// actual value later (eg. by linking). Cranelift itself does not interpret
/// this name; it's used by embedders to link with other data structures.
///
/// For now, symbolic values always have pointer type, and represent
/// addresses, however in the future they could be used to represent other
/// things as well.
Symbol {
/// The symbolic name.
name: ExternalName,
/// Offset from the symbol. This can be used instead of IAddImm to represent folding an
/// offset into a symbol.
offset: Imm64,
/// Will this symbol be defined nearby, such that it will always be a certain distance
/// away, after linking? If so, references to it can avoid going through a GOT. Note that
/// symbols meant to be preemptible cannot be colocated.
colocated: bool,
},
}
impl GlobalValueData {
/// Assume that `self` is an `GlobalValueData::Symbol` and return its name.
pub fn symbol_name(&self) -> &ExternalName {
match *self {
GlobalValueData::Symbol { ref name, .. } => name,
_ => panic!("only symbols have names"),
}
}
/// Return the type of this global.
pub fn global_type(&self, isa: &TargetIsa) -> Type {
match *self {
GlobalValueData::VMContext { .. } | GlobalValueData::Symbol { .. } => {
isa.pointer_type()
}
GlobalValueData::IAddImm { global_type, .. }
| GlobalValueData::Load { global_type, .. } => global_type,
}
}
}
impl fmt::Display for GlobalValueData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
GlobalValueData::VMContext => write!(f, "vmctx"),
GlobalValueData::Load {
base,
offset,
global_type,
readonly,
} => write!(
f,
"load.{} notrap aligned {}{}{}",
global_type,
if readonly { "readonly " } else { "" },
base,
offset
),
GlobalValueData::IAddImm {
global_type,
base,
offset,
} => write!(f, "iadd_imm.{} {}, {}", global_type, base, offset),
GlobalValueData::Symbol {
ref name,
offset,
colocated,
} => {
if colocated {
write!(f, "colocated ")?;
}
write!(f, "symbol {}", name)?;
let offset_val: i64 = offset.into();
if offset_val > 0 {
write!(f, "+")?;
}
if offset_val != 0 {
write!(f, "{}", offset)?;
}
Ok(())
}
}
}
}

View File

@@ -0,0 +1,62 @@
//! Heaps.
use crate::ir::immediates::Uimm64;
use crate::ir::{GlobalValue, Type};
use core::fmt;
/// Information about a heap declaration.
#[derive(Clone)]
pub struct HeapData {
/// The address of the start of the heap's storage.
pub base: GlobalValue,
/// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
/// checking.
pub min_size: Uimm64,
/// Size in bytes of the offset-guard pages following the heap.
pub offset_guard_size: Uimm64,
/// Heap style, with additional style-specific info.
pub style: HeapStyle,
/// The index type for the heap.
pub index_type: Type,
}
/// Style of heap including style-specific information.
#[derive(Clone)]
pub enum HeapStyle {
/// A dynamic heap can be relocated to a different base address when it is grown.
Dynamic {
/// Global value providing the current bound of the heap in bytes.
bound_gv: GlobalValue,
},
/// A static heap has a fixed base address and a number of not-yet-allocated pages before the
/// offset-guard pages.
Static {
/// Heap bound in bytes. The offset-guard pages are allocated after the bound.
bound: Uimm64,
},
}
impl fmt::Display for HeapData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self.style {
HeapStyle::Dynamic { .. } => "dynamic",
HeapStyle::Static { .. } => "static",
})?;
write!(f, " {}, min {}", self.base, self.min_size)?;
match self.style {
HeapStyle::Dynamic { bound_gv } => write!(f, ", bound {}", bound_gv)?,
HeapStyle::Static { bound } => write!(f, ", bound {}", bound)?,
}
write!(
f,
", offset_guard {}, index_type {}",
self.offset_guard_size, self.index_type
)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,702 @@
//! Instruction formats and opcodes.
//!
//! The `instructions` module contains definitions for instruction formats, opcodes, and the
//! in-memory representation of IR instructions.
//!
//! A large part of this module is auto-generated from the instruction descriptions in the meta
//! directory.
use core::fmt::{self, Display, Formatter};
use core::ops::{Deref, DerefMut};
use core::str::FromStr;
use std::vec::Vec;
use crate::ir;
use crate::ir::types;
use crate::ir::{Ebb, FuncRef, JumpTable, SigRef, Type, Value};
use crate::isa;
use crate::bitset::BitSet;
use crate::entity;
use crate::ref_slice::{ref_slice, ref_slice_mut};
/// Some instructions use an external list of argument values because there is not enough space in
/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in
/// `dfg.value_lists`.
pub type ValueList = entity::EntityList<Value>;
/// Memory pool for holding value lists. See `ValueList`.
pub type ValueListPool = entity::ListPool<Value>;
// Include code generated by `cranelift-codegen/meta-python/gen_instr.py`. This file contains:
//
// - The `pub enum InstructionFormat` enum with all the instruction formats.
// - The `pub enum InstructionData` enum with all the instruction data fields.
// - The `pub enum Opcode` definition with all known opcodes,
// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
//
// For value type constraints:
//
// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table.
// - The `const TYPE_SETS : [ValueTypeSet; N]` table.
// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table.
//
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
impl Display for Opcode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", opcode_name(*self))
}
}
impl Opcode {
/// Get the instruction format for this opcode.
pub fn format(self) -> InstructionFormat {
OPCODE_FORMAT[self as usize - 1]
}
/// Get the constraint descriptor for this opcode.
/// Panic if this is called on `NotAnOpcode`.
pub fn constraints(self) -> OpcodeConstraints {
OPCODE_CONSTRAINTS[self as usize - 1]
}
}
// This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since
// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in
// this module. This also saves us from running the build script twice to generate code for the two
// separate crates.
impl FromStr for Opcode {
type Err = &'static str;
/// Parse an Opcode name from a string.
fn from_str(s: &str) -> Result<Self, &'static str> {
use crate::constant_hash::{probe, simple_hash, Table};
impl<'a> Table<&'a str> for [Option<Opcode>] {
fn len(&self) -> usize {
self.len()
}
fn key(&self, idx: usize) -> Option<&'a str> {
self[idx].map(opcode_name)
}
}
match probe::<&str, [Option<Self>]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) {
Err(_) => Err("Unknown opcode"),
// We unwrap here because probe() should have ensured that the entry
// at this index is not None.
Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()),
}
}
}
/// A variable list of `Value` operands used for function call arguments and passing arguments to
/// basic blocks.
#[derive(Clone, Debug)]
pub struct VariableArgs(Vec<Value>);
impl VariableArgs {
/// Create an empty argument list.
pub fn new() -> Self {
VariableArgs(Vec::new())
}
/// Add an argument to the end.
pub fn push(&mut self, v: Value) {
self.0.push(v)
}
/// Check if the list is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Convert this to a value list in `pool` with `fixed` prepended.
pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList {
let mut vlist = ValueList::default();
vlist.extend(fixed.iter().cloned(), pool);
vlist.extend(self.0, pool);
vlist
}
}
// Coerce `VariableArgs` into a `&[Value]` slice.
impl Deref for VariableArgs {
type Target = [Value];
fn deref(&self) -> &[Value] {
&self.0
}
}
impl DerefMut for VariableArgs {
fn deref_mut(&mut self) -> &mut [Value] {
&mut self.0
}
}
impl Display for VariableArgs {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
for (i, val) in self.0.iter().enumerate() {
if i == 0 {
write!(fmt, "{}", val)?;
} else {
write!(fmt, ", {}", val)?;
}
}
Ok(())
}
}
impl Default for VariableArgs {
fn default() -> Self {
Self::new()
}
}
/// Analyzing an instruction.
///
/// Avoid large matches on instruction formats by using the methods defined here to examine
/// instructions.
impl InstructionData {
/// Return information about the destination of a branch or jump instruction.
///
/// Any instruction that can transfer control to another EBB reveals its possible destinations
/// here.
pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> {
match *self {
InstructionData::Jump {
destination,
ref args,
..
} => BranchInfo::SingleDest(destination, args.as_slice(pool)),
InstructionData::BranchInt {
destination,
ref args,
..
}
| InstructionData::BranchFloat {
destination,
ref args,
..
}
| InstructionData::Branch {
destination,
ref args,
..
} => BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]),
InstructionData::BranchIcmp {
destination,
ref args,
..
} => BranchInfo::SingleDest(destination, &args.as_slice(pool)[2..]),
InstructionData::BranchTable {
table, destination, ..
} => BranchInfo::Table(table, Some(destination)),
InstructionData::IndirectJump { table, .. } => BranchInfo::Table(table, None),
_ => {
debug_assert!(!self.opcode().is_branch());
BranchInfo::NotABranch
}
}
}
/// Get the single destination of this branch instruction, if it is a single destination
/// branch or jump.
///
/// Multi-destination branches like `br_table` return `None`.
pub fn branch_destination(&self) -> Option<Ebb> {
match *self {
InstructionData::Jump { destination, .. }
| InstructionData::Branch { destination, .. }
| InstructionData::BranchInt { destination, .. }
| InstructionData::BranchFloat { destination, .. }
| InstructionData::BranchIcmp { destination, .. } => Some(destination),
InstructionData::BranchTable { .. } | InstructionData::IndirectJump { .. } => None,
_ => {
debug_assert!(!self.opcode().is_branch());
None
}
}
}
/// Get a mutable reference to the single destination of this branch instruction, if it is a
/// single destination branch or jump.
///
/// Multi-destination branches like `br_table` return `None`.
pub fn branch_destination_mut(&mut self) -> Option<&mut Ebb> {
match *self {
InstructionData::Jump {
ref mut destination,
..
}
| InstructionData::Branch {
ref mut destination,
..
}
| InstructionData::BranchInt {
ref mut destination,
..
}
| InstructionData::BranchFloat {
ref mut destination,
..
}
| InstructionData::BranchIcmp {
ref mut destination,
..
} => Some(destination),
InstructionData::BranchTable { .. } => None,
_ => {
debug_assert!(!self.opcode().is_branch());
None
}
}
}
/// Return information about a call instruction.
///
/// Any instruction that can call another function reveals its call signature here.
pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> {
match *self {
InstructionData::Call {
func_ref, ref args, ..
} => CallInfo::Direct(func_ref, args.as_slice(pool)),
InstructionData::CallIndirect {
sig_ref, ref args, ..
} => CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..]),
_ => {
debug_assert!(!self.opcode().is_call());
CallInfo::NotACall
}
}
}
}
/// Information about branch and jump instructions.
pub enum BranchInfo<'a> {
/// This is not a branch or jump instruction.
/// This instruction will not transfer control to another EBB in the function, but it may still
/// affect control flow by returning or trapping.
NotABranch,
/// This is a branch or jump to a single destination EBB, possibly taking value arguments.
SingleDest(Ebb, &'a [Value]),
/// This is a jump table branch which can have many destination EBBs and maybe one default EBB.
Table(JumpTable, Option<Ebb>),
}
/// Information about call instructions.
pub enum CallInfo<'a> {
/// This is not a call instruction.
NotACall,
/// This is a direct call to an external function declared in the preamble. See
/// `DataFlowGraph.ext_funcs`.
Direct(FuncRef, &'a [Value]),
/// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`.
Indirect(SigRef, &'a [Value]),
}
/// Value type constraints for a given opcode.
///
/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and
/// results are not determined by the format. Every `Opcode` has an associated
/// `OpcodeConstraints` object that provides the missing details.
#[derive(Clone, Copy)]
pub struct OpcodeConstraints {
/// Flags for this opcode encoded as a bit field:
///
/// Bits 0-2:
/// Number of fixed result values. This does not include `variable_args` results as are
/// produced by call instructions.
///
/// Bit 3:
/// This opcode is polymorphic and the controlling type variable can be inferred from the
/// designated input operand. This is the `typevar_operand` index given to the
/// `InstructionFormat` meta language object. When this bit is not set, the controlling
/// type variable must be the first output value instead.
///
/// Bit 4:
/// This opcode is polymorphic and the controlling type variable does *not* appear as the
/// first result type.
///
/// Bits 5-7:
/// Number of fixed value arguments. The minimum required number of value operands.
flags: u8,
/// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`.
typeset_offset: u8,
/// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first
/// `num_fixed_results()` entries describe the result constraints, then follows constraints for
/// the fixed `Value` input operands. (`num_fixed_value_arguments()` of them).
constraint_offset: u16,
}
impl OpcodeConstraints {
/// Can the controlling type variable for this opcode be inferred from the designated value
/// input operand?
/// This also implies that this opcode is polymorphic.
pub fn use_typevar_operand(self) -> bool {
(self.flags & 0x8) != 0
}
/// Is it necessary to look at the designated value input operand in order to determine the
/// controlling type variable, or is it good enough to use the first return type?
///
/// Most polymorphic instructions produce a single result with the type of the controlling type
/// variable. A few polymorphic instructions either don't produce any results, or produce
/// results with a fixed type. These instructions return `true`.
pub fn requires_typevar_operand(self) -> bool {
(self.flags & 0x10) != 0
}
/// Get the number of *fixed* result values produced by this opcode.
/// This does not include `variable_args` produced by calls.
pub fn num_fixed_results(self) -> usize {
(self.flags & 0x7) as usize
}
/// Get the number of *fixed* input values required by this opcode.
///
/// This does not include `variable_args` arguments on call and branch instructions.
///
/// The number of fixed input values is usually implied by the instruction format, but
/// instruction formats that use a `ValueList` put both fixed and variable arguments in the
/// list. This method returns the *minimum* number of values required in the value list.
pub fn num_fixed_value_arguments(self) -> usize {
((self.flags >> 5) & 0x7) as usize
}
/// Get the offset into `TYPE_SETS` for the controlling type variable.
/// Returns `None` if the instruction is not polymorphic.
fn typeset_offset(self) -> Option<usize> {
let offset = usize::from(self.typeset_offset);
if offset < TYPE_SETS.len() {
Some(offset)
} else {
None
}
}
/// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin.
fn constraint_offset(self) -> usize {
self.constraint_offset as usize
}
/// Get the value type of result number `n`, having resolved the controlling type variable to
/// `ctrl_type`.
pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
debug_assert!(n < self.num_fixed_results(), "Invalid result index");
if let ResolvedConstraint::Bound(t) =
OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
{
t
} else {
panic!("Result constraints can't be free");
}
}
/// Get the value type of input value number `n`, having resolved the controlling type variable
/// to `ctrl_type`.
///
/// Unlike results, it is possible for some input values to vary freely within a specific
/// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
debug_assert!(
n < self.num_fixed_value_arguments(),
"Invalid value argument index"
);
let offset = self.constraint_offset() + self.num_fixed_results();
OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type)
}
/// Get the typeset of allowed types for the controlling type variable in a polymorphic
/// instruction.
pub fn ctrl_typeset(self) -> Option<ValueTypeSet> {
self.typeset_offset().map(|offset| TYPE_SETS[offset])
}
/// Is this instruction polymorphic?
pub fn is_polymorphic(self) -> bool {
self.ctrl_typeset().is_some()
}
}
type BitSet8 = BitSet<u8>;
type BitSet16 = BitSet<u16>;
/// A value type set describes the permitted set of types for a type variable.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ValueTypeSet {
/// Allowed lane sizes
pub lanes: BitSet16,
/// Allowed int widths
pub ints: BitSet8,
/// Allowed float widths
pub floats: BitSet8,
/// Allowed bool widths
pub bools: BitSet8,
}
impl ValueTypeSet {
/// Is `scalar` part of the base type set?
///
/// Note that the base type set does not have to be included in the type set proper.
fn is_base_type(self, scalar: Type) -> bool {
let l2b = scalar.log2_lane_bits();
if scalar.is_int() {
self.ints.contains(l2b)
} else if scalar.is_float() {
self.floats.contains(l2b)
} else if scalar.is_bool() {
self.bools.contains(l2b)
} else {
false
}
}
/// Does `typ` belong to this set?
pub fn contains(self, typ: Type) -> bool {
let l2l = typ.log2_lane_count();
self.lanes.contains(l2l) && self.is_base_type(typ.lane_type())
}
/// Get an example member of this type set.
///
/// This is used for error messages to avoid suggesting invalid types.
pub fn example(self) -> Type {
let t = if self.ints.max().unwrap_or(0) > 5 {
types::I32
} else if self.floats.max().unwrap_or(0) > 5 {
types::F32
} else if self.bools.max().unwrap_or(0) > 5 {
types::B32
} else {
types::B1
};
t.by(1 << self.lanes.min().unwrap()).unwrap()
}
}
/// Operand constraints. This describes the value type constraints on a single `Value` operand.
enum OperandConstraint {
/// This operand has a concrete value type.
Concrete(Type),
/// This operand can vary freely within the given type set.
/// The type set is identified by its index into the TYPE_SETS constant table.
Free(u8),
/// This operand is the same type as the controlling type variable.
Same,
/// This operand is `ctrlType.lane_type()`.
LaneOf,
/// This operand is `ctrlType.as_bool()`.
AsBool,
/// This operand is `ctrlType.half_width()`.
HalfWidth,
/// This operand is `ctrlType.double_width()`.
DoubleWidth,
/// This operand is `ctrlType.half_vector()`.
HalfVector,
/// This operand is `ctrlType.double_vector()`.
DoubleVector,
}
impl OperandConstraint {
/// Resolve this operand constraint into a concrete value type, given the value of the
/// controlling type variable.
pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint {
use self::OperandConstraint::*;
use self::ResolvedConstraint::Bound;
match *self {
Concrete(t) => Bound(t),
Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]),
Same => Bound(ctrl_type),
LaneOf => Bound(ctrl_type.lane_type()),
AsBool => Bound(ctrl_type.as_bool()),
HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
DoubleWidth => Bound(
ctrl_type
.double_width()
.expect("invalid type for double_width"),
),
HalfVector => Bound(
ctrl_type
.half_vector()
.expect("invalid type for half_vector"),
),
DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")),
}
}
}
/// The type constraint on a value argument once the controlling type variable is known.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ResolvedConstraint {
/// The operand is bound to a known type.
Bound(Type),
/// The operand type can vary freely within the given set.
Free(ValueTypeSet),
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
#[test]
fn opcodes() {
use core::mem;
let x = Opcode::Iadd;
let mut y = Opcode::Isub;
assert!(x != y);
y = Opcode::Iadd;
assert_eq!(x, y);
assert_eq!(x.format(), InstructionFormat::Binary);
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
// Check the matcher.
assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
// Opcode is a single byte, and because Option<Opcode> originally came to 2 bytes, early on
// Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust
// compiler has brought in NonZero optimization, meaning that an enum not using the 0 value
// can be optional for no size cost. We want to ensure Option<Opcode> remains small.
assert_eq!(mem::size_of::<Opcode>(), mem::size_of::<Option<Opcode>>());
}
#[test]
fn instruction_data() {
use core::mem;
// The size of the `InstructionData` enum is important for performance. It should not
// exceed 16 bytes. Use `Box<FooData>` out-of-line payloads for instruction formats that
// require more space than that. It would be fine with a data structure smaller than 16
// bytes, but what are the odds of that?
assert_eq!(mem::size_of::<InstructionData>(), 16);
}
#[test]
fn constraints() {
let a = Opcode::Iadd.constraints();
assert!(a.use_typevar_operand());
assert!(!a.requires_typevar_operand());
assert_eq!(a.num_fixed_results(), 1);
assert_eq!(a.num_fixed_value_arguments(), 2);
assert_eq!(a.result_type(0, types::I32), types::I32);
assert_eq!(a.result_type(0, types::I8), types::I8);
assert_eq!(
a.value_argument_constraint(0, types::I32),
ResolvedConstraint::Bound(types::I32)
);
assert_eq!(
a.value_argument_constraint(1, types::I32),
ResolvedConstraint::Bound(types::I32)
);
let b = Opcode::Bitcast.constraints();
assert!(!b.use_typevar_operand());
assert!(!b.requires_typevar_operand());
assert_eq!(b.num_fixed_results(), 1);
assert_eq!(b.num_fixed_value_arguments(), 1);
assert_eq!(b.result_type(0, types::I32), types::I32);
assert_eq!(b.result_type(0, types::I8), types::I8);
match b.value_argument_constraint(0, types::I32) {
ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)),
_ => panic!("Unexpected constraint from value_argument_constraint"),
}
let c = Opcode::Call.constraints();
assert_eq!(c.num_fixed_results(), 0);
assert_eq!(c.num_fixed_value_arguments(), 0);
let i = Opcode::CallIndirect.constraints();
assert_eq!(i.num_fixed_results(), 0);
assert_eq!(i.num_fixed_value_arguments(), 1);
let cmp = Opcode::Icmp.constraints();
assert!(cmp.use_typevar_operand());
assert!(cmp.requires_typevar_operand());
assert_eq!(cmp.num_fixed_results(), 1);
assert_eq!(cmp.num_fixed_value_arguments(), 2);
}
#[test]
fn value_set() {
use crate::ir::types::*;
let vts = ValueTypeSet {
lanes: BitSet16::from_range(0, 8),
ints: BitSet8::from_range(4, 7),
floats: BitSet8::from_range(0, 0),
bools: BitSet8::from_range(3, 7),
};
assert!(!vts.contains(I8));
assert!(vts.contains(I32));
assert!(vts.contains(I64));
assert!(vts.contains(I32X4));
assert!(!vts.contains(F32));
assert!(!vts.contains(B1));
assert!(vts.contains(B8));
assert!(vts.contains(B64));
assert_eq!(vts.example().to_string(), "i32");
let vts = ValueTypeSet {
lanes: BitSet16::from_range(0, 8),
ints: BitSet8::from_range(0, 0),
floats: BitSet8::from_range(5, 7),
bools: BitSet8::from_range(3, 7),
};
assert_eq!(vts.example().to_string(), "f32");
let vts = ValueTypeSet {
lanes: BitSet16::from_range(1, 8),
ints: BitSet8::from_range(0, 0),
floats: BitSet8::from_range(5, 7),
bools: BitSet8::from_range(3, 7),
};
assert_eq!(vts.example().to_string(), "f32x2");
let vts = ValueTypeSet {
lanes: BitSet16::from_range(2, 8),
ints: BitSet8::from_range(0, 0),
floats: BitSet8::from_range(0, 0),
bools: BitSet8::from_range(3, 7),
};
assert!(!vts.contains(B32X2));
assert!(vts.contains(B32X4));
assert_eq!(vts.example().to_string(), "b32x4");
let vts = ValueTypeSet {
// TypeSet(lanes=(1, 256), ints=(8, 64))
lanes: BitSet16::from_range(0, 9),
ints: BitSet8::from_range(3, 7),
floats: BitSet8::from_range(0, 0),
bools: BitSet8::from_range(0, 0),
};
assert!(vts.contains(I32));
assert!(vts.contains(I32X4));
}
}

View File

@@ -0,0 +1,119 @@
//! Jump table representation.
//!
//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference.
//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module.
use crate::ir::entities::Ebb;
use core::fmt::{self, Display, Formatter};
use core::slice::{Iter, IterMut};
use std::vec::Vec;
/// Contents of a jump table.
///
/// All jump tables use 0-based indexing and densely populated.
#[derive(Clone)]
pub struct JumpTableData {
// Table entries.
table: Vec<Ebb>,
}
impl JumpTableData {
/// Create a new empty jump table.
pub fn new() -> Self {
Self { table: Vec::new() }
}
/// Create a new empty jump table with the specified capacity.
pub fn with_capacity(capacity: usize) -> Self {
Self {
table: Vec::with_capacity(capacity),
}
}
/// Get the number of table entries.
pub fn len(&self) -> usize {
self.table.len()
}
/// Append a table entry.
pub fn push_entry(&mut self, dest: Ebb) {
self.table.push(dest)
}
/// Checks if any of the entries branch to `ebb`.
pub fn branches_to(&self, ebb: Ebb) -> bool {
self.table.iter().any(|target_ebb| *target_ebb == ebb)
}
/// Access the whole table as a slice.
pub fn as_slice(&self) -> &[Ebb] {
self.table.as_slice()
}
/// Access the whole table as a mutable slice.
pub fn as_mut_slice(&mut self) -> &mut [Ebb] {
self.table.as_mut_slice()
}
/// Returns an iterator over the table.
pub fn iter(&self) -> Iter<Ebb> {
self.table.iter()
}
/// Returns an iterator that allows modifying each value.
pub fn iter_mut(&mut self) -> IterMut<Ebb> {
self.table.iter_mut()
}
}
impl Display for JumpTableData {
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
write!(fmt, "jump_table [")?;
match self.table.first() {
None => (),
Some(first) => write!(fmt, "{}", first)?,
}
for ebb in self.table.iter().skip(1) {
write!(fmt, ", {}", ebb)?;
}
write!(fmt, "]")
}
}
#[cfg(test)]
mod tests {
use super::JumpTableData;
use crate::entity::EntityRef;
use crate::ir::Ebb;
use std::string::ToString;
#[test]
fn empty() {
let jt = JumpTableData::new();
assert_eq!(jt.as_slice().get(0), None);
assert_eq!(jt.as_slice().get(10), None);
assert_eq!(jt.to_string(), "jump_table []");
let v = jt.as_slice();
assert_eq!(v, []);
}
#[test]
fn insert() {
let e1 = Ebb::new(1);
let e2 = Ebb::new(2);
let mut jt = JumpTableData::new();
jt.push_entry(e1);
jt.push_entry(e2);
jt.push_entry(e1);
assert_eq!(jt.to_string(), "jump_table [ebb1, ebb2, ebb1]");
let v = jt.as_slice();
assert_eq!(v, [e1, e2, e1]);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,205 @@
//! Naming well-known routines in the runtime library.
use crate::ir::{
types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode,
Signature, Type,
};
use crate::isa::{CallConv, RegUnit, TargetIsa};
use core::fmt;
use core::str::FromStr;
/// The name of a runtime library routine.
///
/// Runtime library calls are generated for Cranelift IR instructions that don't have an equivalent
/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to
/// the runtime library routine. This way, Cranelift doesn't have to know about the naming
/// convention in the embedding VM's runtime library.
///
/// This list is likely to grow over time.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum LibCall {
/// probe for stack overflow. These are emitted for functions which need
/// when the `probestack_enabled` setting is true.
Probestack,
/// ceil.f32
CeilF32,
/// ceil.f64
CeilF64,
/// floor.f32
FloorF32,
/// floor.f64
FloorF64,
/// trunc.f32
TruncF32,
/// frunc.f64
TruncF64,
/// nearest.f32
NearestF32,
/// nearest.f64
NearestF64,
/// libc.memcpy
Memcpy,
/// libc.memset
Memset,
/// libc.memmove
Memmove,
}
impl fmt::Display for LibCall {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
}
}
impl FromStr for LibCall {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"Probestack" => Ok(LibCall::Probestack),
"CeilF32" => Ok(LibCall::CeilF32),
"CeilF64" => Ok(LibCall::CeilF64),
"FloorF32" => Ok(LibCall::FloorF32),
"FloorF64" => Ok(LibCall::FloorF64),
"TruncF32" => Ok(LibCall::TruncF32),
"TruncF64" => Ok(LibCall::TruncF64),
"NearestF32" => Ok(LibCall::NearestF32),
"NearestF64" => Ok(LibCall::NearestF64),
"Memcpy" => Ok(LibCall::Memcpy),
"Memset" => Ok(LibCall::Memset),
"Memmove" => Ok(LibCall::Memmove),
_ => Err(()),
}
}
}
impl LibCall {
/// Get the well-known library call name to use as a replacement for an instruction with the
/// given opcode and controlling type variable.
///
/// Returns `None` if no well-known library routine name exists for that instruction.
pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<Self> {
Some(match ctrl_type {
types::F32 => match opcode {
Opcode::Ceil => LibCall::CeilF32,
Opcode::Floor => LibCall::FloorF32,
Opcode::Trunc => LibCall::TruncF32,
Opcode::Nearest => LibCall::NearestF32,
_ => return None,
},
types::F64 => match opcode {
Opcode::Ceil => LibCall::CeilF64,
Opcode::Floor => LibCall::FloorF64,
Opcode::Trunc => LibCall::TruncF64,
Opcode::Nearest => LibCall::NearestF64,
_ => return None,
},
_ => return None,
})
}
}
/// Get a function reference for `libcall` in `func`, following the signature
/// for `inst`.
///
/// If there is an existing reference, use it, otherwise make a new one.
pub fn get_libcall_funcref(
libcall: LibCall,
func: &mut Function,
inst: Inst,
isa: &TargetIsa,
) -> FuncRef {
find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
}
/// Get a function reference for the probestack function in `func`.
///
/// If there is an existing reference, use it, otherwise make a new one.
pub fn get_probestack_funcref(
func: &mut Function,
reg_type: Type,
arg_reg: RegUnit,
isa: &TargetIsa,
) -> FuncRef {
find_funcref(LibCall::Probestack, func)
.unwrap_or_else(|| make_funcref_for_probestack(func, reg_type, arg_reg, isa))
}
/// Get the existing function reference for `libcall` in `func` if it exists.
fn find_funcref(libcall: LibCall, func: &Function) -> Option<FuncRef> {
// We're assuming that all libcall function decls are at the end.
// If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
match func_data.name {
ExternalName::LibCall(lc) => {
if lc == libcall {
return Some(fref);
}
}
_ => break,
}
}
None
}
/// Create a funcref for `LibCall::Probestack`.
fn make_funcref_for_probestack(
func: &mut Function,
reg_type: Type,
arg_reg: RegUnit,
isa: &TargetIsa,
) -> FuncRef {
let mut sig = Signature::new(CallConv::Probestack);
let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg);
sig.params.push(rax);
if !isa.flags().probestack_func_adjusts_sp() {
sig.returns.push(rax);
}
make_funcref(LibCall::Probestack, func, sig, isa)
}
/// Create a funcref for `libcall` with a signature matching `inst`.
fn make_funcref_for_inst(
libcall: LibCall,
func: &mut Function,
inst: Inst,
isa: &TargetIsa,
) -> FuncRef {
let mut sig = Signature::new(isa.default_call_conv());
for &v in func.dfg.inst_args(inst) {
sig.params.push(AbiParam::new(func.dfg.value_type(v)));
}
for &v in func.dfg.inst_results(inst) {
sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
}
make_funcref(libcall, func, sig, isa)
}
/// Create a funcref for `libcall`.
fn make_funcref(libcall: LibCall, func: &mut Function, sig: Signature, isa: &TargetIsa) -> FuncRef {
let sigref = func.import_signature(sig);
func.import_function(ExtFuncData {
name: ExternalName::LibCall(libcall),
signature: sigref,
colocated: isa.flags().colocated_libcalls(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
#[test]
fn display() {
assert_eq!(LibCall::CeilF32.to_string(), "CeilF32");
assert_eq!(LibCall::NearestF64.to_string(), "NearestF64");
}
#[test]
fn parsing() {
assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32));
}
}

View File

@@ -0,0 +1,117 @@
//! Memory operation flags.
use core::fmt;
enum FlagBit {
Notrap,
Aligned,
Readonly,
}
const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"];
/// Flags for memory operations like load/store.
///
/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
/// program does not change when a flag is removed, but adding a flag will.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct MemFlags {
bits: u8,
}
impl MemFlags {
/// Create a new empty set of flags.
pub fn new() -> Self {
Self { bits: 0 }
}
/// Create a set of flags representing an access from a "trusted" address, meaning it's
/// known to be aligned and non-trapping.
pub fn trusted() -> Self {
let mut result = Self::new();
result.set_notrap();
result.set_aligned();
result
}
/// Read a flag bit.
fn read(self, bit: FlagBit) -> bool {
self.bits & (1 << bit as usize) != 0
}
/// Set a flag bit.
fn set(&mut self, bit: FlagBit) {
self.bits |= 1 << bit as usize
}
/// Set a flag bit by name.
///
/// Returns true if the flag was found and set, false for an unknown flag name.
pub fn set_by_name(&mut self, name: &str) -> bool {
match NAMES.iter().position(|&s| s == name) {
Some(bit) => {
self.bits |= 1 << bit;
true
}
None => false,
}
}
/// Test if the `notrap` flag is set.
///
/// Normally, trapping is part of the semantics of a load/store operation. If the platform
/// would cause a trap when accessing the effective address, the Cranelift memory operation is
/// also required to trap.
///
/// The `notrap` flag tells Cranelift that the memory is *accessible*, which means that
/// accesses will not trap. This makes it possible to delete an unused load or a dead store
/// instruction.
pub fn notrap(self) -> bool {
self.read(FlagBit::Notrap)
}
/// Set the `notrap` flag.
pub fn set_notrap(&mut self) {
self.set(FlagBit::Notrap)
}
/// Test if the `aligned` flag is set.
///
/// By default, Cranelift memory instructions work with any unaligned effective address. If the
/// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
/// effective address is misaligned.
pub fn aligned(self) -> bool {
self.read(FlagBit::Aligned)
}
/// Set the `aligned` flag.
pub fn set_aligned(&mut self) {
self.set(FlagBit::Aligned)
}
/// Test if the `readonly` flag is set.
///
/// Loads with this flag have no memory dependencies.
/// This results in undefined behavior if the dereferenced memory is mutated at any time
/// between when the function is called and when it is exited.
pub fn readonly(self) -> bool {
self.read(FlagBit::Readonly)
}
/// Set the `readonly` flag.
pub fn set_readonly(&mut self) {
self.set(FlagBit::Readonly)
}
}
impl fmt::Display for MemFlags {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for (i, n) in NAMES.iter().enumerate() {
if self.bits & (1 << i) != 0 {
write!(f, " {}", n)?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,73 @@
//! Representation of Cranelift IR functions.
mod builder;
pub mod condcodes;
pub mod dfg;
pub mod entities;
mod extfunc;
mod extname;
pub mod function;
mod globalvalue;
mod heap;
pub mod immediates;
pub mod instructions;
pub mod jumptable;
pub mod layout;
mod libcall;
mod memflags;
mod progpoint;
mod sourceloc;
pub mod stackslot;
mod table;
mod trapcode;
pub mod types;
mod valueloc;
pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
pub use crate::ir::entities::{
Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value,
};
pub use crate::ir::extfunc::{
AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
};
pub use crate::ir::extname::ExternalName;
pub use crate::ir::function::Function;
pub use crate::ir::globalvalue::GlobalValueData;
pub use crate::ir::heap::{HeapData, HeapStyle};
pub use crate::ir::instructions::{
InstructionData, Opcode, ValueList, ValueListPool, VariableArgs,
};
pub use crate::ir::jumptable::JumpTableData;
pub use crate::ir::layout::Layout;
pub use crate::ir::libcall::{get_libcall_funcref, get_probestack_funcref, LibCall};
pub use crate::ir::memflags::MemFlags;
pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
pub use crate::ir::sourceloc::SourceLoc;
pub use crate::ir::stackslot::{StackSlotData, StackSlotKind, StackSlots};
pub use crate::ir::table::TableData;
pub use crate::ir::trapcode::TrapCode;
pub use crate::ir::types::Type;
pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc};
use crate::binemit;
use crate::entity::{PrimaryMap, SecondaryMap};
use crate::isa;
/// Map of value locations.
pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
/// Map of jump tables.
pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
/// Map of instruction encodings.
pub type InstEncodings = SecondaryMap<Inst, isa::Encoding>;
/// Code offsets for EBBs.
pub type EbbOffsets = SecondaryMap<Ebb, binemit::CodeOffset>;
/// Code offsets for Jump Tables.
pub type JumpTableOffsets = SecondaryMap<JumpTable, binemit::CodeOffset>;
/// Source locations for instructions.
pub type SourceLocs = SecondaryMap<Inst, SourceLoc>;

View File

@@ -0,0 +1,164 @@
//! Program points.
use crate::entity::EntityRef;
use crate::ir::{Ebb, Inst, ValueDef};
use core::cmp;
use core::fmt;
use core::u32;
/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can
/// begin or end. It can be either:
///
/// 1. An instruction or
/// 2. An EBB header.
///
/// This corresponds more or less to the lines in the textual form of Cranelift IR.
#[derive(PartialEq, Eq, Clone, Copy)]
pub struct ProgramPoint(u32);
impl From<Inst> for ProgramPoint {
fn from(inst: Inst) -> Self {
let idx = inst.index();
debug_assert!(idx < (u32::MAX / 2) as usize);
ProgramPoint((idx * 2) as u32)
}
}
impl From<Ebb> for ProgramPoint {
fn from(ebb: Ebb) -> Self {
let idx = ebb.index();
debug_assert!(idx < (u32::MAX / 2) as usize);
ProgramPoint((idx * 2 + 1) as u32)
}
}
impl From<ValueDef> for ProgramPoint {
fn from(def: ValueDef) -> Self {
match def {
ValueDef::Result(inst, _) => inst.into(),
ValueDef::Param(ebb, _) => ebb.into(),
}
}
}
/// An expanded program point directly exposes the variants, but takes twice the space to
/// represent.
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum ExpandedProgramPoint {
/// An instruction in the function.
Inst(Inst),
/// An EBB header.
Ebb(Ebb),
}
impl ExpandedProgramPoint {
/// Get the instruction we know is inside.
pub fn unwrap_inst(self) -> Inst {
match self {
ExpandedProgramPoint::Inst(x) => x,
ExpandedProgramPoint::Ebb(x) => panic!("expected inst: {}", x),
}
}
}
impl From<Inst> for ExpandedProgramPoint {
fn from(inst: Inst) -> Self {
ExpandedProgramPoint::Inst(inst)
}
}
impl From<Ebb> for ExpandedProgramPoint {
fn from(ebb: Ebb) -> Self {
ExpandedProgramPoint::Ebb(ebb)
}
}
impl From<ValueDef> for ExpandedProgramPoint {
fn from(def: ValueDef) -> Self {
match def {
ValueDef::Result(inst, _) => inst.into(),
ValueDef::Param(ebb, _) => ebb.into(),
}
}
}
impl From<ProgramPoint> for ExpandedProgramPoint {
fn from(pp: ProgramPoint) -> Self {
if pp.0 & 1 == 0 {
ExpandedProgramPoint::Inst(Inst::from_u32(pp.0 / 2))
} else {
ExpandedProgramPoint::Ebb(Ebb::from_u32(pp.0 / 2))
}
}
}
impl fmt::Display for ExpandedProgramPoint {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ExpandedProgramPoint::Inst(x) => write!(f, "{}", x),
ExpandedProgramPoint::Ebb(x) => write!(f, "{}", x),
}
}
}
impl fmt::Display for ProgramPoint {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let epp: ExpandedProgramPoint = (*self).into();
epp.fmt(f)
}
}
impl fmt::Debug for ExpandedProgramPoint {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ExpandedProgramPoint({})", self)
}
}
impl fmt::Debug for ProgramPoint {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ProgramPoint({})", self)
}
}
/// Context for ordering program points.
///
/// `ProgramPoint` objects don't carry enough information to be ordered independently, they need a
/// context providing the program order.
pub trait ProgramOrder {
/// Compare the program points `a` and `b` relative to this program order.
///
/// Return `Less` if `a` appears in the program before `b`.
///
/// This is declared as a generic such that it can be called with `Inst` and `Ebb` arguments
/// directly. Depending on the implementation, there is a good chance performance will be
/// improved for those cases where the type of either argument is known statically.
fn cmp<A, B>(&self, a: A, b: B) -> cmp::Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>;
/// Is the range from `inst` to `ebb` just the gap between consecutive EBBs?
///
/// This returns true if `inst` is the terminator in the EBB immediately before `ebb`.
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entity::EntityRef;
use crate::ir::{Ebb, Inst};
use std::string::ToString;
#[test]
fn convert() {
let i5 = Inst::new(5);
let b3 = Ebb::new(3);
let pp1: ProgramPoint = i5.into();
let pp2: ProgramPoint = b3.into();
assert_eq!(pp1.to_string(), "inst5");
assert_eq!(pp2.to_string(), "ebb3");
}
}

View File

@@ -0,0 +1,63 @@
//! Source locations.
//!
//! Cranelift tracks the original source location of each instruction, and preserves the source
//! location when instructions are transformed.
use core::fmt;
/// A source location.
///
/// This is an opaque 32-bit number attached to each Cranelift IR instruction. Cranelift does not
/// interpret source locations in any way, they are simply preserved from the input to the output.
///
/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
/// that can't be given a real source location.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct SourceLoc(u32);
impl SourceLoc {
/// Create a new source location with the given bits.
pub fn new(bits: u32) -> Self {
SourceLoc(bits)
}
/// Is this the default source location?
pub fn is_default(self) -> bool {
self == Default::default()
}
/// Read the bits of this source location.
pub fn bits(self) -> u32 {
self.0
}
}
impl Default for SourceLoc {
fn default() -> Self {
SourceLoc(!0)
}
}
impl fmt::Display for SourceLoc {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_default() {
write!(f, "@-")
} else {
write!(f, "@{:04x}", self.0)
}
}
}
#[cfg(test)]
mod tests {
use crate::ir::SourceLoc;
use std::string::ToString;
#[test]
fn display() {
assert_eq!(SourceLoc::default().to_string(), "@-");
assert_eq!(SourceLoc::new(0).to_string(), "@0000");
assert_eq!(SourceLoc::new(16).to_string(), "@0010");
assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef");
}
}

View File

@@ -0,0 +1,427 @@
//! Stack slots.
//!
//! The `StackSlotData` struct keeps track of a single stack slot in a function.
//!
use crate::entity::{Iter, IterMut, Keys, PrimaryMap};
use crate::ir::{StackSlot, Type};
use crate::packed_option::PackedOption;
use core::cmp;
use core::fmt;
use core::ops::{Index, IndexMut};
use core::slice;
use core::str::FromStr;
use std::vec::Vec;
/// The size of an object on the stack, or the size of a stack frame.
///
/// We don't use `usize` to represent object sizes on the target platform because Cranelift supports
/// cross-compilation, and `usize` is a type that depends on the host platform, not the target
/// platform.
pub type StackSize = u32;
/// A stack offset.
///
/// The location of a stack offset relative to a stack pointer or frame pointer.
pub type StackOffset = i32;
/// The minimum size of a spill slot in bytes.
///
/// ISA implementations are allowed to assume that small types like `b1` and `i8` get a full 4-byte
/// spill slot.
const MIN_SPILL_SLOT_SIZE: StackSize = 4;
/// Get the spill slot size to use for `ty`.
fn spill_size(ty: Type) -> StackSize {
cmp::max(MIN_SPILL_SLOT_SIZE, ty.bytes())
}
/// The kind of a stack slot.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum StackSlotKind {
/// A spill slot. This is a stack slot created by the register allocator.
SpillSlot,
/// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
/// and `stack_store` instructions.
ExplicitSlot,
/// An incoming function argument.
///
/// If the current function has more arguments than fits in registers, the remaining arguments
/// are passed on the stack by the caller. These incoming arguments are represented as SSA
/// values assigned to incoming stack slots.
IncomingArg,
/// An outgoing function argument.
///
/// When preparing to call a function whose arguments don't fit in registers, outgoing argument
/// stack slots are used to represent individual arguments in the outgoing call frame. These
/// stack slots are only valid while setting up a call.
OutgoingArg,
/// An emergency spill slot.
///
/// Emergency slots are allocated late when the register's constraint solver needs extra space
/// to shuffle registers around. They are only used briefly, and can be reused.
EmergencySlot,
}
impl FromStr for StackSlotKind {
type Err = ();
fn from_str(s: &str) -> Result<Self, ()> {
use self::StackSlotKind::*;
match s {
"explicit_slot" => Ok(ExplicitSlot),
"spill_slot" => Ok(SpillSlot),
"incoming_arg" => Ok(IncomingArg),
"outgoing_arg" => Ok(OutgoingArg),
"emergency_slot" => Ok(EmergencySlot),
_ => Err(()),
}
}
}
impl fmt::Display for StackSlotKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::StackSlotKind::*;
f.write_str(match *self {
ExplicitSlot => "explicit_slot",
SpillSlot => "spill_slot",
IncomingArg => "incoming_arg",
OutgoingArg => "outgoing_arg",
EmergencySlot => "emergency_slot",
})
}
}
/// Contents of a stack slot.
#[derive(Clone, Debug)]
pub struct StackSlotData {
/// The kind of stack slot.
pub kind: StackSlotKind,
/// Size of stack slot in bytes.
pub size: StackSize,
/// Offset of stack slot relative to the stack pointer in the caller.
///
/// On x86, the base address is the stack pointer *before* the return address was pushed. On
/// RISC ISAs, the base address is the value of the stack pointer on entry to the function.
///
/// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
/// pointer immediately before the call.
pub offset: Option<StackOffset>,
}
impl StackSlotData {
/// Create a stack slot with the specified byte size.
pub fn new(kind: StackSlotKind, size: StackSize) -> Self {
Self {
kind,
size,
offset: None,
}
}
/// Get the alignment in bytes of this stack slot given the stack pointer alignment.
pub fn alignment(&self, max_align: StackSize) -> StackSize {
debug_assert!(max_align.is_power_of_two());
// We want to find the largest power of two that divides both `self.size` and `max_align`.
// That is the same as isolating the rightmost bit in `x`.
let x = self.size | max_align;
// C.f. Hacker's delight.
x & x.wrapping_neg()
}
}
impl fmt::Display for StackSlotData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.kind, self.size)?;
if let Some(offset) = self.offset {
write!(f, ", offset {}", offset)?;
}
Ok(())
}
}
/// Stack frame manager.
///
/// Keep track of all the stack slots used by a function.
#[derive(Clone, Debug)]
pub struct StackSlots {
/// All allocated stack slots.
slots: PrimaryMap<StackSlot, StackSlotData>,
/// All the outgoing stack slots, ordered by offset.
outgoing: Vec<StackSlot>,
/// All the emergency slots.
emergency: Vec<StackSlot>,
/// The total size of the stack frame.
///
/// This is the distance from the stack pointer in the current function to the stack pointer in
/// the calling function, so it includes a pushed return address as well as space for outgoing
/// call arguments.
///
/// This is computed by the `layout()` method.
pub frame_size: Option<StackSize>,
}
/// Stack slot manager functions that behave mostly like an entity map.
impl StackSlots {
/// Create an empty stack slot manager.
pub fn new() -> Self {
Self {
slots: PrimaryMap::new(),
outgoing: Vec::new(),
emergency: Vec::new(),
frame_size: None,
}
}
/// Clear out everything.
pub fn clear(&mut self) {
self.slots.clear();
self.outgoing.clear();
self.emergency.clear();
self.frame_size = None;
}
/// Allocate a new stack slot.
///
/// This function should be primarily used by the text format parser. There are more convenient
/// functions for creating specific kinds of stack slots below.
pub fn push(&mut self, data: StackSlotData) -> StackSlot {
self.slots.push(data)
}
/// Check if `ss` is a valid stack slot reference.
pub fn is_valid(&self, ss: StackSlot) -> bool {
self.slots.is_valid(ss)
}
/// Set the offset of a stack slot.
pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
self.slots[ss].offset = Some(offset);
}
/// Get an iterator over all the stack slot keys.
pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
self.slots.iter()
}
/// Get an iterator over all the stack slot keys, mutable edition.
pub fn iter_mut(&mut self) -> IterMut<StackSlot, StackSlotData> {
self.slots.iter_mut()
}
/// Get an iterator over all the stack slot records.
pub fn values(&self) -> slice::Iter<StackSlotData> {
self.slots.values()
}
/// Get an iterator over all the stack slot records, mutable edition.
pub fn values_mut(&mut self) -> slice::IterMut<StackSlotData> {
self.slots.values_mut()
}
/// Get an iterator over all the stack slot keys.
pub fn keys(&self) -> Keys<StackSlot> {
self.slots.keys()
}
/// Get a reference to the next stack slot that would be created by `push()`.
///
/// This should just be used by the parser.
pub fn next_key(&self) -> StackSlot {
self.slots.next_key()
}
}
impl Index<StackSlot> for StackSlots {
type Output = StackSlotData;
fn index(&self, ss: StackSlot) -> &StackSlotData {
&self.slots[ss]
}
}
impl IndexMut<StackSlot> for StackSlots {
fn index_mut(&mut self, ss: StackSlot) -> &mut StackSlotData {
&mut self.slots[ss]
}
}
/// Higher-level stack frame manipulation functions.
impl StackSlots {
/// Create a new spill slot for spilling values of type `ty`.
pub fn make_spill_slot(&mut self, ty: Type) -> StackSlot {
self.push(StackSlotData::new(StackSlotKind::SpillSlot, spill_size(ty)))
}
/// Create a stack slot representing an incoming function argument.
pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
data.offset = Some(offset);
self.push(data)
}
/// Get a stack slot representing an outgoing argument.
///
/// This may create a new stack slot, or reuse an existing outgoing stack slot with the
/// requested offset and size.
///
/// The requested offset is relative to this function's stack pointer immediately before making
/// the call.
pub fn get_outgoing_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
let size = ty.bytes();
// Look for an existing outgoing stack slot with the same offset and size.
let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
(self[ss].offset.unwrap(), self[ss].size)
}) {
Ok(idx) => return self.outgoing[idx],
Err(idx) => idx,
};
// No existing slot found. Make one and insert it into `outgoing`.
let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
data.offset = Some(offset);
let ss = self.slots.push(data);
self.outgoing.insert(inspos, ss);
ss
}
/// Get an emergency spill slot that can be used to store a `ty` value.
///
/// This may allocate a new slot, or it may reuse an existing emergency spill slot, excluding
/// any slots in the `in_use` list.
pub fn get_emergency_slot(
&mut self,
ty: Type,
in_use: &[PackedOption<StackSlot>],
) -> StackSlot {
let size = spill_size(ty);
// Find the smallest existing slot that can fit the type.
if let Some(&ss) = self
.emergency
.iter()
.filter(|&&ss| self[ss].size >= size && !in_use.contains(&ss.into()))
.min_by_key(|&&ss| self[ss].size)
{
return ss;
}
// Alternatively, use the largest available slot and make it larger.
if let Some(&ss) = self
.emergency
.iter()
.filter(|&&ss| !in_use.contains(&ss.into()))
.max_by_key(|&&ss| self[ss].size)
{
self.slots[ss].size = size;
return ss;
}
// No existing slot found. Make one and insert it into `emergency`.
let data = StackSlotData::new(StackSlotKind::EmergencySlot, size);
let ss = self.slots.push(data);
self.emergency.push(ss);
ss
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::types;
use crate::ir::Function;
use std::string::ToString;
#[test]
fn stack_slot() {
let mut func = Function::new();
let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::IncomingArg, 4));
let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 8));
assert_eq!(ss0.to_string(), "ss0");
assert_eq!(ss1.to_string(), "ss1");
assert_eq!(func.stack_slots[ss0].size, 4);
assert_eq!(func.stack_slots[ss1].size, 8);
assert_eq!(func.stack_slots[ss0].to_string(), "incoming_arg 4");
assert_eq!(func.stack_slots[ss1].to_string(), "spill_slot 8");
}
#[test]
fn outgoing() {
let mut sss = StackSlots::new();
let ss0 = sss.get_outgoing_arg(types::I32, 8);
let ss1 = sss.get_outgoing_arg(types::I32, 4);
let ss2 = sss.get_outgoing_arg(types::I64, 8);
assert_eq!(sss[ss0].offset, Some(8));
assert_eq!(sss[ss0].size, 4);
assert_eq!(sss[ss1].offset, Some(4));
assert_eq!(sss[ss1].size, 4);
assert_eq!(sss[ss2].offset, Some(8));
assert_eq!(sss[ss2].size, 8);
assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
assert_eq!(sss.get_outgoing_arg(types::I32, 4), ss1);
assert_eq!(sss.get_outgoing_arg(types::I64, 8), ss2);
}
#[test]
fn alignment() {
let slot = StackSlotData::new(StackSlotKind::SpillSlot, 8);
assert_eq!(slot.alignment(4), 4);
assert_eq!(slot.alignment(8), 8);
assert_eq!(slot.alignment(16), 8);
let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);
assert_eq!(slot2.alignment(4), 4);
assert_eq!(slot2.alignment(8), 8);
assert_eq!(slot2.alignment(16), 8);
assert_eq!(slot2.alignment(32), 8);
}
#[test]
fn emergency() {
let mut sss = StackSlots::new();
let ss0 = sss.get_emergency_slot(types::I32, &[]);
assert_eq!(sss[ss0].size, 4);
// When a smaller size is requested, we should simply get the same slot back.
assert_eq!(sss.get_emergency_slot(types::I8, &[]), ss0);
assert_eq!(sss[ss0].size, 4);
assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss0);
assert_eq!(sss[ss0].size, 4);
// Ask for a larger size and the slot should grow.
assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
assert_eq!(sss[ss0].size, 8);
// When one slot is in use, we should get a new one.
let ss1 = sss.get_emergency_slot(types::I32, &[None.into(), ss0.into()]);
assert_eq!(sss[ss0].size, 8);
assert_eq!(sss[ss1].size, 4);
// Now we should get the smallest fit of the two available slots.
assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss1);
assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
}
}

View File

@@ -0,0 +1,36 @@
//! Tables.
use crate::ir::immediates::Uimm64;
use crate::ir::{GlobalValue, Type};
use core::fmt;
/// Information about a table declaration.
#[derive(Clone)]
pub struct TableData {
/// Global value giving the address of the start of the table.
pub base_gv: GlobalValue,
/// Guaranteed minimum table size in elements. Table accesses before `min_size` don't need
/// bounds checking.
pub min_size: Uimm64,
/// Global value giving the current bound of the table, in elements.
pub bound_gv: GlobalValue,
/// The size of a table element, in bytes.
pub element_size: Uimm64,
/// The index type for the table.
pub index_type: Type,
}
impl fmt::Display for TableData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("dynamic")?;
write!(
f,
" {}, min {}, bound {}, element_size {}, index_type {}",
self.base_gv, self.min_size, self.bound_gv, self.element_size, self.index_type
)
}
}

View File

@@ -0,0 +1,134 @@
//! Trap codes describing the reason for a trap.
use core::fmt::{self, Display, Formatter};
use core::str::FromStr;
/// A trap code describing the reason for a trap.
///
/// All trap instructions have an explicit trap code.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum TrapCode {
/// The current stack space was exhausted.
///
/// On some platforms, a stack overflow may also be indicated by a segmentation fault from the
/// stack guard page.
StackOverflow,
/// A `heap_addr` instruction detected an out-of-bounds error.
///
/// Note that not all out-of-bounds heap accesses are reported this way;
/// some are detected by a segmentation fault on the heap unmapped or
/// offset-guard pages.
HeapOutOfBounds,
/// A `table_addr` instruction detected an out-of-bounds error.
TableOutOfBounds,
/// Other bounds checking error.
OutOfBounds,
/// Indirect call to a null table entry.
IndirectCallToNull,
/// Signature mismatch on indirect call.
BadSignature,
/// An integer arithmetic operation caused an overflow.
IntegerOverflow,
/// An integer division by zero.
IntegerDivisionByZero,
/// Failed float-to-int conversion.
BadConversionToInteger,
/// Code that was supposed to have been unreachable was reached.
UnreachableCodeReached,
/// Execution has potentially run too long and may be interrupted.
/// This trap is resumable.
Interrupt,
/// A user-defined trap code.
User(u16),
}
impl Display for TrapCode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::TrapCode::*;
let identifier = match *self {
StackOverflow => "stk_ovf",
HeapOutOfBounds => "heap_oob",
TableOutOfBounds => "table_oob",
OutOfBounds => "oob",
IndirectCallToNull => "icall_null",
BadSignature => "bad_sig",
IntegerOverflow => "int_ovf",
IntegerDivisionByZero => "int_divz",
BadConversionToInteger => "bad_toint",
UnreachableCodeReached => "unreachable",
Interrupt => "interrupt",
User(x) => return write!(f, "user{}", x),
};
f.write_str(identifier)
}
}
impl FromStr for TrapCode {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
use self::TrapCode::*;
match s {
"stk_ovf" => Ok(StackOverflow),
"heap_oob" => Ok(HeapOutOfBounds),
"table_oob" => Ok(TableOutOfBounds),
"oob" => Ok(OutOfBounds),
"icall_null" => Ok(IndirectCallToNull),
"bad_sig" => Ok(BadSignature),
"int_ovf" => Ok(IntegerOverflow),
"int_divz" => Ok(IntegerDivisionByZero),
"bad_toint" => Ok(BadConversionToInteger),
"unreachable" => Ok(UnreachableCodeReached),
"interrupt" => Ok(Interrupt),
_ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()),
_ => Err(()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
// Everything but user-defined codes.
const CODES: [TrapCode; 11] = [
TrapCode::StackOverflow,
TrapCode::HeapOutOfBounds,
TrapCode::TableOutOfBounds,
TrapCode::OutOfBounds,
TrapCode::IndirectCallToNull,
TrapCode::BadSignature,
TrapCode::IntegerOverflow,
TrapCode::IntegerDivisionByZero,
TrapCode::BadConversionToInteger,
TrapCode::UnreachableCodeReached,
TrapCode::Interrupt,
];
#[test]
fn display() {
for r in &CODES {
let tc = *r;
assert_eq!(tc.to_string().parse(), Ok(tc));
}
assert_eq!("bogus".parse::<TrapCode>(), Err(()));
assert_eq!(TrapCode::User(17).to_string(), "user17");
assert_eq!("user22".parse(), Ok(TrapCode::User(22)));
assert_eq!("user".parse::<TrapCode>(), Err(()));
assert_eq!("user-1".parse::<TrapCode>(), Err(()));
assert_eq!("users".parse::<TrapCode>(), Err(()));
}
}

View File

@@ -0,0 +1,466 @@
//! Common types for the Cranelift code generator.
use core::default::Default;
use core::fmt::{self, Debug, Display, Formatter};
use target_lexicon::{PointerWidth, Triple};
/// The type of an SSA value.
///
/// The `INVALID` type isn't a real type, and is used as a placeholder in the IR where a type
/// field is present put no type is needed, such as the controlling type variable for a
/// non-polymorphic instruction.
///
/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic.
///
/// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
///
/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The
/// larger types use redundant bits.
///
/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
///
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Type(u8);
/// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector.
pub const INVALID: Type = Type(0);
/// Start of the lane types. See also `meta-python/cdsl/types.py`.
const LANE_BASE: u8 = 0x70;
/// Start of the 2-lane vector types.
const VECTOR_BASE: u8 = LANE_BASE + 16;
// Include code generated by `cranelift-codegen/meta/gen_types.rs`. This file contains constant
// definitions for all the scalar types as well as common vector types for 64, 128, 256, and
// 512-bit SIMD vectors.
include!(concat!(env!("OUT_DIR"), "/types.rs"));
impl Type {
/// Get the lane type of this SIMD vector type.
///
/// A lane type is the same as a SIMD vector type with one lane, so it returns itself.
pub fn lane_type(self) -> Self {
if self.0 < VECTOR_BASE {
self
} else {
Type(LANE_BASE | (self.0 & 0x0f))
}
}
/// Get log_2 of the number of bits in a lane.
pub fn log2_lane_bits(self) -> u8 {
match self.lane_type() {
B1 => 0,
B8 | I8 => 3,
B16 | I16 => 4,
B32 | I32 | F32 => 5,
B64 | I64 | F64 => 6,
_ => 0,
}
}
/// Get the number of bits in a lane.
pub fn lane_bits(self) -> u8 {
match self.lane_type() {
B1 => 1,
B8 | I8 => 8,
B16 | I16 => 16,
B32 | I32 | F32 => 32,
B64 | I64 | F64 => 64,
_ => 0,
}
}
/// Get an integer type with the requested number of bits.
pub fn int(bits: u16) -> Option<Self> {
match bits {
8 => Some(I8),
16 => Some(I16),
32 => Some(I32),
64 => Some(I64),
_ => None,
}
}
/// Get a type with the same number of lanes as `self`, but using `lane` as the lane type.
fn replace_lanes(self, lane: Self) -> Self {
debug_assert!(lane.is_lane() && !self.is_special());
Type((lane.0 & 0x0f) | (self.0 & 0xf0))
}
/// Get a type with the same number of lanes as this type, but with the lanes replaced by
/// booleans of the same size.
///
/// Scalar types are treated as vectors with one lane, so they are converted to the multi-bit
/// boolean types.
pub fn as_bool_pedantic(self) -> Self {
// Replace the low 4 bits with the boolean version, preserve the high 4 bits.
self.replace_lanes(match self.lane_type() {
B8 | I8 => B8,
B16 | I16 => B16,
B32 | I32 | F32 => B32,
B64 | I64 | F64 => B64,
_ => B1,
})
}
/// Get a type with the same number of lanes as this type, but with the lanes replaced by
/// booleans of the same size.
///
/// Scalar types are all converted to `b1` which is usually what you want.
pub fn as_bool(self) -> Self {
if !self.is_vector() {
B1
} else {
self.as_bool_pedantic()
}
}
/// Get a type with the same number of lanes as this type, but with lanes that are half the
/// number of bits.
pub fn half_width(self) -> Option<Self> {
Some(self.replace_lanes(match self.lane_type() {
I16 => I8,
I32 => I16,
I64 => I32,
F64 => F32,
B16 => B8,
B32 => B16,
B64 => B32,
_ => return None,
}))
}
/// Get a type with the same number of lanes as this type, but with lanes that are twice the
/// number of bits.
pub fn double_width(self) -> Option<Self> {
Some(self.replace_lanes(match self.lane_type() {
I8 => I16,
I16 => I32,
I32 => I64,
F32 => F64,
B8 => B16,
B16 => B32,
B32 => B64,
_ => return None,
}))
}
/// Is this the INVALID type?
pub fn is_invalid(self) -> bool {
self == INVALID
}
/// Is this a special type?
pub fn is_special(self) -> bool {
self.0 < LANE_BASE
}
/// Is this a lane type?
///
/// This is a scalar type that can also appear as the lane type of a SIMD vector.
pub fn is_lane(self) -> bool {
LANE_BASE <= self.0 && self.0 < VECTOR_BASE
}
/// Is this a SIMD vector type?
///
/// A vector type has 2 or more lanes.
pub fn is_vector(self) -> bool {
self.0 >= VECTOR_BASE
}
/// Is this a scalar boolean type?
pub fn is_bool(self) -> bool {
match self {
B1 | B8 | B16 | B32 | B64 => true,
_ => false,
}
}
/// Is this a scalar integer type?
pub fn is_int(self) -> bool {
match self {
I8 | I16 | I32 | I64 => true,
_ => false,
}
}
/// Is this a scalar floating point type?
pub fn is_float(self) -> bool {
match self {
F32 | F64 => true,
_ => false,
}
}
/// Is this a CPU flags type?
pub fn is_flags(self) -> bool {
match self {
IFLAGS | FFLAGS => true,
_ => false,
}
}
/// Get log_2 of the number of lanes in this SIMD vector type.
///
/// All SIMD types have a lane count that is a power of two and no larger than 256, so this
/// will be a number in the range 0-8.
///
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 0.
pub fn log2_lane_count(self) -> u8 {
self.0.saturating_sub(LANE_BASE) >> 4
}
/// Get the number of lanes in this SIMD vector type.
///
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 1.
pub fn lane_count(self) -> u16 {
1 << self.log2_lane_count()
}
/// Get the total number of bits used to represent this type.
pub fn bits(self) -> u16 {
u16::from(self.lane_bits()) * self.lane_count()
}
/// Get the number of bytes used to store this type in memory.
pub fn bytes(self) -> u32 {
(u32::from(self.bits()) + 7) / 8
}
/// Get a SIMD vector type with `n` times more lanes than this one.
///
/// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes.
///
/// If this is already a SIMD vector type, this produces a SIMD vector type with `n *
/// self.lane_count()` lanes.
pub fn by(self, n: u16) -> Option<Self> {
if self.lane_bits() == 0 || !n.is_power_of_two() {
return None;
}
let log2_lanes: u32 = n.trailing_zeros();
let new_type = u32::from(self.0) + (log2_lanes << 4);
if new_type < 0x100 {
Some(Type(new_type as u8))
} else {
None
}
}
/// Get a SIMD vector with half the number of lanes.
///
/// There is no `double_vector()` method. Use `t.by(2)` instead.
pub fn half_vector(self) -> Option<Self> {
if self.is_vector() {
Some(Type(self.0 - 0x10))
} else {
None
}
}
/// Index of this type, for use with hash tables etc.
pub fn index(self) -> usize {
usize::from(self.0)
}
/// True iff:
///
/// 1. `self.lane_count() == other.lane_count()` and
/// 2. `self.lane_bits() >= other.lane_bits()`
pub fn wider_or_equal(self, other: Self) -> bool {
self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits()
}
/// Return the pointer type for the given target triple.
pub fn triple_pointer_type(triple: &Triple) -> Self {
match triple.pointer_width() {
Ok(PointerWidth::U16) => I16,
Ok(PointerWidth::U32) => I32,
Ok(PointerWidth::U64) => I64,
Err(()) => panic!("unable to determine architecture pointer width"),
}
}
}
impl Display for Type {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
if self.is_bool() {
write!(f, "b{}", self.lane_bits())
} else if self.is_int() {
write!(f, "i{}", self.lane_bits())
} else if self.is_float() {
write!(f, "f{}", self.lane_bits())
} else if self.is_vector() {
write!(f, "{}x{}", self.lane_type(), self.lane_count())
} else {
f.write_str(match *self {
IFLAGS => "iflags",
FFLAGS => "fflags",
INVALID => panic!("INVALID encountered"),
_ => panic!("Unknown Type(0x{:x})", self.0),
})
}
}
}
impl Debug for Type {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
if self.is_bool() {
write!(f, "types::B{}", self.lane_bits())
} else if self.is_int() {
write!(f, "types::I{}", self.lane_bits())
} else if self.is_float() {
write!(f, "types::F{}", self.lane_bits())
} else if self.is_vector() {
write!(f, "{:?}X{}", self.lane_type(), self.lane_count())
} else {
match *self {
INVALID => write!(f, "types::INVALID"),
IFLAGS => write!(f, "types::IFLAGS"),
FFLAGS => write!(f, "types::FFLAGS"),
_ => write!(f, "Type(0x{:x})", self.0),
}
}
}
}
impl Default for Type {
fn default() -> Self {
INVALID
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
#[test]
fn basic_scalars() {
assert_eq!(INVALID, INVALID.lane_type());
assert_eq!(0, INVALID.bits());
assert_eq!(IFLAGS, IFLAGS.lane_type());
assert_eq!(0, IFLAGS.bits());
assert_eq!(FFLAGS, FFLAGS.lane_type());
assert_eq!(0, FFLAGS.bits());
assert_eq!(B1, B1.lane_type());
assert_eq!(B8, B8.lane_type());
assert_eq!(B16, B16.lane_type());
assert_eq!(B32, B32.lane_type());
assert_eq!(B64, B64.lane_type());
assert_eq!(I8, I8.lane_type());
assert_eq!(I16, I16.lane_type());
assert_eq!(I32, I32.lane_type());
assert_eq!(I64, I64.lane_type());
assert_eq!(F32, F32.lane_type());
assert_eq!(F64, F64.lane_type());
assert_eq!(INVALID.lane_bits(), 0);
assert_eq!(IFLAGS.lane_bits(), 0);
assert_eq!(FFLAGS.lane_bits(), 0);
assert_eq!(B1.lane_bits(), 1);
assert_eq!(B8.lane_bits(), 8);
assert_eq!(B16.lane_bits(), 16);
assert_eq!(B32.lane_bits(), 32);
assert_eq!(B64.lane_bits(), 64);
assert_eq!(I8.lane_bits(), 8);
assert_eq!(I16.lane_bits(), 16);
assert_eq!(I32.lane_bits(), 32);
assert_eq!(I64.lane_bits(), 64);
assert_eq!(F32.lane_bits(), 32);
assert_eq!(F64.lane_bits(), 64);
}
#[test]
fn typevar_functions() {
assert_eq!(INVALID.half_width(), None);
assert_eq!(INVALID.half_width(), None);
assert_eq!(FFLAGS.half_width(), None);
assert_eq!(B1.half_width(), None);
assert_eq!(B8.half_width(), None);
assert_eq!(B16.half_width(), Some(B8));
assert_eq!(B32.half_width(), Some(B16));
assert_eq!(B64.half_width(), Some(B32));
assert_eq!(I8.half_width(), None);
assert_eq!(I16.half_width(), Some(I8));
assert_eq!(I32.half_width(), Some(I16));
assert_eq!(I32X4.half_width(), Some(I16X4));
assert_eq!(I64.half_width(), Some(I32));
assert_eq!(F32.half_width(), None);
assert_eq!(F64.half_width(), Some(F32));
assert_eq!(INVALID.double_width(), None);
assert_eq!(IFLAGS.double_width(), None);
assert_eq!(FFLAGS.double_width(), None);
assert_eq!(B1.double_width(), None);
assert_eq!(B8.double_width(), Some(B16));
assert_eq!(B16.double_width(), Some(B32));
assert_eq!(B32.double_width(), Some(B64));
assert_eq!(B64.double_width(), None);
assert_eq!(I8.double_width(), Some(I16));
assert_eq!(I16.double_width(), Some(I32));
assert_eq!(I32.double_width(), Some(I64));
assert_eq!(I32X4.double_width(), Some(I64X4));
assert_eq!(I64.double_width(), None);
assert_eq!(F32.double_width(), Some(F64));
assert_eq!(F64.double_width(), None);
}
#[test]
fn vectors() {
let big = F64.by(256).unwrap();
assert_eq!(big.lane_bits(), 64);
assert_eq!(big.lane_count(), 256);
assert_eq!(big.bits(), 64 * 256);
assert_eq!(big.half_vector().unwrap().to_string(), "f64x128");
assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1");
assert_eq!(I32.half_vector(), None);
assert_eq!(INVALID.half_vector(), None);
// Check that the generated constants match the computed vector types.
assert_eq!(I32.by(4), Some(I32X4));
assert_eq!(F64.by(8), Some(F64X8));
}
#[test]
fn format_scalars() {
assert_eq!(IFLAGS.to_string(), "iflags");
assert_eq!(FFLAGS.to_string(), "fflags");
assert_eq!(B1.to_string(), "b1");
assert_eq!(B8.to_string(), "b8");
assert_eq!(B16.to_string(), "b16");
assert_eq!(B32.to_string(), "b32");
assert_eq!(B64.to_string(), "b64");
assert_eq!(I8.to_string(), "i8");
assert_eq!(I16.to_string(), "i16");
assert_eq!(I32.to_string(), "i32");
assert_eq!(I64.to_string(), "i64");
assert_eq!(F32.to_string(), "f32");
assert_eq!(F64.to_string(), "f64");
}
#[test]
fn format_vectors() {
assert_eq!(B1.by(8).unwrap().to_string(), "b1x8");
assert_eq!(B8.by(1).unwrap().to_string(), "b8");
assert_eq!(B16.by(256).unwrap().to_string(), "b16x256");
assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8");
assert_eq!(B64.by(8).unwrap().to_string(), "b64x8");
assert_eq!(I8.by(64).unwrap().to_string(), "i8x64");
assert_eq!(F64.by(2).unwrap().to_string(), "f64x2");
assert_eq!(I8.by(3), None);
assert_eq!(I8.by(512), None);
assert_eq!(INVALID.by(4), None);
}
#[test]
fn as_bool() {
assert_eq!(I32X4.as_bool(), B32X4);
assert_eq!(I32.as_bool(), B1);
assert_eq!(I32X4.as_bool_pedantic(), B32X4);
assert_eq!(I32.as_bool_pedantic(), B32);
}
}

View File

@@ -0,0 +1,161 @@
//! Value locations.
//!
//! The register allocator assigns every SSA value to either a register or a stack slot. This
//! assignment is represented by a `ValueLoc` object.
use crate::ir::StackSlot;
use crate::isa::{RegInfo, RegUnit};
use core::fmt;
/// Value location.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ValueLoc {
/// This value has not been assigned to a location yet.
Unassigned,
/// Value is assigned to a register.
Reg(RegUnit),
/// Value is assigned to a stack slot.
Stack(StackSlot),
}
impl Default for ValueLoc {
fn default() -> Self {
ValueLoc::Unassigned
}
}
impl ValueLoc {
/// Is this an assigned location? (That is, not `Unassigned`).
pub fn is_assigned(self) -> bool {
match self {
ValueLoc::Unassigned => false,
_ => true,
}
}
/// Get the register unit of this location, or panic.
pub fn unwrap_reg(self) -> RegUnit {
match self {
ValueLoc::Reg(ru) => ru,
_ => panic!("Expected register: {:?}", self),
}
}
/// Get the stack slot of this location, or panic.
pub fn unwrap_stack(self) -> StackSlot {
match self {
ValueLoc::Stack(ss) => ss,
_ => panic!("Expected stack slot: {:?}", self),
}
}
/// Return an object that can display this value location, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayValueLoc<'a> {
DisplayValueLoc(self, regs.into())
}
}
/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA.
/// Without the register info, register units are simply show as numbers.
///
/// The `DisplayValueLoc` type can display the contained `ValueLoc`.
pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayValueLoc<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
ValueLoc::Unassigned => write!(f, "-"),
ValueLoc::Reg(ru) => match self.1 {
Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
None => write!(f, "%{}", ru),
},
ValueLoc::Stack(ss) => write!(f, "{}", ss),
}
}
}
/// Function argument location.
///
/// The ABI specifies how arguments are passed to a function, and where return values appear after
/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the
/// stack.
///
/// Function arguments on the stack are accessed differently for the incoming arguments to the
/// current function and the outgoing arguments to a called external function. For this reason,
/// the location of stack arguments is described as an offset into the array of function arguments
/// on the stack.
///
/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an
/// incoming argument or an outgoing argument.
///
/// - For stack arguments, different `StackSlot` entities are used to represent incoming and
/// outgoing arguments.
/// - For register arguments, there is usually no difference, but if we ever add support for a
/// register-window ISA like SPARC, register arguments would also need to be translated.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ArgumentLoc {
/// This argument has not been assigned to a location yet.
Unassigned,
/// Argument is passed in a register.
Reg(RegUnit),
/// Argument is passed on the stack, at the given byte offset into the argument array.
Stack(i32),
}
impl Default for ArgumentLoc {
fn default() -> Self {
ArgumentLoc::Unassigned
}
}
impl ArgumentLoc {
/// Is this an assigned location? (That is, not `Unassigned`).
pub fn is_assigned(self) -> bool {
match self {
ArgumentLoc::Unassigned => false,
_ => true,
}
}
/// Is this a register location?
pub fn is_reg(self) -> bool {
match self {
ArgumentLoc::Reg(_) => true,
_ => false,
}
}
/// Is this a stack location?
pub fn is_stack(self) -> bool {
match self {
ArgumentLoc::Stack(_) => true,
_ => false,
}
}
/// Return an object that can display this argument location, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayArgumentLoc<'a> {
DisplayArgumentLoc(self, regs.into())
}
}
/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA.
/// Without the register info, register units are simply show as numbers.
///
/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`.
pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayArgumentLoc<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
ArgumentLoc::Unassigned => write!(f, "-"),
ArgumentLoc::Reg(ru) => match self.1 {
Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
None => write!(f, "%{}", ru),
},
ArgumentLoc::Stack(offset) => write!(f, "{}", offset),
}
}
}

View File

@@ -0,0 +1,35 @@
//! ARM ABI implementation.
use super::registers::{D, GPR, Q, S};
use crate::ir;
use crate::isa::RegClass;
use crate::regalloc::RegisterSet;
use crate::settings as shared_settings;
/// Legalize `sig`.
pub fn legalize_signature(
_sig: &mut ir::Signature,
_flags: &shared_settings::Flags,
_current: bool,
) {
unimplemented!()
}
/// Get register class for a type appearing in a legalized signature.
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
if ty.is_int() {
GPR
} else {
match ty.bits() {
32 => S,
64 => D,
128 => Q,
_ => panic!("Unexpected {} ABI type for arm32", ty),
}
}
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
unimplemented!()
}

View File

@@ -0,0 +1,7 @@
//! Emitting binary ARM32 machine code.
use crate::binemit::{bad_encoding, CodeSink};
use crate::ir::{Function, Inst};
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs"));

View File

@@ -0,0 +1,9 @@
//! Encoding tables for ARM32 ISA.
use crate::isa;
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::RecipeSizing;
include!(concat!(env!("OUT_DIR"), "/encoding-arm32.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-arm32.rs"));

View File

@@ -0,0 +1,136 @@
//! ARM 32-bit Instruction Set Architecture.
mod abi;
mod binemit;
mod enc_tables;
mod registers;
pub mod settings;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
use crate::ir;
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use core::fmt;
use std::boxed::Box;
use target_lexicon::{Architecture, Triple};
#[allow(dead_code)]
struct Isa {
triple: Triple,
shared_flags: shared_settings::Flags,
isa_flags: settings::Flags,
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
}
/// Get an ISA builder for creating ARM32 targets.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: isa_constructor,
}
}
fn isa_constructor(
triple: Triple,
shared_flags: shared_settings::Flags,
builder: shared_settings::Builder,
) -> Box<TargetIsa> {
let level1 = match triple.architecture {
Architecture::Thumbv6m | Architecture::Thumbv7em | Architecture::Thumbv7m => {
&enc_tables::LEVEL1_T32[..]
}
Architecture::Arm
| Architecture::Armv4t
| Architecture::Armv5te
| Architecture::Armv7
| Architecture::Armv7s => &enc_tables::LEVEL1_A32[..],
_ => panic!(),
};
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
cpumode: level1,
})
}
impl TargetIsa for Isa {
fn name(&self) -> &'static str {
"arm32"
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.shared_flags
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}
fn encoding_info(&self) -> EncInfo {
enc_tables::INFO.clone()
}
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a> {
lookup_enclist(
ctrl_typevar,
inst,
func,
self.cpumode,
&enc_tables::LEVEL2[..],
&enc_tables::ENCLISTS[..],
&enc_tables::LEGALIZE_ACTIONS[..],
&enc_tables::RECIPE_PREDICATES[..],
&enc_tables::INST_PREDICATES[..],
self.isa_flags.predicate_view(),
)
}
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
abi::legalize_signature(sig, &self.shared_flags, current)
}
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func)
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut CodeSink,
) {
binemit::emit_inst(func, inst, divert, sink)
}
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
emit_function(func, binemit::emit_inst, sink)
}
}
impl fmt::Display for Isa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
}
}

View File

@@ -0,0 +1,68 @@
//! ARM32 register descriptions.
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
#[cfg(test)]
mod tests {
use super::{D, GPR, INFO, S};
use crate::isa::RegUnit;
use std::string::{String, ToString};
#[test]
fn unit_encodings() {
assert_eq!(INFO.parse_regunit("s0"), Some(0));
assert_eq!(INFO.parse_regunit("s31"), Some(31));
assert_eq!(INFO.parse_regunit("s32"), Some(32));
assert_eq!(INFO.parse_regunit("r0"), Some(64));
assert_eq!(INFO.parse_regunit("r15"), Some(79));
}
#[test]
fn unit_names() {
fn uname(ru: RegUnit) -> String {
INFO.display_regunit(ru).to_string()
}
assert_eq!(uname(0), "%s0");
assert_eq!(uname(1), "%s1");
assert_eq!(uname(31), "%s31");
assert_eq!(uname(64), "%r0");
}
#[test]
fn overlaps() {
// arm32 has the most interesting register geometries, so test `regs_overlap()` here.
use crate::isa::regs_overlap;
let r0 = GPR.unit(0);
let r1 = GPR.unit(1);
let r2 = GPR.unit(2);
assert!(regs_overlap(GPR, r0, GPR, r0));
assert!(regs_overlap(GPR, r2, GPR, r2));
assert!(!regs_overlap(GPR, r0, GPR, r1));
assert!(!regs_overlap(GPR, r1, GPR, r0));
assert!(!regs_overlap(GPR, r2, GPR, r1));
assert!(!regs_overlap(GPR, r1, GPR, r2));
let s0 = S.unit(0);
let s1 = S.unit(1);
let s2 = S.unit(2);
let s3 = S.unit(3);
let d0 = D.unit(0);
let d1 = D.unit(1);
assert!(regs_overlap(S, s0, D, d0));
assert!(regs_overlap(S, s1, D, d0));
assert!(!regs_overlap(S, s0, D, d1));
assert!(!regs_overlap(S, s1, D, d1));
assert!(regs_overlap(S, s2, D, d1));
assert!(regs_overlap(S, s3, D, d1));
assert!(!regs_overlap(D, d1, S, s1));
assert!(regs_overlap(D, d1, S, s2));
assert!(!regs_overlap(D, d0, D, d1));
assert!(regs_overlap(D, d1, D, d1));
}
}

View File

@@ -0,0 +1,9 @@
//! ARM32 Settings.
use crate::settings::{self, detail, Builder};
use core::fmt;
// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
// `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta-python/isa/arm32/settings.py`.
include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));

View File

@@ -0,0 +1,30 @@
//! ARM 64 ABI implementation.
use super::registers::{FPR, GPR};
use crate::ir;
use crate::isa::RegClass;
use crate::regalloc::RegisterSet;
use crate::settings as shared_settings;
/// Legalize `sig`.
pub fn legalize_signature(
_sig: &mut ir::Signature,
_flags: &shared_settings::Flags,
_current: bool,
) {
unimplemented!()
}
/// Get register class for a type appearing in a legalized signature.
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
if ty.is_int() {
GPR
} else {
FPR
}
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
unimplemented!()
}

View File

@@ -0,0 +1,7 @@
//! Emitting binary ARM64 machine code.
use crate::binemit::{bad_encoding, CodeSink};
use crate::ir::{Function, Inst};
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));

View File

@@ -0,0 +1,9 @@
//! Encoding tables for ARM64 ISA.
use crate::isa;
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::RecipeSizing;
include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));

View File

@@ -0,0 +1,123 @@
//! ARM 64-bit Instruction Set Architecture.
mod abi;
mod binemit;
mod enc_tables;
mod registers;
pub mod settings;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
use crate::ir;
use crate::isa::enc_tables::{lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use core::fmt;
use std::boxed::Box;
use target_lexicon::Triple;
#[allow(dead_code)]
struct Isa {
triple: Triple,
shared_flags: shared_settings::Flags,
isa_flags: settings::Flags,
}
/// Get an ISA builder for creating ARM64 targets.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: isa_constructor,
}
}
fn isa_constructor(
triple: Triple,
shared_flags: shared_settings::Flags,
builder: shared_settings::Builder,
) -> Box<TargetIsa> {
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
})
}
impl TargetIsa for Isa {
fn name(&self) -> &'static str {
"arm64"
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.shared_flags
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}
fn encoding_info(&self) -> EncInfo {
enc_tables::INFO.clone()
}
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a> {
lookup_enclist(
ctrl_typevar,
inst,
func,
&enc_tables::LEVEL1_A64[..],
&enc_tables::LEVEL2[..],
&enc_tables::ENCLISTS[..],
&enc_tables::LEGALIZE_ACTIONS[..],
&enc_tables::RECIPE_PREDICATES[..],
&enc_tables::INST_PREDICATES[..],
self.isa_flags.predicate_view(),
)
}
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
abi::legalize_signature(sig, &self.shared_flags, current)
}
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func)
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut CodeSink,
) {
binemit::emit_inst(func, inst, divert, sink)
}
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
emit_function(func, binemit::emit_inst, sink)
}
}
impl fmt::Display for Isa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
}
}

View File

@@ -0,0 +1,39 @@
//! ARM64 register descriptions.
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
#[cfg(test)]
mod tests {
use super::INFO;
use crate::isa::RegUnit;
use std::string::{String, ToString};
#[test]
fn unit_encodings() {
assert_eq!(INFO.parse_regunit("x0"), Some(0));
assert_eq!(INFO.parse_regunit("x31"), Some(31));
assert_eq!(INFO.parse_regunit("v0"), Some(32));
assert_eq!(INFO.parse_regunit("v31"), Some(63));
assert_eq!(INFO.parse_regunit("x32"), None);
assert_eq!(INFO.parse_regunit("v32"), None);
}
#[test]
fn unit_names() {
fn uname(ru: RegUnit) -> String {
INFO.display_regunit(ru).to_string()
}
assert_eq!(uname(0), "%x0");
assert_eq!(uname(1), "%x1");
assert_eq!(uname(31), "%x31");
assert_eq!(uname(32), "%v0");
assert_eq!(uname(33), "%v1");
assert_eq!(uname(63), "%v31");
assert_eq!(uname(64), "%nzcv");
assert_eq!(uname(65), "%INVALID65");
}
}

View File

@@ -0,0 +1,9 @@
//! ARM64 Settings.
use crate::settings::{self, detail, Builder};
use core::fmt;
// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
// `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta-python/isa/arm64/settings.py`.
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));

View File

@@ -0,0 +1,60 @@
use core::fmt;
use core::str;
use target_lexicon::{CallingConvention, Triple};
/// Calling convention identifiers.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum CallConv {
/// Best performance, not ABI-stable
Fast,
/// Smallest caller code size, not ABI-stable
Cold,
/// System V-style convention used on many platforms
SystemV,
/// Windows "fastcall" convention, also used for x64 and ARM
WindowsFastcall,
/// SpiderMonkey WebAssembly convention
Baldrdash,
/// Specialized convention for the probestack function
Probestack,
}
impl CallConv {
/// Return the default calling convention for the given target triple.
pub fn triple_default(triple: &Triple) -> Self {
match triple.default_calling_convention() {
// Default to System V for unknown targets because most everything
// uses System V.
Ok(CallingConvention::SystemV) | Err(()) => CallConv::SystemV,
Ok(CallingConvention::WindowsFastcall) => CallConv::WindowsFastcall,
}
}
}
impl fmt::Display for CallConv {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match *self {
CallConv::Fast => "fast",
CallConv::Cold => "cold",
CallConv::SystemV => "system_v",
CallConv::WindowsFastcall => "windows_fastcall",
CallConv::Baldrdash => "baldrdash",
CallConv::Probestack => "probestack",
})
}
}
impl str::FromStr for CallConv {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"fast" => Ok(CallConv::Fast),
"cold" => Ok(CallConv::Cold),
"system_v" => Ok(CallConv::SystemV),
"windows_fastcall" => Ok(CallConv::WindowsFastcall),
"baldrdash" => Ok(CallConv::Baldrdash),
"probestack" => Ok(CallConv::Probestack),
_ => Err(()),
}
}
}

View File

@@ -0,0 +1,207 @@
//! Register constraints for instruction operands.
//!
//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only
//! works if the operands and results satisfy certain constraints. Constraints on immediate
//! operands are checked by instruction predicates when the recipe is chosen.
//!
//! It is the register allocator's job to make sure that the register constraints on value operands
//! are satisfied.
use crate::binemit::CodeOffset;
use crate::ir::{Function, Inst, ValueLoc};
use crate::isa::{RegClass, RegUnit};
use crate::regalloc::RegDiversions;
/// Register constraint for a single value operand or instruction result.
#[derive(PartialEq, Debug)]
pub struct OperandConstraint {
/// The kind of constraint.
pub kind: ConstraintKind,
/// The register class of the operand.
///
/// This applies to all kinds of constraints, but with slightly different meaning.
pub regclass: RegClass,
}
impl OperandConstraint {
/// Check if this operand constraint is satisfied by the given value location.
/// For tied constraints, this only checks the register class, not that the
/// counterpart operand has the same value location.
pub fn satisfied(&self, loc: ValueLoc) -> bool {
match self.kind {
ConstraintKind::Reg | ConstraintKind::Tied(_) => {
if let ValueLoc::Reg(reg) = loc {
self.regclass.contains(reg)
} else {
false
}
}
ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => {
loc == ValueLoc::Reg(reg) && self.regclass.contains(reg)
}
ConstraintKind::Stack => {
if let ValueLoc::Stack(_) = loc {
true
} else {
false
}
}
}
}
}
/// The different kinds of operand constraints.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum ConstraintKind {
/// This operand or result must be a register from the given register class.
Reg,
/// This operand or result must be a fixed register.
///
/// The constraint's `regclass` field is the top-level register class containing the fixed
/// register.
FixedReg(RegUnit),
/// This result value must use the same register as an input value operand.
///
/// The associated number is the index of the input value operand this result is tied to. The
/// constraint's `regclass` field is the same as the tied operand's register class.
///
/// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and
/// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for
/// the out operand is `Tied(in)`.
Tied(u8),
/// This operand must be a fixed register, and it has a tied counterpart.
///
/// This works just like `FixedReg`, but additionally indicates that there are identical
/// input/output operands for this fixed register. For an input operand, this means that the
/// value will be clobbered by the instruction
FixedTied(RegUnit),
/// This operand must be a value in a stack slot.
///
/// The constraint's `regclass` field is the register class that would normally be used to load
/// and store values of this type.
Stack,
}
/// Value operand constraints for an encoding recipe.
#[derive(PartialEq, Clone)]
pub struct RecipeConstraints {
/// Constraints for the instruction's fixed value operands.
///
/// If the instruction takes a variable number of operands, the register constraints for those
/// operands must be computed dynamically.
///
/// - For branches and jumps, EBB arguments must match the expectations of the destination EBB.
/// - For calls and returns, the calling convention ABI specifies constraints.
pub ins: &'static [OperandConstraint],
/// Constraints for the instruction's fixed results.
///
/// If the instruction produces a variable number of results, it's probably a call and the
/// constraints must be derived from the calling convention ABI.
pub outs: &'static [OperandConstraint],
/// Are any of the input constraints `FixedReg`?
pub fixed_ins: bool,
/// Are any of the output constraints `FixedReg`?
pub fixed_outs: bool,
/// Are there any tied operands?
pub tied_ops: bool,
/// Does this instruction clobber the CPU flags?
///
/// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
pub clobbers_flags: bool,
}
impl RecipeConstraints {
/// Check that these constraints are satisfied by the operands on `inst`.
pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
let loc = divert.get(arg, &func.locations);
if let ConstraintKind::Tied(out_index) = constraint.kind {
let out_val = func.dfg.inst_results(inst)[out_index as usize];
let out_loc = func.locations[out_val];
if loc != out_loc {
return false;
}
}
if !constraint.satisfied(loc) {
return false;
}
}
for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
let loc = divert.get(arg, &func.locations);
if !constraint.satisfied(loc) {
return false;
}
}
true
}
}
/// Constraints on the range of a branch instruction.
///
/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
/// The origin depends on the ISA and the specific instruction:
///
/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`.
/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte
/// branch instruction.
/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
#[derive(Clone, Copy, Debug)]
pub struct BranchRange {
/// Offset in bytes from the address of the branch instruction to the origin used for computing
/// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
pub origin: u8,
/// Number of bits in the signed byte displacement encoded in the instruction. This does not
/// account for branches that can only target aligned addresses.
pub bits: u8,
}
impl BranchRange {
/// Determine if this branch range can represent the range from `branch` to `dest`, where
/// `branch` is the code offset of the branch instruction itself and `dest` is the code offset
/// of the destination EBB header.
///
/// This method does not detect if the range is larger than 2 GB.
pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool {
let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32;
let s = 32 - self.bits;
d == d << s >> s
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn branch_range() {
// ARM T1 branch.
let t1 = BranchRange { origin: 4, bits: 9 };
assert!(t1.contains(0, 0));
assert!(t1.contains(0, 2));
assert!(t1.contains(2, 0));
assert!(t1.contains(1000, 1000));
// Forward limit.
assert!(t1.contains(1000, 1258));
assert!(!t1.contains(1000, 1260));
// Backward limit
assert!(t1.contains(1000, 748));
assert!(!t1.contains(1000, 746));
}
}

View File

@@ -0,0 +1,292 @@
//! Support types for generated encoding tables.
//!
//! This module contains types and functions for working with the encoding tables generated by
//! `cranelift-codegen/meta-python/gen_encoding.py`.
use crate::constant_hash::{probe, Table};
use crate::ir::{Function, InstructionData, Opcode, Type};
use crate::isa::{Encoding, Legalize};
use crate::settings::PredicateView;
use core::ops::Range;
/// A recipe predicate.
///
/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
///
/// A None predicate is always satisfied.
pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
/// An instruction predicate.
///
/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
/// can't depend on ISA settings.
pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
/// Legalization action to perform when no encoding can be found for an instruction.
///
/// This is an index into an ISA-specific table of legalization actions.
pub type LegalizeCode = u8;
/// Level 1 hash table entry.
///
/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
/// variable, using `INVALID` for non-polymorphic instructions.
///
/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
/// have a power-of-two size.
///
/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
/// size of the `LEVEL2` table.
///
/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of
/// bounds.
pub struct Level1Entry<OffT: Into<u32> + Copy> {
pub ty: Type,
pub log2len: u8,
pub legalize: LegalizeCode,
pub offset: OffT,
}
impl<OffT: Into<u32> + Copy> Level1Entry<OffT> {
/// Get the level 2 table range indicated by this entry.
fn range(&self) -> Range<usize> {
let b = self.offset.into() as usize;
b..b + (1 << self.log2len)
}
}
impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
fn len(&self) -> usize {
self.len()
}
fn key(&self, idx: usize) -> Option<Type> {
if self[idx].log2len != !0 {
Some(self[idx].ty)
} else {
None
}
}
}
/// Level 2 hash table entry.
///
/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
/// table where the encoding recipes for the instruction are stored.
///
/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
/// bits.
///
/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
pub struct Level2Entry<OffT: Into<u32> + Copy> {
pub opcode: Option<Opcode>,
pub offset: OffT,
}
impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
fn len(&self) -> usize {
self.len()
}
fn key(&self, idx: usize) -> Option<Opcode> {
self[idx].opcode
}
}
/// Two-level hash table lookup and iterator construction.
///
/// Given the controlling type variable and instruction opcode, find the corresponding encoding
/// list.
///
/// Returns an iterator that produces legal encodings for `inst`.
pub fn lookup_enclist<'a, OffT1, OffT2>(
ctrl_typevar: Type,
inst: &'a InstructionData,
func: &'a Function,
level1_table: &'static [Level1Entry<OffT1>],
level2_table: &'static [Level2Entry<OffT2>],
enclist: &'static [EncListEntry],
legalize_actions: &'static [Legalize],
recipe_preds: &'static [RecipePredicate],
inst_preds: &'static [InstPredicate],
isa_preds: PredicateView<'a>,
) -> Encodings<'a>
where
OffT1: Into<u32> + Copy,
OffT2: Into<u32> + Copy,
{
let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) {
Err(l1idx) => {
// No level 1 entry found for the type.
// We have a sentinel entry with the default legalization code.
(!0, level1_table[l1idx].legalize)
}
Ok(l1idx) => {
// We have a valid level 1 entry for this type.
let l1ent = &level1_table[l1idx];
let offset = match level2_table.get(l1ent.range()) {
Some(l2tab) => {
let opcode = inst.opcode();
match probe(l2tab, opcode, opcode as usize) {
Ok(l2idx) => l2tab[l2idx].offset.into() as usize,
Err(_) => !0,
}
}
// The l1ent range is invalid. This means that we just have a customized
// legalization code for this type. The level 2 table is empty.
None => !0,
};
(offset, l1ent.legalize)
}
};
// Now we have an offset into `enclist` that is `!0` when no encoding list could be found.
// The default legalization code is always valid.
Encodings::new(
offset,
legalize,
inst,
func,
enclist,
legalize_actions,
recipe_preds,
inst_preds,
isa_preds,
)
}
/// Encoding list entry.
///
/// Encoding lists are represented as sequences of u16 words.
pub type EncListEntry = u16;
/// Number of bits used to represent a predicate. c.f. `meta-python/gen_encoding.py`.
const PRED_BITS: u8 = 12;
const PRED_MASK: usize = (1 << PRED_BITS) - 1;
/// First code word representing a predicate check. c.f. `meta-python/gen_encoding.py`.
const PRED_START: usize = 0x1000;
/// An iterator over legal encodings for the instruction.
pub struct Encodings<'a> {
// Current offset into `enclist`, or out of bounds after we've reached the end.
offset: usize,
// Legalization code to use of no encoding is found.
legalize: LegalizeCode,
inst: &'a InstructionData,
func: &'a Function,
enclist: &'static [EncListEntry],
legalize_actions: &'static [Legalize],
recipe_preds: &'static [RecipePredicate],
inst_preds: &'static [InstPredicate],
isa_preds: PredicateView<'a>,
}
impl<'a> Encodings<'a> {
/// Creates a new instance of `Encodings`.
///
/// This iterator provides search for encodings that applies to the given instruction. The
/// encoding lists are laid out such that first call to `next` returns valid entry in the list
/// or `None`.
pub fn new(
offset: usize,
legalize: LegalizeCode,
inst: &'a InstructionData,
func: &'a Function,
enclist: &'static [EncListEntry],
legalize_actions: &'static [Legalize],
recipe_preds: &'static [RecipePredicate],
inst_preds: &'static [InstPredicate],
isa_preds: PredicateView<'a>,
) -> Self {
Encodings {
offset,
inst,
func,
legalize,
isa_preds,
recipe_preds,
inst_preds,
enclist,
legalize_actions,
}
}
/// Get the legalization action that caused the enumeration of encodings to stop.
/// This can be the default legalization action for the type or a custom code for the
/// instruction.
///
/// This method must only be called after the iterator returns `None`.
pub fn legalize(&self) -> Legalize {
debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
self.legalize_actions[self.legalize as usize]
}
/// Check if the `rpred` recipe predicate is satisfied.
fn check_recipe(&self, rpred: RecipePredicate) -> bool {
match rpred {
Some(p) => p(self.isa_preds, self.inst),
None => true,
}
}
/// Check an instruction or isa predicate.
fn check_pred(&self, pred: usize) -> bool {
if let Some(&p) = self.inst_preds.get(pred) {
p(self.func, self.inst)
} else {
let pred = pred - self.inst_preds.len();
self.isa_preds.test(pred)
}
}
}
impl<'a> Iterator for Encodings<'a> {
type Item = Encoding;
fn next(&mut self) -> Option<Encoding> {
while let Some(entryref) = self.enclist.get(self.offset) {
let entry = *entryref as usize;
// Check for "recipe+bits".
let recipe = entry >> 1;
if let Some(&rpred) = self.recipe_preds.get(recipe) {
let bits = self.offset + 1;
if entry & 1 == 0 {
self.offset += 2; // Next entry.
} else {
self.offset = !0; // Stop.
}
if self.check_recipe(rpred) {
return Some(Encoding::new(recipe as u16, self.enclist[bits]));
}
continue;
}
// Check for "stop with legalize".
if entry < PRED_START {
self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
self.offset = !0; // Stop.
return None;
}
// Finally, this must be a predicate entry.
let pred_entry = entry - PRED_START;
let skip = pred_entry >> PRED_BITS;
let pred = pred_entry & PRED_MASK;
if self.check_pred(pred) {
self.offset += 1;
} else if skip == 0 {
self.offset = !0; // Stop.
return None;
} else {
self.offset += 1 + skip;
}
}
None
}
}

View File

@@ -0,0 +1,157 @@
//! The `Encoding` struct.
use crate::binemit::CodeOffset;
use crate::ir::{Function, Inst};
use crate::isa::constraints::{BranchRange, RecipeConstraints};
use crate::regalloc::RegDiversions;
use core::fmt;
/// Bits needed to encode an instruction as binary machine code.
///
/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
/// encoding *bits*. The recipe determines the native instruction format and the mapping of
/// operands to encoded bits. The encoding bits provide additional information to the recipe,
/// typically parts of the opcode.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Encoding {
recipe: u16,
bits: u16,
}
impl Encoding {
/// Create a new `Encoding` containing `(recipe, bits)`.
pub fn new(recipe: u16, bits: u16) -> Self {
Self { recipe, bits }
}
/// Get the recipe number in this encoding.
pub fn recipe(self) -> usize {
self.recipe as usize
}
/// Get the recipe-specific encoding bits.
pub fn bits(self) -> u16 {
self.bits
}
/// Is this a legal encoding, or the default placeholder?
pub fn is_legal(self) -> bool {
self != Self::default()
}
}
/// The default encoding is the illegal one.
impl Default for Encoding {
fn default() -> Self {
Self::new(0xffff, 0xffff)
}
}
/// ISA-independent display of an encoding.
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_legal() {
write!(f, "{}#{:02x}", self.recipe, self.bits)
} else {
write!(f, "-")
}
}
}
/// Temporary object that holds enough context to properly display an encoding.
/// This is meant to be created by `EncInfo::display()`.
pub struct DisplayEncoding {
pub encoding: Encoding,
pub recipe_names: &'static [&'static str],
}
impl fmt::Display for DisplayEncoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.encoding.is_legal() {
write!(
f,
"{}#{:02x}",
self.recipe_names[self.encoding.recipe()],
self.encoding.bits
)
} else {
write!(f, "-")
}
}
}
type SizeCalculatorFn = fn(&RecipeSizing, Inst, &RegDiversions, &Function) -> u8;
/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most
/// encodings; others can be variable and longer than this base size, depending on the registers
/// they're using and use a different function, specific per platform.
pub fn base_size(sizing: &RecipeSizing, _: Inst, _: &RegDiversions, _: &Function) -> u8 {
sizing.base_size
}
/// Code size information for an encoding recipe.
///
/// All encoding recipes correspond to an exact instruction size.
pub struct RecipeSizing {
/// Size in bytes of instructions encoded with this recipe.
pub base_size: u8,
/// Method computing the real instruction's size, given inputs and outputs.
pub compute_size: SizeCalculatorFn,
/// Allowed branch range in this recipe, if any.
///
/// All encoding recipes for branches have exact branch range information.
pub branch_range: Option<BranchRange>,
}
/// Information about all the encodings in this ISA.
#[derive(Clone)]
pub struct EncInfo {
/// Constraints on value operands per recipe.
pub constraints: &'static [RecipeConstraints],
/// Code size information per recipe.
pub sizing: &'static [RecipeSizing],
/// Names of encoding recipes.
pub names: &'static [&'static str],
}
impl EncInfo {
/// Get the value operand constraints for `enc` if it is a legal encoding.
pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
self.constraints.get(enc.recipe())
}
/// Create an object that can display an ISA-dependent encoding properly.
pub fn display(&self, enc: Encoding) -> DisplayEncoding {
DisplayEncoding {
encoding: enc,
recipe_names: self.names,
}
}
/// Get the precise size in bytes of instructions encoded with `enc`.
///
/// Returns 0 for illegal encodings.
pub fn byte_size(
&self,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> CodeOffset {
self.sizing.get(enc.recipe()).map_or(0, |s| {
let compute_size = s.compute_size;
CodeOffset::from(compute_size(&s, inst, divert, func))
})
}
/// Get the branch range that is supported by `enc`, if any.
///
/// This will never return `None` for a legal branch encoding.
pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
}
}

View File

@@ -0,0 +1,375 @@
//! Instruction Set Architectures.
//!
//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed
//! by the ISA-independent code generator. The sub-modules of this module provide definitions for
//! the instruction sets that Cranelift can target. Each sub-module has it's own implementation of
//! `TargetIsa`.
//!
//! # Constructing a `TargetIsa` instance
//!
//! The target ISA is built from the following information:
//!
//! - The name of the target ISA as a string. Cranelift is a cross-compiler, so the ISA to target
//! can be selected dynamically. Individual ISAs can be left out when Cranelift is compiled, so a
//! string is used to identify the proper sub-module.
//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags`
//! instance.
//! - Values for ISA-specific settings.
//!
//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder`
//! appropriate for the requested ISA:
//!
//! ```
//! # extern crate cranelift_codegen;
//! # #[macro_use] extern crate target_lexicon;
//! # fn main() {
//! use cranelift_codegen::isa;
//! use cranelift_codegen::settings::{self, Configurable};
//! use std::str::FromStr;
//! use target_lexicon::Triple;
//!
//! let shared_builder = settings::builder();
//! let shared_flags = settings::Flags::new(shared_builder);
//!
//! match isa::lookup(triple!("riscv32")) {
//! Err(_) => {
//! // The RISC-V target ISA is not available.
//! }
//! Ok(mut isa_builder) => {
//! isa_builder.set("supports_m", "on");
//! let isa = isa_builder.finish(shared_flags);
//! }
//! }
//! # }
//! ```
//!
//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
//! concurrent function compilations.
pub use crate::isa::call_conv::CallConv;
pub use crate::isa::constraints::{
BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
};
pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
use crate::binemit;
use crate::flowgraph;
use crate::ir;
use crate::isa::enc_tables::Encodings;
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings;
use crate::settings::SetResult;
use crate::timing;
use core::fmt;
use failure_derive::Fail;
use std::boxed::Box;
use target_lexicon::{Architecture, PointerWidth, Triple};
#[cfg(build_riscv)]
mod riscv;
#[cfg(build_x86)]
mod x86;
#[cfg(build_arm32)]
mod arm32;
#[cfg(build_arm64)]
mod arm64;
mod call_conv;
mod constraints;
mod enc_tables;
mod encoding;
pub mod registers;
mod stack;
/// Returns a builder that can create a corresponding `TargetIsa`
/// or `Err(LookupError::Unsupported)` if not enabled.
macro_rules! isa_builder {
($module:ident, $name:ident) => {{
#[cfg($name)]
fn $name(triple: Triple) -> Result<Builder, LookupError> {
Ok($module::isa_builder(triple))
};
#[cfg(not($name))]
fn $name(_triple: Triple) -> Result<Builder, LookupError> {
Err(LookupError::Unsupported)
}
$name
}};
}
/// Look for a supported ISA with the given `name`.
/// Return a builder that can create a corresponding `TargetIsa`.
pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
match triple.architecture {
Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, build_riscv)(triple),
Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
isa_builder!(x86, build_x86)(triple)
}
Architecture::Thumbv6m
| Architecture::Thumbv7em
| Architecture::Thumbv7m
| Architecture::Arm
| Architecture::Armv4t
| Architecture::Armv5te
| Architecture::Armv7
| Architecture::Armv7s => isa_builder!(arm32, build_arm32)(triple),
Architecture::Aarch64 => isa_builder!(arm64, build_arm64)(triple),
_ => Err(LookupError::Unsupported),
}
}
/// Describes reason for target lookup failure
#[derive(Fail, PartialEq, Eq, Copy, Clone, Debug)]
pub enum LookupError {
/// Support for this target was disabled in the current build.
#[fail(display = "Support for this target is disabled")]
SupportDisabled,
/// Support for this target has not yet been implemented.
#[fail(display = "Support for this target has not been implemented yet")]
Unsupported,
}
/// Builder for a `TargetIsa`.
/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
pub struct Builder {
triple: Triple,
setup: settings::Builder,
constructor: fn(Triple, settings::Flags, settings::Builder) -> Box<TargetIsa>,
}
impl Builder {
/// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
/// fully configured `TargetIsa` trait object.
pub fn finish(self, shared_flags: settings::Flags) -> Box<TargetIsa> {
(self.constructor)(self.triple, shared_flags, self.setup)
}
}
impl settings::Configurable for Builder {
fn set(&mut self, name: &str, value: &str) -> SetResult<()> {
self.setup.set(name, value)
}
fn enable(&mut self, name: &str) -> SetResult<()> {
self.setup.enable(name)
}
}
/// After determining that an instruction doesn't have an encoding, how should we proceed to
/// legalize it?
///
/// The `Encodings` iterator returns a legalization function to call.
pub type Legalize =
fn(ir::Inst, &mut ir::Function, &mut flowgraph::ControlFlowGraph, &TargetIsa) -> bool;
/// This struct provides information that a frontend may need to know about a target to
/// produce Cranelift IR for the target.
#[derive(Clone, Copy)]
pub struct TargetFrontendConfig {
/// The default calling convention of the target.
pub default_call_conv: CallConv,
/// The pointer width of the target.
pub pointer_width: PointerWidth,
}
impl TargetFrontendConfig {
/// Get the pointer type of this target.
pub fn pointer_type(self) -> ir::Type {
ir::Type::int(u16::from(self.pointer_bits())).unwrap()
}
/// Get the width of pointers on this target, in units of bits.
pub fn pointer_bits(self) -> u8 {
self.pointer_width.bits()
}
/// Get the width of pointers on this target, in units of bytes.
pub fn pointer_bytes(self) -> u8 {
self.pointer_width.bytes()
}
}
/// Methods that are specialized to a target ISA. Implies a Display trait that shows the
/// shared flags, as well as any isa-specific flags.
pub trait TargetIsa: fmt::Display + Sync {
/// Get the name of this ISA.
fn name(&self) -> &'static str;
/// Get the target triple that was used to make this trait object.
fn triple(&self) -> &Triple;
/// Get the ISA-independent flags that were used to make this trait object.
fn flags(&self) -> &settings::Flags;
/// Get the default calling convention of this target.
fn default_call_conv(&self) -> CallConv {
CallConv::triple_default(self.triple())
}
/// Get the pointer type of this ISA.
fn pointer_type(&self) -> ir::Type {
ir::Type::int(u16::from(self.pointer_bits())).unwrap()
}
/// Get the width of pointers on this ISA.
fn pointer_width(&self) -> PointerWidth {
self.triple().pointer_width().unwrap()
}
/// Get the width of pointers on this ISA, in units of bits.
fn pointer_bits(&self) -> u8 {
self.pointer_width().bits()
}
/// Get the width of pointers on this ISA, in units of bytes.
fn pointer_bytes(&self) -> u8 {
self.pointer_width().bytes()
}
/// Get the information needed by frontends producing Cranelift IR.
fn frontend_config(&self) -> TargetFrontendConfig {
TargetFrontendConfig {
default_call_conv: self.default_call_conv(),
pointer_width: self.pointer_width(),
}
}
/// Does the CPU implement scalar comparisons using a CPU flags register?
fn uses_cpu_flags(&self) -> bool {
false
}
/// Does the CPU implement multi-register addressing?
fn uses_complex_addresses(&self) -> bool {
false
}
/// Get a data structure describing the registers in this ISA.
fn register_info(&self) -> RegInfo;
/// Returns an iterator over legal encodings for the instruction.
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a>;
/// Encode an instruction after determining it is legal.
///
/// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
/// Otherwise, return `Legalize` action.
///
/// This is also the main entry point for determining if an instruction is legal.
fn encode(
&self,
func: &ir::Function,
inst: &ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Result<Encoding, Legalize> {
let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
iter.next().ok_or_else(|| iter.legalize())
}
/// Get a data structure describing the instruction encodings in this ISA.
fn encoding_info(&self) -> EncInfo;
/// Legalize a function signature.
///
/// This is used to legalize both the signature of the function being compiled and any called
/// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
/// arguments and return values.
///
/// Arguments with types that are not supported by the ABI can be expanded into multiple
/// arguments:
///
/// - Integer types that are too large to fit in a register can be broken into multiple
/// arguments of a smaller integer type.
/// - Floating point types can be bit-cast to an integer type of the same size, and possible
/// broken into smaller integer types.
/// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
///
/// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
///
/// When this function is called to legalize the signature of the function currently being
/// compiled, `current` is true. The legalized signature can then also contain special purpose
/// arguments and return values such as:
///
/// - A `link` argument representing the link registers on RISC architectures that don't push
/// the return address on the stack.
/// - A `link` return value which will receive the value that was passed to the `link`
/// argument.
/// - An `sret` argument can be added if one wasn't present already. This is necessary if the
/// signature returns more values than registers are available for returning values.
/// - An `sret` return value can be added if the ABI requires a function to return its `sret`
/// argument in a register.
///
/// Arguments and return values for the caller's frame pointer and other callee-saved registers
/// should not be added by this function. These arguments are not added until after register
/// allocation.
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool);
/// Get the register class that should be used to represent an ABI argument or return value of
/// type `ty`. This should be the top-level register class that contains the argument
/// registers.
///
/// This function can assume that it will only be asked to provide register classes for types
/// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
/// Get the set of allocatable registers that can be used when compiling `func`.
///
/// This set excludes reserved registers like the stack pointer and other special-purpose
/// registers.
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
/// Compute the stack layout and insert prologue and epilogue code into `func`.
///
/// Return an error if the stack frame is too large.
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
let _tt = timing::prologue_epilogue();
// This default implementation is unlikely to be good enough.
use crate::ir::stackslot::{StackOffset, StackSize};
use crate::stack_layout::layout_stack;
let word_size = StackSize::from(self.pointer_bytes());
// Account for the SpiderMonkey standard prologue pushes.
if func.signature.call_conv == CallConv::Baldrdash {
let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
ss.offset = Some(-(bytes as StackOffset));
func.stack_slots.push(ss);
}
layout_stack(&mut func.stack_slots, word_size)?;
Ok(())
}
/// Emit binary machine code for a single instruction into the `sink` trait object.
///
/// Note that this will call `put*` methods on the `sink` trait object via its vtable which
/// is not the fastest way of emitting code.
///
/// This function is under the "testing_hooks" feature, and is only suitable for use by
/// test harnesses. It increases code size, and is inefficient.
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut binemit::CodeSink,
);
/// Emit a whole function into memory.
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
}

View File

@@ -0,0 +1,325 @@
//! Data structures describing the registers in an ISA.
use crate::entity::EntityRef;
use core::fmt;
/// Register units are the smallest units of register allocation.
///
/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA
/// has aliasing registers, the aliasing can be modeled with registers that cover multiple
/// register units.
///
/// The register allocator will enforce that each register unit only gets used for one thing.
pub type RegUnit = u16;
/// A bit mask indexed by register units.
///
/// The size of this type is determined by the target ISA that has the most register units defined.
/// Currently that is arm32 which has 64+16 units.
///
/// This type should be coordinated with meta-python/cdsl/registers.py.
pub type RegUnitMask = [u32; 3];
/// A bit mask indexed by register classes.
///
/// The size of this type is determined by the ISA with the most register classes.
///
/// This type should be coordinated with meta-python/cdsl/isa.py.
pub type RegClassMask = u32;
/// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA.
///
/// This can be increased, but should be coordinated with meta-python/cdsl/isa.py.
pub const MAX_TRACKED_TOPRCS: usize = 4;
/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
/// contiguous range of register units.
///
/// The `RegBank` struct provides a static description of a register bank.
pub struct RegBank {
/// The name of this register bank as defined in the ISA's `registers.py` file.
pub name: &'static str,
/// The first register unit in this bank.
pub first_unit: RegUnit,
/// The total number of register units in this bank.
pub units: RegUnit,
/// Array of specially named register units. This array can be shorter than the number of units
/// in the bank.
pub names: &'static [&'static str],
/// Name prefix to use for those register units in the bank not covered by the `names` array.
/// The remaining register units will be named this prefix followed by their decimal offset in
/// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ...
pub prefix: &'static str,
/// Index of the first top-level register class in this bank.
pub first_toprc: usize,
/// Number of top-level register classes in this bank.
///
/// The top-level register classes in a bank are guaranteed to be numbered sequentially from
/// `first_toprc`, and all top-level register classes across banks come before any sub-classes.
pub num_toprcs: usize,
/// Is register pressure tracking enabled for this bank?
pub pressure_tracking: bool,
}
impl RegBank {
/// Does this bank contain `regunit`?
fn contains(&self, regunit: RegUnit) -> bool {
regunit >= self.first_unit && regunit - self.first_unit < self.units
}
/// Try to parse a regunit name. The name is not expected to begin with `%`.
fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
match self.names.iter().position(|&x| x == name) {
Some(offset) => {
// This is one of the special-cased names.
Some(offset as RegUnit)
}
None => {
// Try a regular prefixed name.
if name.starts_with(self.prefix) {
name[self.prefix.len()..].parse().ok()
} else {
None
}
}
}
.and_then(|offset| {
if offset < self.units {
Some(offset + self.first_unit)
} else {
None
}
})
}
/// Write `regunit` to `w`, assuming that it belongs to this bank.
/// All regunits are written with a `%` prefix.
fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result {
let offset = regunit - self.first_unit;
assert!(offset < self.units);
if (offset as usize) < self.names.len() {
write!(f, "%{}", self.names[offset as usize])
} else {
write!(f, "%{}{}", self.prefix, offset)
}
}
}
/// A register class reference.
///
/// All register classes are statically defined in tables generated from the meta descriptions.
pub type RegClass = &'static RegClassData;
/// Data about a register class.
///
/// A register class represents a subset of the registers in a bank. It describes the set of
/// permitted registers for a register operand in a given encoding of an instruction.
///
/// A register class can be a subset of another register class. The top-level register classes are
/// disjoint.
pub struct RegClassData {
/// The name of the register class.
pub name: &'static str,
/// The index of this class in the ISA's RegInfo description.
pub index: u8,
/// How many register units to allocate per register.
pub width: u8,
/// Index of the register bank this class belongs to.
pub bank: u8,
/// Index of the top-level register class contains this one.
pub toprc: u8,
/// The first register unit in this class.
pub first: RegUnit,
/// Bit-mask of sub-classes of this register class, including itself.
///
/// Bits correspond to RC indexes.
pub subclasses: RegClassMask,
/// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the
/// first register unit in each allocatable register.
pub mask: RegUnitMask,
/// The global `RegInfo` instance containing this register class.
pub info: &'static RegInfo,
}
impl RegClassData {
/// Get the register class index corresponding to the intersection of `self` and `other`.
///
/// This register class is guaranteed to exist if the register classes overlap. If the register
/// classes don't overlap, returns `None`.
pub fn intersect_index(&self, other: RegClass) -> Option<RegClassIndex> {
// Compute the set of common subclasses.
let mask = self.subclasses & other.subclasses;
if mask == 0 {
// No overlap.
None
} else {
// Register class indexes are topologically ordered, so the largest common subclass has
// the smallest index.
Some(RegClassIndex(mask.trailing_zeros() as u8))
}
}
/// Get the intersection of `self` and `other`.
pub fn intersect(&self, other: RegClass) -> Option<RegClass> {
self.intersect_index(other).map(|rci| self.info.rc(rci))
}
/// Returns true if `other` is a subclass of this register class.
/// A register class is considered to be a subclass of itself.
pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
self.subclasses & (1 << other.into().0) != 0
}
/// Get the top-level register class containing this class.
pub fn toprc(&self) -> RegClass {
self.info.rc(RegClassIndex(self.toprc))
}
/// Get a specific register unit in this class.
pub fn unit(&self, offset: usize) -> RegUnit {
let uoffset = offset * usize::from(self.width);
self.first + uoffset as RegUnit
}
/// Does this register class contain `regunit`?
pub fn contains(&self, regunit: RegUnit) -> bool {
self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0
}
}
impl fmt::Display for RegClassData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.name)
}
}
impl fmt::Debug for RegClassData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.name)
}
}
/// Within an ISA, register classes are uniquely identified by their index.
impl PartialEq for RegClassData {
fn eq(&self, other: &Self) -> bool {
self.index == other.index
}
}
/// A small reference to a register class.
///
/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method
/// can be used to get the real register class reference back.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct RegClassIndex(u8);
impl EntityRef for RegClassIndex {
fn new(idx: usize) -> Self {
RegClassIndex(idx as u8)
}
fn index(self) -> usize {
usize::from(self.0)
}
}
impl From<RegClass> for RegClassIndex {
fn from(rc: RegClass) -> Self {
RegClassIndex(rc.index)
}
}
impl fmt::Display for RegClassIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "rci{}", self.0)
}
}
/// Test of two registers overlap.
///
/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to
/// determine the width (in regunits) of the register.
pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool {
let end1 = reg1 + RegUnit::from(rc1.width);
let end2 = reg2 + RegUnit::from(rc2.width);
!(end1 <= reg2 || end2 <= reg1)
}
/// Information about the registers in an ISA.
///
/// The `RegUnit` data structure collects all relevant static information about the registers in an
/// ISA.
#[derive(Clone)]
pub struct RegInfo {
/// All register banks, ordered by their `first_unit`. The register banks are disjoint, but
/// there may be holes of unused register unit numbers between banks due to alignment.
pub banks: &'static [RegBank],
/// All register classes ordered topologically so a sub-class always follows its parent.
pub classes: &'static [RegClass],
}
impl RegInfo {
/// Get the register bank holding `regunit`.
pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> {
// We could do a binary search, but most ISAs have only two register banks...
self.banks.iter().find(|b| b.contains(regunit))
}
/// Try to parse a regunit name. The name is not expected to begin with `%`.
pub fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
self.banks
.iter()
.filter_map(|b| b.parse_regunit(name))
.next()
}
/// Make a temporary object that can display a register unit.
pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit {
DisplayRegUnit {
regunit,
reginfo: self,
}
}
/// Get the register class corresponding to `idx`.
pub fn rc(&self, idx: RegClassIndex) -> RegClass {
self.classes[idx.index()]
}
/// Get the top-level register class containing the `idx` class.
pub fn toprc(&self, idx: RegClassIndex) -> RegClass {
self.classes[self.rc(idx).toprc as usize]
}
}
/// Temporary object that holds enough information to print a register unit.
pub struct DisplayRegUnit<'a> {
regunit: RegUnit,
reginfo: &'a RegInfo,
}
impl<'a> fmt::Display for DisplayRegUnit<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.reginfo.bank_containing_regunit(self.regunit) {
Some(b) => b.write_regunit(f, self.regunit),
None => write!(f, "%INVALID{}", self.regunit),
}
}
}

View File

@@ -0,0 +1,144 @@
//! RISC-V ABI implementation.
//!
//! This module implements the RISC-V calling convention through the primary `legalize_signature()`
//! entry point.
//!
//! This doesn't support the soft-float ABI at the moment.
use super::registers::{FPR, GPR};
use super::settings;
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
use crate::isa::RegClass;
use crate::regalloc::RegisterSet;
use core::i32;
use target_lexicon::Triple;
struct Args {
pointer_bits: u8,
pointer_bytes: u8,
pointer_type: Type,
regs: u32,
reg_limit: u32,
offset: u32,
}
impl Args {
fn new(bits: u8, enable_e: bool) -> Self {
Self {
pointer_bits: bits,
pointer_bytes: bits / 8,
pointer_type: Type::int(u16::from(bits)).unwrap(),
regs: 0,
reg_limit: if enable_e { 6 } else { 8 },
offset: 0,
}
}
}
impl ArgAssigner for Args {
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
fn align(value: u32, to: u32) -> u32 {
(value + to - 1) & !(to - 1)
}
let ty = arg.value_type;
// Check for a legal type.
// RISC-V doesn't have SIMD at all, so break all vectors down.
if ty.is_vector() {
return ValueConversion::VectorSplit.into();
}
// Large integers and booleans are broken down to fit in a register.
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
// Align registers and stack to a multiple of two pointers.
self.regs = align(self.regs, 2);
self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes));
return ValueConversion::IntSplit.into();
}
// Small integers are extended to the size of a pointer register.
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
match arg.extension {
ArgumentExtension::None => {}
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
}
}
if self.regs < self.reg_limit {
// Assign to a register.
let reg = if ty.is_float() {
FPR.unit(10 + self.regs as usize)
} else {
GPR.unit(10 + self.regs as usize)
};
self.regs += 1;
ArgumentLoc::Reg(reg).into()
} else {
// Assign a stack location.
let loc = ArgumentLoc::Stack(self.offset as i32);
self.offset += u32::from(self.pointer_bytes);
debug_assert!(self.offset <= i32::MAX as u32);
loc.into()
}
}
}
/// Legalize `sig` for RISC-V.
pub fn legalize_signature(
sig: &mut ir::Signature,
triple: &Triple,
isa_flags: &settings::Flags,
current: bool,
) {
let bits = triple.pointer_width().unwrap().bits();
let mut args = Args::new(bits, isa_flags.enable_e());
legalize_args(&mut sig.params, &mut args);
let mut rets = Args::new(bits, isa_flags.enable_e());
legalize_args(&mut sig.returns, &mut rets);
if current {
let ptr = Type::int(u16::from(bits)).unwrap();
// Add the link register as an argument and return value.
//
// The `jalr` instruction implementing a return can technically accept the return address
// in any register, but a micro-architecture with a return address predictor will only
// recognize it as a return if the address is in `x1`.
let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
sig.params.push(link);
sig.returns.push(link);
}
}
/// Get register class for a type appearing in a legalized signature.
pub fn regclass_for_abi_type(ty: Type) -> RegClass {
if ty.is_float() {
FPR
} else {
GPR
}
}
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
let mut regs = RegisterSet::new();
regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
// %x1 is the link register which is available for allocation.
regs.take(GPR, GPR.unit(2)); // Stack pointer.
regs.take(GPR, GPR.unit(3)); // Global pointer.
regs.take(GPR, GPR.unit(4)); // Thread pointer.
// TODO: %x8 is the frame pointer. Reserve it?
// Remove %x16 and up for RV32E.
if isa_flags.enable_e() {
for u in 16..32 {
regs.take(GPR, GPR.unit(u));
}
}
regs
}

View File

@@ -0,0 +1,182 @@
//! Emitting binary RISC-V machine code.
use crate::binemit::{bad_encoding, CodeSink, Reloc};
use crate::ir::{Function, Inst, InstructionData};
use crate::isa::{RegUnit, StackBaseMask, StackRef};
use crate::predicates::is_signed_int;
use crate::regalloc::RegDiversions;
use core::u32;
include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs"));
/// R-type instructions.
///
/// 31 24 19 14 11 6
/// funct7 rs2 rs1 funct3 rd opcode
/// 25 20 15 12 7 0
///
/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
fn put_r<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let funct3 = (bits >> 5) & 0x7;
let funct7 = (bits >> 8) & 0x7f;
let rs1 = u32::from(rs1) & 0x1f;
let rs2 = u32::from(rs2) & 0x1f;
let rd = u32::from(rd) & 0x1f;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= rd << 7;
i |= funct3 << 12;
i |= rs1 << 15;
i |= rs2 << 20;
i |= funct7 << 25;
sink.put4(i);
}
/// R-type instructions with a shift amount instead of rs2.
///
/// 31 25 19 14 11 6
/// funct7 shamt rs1 funct3 rd opcode
/// 25 20 15 12 7 0
///
/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31.
///
/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
fn put_rshamt<CS: CodeSink + ?Sized>(
bits: u16,
rs1: RegUnit,
shamt: i64,
rd: RegUnit,
sink: &mut CS,
) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let funct3 = (bits >> 5) & 0x7;
let funct7 = (bits >> 8) & 0x7f;
let rs1 = u32::from(rs1) & 0x1f;
let shamt = shamt as u32 & 0x3f;
let rd = u32::from(rd) & 0x1f;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= rd << 7;
i |= funct3 << 12;
i |= rs1 << 15;
i |= shamt << 20;
i |= funct7 << 25;
sink.put4(i);
}
/// I-type instructions.
///
/// 31 19 14 11 6
/// imm rs1 funct3 rd opcode
/// 20 15 12 7 0
///
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let funct3 = (bits >> 5) & 0x7;
let rs1 = u32::from(rs1) & 0x1f;
let rd = u32::from(rd) & 0x1f;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= rd << 7;
i |= funct3 << 12;
i |= rs1 << 15;
i |= (imm << 20) as u32;
sink.put4(i);
}
/// U-type instructions.
///
/// 31 11 6
/// imm rd opcode
/// 12 7 0
///
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let rd = u32::from(rd) & 0x1f;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= rd << 7;
i |= imm as u32 & 0xfffff000;
sink.put4(i);
}
/// SB-type branch instructions.
///
/// 31 24 19 14 11 6
/// imm rs2 rs1 funct3 imm opcode
/// 25 20 15 12 7 0
///
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let funct3 = (bits >> 5) & 0x7;
let rs1 = u32::from(rs1) & 0x1f;
let rs2 = u32::from(rs2) & 0x1f;
debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
let imm = imm as u32;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= funct3 << 12;
i |= rs1 << 15;
i |= rs2 << 20;
// The displacement is completely hashed up.
i |= ((imm >> 11) & 0x1) << 7;
i |= ((imm >> 1) & 0xf) << 8;
i |= ((imm >> 5) & 0x3f) << 25;
i |= ((imm >> 12) & 0x1) << 31;
sink.put4(i);
}
/// UJ-type jump instructions.
///
/// 31 11 6
/// imm rd opcode
/// 12 7 0
///
/// Encoding bits: `opcode[6:2]`
fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
let bits = u32::from(bits);
let opcode5 = bits & 0x1f;
let rd = u32::from(rd) & 0x1f;
debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
let imm = imm as u32;
// 0-6: opcode
let mut i = 0x3;
i |= opcode5 << 2;
i |= rd << 7;
// The displacement is completely hashed up.
i |= imm & 0xff000;
i |= ((imm >> 11) & 0x1) << 20;
i |= ((imm >> 1) & 0x3ff) << 21;
i |= ((imm >> 20) & 0x1) << 31;
sink.put4(i);
}

View File

@@ -0,0 +1,17 @@
//! Encoding tables for RISC-V.
use super::registers::*;
use crate::ir;
use crate::isa;
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::{base_size, RecipeSizing};
// Include the generated encoding tables:
// - `LEVEL1_RV32`
// - `LEVEL1_RV64`
// - `LEVEL2`
// - `ENCLIST`
// - `INFO`
include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs"));

View File

@@ -0,0 +1,281 @@
//! RISC-V Instruction Set Architecture.
mod abi;
mod binemit;
mod enc_tables;
mod registers;
pub mod settings;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
use crate::ir;
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use core::fmt;
use std::boxed::Box;
use target_lexicon::{PointerWidth, Triple};
#[allow(dead_code)]
struct Isa {
triple: Triple,
shared_flags: shared_settings::Flags,
isa_flags: settings::Flags,
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
}
/// Get an ISA builder for creating RISC-V targets.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: isa_constructor,
}
}
fn isa_constructor(
triple: Triple,
shared_flags: shared_settings::Flags,
builder: shared_settings::Builder,
) -> Box<TargetIsa> {
let level1 = match triple.pointer_width().unwrap() {
PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"),
PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..],
PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..],
};
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
cpumode: level1,
})
}
impl TargetIsa for Isa {
fn name(&self) -> &'static str {
"riscv"
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.shared_flags
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}
fn encoding_info(&self) -> EncInfo {
enc_tables::INFO.clone()
}
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a> {
lookup_enclist(
ctrl_typevar,
inst,
func,
self.cpumode,
&enc_tables::LEVEL2[..],
&enc_tables::ENCLISTS[..],
&enc_tables::LEGALIZE_ACTIONS[..],
&enc_tables::RECIPE_PREDICATES[..],
&enc_tables::INST_PREDICATES[..],
self.isa_flags.predicate_view(),
)
}
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
abi::legalize_signature(sig, &self.triple, &self.isa_flags, current)
}
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func, &self.isa_flags)
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut CodeSink,
) {
binemit::emit_inst(func, inst, divert, sink)
}
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
emit_function(func, binemit::emit_inst, sink)
}
}
#[cfg(test)]
mod tests {
use crate::ir::{immediates, types};
use crate::ir::{Function, InstructionData, Opcode};
use crate::isa;
use crate::settings::{self, Configurable};
use core::str::FromStr;
use std::string::{String, ToString};
use target_lexicon::triple;
fn encstr(isa: &isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
match enc {
Ok(e) => isa.encoding_info().display(e).to_string(),
Err(_) => "no encoding".to_string(),
}
}
#[test]
fn test_64bitenc() {
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(shared_builder);
let isa = isa::lookup(triple!("riscv64"))
.unwrap()
.finish(shared_flags);
let mut func = Function::new();
let ebb = func.dfg.make_ebb();
let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
// Try to encode iadd_imm.i64 v1, -10.
let inst64 = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10),
};
// ADDI is I/0b00100
assert_eq!(
encstr(&*isa, isa.encode(&func, &inst64, types::I64)),
"Ii#04"
);
// Try to encode iadd_imm.i64 v1, -10000.
let inst64_large = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10000),
};
// Immediate is out of range for ADDI.
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
// Create an iadd_imm.i32 which is encodable in RV64.
let inst32 = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg32,
imm: immediates::Imm64::new(10),
};
// ADDIW is I/0b00110
assert_eq!(
encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
"Ii#06"
);
}
// Same as above, but for RV32.
#[test]
fn test_32bitenc() {
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(shared_builder);
let isa = isa::lookup(triple!("riscv32"))
.unwrap()
.finish(shared_flags);
let mut func = Function::new();
let ebb = func.dfg.make_ebb();
let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
// Try to encode iadd_imm.i64 v1, -10.
let inst64 = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10),
};
// In 32-bit mode, an i64 bit add should be narrowed.
assert!(isa.encode(&func, &inst64, types::I64).is_err());
// Try to encode iadd_imm.i64 v1, -10000.
let inst64_large = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg64,
imm: immediates::Imm64::new(-10000),
};
// In 32-bit mode, an i64 bit add should be narrowed.
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
// Create an iadd_imm.i32 which is encodable in RV32.
let inst32 = InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg: arg32,
imm: immediates::Imm64::new(10),
};
// ADDI is I/0b00100
assert_eq!(
encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
"Ii#04"
);
// Create an imul.i32 which is encodable in RV32, but only when use_m is true.
let mul32 = InstructionData::Binary {
opcode: Opcode::Imul,
args: [arg32, arg32],
};
assert!(isa.encode(&func, &mul32, types::I32).is_err());
}
#[test]
fn test_rv32m() {
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(shared_builder);
// Set the supports_m stting which in turn enables the use_m predicate that unlocks
// encodings for imul.
let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap();
isa_builder.enable("supports_m").unwrap();
let isa = isa_builder.finish(shared_flags);
let mut func = Function::new();
let ebb = func.dfg.make_ebb();
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
// Create an imul.i32 which is encodable in RV32M.
let mul32 = InstructionData::Binary {
opcode: Opcode::Imul,
args: [arg32, arg32],
};
assert_eq!(
encstr(&*isa, isa.encode(&func, &mul32, types::I32)),
"R#10c"
);
}
}
impl fmt::Display for Isa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
}
}

View File

@@ -0,0 +1,50 @@
//! RISC-V register descriptions.
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));
#[cfg(test)]
mod tests {
use super::{FPR, GPR, INFO};
use crate::isa::RegUnit;
use std::string::{String, ToString};
#[test]
fn unit_encodings() {
assert_eq!(INFO.parse_regunit("x0"), Some(0));
assert_eq!(INFO.parse_regunit("x31"), Some(31));
assert_eq!(INFO.parse_regunit("f0"), Some(32));
assert_eq!(INFO.parse_regunit("f31"), Some(63));
assert_eq!(INFO.parse_regunit("x32"), None);
assert_eq!(INFO.parse_regunit("f32"), None);
}
#[test]
fn unit_names() {
fn uname(ru: RegUnit) -> String {
INFO.display_regunit(ru).to_string()
}
assert_eq!(uname(0), "%x0");
assert_eq!(uname(1), "%x1");
assert_eq!(uname(31), "%x31");
assert_eq!(uname(32), "%f0");
assert_eq!(uname(33), "%f1");
assert_eq!(uname(63), "%f31");
assert_eq!(uname(64), "%INVALID64");
}
#[test]
fn classes() {
assert!(GPR.contains(GPR.unit(0)));
assert!(GPR.contains(GPR.unit(31)));
assert!(!FPR.contains(GPR.unit(0)));
assert!(!FPR.contains(GPR.unit(31)));
assert!(!GPR.contains(FPR.unit(0)));
assert!(!GPR.contains(FPR.unit(31)));
assert!(FPR.contains(FPR.unit(0)));
assert!(FPR.contains(FPR.unit(31)));
}
}

View File

@@ -0,0 +1,54 @@
//! RISC-V Settings.
use crate::settings::{self, detail, Builder};
use core::fmt;
// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
// `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta-python/isa/riscv/settings.py`.
include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
#[cfg(test)]
mod tests {
use super::{builder, Flags};
use crate::settings::{self, Configurable};
use std::string::ToString;
#[test]
fn display_default() {
let shared = settings::Flags::new(settings::builder());
let b = builder();
let f = Flags::new(&shared, b);
assert_eq!(
f.to_string(),
"[riscv]\n\
supports_m = false\n\
supports_a = false\n\
supports_f = false\n\
supports_d = false\n\
enable_m = true\n\
enable_e = false\n"
);
// Predicates are not part of the Display output.
assert_eq!(f.full_float(), false);
}
#[test]
fn predicates() {
let shared = settings::Flags::new(settings::builder());
let mut b = builder();
b.enable("supports_f").unwrap();
b.enable("supports_d").unwrap();
let f = Flags::new(&shared, b);
assert_eq!(f.full_float(), true);
let mut sb = settings::builder();
sb.set("enable_simd", "false").unwrap();
let shared = settings::Flags::new(sb);
let mut b = builder();
b.enable("supports_f").unwrap();
b.enable("supports_d").unwrap();
let f = Flags::new(&shared, b);
assert_eq!(f.full_float(), false);
}
}

View File

@@ -0,0 +1,94 @@
//! Low-level details of stack accesses.
//!
//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type
//! defined in this module expresses the low-level details of accessing a stack slot from an
//! encoded instruction.
use crate::ir::stackslot::{StackOffset, StackSlotKind, StackSlots};
use crate::ir::StackSlot;
/// A method for referencing a stack slot in the current stack frame.
///
/// Stack slots are addressed with a constant offset from a base register. The base can be the
/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone
/// of a large stack frame.
#[derive(Clone, Copy, Debug)]
pub struct StackRef {
/// The base register to use for addressing.
pub base: StackBase,
/// Immediate offset from the base register to the first byte of the stack slot.
pub offset: StackOffset,
}
impl StackRef {
/// Get a reference to the stack slot `ss` using one of the base pointers in `mask`.
pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option<Self> {
// Try an SP-relative reference.
if mask.contains(StackBase::SP) {
return Some(Self::sp(ss, frame));
}
// No reference possible with this mask.
None
}
/// Get a reference to `ss` using the stack pointer as a base.
pub fn sp(ss: StackSlot, frame: &StackSlots) -> Self {
let size = frame
.frame_size
.expect("Stack layout must be computed before referencing stack slots");
let slot = &frame[ss];
let offset = if slot.kind == StackSlotKind::OutgoingArg {
// Outgoing argument slots have offsets relative to our stack pointer.
slot.offset.unwrap()
} else {
// All other slots have offsets relative to our caller's stack frame.
// Offset where SP is pointing. (All ISAs have stacks growing downwards.)
let sp_offset = -(size as StackOffset);
slot.offset.unwrap() - sp_offset
};
Self {
base: StackBase::SP,
offset,
}
}
}
/// Generic base register for referencing stack slots.
///
/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for
/// those two base pointers.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum StackBase {
/// Use the stack pointer.
SP = 0,
/// Use the frame pointer (if one is present).
FP = 1,
/// Use an explicit zone pointer in a general-purpose register.
///
/// This feature is not yet implemented.
Zone = 2,
}
/// Bit mask of supported stack bases.
///
/// Many instruction encodings can use different base registers while others only work with the
/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given
/// instruction encoding.
///
/// This behaves like a set of `StackBase` variants.
///
/// The internal representation as a `u8` is public because stack base masks are used in constant
/// tables generated from the Python encoding definitions.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct StackBaseMask(pub u8);
impl StackBaseMask {
/// Check if this mask contains the `base` variant.
pub fn contains(self, base: StackBase) -> bool {
self.0 & (1 << base as usize) != 0
}
}

View File

@@ -0,0 +1,579 @@
//! x86 ABI implementation.
use super::registers::{FPR, GPR, RU};
use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
use crate::cursor::{Cursor, CursorPosition, EncCursor};
use crate::ir;
use crate::ir::immediates::Imm64;
use crate::ir::stackslot::{StackOffset, StackSize};
use crate::ir::{
get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
ValueLoc,
};
use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
use crate::regalloc::RegisterSet;
use crate::result::CodegenResult;
use crate::stack_layout::layout_stack;
use core::i32;
use target_lexicon::{PointerWidth, Triple};
/// Argument registers for x86-64
static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
/// Return value registers.
static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
/// Argument registers for x86-64, when using windows fastcall
static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
/// Return value registers for x86-64, when using windows fastcall
static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
struct Args {
pointer_bytes: u8,
pointer_bits: u8,
pointer_type: ir::Type,
gpr: &'static [RU],
gpr_used: usize,
fpr_limit: usize,
fpr_used: usize,
offset: u32,
call_conv: CallConv,
}
impl Args {
fn new(bits: u8, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Self {
let offset = if let CallConv::WindowsFastcall = call_conv {
// [1] "The caller is responsible for allocating space for parameters to the callee,
// and must always allocate sufficient space to store four register parameters"
32
} else {
0
};
Self {
pointer_bytes: bits / 8,
pointer_bits: bits,
pointer_type: ir::Type::int(u16::from(bits)).unwrap(),
gpr,
gpr_used: 0,
fpr_limit,
fpr_used: 0,
offset,
call_conv,
}
}
}
impl ArgAssigner for Args {
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
let ty = arg.value_type;
// Check for a legal type.
// We don't support SIMD yet, so break all vectors down.
if ty.is_vector() {
return ValueConversion::VectorSplit.into();
}
// Large integers and booleans are broken down to fit in a register.
if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
return ValueConversion::IntSplit.into();
}
// Small integers are extended to the size of a pointer register.
if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
match arg.extension {
ArgumentExtension::None => {}
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
}
}
// Handle special-purpose arguments.
if ty.is_int() && self.call_conv == CallConv::Baldrdash {
match arg.purpose {
// This is SpiderMonkey's `WasmTlsReg`.
ArgumentPurpose::VMContext => {
return ArgumentLoc::Reg(if self.pointer_bits == 64 {
RU::r14
} else {
RU::rsi
} as RegUnit)
.into();
}
// This is SpiderMonkey's `WasmTableCallSigReg`.
ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::r10 as RegUnit).into(),
_ => {}
}
}
// Try to use a GPR.
if !ty.is_float() && self.gpr_used < self.gpr.len() {
let reg = self.gpr[self.gpr_used] as RegUnit;
self.gpr_used += 1;
return ArgumentLoc::Reg(reg).into();
}
// Try to use an FPR.
if ty.is_float() && self.fpr_used < self.fpr_limit {
let reg = FPR.unit(self.fpr_used);
self.fpr_used += 1;
return ArgumentLoc::Reg(reg).into();
}
// Assign a stack location.
let loc = ArgumentLoc::Stack(self.offset as i32);
self.offset += u32::from(self.pointer_bytes);
debug_assert!(self.offset <= i32::MAX as u32);
loc.into()
}
}
/// Legalize `sig`.
pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bool) {
let bits;
let mut args;
match triple.pointer_width().unwrap() {
PointerWidth::U16 => panic!(),
PointerWidth::U32 => {
bits = 32;
args = Args::new(bits, &[], 0, sig.call_conv);
}
PointerWidth::U64 => {
bits = 64;
args = if sig.call_conv == CallConv::WindowsFastcall {
Args::new(bits, &ARG_GPRS_WIN_FASTCALL_X64[..], 4, sig.call_conv)
} else {
Args::new(bits, &ARG_GPRS[..], 8, sig.call_conv)
};
}
}
legalize_args(&mut sig.params, &mut args);
let regs = if sig.call_conv == CallConv::WindowsFastcall {
&RET_GPRS_WIN_FASTCALL_X64[..]
} else {
&RET_GPRS[..]
};
let mut rets = Args::new(bits, regs, 2, sig.call_conv);
legalize_args(&mut sig.returns, &mut rets);
}
/// Get register class for a type appearing in a legalized signature.
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
if ty.is_int() || ty.is_bool() {
GPR
} else {
FPR
}
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function, triple: &Triple) -> RegisterSet {
let mut regs = RegisterSet::new();
regs.take(GPR, RU::rsp as RegUnit);
regs.take(GPR, RU::rbp as RegUnit);
// 32-bit arch only has 8 registers.
if triple.pointer_width().unwrap() != PointerWidth::U64 {
for i in 8..16 {
regs.take(GPR, GPR.unit(i));
regs.take(FPR, FPR.unit(i));
}
}
regs
}
/// Get the set of callee-saved registers.
fn callee_saved_gprs(isa: &TargetIsa, call_conv: CallConv) -> &'static [RU] {
match isa.triple().pointer_width().unwrap() {
PointerWidth::U16 => panic!(),
PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
PointerWidth::U64 => {
if call_conv == CallConv::WindowsFastcall {
// "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile
// and must be saved and restored by a function that uses them."
// as per https://msdn.microsoft.com/en-us/library/6t169e9c.aspx
// RSP & RSB are not listed below, since they are restored automatically during
// a function call. If that wasn't the case, function calls (RET) would not work.
&[
RU::rbx,
RU::rdi,
RU::rsi,
RU::r12,
RU::r13,
RU::r14,
RU::r15,
]
} else {
&[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
}
}
}
}
/// Get the set of callee-saved registers that are used.
fn callee_saved_gprs_used(isa: &TargetIsa, func: &ir::Function) -> RegisterSet {
let mut all_callee_saved = RegisterSet::empty();
for reg in callee_saved_gprs(isa, func.signature.call_conv) {
all_callee_saved.free(GPR, *reg as RegUnit);
}
let mut used = RegisterSet::empty();
for value_loc in func.locations.values() {
// Note that `value_loc` here contains only a single unit of a potentially multi-unit
// register. We don't use registers that overlap each other in the x86 ISA, but in others
// we do. So this should not be blindly reused.
if let ValueLoc::Reg(ru) = *value_loc {
if !used.is_avail(GPR, ru) {
used.free(GPR, ru);
}
}
}
// regmove and regfill instructions may temporarily divert values into other registers,
// and these are not reflected in `func.locations`. Scan the function for such instructions
// and note which callee-saved registers they use.
//
// TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
// to avoid this step.
for ebb in &func.layout {
for inst in func.layout.ebb_insts(ebb) {
match func.dfg[inst] {
ir::instructions::InstructionData::RegMove { dst, .. }
| ir::instructions::InstructionData::RegFill { dst, .. } => {
if !used.is_avail(GPR, dst) {
used.free(GPR, dst);
}
}
_ => (),
}
}
}
used.intersect(&all_callee_saved);
used
}
pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
match func.signature.call_conv {
// For now, just translate fast and cold as system_v.
CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
system_v_prologue_epilogue(func, isa)
}
CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
CallConv::Probestack => unimplemented!("probestack calling convention"),
}
}
fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
debug_assert!(
!isa.flags().probestack_enabled(),
"baldrdash does not expect cranelift to emit stack probes"
);
// Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
let stack_align = 16;
let word_size = StackSize::from(isa.pointer_bytes());
let bytes = StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size;
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
ss.offset = Some(-(bytes as StackOffset));
func.stack_slots.push(ss);
layout_stack(&mut func.stack_slots, stack_align)?;
Ok(())
}
/// Implementation of the fastcall-based Win64 calling convention described at [1]
/// [1] https://msdn.microsoft.com/en-us/library/ms235286.aspx
fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
if isa.triple().pointer_width().unwrap() != PointerWidth::U64 {
panic!("TODO: windows-fastcall: x86-32 not implemented yet");
}
// [1] "The primary exceptions are the stack pointer and malloc or alloca memory,
// which are aligned to 16 bytes in order to aid performance"
let stack_align = 16;
let word_size = isa.pointer_bytes() as usize;
let reg_type = isa.pointer_type();
let csrs = callee_saved_gprs_used(isa, func);
// [1] "Space is allocated on the call stack as a shadow store for callees to save"
// This shadow store contains the parameters which are passed through registers (ARG_GPRS)
// and is eventually used by the callee to save & restore the values of the arguments.
//
// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333
// "Although the x64 calling convention reserves spill space for parameters,
// you dont have to use them as such"
//
// The reserved stack area is composed of:
// return address + frame pointer + all callee-saved registers + shadow space
//
// Pushing the return address is an implicit function of the `call`
// instruction. Each of the others we will then push explicitly. Then we
// will adjust the stack pointer to make room for the rest of the required
// space for this frame.
const SHADOW_STORE_SIZE: i32 = 32;
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
// TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work
// since cranelift does not support spill slots before incoming args
func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::IncomingArg,
size: csr_stack_size as u32,
offset: Some(-(SHADOW_STORE_SIZE + csr_stack_size)),
});
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
// Add CSRs to function signature
let fp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::FramePointer,
RU::rbp as RegUnit,
);
func.signature.params.push(fp_arg);
func.signature.returns.push(fp_arg);
for csr in csrs.iter(GPR) {
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
func.signature.params.push(csr_arg);
func.signature.returns.push(csr_arg);
}
// Set up the cursor and insert the prologue
let entry_ebb = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
Ok(())
}
/// Insert a System V-compatible prologue and epilogue.
fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> CodegenResult<()> {
// The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
// newer versions use a 16-byte aligned stack pointer.
let stack_align = 16;
let pointer_width = isa.triple().pointer_width().unwrap();
let word_size = pointer_width.bytes() as usize;
let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
let csrs = callee_saved_gprs_used(isa, func);
// The reserved stack area is composed of:
// return address + frame pointer + all callee-saved registers
//
// Pushing the return address is an implicit function of the `call`
// instruction. Each of the others we will then push explicitly. Then we
// will adjust the stack pointer to make room for the rest of the required
// space for this frame.
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::IncomingArg,
size: csr_stack_size as u32,
offset: Some(-csr_stack_size),
});
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
// Add CSRs to function signature
let fp_arg = ir::AbiParam::special_reg(
reg_type,
ir::ArgumentPurpose::FramePointer,
RU::rbp as RegUnit,
);
func.signature.params.push(fp_arg);
func.signature.returns.push(fp_arg);
for csr in csrs.iter(GPR) {
let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
func.signature.params.push(csr_arg);
func.signature.returns.push(csr_arg);
}
// Set up the cursor and insert the prologue
let entry_ebb = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs);
Ok(())
}
/// Insert the prologue for a given function.
/// This is used by common calling conventions such as System V.
fn insert_common_prologue(
pos: &mut EncCursor,
stack_size: i64,
reg_type: ir::types::Type,
csrs: &RegisterSet,
isa: &TargetIsa,
) {
if stack_size > 0 {
// Check if there is a special stack limit parameter. If so insert stack check.
if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) {
// Total stack size is the size of all stack area used by the function, including
// pushed CSRs, frame pointer.
// Also, the size of a return address, implicitly pushed by a x86 `call` instruction,
// also should be accounted for.
// TODO: Check if the function body actually contains a `call` instruction.
let word_size = isa.pointer_bytes();
let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64;
insert_stack_check(pos, total_stack_size, stack_limit_arg);
}
}
// Append param to entry EBB
let ebb = pos.current_ebb().expect("missing ebb under cursor");
let fp = pos.func.dfg.append_ebb_param(ebb, reg_type);
pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.ins().x86_push(fp);
pos.ins()
.copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
for reg in csrs.iter(GPR) {
// Append param to entry EBB
let csr_arg = pos.func.dfg.append_ebb_param(ebb, reg_type);
// Assign it a location
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
// Remember it so we can push it momentarily
pos.ins().x86_push(csr_arg);
}
// Allocate stack frame storage.
if stack_size > 0 {
if isa.flags().probestack_enabled()
&& stack_size > (1 << isa.flags().probestack_size_log2())
{
// Emit a stack probe.
let rax = RU::rax as RegUnit;
let rax_val = ir::ValueLoc::Reg(rax);
// The probestack function expects its input in %rax.
let arg = pos.ins().iconst(reg_type, stack_size);
pos.func.locations[arg] = rax_val;
// Call the probestack function.
let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
// Make the call.
let call = if !isa.flags().is_pic()
&& isa.triple().pointer_width().unwrap() == PointerWidth::U64
&& !pos.func.dfg.ext_funcs[callee].colocated
{
// 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
// Use r11 as it may be clobbered under all supported calling conventions.
let r11 = RU::r11 as RegUnit;
let sig = pos.func.dfg.ext_funcs[callee].signature;
let addr = pos.ins().func_addr(reg_type, callee);
pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
pos.ins().call_indirect(sig, addr, &[arg])
} else {
// Otherwise just do a normal call.
pos.ins().call(callee, &[arg])
};
// If the probestack function doesn't adjust sp, do it ourselves.
if !isa.flags().probestack_func_adjusts_sp() {
let result = pos.func.dfg.inst_results(call)[0];
pos.func.locations[result] = rax_val;
pos.ins().adjust_sp_down(result);
}
} else {
// Simply decrement the stack pointer.
pos.ins().adjust_sp_down_imm(Imm64::new(stack_size));
}
}
}
/// Insert a check that generates a trap if the stack pointer goes
/// below a value in `stack_limit_arg`.
fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) {
use crate::ir::condcodes::IntCC;
// Copy `stack_limit_arg` into a %rax and use it for calculating
// a SP threshold.
let stack_limit_copy = pos.ins().copy(stack_limit_arg);
pos.func.locations[stack_limit_copy] = ir::ValueLoc::Reg(RU::rax as RegUnit);
let sp_threshold = pos.ins().iadd_imm(stack_limit_copy, stack_size);
pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit);
// If the stack pointer currently reaches the SP threshold or below it then after opening
// the current stack frame, the current stack pointer will reach the limit.
let cflags = pos.ins().ifcmp_sp(sp_threshold);
pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
pos.ins().trapif(
IntCC::UnsignedGreaterThanOrEqual,
cflags,
ir::TrapCode::StackOverflow,
);
}
/// Find all `return` instructions and insert epilogues before them.
fn insert_common_epilogues(
pos: &mut EncCursor,
stack_size: i64,
reg_type: ir::types::Type,
csrs: &RegisterSet,
) {
while let Some(ebb) = pos.next_ebb() {
pos.goto_last_inst(ebb);
if let Some(inst) = pos.current_inst() {
if pos.func.dfg[inst].opcode().is_return() {
insert_common_epilogue(inst, stack_size, pos, reg_type, csrs);
}
}
}
}
/// Insert an epilogue given a specific `return` instruction.
/// This is used by common calling conventions such as System V.
fn insert_common_epilogue(
inst: ir::Inst,
stack_size: i64,
pos: &mut EncCursor,
reg_type: ir::types::Type,
csrs: &RegisterSet,
) {
if stack_size > 0 {
pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
}
// Pop all the callee-saved registers, stepping backward each time to
// preserve the correct order.
let fp_ret = pos.ins().x86_pop(reg_type);
pos.prev_inst();
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.func.dfg.append_inst_arg(inst, fp_ret);
for reg in csrs.iter(GPR) {
let csr_ret = pos.ins().x86_pop(reg_type);
pos.prev_inst();
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, csr_ret);
}
}

View File

@@ -0,0 +1,342 @@
//! Emitting binary x86 machine code.
use super::enc_tables::{needs_offset, needs_sib_byte};
use super::registers::RU;
use crate::binemit::{bad_encoding, CodeSink, Reloc};
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
use crate::ir::{Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode};
use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef};
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
// Convert a stack base to the corresponding register.
fn stk_base(base: StackBase) -> RegUnit {
let ru = match base {
StackBase::SP => RU::rsp,
StackBase::FP => RU::rbp,
StackBase::Zone => unimplemented!(),
};
ru as RegUnit
}
// Mandatory prefix bytes for Mp* opcodes.
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
// A REX prefix with no bits set: 0b0100WRXB.
const BASE_REX: u8 = 0b0100_0000;
// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
// This is used for instructions that encode a register in the low 3 bits of the opcode and for
// instructions that use the ModR/M `reg` field for something else.
fn rex1(reg_b: RegUnit) -> u8 {
let b = ((reg_b >> 3) & 1) as u8;
BASE_REX | b
}
// Create a dual-register REX prefix, setting:
//
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
// REX.R = bit 3 of reg register.
fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
let b = ((rm >> 3) & 1) as u8;
let r = ((reg >> 3) & 1) as u8;
BASE_REX | b | (r << 2)
}
// Create a three-register REX prefix, setting:
//
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
// REX.R = bit 3 of reg register.
// REX.X = bit 3 of SIB index register.
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
let b = ((rm >> 3) & 1) as u8;
let r = ((reg >> 3) & 1) as u8;
let x = ((index >> 3) & 1) as u8;
BASE_REX | b | (x << 1) | (r << 2)
}
// Emit a REX prefix.
//
// The R, X, and B bits are computed from registers using the functions above. The W bit is
// extracted from `bits`.
fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(rex & 0xf8, BASE_REX);
let w = ((bits >> 15) & 1) as u8;
sink.put1(rex | (w << 3));
}
// Emit a single-byte opcode with no REX prefix.
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
sink.put1(bits as u8);
}
// Emit a single-byte opcode with REX prefix.
fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
rex_prefix(bits, rex, sink);
sink.put1(bits as u8);
}
// Emit two-byte opcode: 0F XX
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
sink.put1(0x0f);
sink.put1(bits as u8);
}
// Emit two-byte opcode: 0F XX with REX prefix.
fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
rex_prefix(bits, rex, sink);
sink.put1(0x0f);
sink.put1(bits as u8);
}
// Emit single-byte opcode with mandatory prefix.
fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
sink.put1(bits as u8);
}
// Emit single-byte opcode with mandatory prefix and REX.
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
rex_prefix(bits, rex, sink);
sink.put1(bits as u8);
}
// Emit two-byte opcode (0F XX) with mandatory prefix.
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
sink.put1(0x0f);
sink.put1(bits as u8);
}
// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
rex_prefix(bits, rex, sink);
sink.put1(0x0f);
sink.put1(bits as u8);
}
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
let mm = (bits >> 10) & 3;
sink.put1(0x0f);
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
sink.put1(bits as u8);
}
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
rex_prefix(bits, rex, sink);
let mm = (bits >> 10) & 3;
sink.put1(0x0f);
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
sink.put1(bits as u8);
}
/// Emit a ModR/M byte for reg-reg operands.
fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
let reg = reg as u8 & 7;
let rm = rm as u8 & 7;
let mut b = 0b11000000;
b |= reg << 3;
b |= rm;
sink.put1(b);
}
/// Emit a ModR/M byte where the reg bits are part of the opcode.
fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
let reg = (bits >> 12) as u8 & 7;
let rm = rm as u8 & 7;
let mut b = 0b11000000;
b |= reg << 3;
b |= rm;
sink.put1(b);
}
/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
/// absolute immediate 32-bit address.
fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
let reg = reg as u8 & 7;
let rm = rm as u8 & 7;
let mut b = 0b00000000;
b |= reg << 3;
b |= rm;
sink.put1(b);
}
/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
/// section 2.2.1.6.
fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_rm(0b101, reg, sink)
}
/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
/// displacement.
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
let reg = reg as u8 & 7;
let rm = rm as u8 & 7;
let mut b = 0b01000000;
b |= reg << 3;
b |= rm;
sink.put1(b);
}
/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
/// displacement.
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
let reg = reg as u8 & 7;
let rm = rm as u8 & 7;
let mut b = 0b10000000;
b |= reg << 3;
b |= rm;
sink.put1(b);
}
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_rm(0b100, reg, sink);
}
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
/// displacement are present.
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp8(0b100, reg, sink);
}
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
/// displacement are present.
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp32(0b100, reg, sink);
}
/// Emit a SIB byte with a base register and no scale+index.
fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
let base = base as u8 & 7;
// SIB SS_III_BBB.
let mut b = 0b00_100_000;
b |= base;
sink.put1(b);
}
/// Emit a SIB byte with a scale, base, and index.
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
// SIB SS_III_BBB.
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
let scale = scale & 3;
let index = index as u8 & 7;
let base = base as u8 & 7;
let b: u8 = (scale << 6) | (index << 3) | base;
sink.put1(b);
}
/// Get the low 4 bits of an opcode for an integer condition code.
///
/// Add this offset to a base opcode for:
///
/// ---- 0x70: Short conditional branch.
/// 0x0f 0x80: Long conditional branch.
/// 0x0f 0x90: SetCC.
///
fn icc2opc(cond: IntCC) -> u16 {
use crate::ir::condcodes::IntCC::*;
match cond {
// 0x0 = Overflow.
// 0x1 = !Overflow.
UnsignedLessThan => 0x2,
UnsignedGreaterThanOrEqual => 0x3,
Equal => 0x4,
NotEqual => 0x5,
UnsignedLessThanOrEqual => 0x6,
UnsignedGreaterThan => 0x7,
// 0x8 = Sign.
// 0x9 = !Sign.
// 0xa = Parity even.
// 0xb = Parity odd.
SignedLessThan => 0xc,
SignedGreaterThanOrEqual => 0xd,
SignedLessThanOrEqual => 0xe,
SignedGreaterThan => 0xf,
}
}
/// Get the low 4 bits of an opcode for a floating point condition code.
///
/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
///
/// ZPC OSA
/// UN 111 000
/// GT 000 000
/// LT 001 000
/// EQ 100 000
///
/// Not all floating point condition codes are supported.
fn fcc2opc(cond: FloatCC) -> u16 {
use crate::ir::condcodes::FloatCC::*;
match cond {
Ordered => 0xb, // EQ|LT|GT => *np (P=0)
Unordered => 0xa, // UN => *p (P=1)
OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
GreaterThan => 0x7, // GT => *a (C=0&Z=0)
GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
Equal | // EQ
NotEqual | // UN|LT|GT
LessThan | // LT
LessThanOrEqual | // LT|EQ
UnorderedOrGreaterThan | // UN|GT
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
=> panic!("{} not supported", cond),
}
}
/// Emit a single-byte branch displacement to `destination`.
fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
sink.put1(delta as u8);
}
/// Emit a four-byte branch displacement to `destination`.
fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
sink.put4(delta);
}
/// Emit a four-byte displacement to jump table `jt`.
fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
sink.put4(delta);
}

View File

@@ -0,0 +1,778 @@
//! Encoding tables for x86 ISAs.
use super::registers::*;
use crate::bitset::BitSet;
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::condcodes::IntCC;
use crate::ir::{self, Function, Inst, InstBuilder};
use crate::isa;
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::base_size;
use crate::isa::encoding::RecipeSizing;
use crate::isa::RegUnit;
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
pub fn needs_sib_byte(reg: RegUnit) -> bool {
reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
}
pub fn needs_offset(reg: RegUnit) -> bool {
reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
}
pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
needs_sib_byte(reg) || needs_offset(reg)
}
fn additional_size_if(
op_index: usize,
inst: Inst,
divert: &RegDiversions,
func: &Function,
condition_func: fn(RegUnit) -> bool,
) -> u8 {
let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
if condition_func(addr_reg) {
1
} else {
0
}
}
fn size_plus_maybe_offset_for_in_reg_0(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset)
}
fn size_plus_maybe_offset_for_in_reg_1(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset)
}
fn size_plus_maybe_sib_for_in_reg_0(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte)
}
fn size_plus_maybe_sib_for_in_reg_1(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte)
}
fn size_plus_maybe_sib_or_offset_for_in_reg_0(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte_or_offset)
}
fn size_plus_maybe_sib_or_offset_for_in_reg_1(
sizing: &RecipeSizing,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset)
}
/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
fn expand_sdivrem(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
isa: &isa::TargetIsa,
) {
let (x, y, is_srem) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Sdiv,
args,
} => (args[0], args[1], false),
ir::InstructionData::Binary {
opcode: ir::Opcode::Srem,
args,
} => (args[0], args[1], true),
_ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
};
let avoid_div_traps = isa.flags().avoid_div_traps();
let old_ebb = func.layout.pp_ebb(inst);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
pos.func.dfg.clear_results(inst);
// If we can tolerate native division traps, sdiv doesn't need branching.
if !avoid_div_traps && !is_srem {
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
pos.remove_inst();
return;
}
// EBB handling the -1 divisor case.
let minus_one = pos.func.dfg.make_ebb();
// Final EBB with one argument representing the final result value.
let done = pos.func.dfg.make_ebb();
// Move the `inst` result value onto the `done` EBB.
pos.func.dfg.attach_ebb_param(done, result);
// Start by checking for a -1 divisor which needs to be handled specially.
let is_m1 = pos.ins().ifcmp_imm(y, -1);
pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
// Put in an explicit division-by-zero trap if the environment requires it.
if avoid_div_traps {
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
}
// Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
// by zero.
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
let divres = if is_srem { rem } else { quot };
pos.ins().jump(done, &[divres]);
// Now deal with the -1 divisor case.
pos.insert_ebb(minus_one);
let m1_result = if is_srem {
// x % -1 = 0.
pos.ins().iconst(ty, 0)
} else {
// Explicitly check for overflow: Trap when x == INT_MIN.
debug_assert!(avoid_div_traps, "Native trapping divide handled above");
let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
pos.ins()
.trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
// x / -1 = -x.
pos.ins().irsub_imm(x, 0)
};
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[m1_result]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, minus_one);
cfg.recompute_ebb(pos.func, done);
}
/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
fn expand_udivrem(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &isa::TargetIsa,
) {
let (x, y, is_urem) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Udiv,
args,
} => (args[0], args[1], false),
ir::InstructionData::Binary {
opcode: ir::Opcode::Urem,
args,
} => (args[0], args[1], true),
_ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
};
let avoid_div_traps = isa.flags().avoid_div_traps();
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
pos.func.dfg.clear_results(inst);
// Put in an explicit division-by-zero trap if the environment requires it.
if avoid_div_traps {
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
}
// Now it is safe to execute the `x86_udivmodx` instruction.
let xhi = pos.ins().iconst(ty, 0);
let reuse = if is_urem {
[None, Some(result)]
} else {
[Some(result), None]
};
pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
pos.remove_inst();
}
/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
/// instructions.
fn expand_minmax(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::FloatCC;
let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmin,
args,
} => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmax,
args,
} => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
};
let old_ebb = func.layout.pp_ebb(inst);
// We need to handle the following conditions, depending on how x and y compare:
//
// 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
// 2. EQ: We need to use `bitwise_opc` to make sure that
// fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
// 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
// EBB handling case 3) where one operand is NaN.
let uno_ebb = func.dfg.make_ebb();
// EBB that handles the unordered or equal cases 2) and 3).
let ueq_ebb = func.dfg.make_ebb();
// Final EBB with one argument representing the final result value.
let done = func.dfg.make_ebb();
// The basic blocks are laid out to minimize branching for the common cases:
//
// 1) One branch not taken, one jump.
// 2) One branch taken.
// 3) Two branches taken, one jump.
// Move the `inst` result value onto the `done` EBB.
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
func.dfg.clear_results(inst);
func.dfg.attach_ebb_param(done, result);
// Test for case 1) ordered and not equal.
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
// Handle the common ordered, not equal (LT|GT) case.
let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
let one_result = pos.func.dfg.first_result(one_inst);
pos.ins().jump(done, &[one_result]);
// Case 3) Unordered.
// We know that at least one operand is a NaN that needs to be propagated. We simply use an
// `fadd` instruction which has the same NaN propagation semantics.
pos.insert_ebb(uno_ebb);
let uno_result = pos.ins().fadd(x, y);
pos.ins().jump(done, &[uno_result]);
// Case 2) or 3).
pos.insert_ebb(ueq_ebb);
// Test for case 3) (UN) one value is NaN.
// TODO: When we get support for flag values, we can reuse the above comparison.
let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
pos.ins().brnz(cmp_uno, uno_ebb, &[]);
// We are now in case 2) where x and y compare EQ.
// We need a bitwise operation to get the sign right.
let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
let bw_result = pos.func.dfg.first_result(bw_inst);
// This should become a fall-through for this second most common case.
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[bw_result]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, ueq_ebb);
cfg.recompute_ebb(pos.func, uno_ebb);
cfg.recompute_ebb(pos.func, done);
}
/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
/// i64 with a pattern, the rest needs more code.
fn expand_fcvt_from_uint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::IntCC;
let x;
match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtFromUint,
arg,
} => x = arg,
_ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
}
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Conversion from unsigned 32-bit is easy on x86-64.
// TODO: This should be guarded by an ISA check.
if xty == ir::types::I32 {
let wide = pos.ins().uextend(ir::types::I64, x);
pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
return;
}
let old_ebb = pos.func.layout.pp_ebb(inst);
// EBB handling the case where x < 0.
let neg_ebb = pos.func.dfg.make_ebb();
// Final EBB with one argument representing the final result value.
let done = pos.func.dfg.make_ebb();
// Move the `inst` result value onto the `done` EBB.
pos.func.dfg.clear_results(inst);
pos.func.dfg.attach_ebb_param(done, result);
// If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
pos.ins().brnz(is_neg, neg_ebb, &[]);
// Easy case: just use a signed conversion.
let posres = pos.ins().fcvt_from_sint(ty, x);
pos.ins().jump(done, &[posres]);
// Now handle the negative case.
pos.insert_ebb(neg_ebb);
// Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
// back up on the FP side.
let ihalf = pos.ins().ushr_imm(x, 1);
let lsb = pos.ins().band_imm(x, 1);
let ifinal = pos.ins().bor(ihalf, lsb);
let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
let negres = pos.ins().fadd(fhalf, fhalf);
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[negres]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, neg_ebb);
cfg.recompute_ebb(pos.func, done);
}
fn expand_fcvt_to_sint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToSint,
arg,
} => arg,
_ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
};
let old_ebb = func.layout.pp_ebb(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
// Final EBB after the bad value checks.
let done = func.dfg.make_ebb();
// The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
// It produces an INT_MIN result instead.
func.dfg.replace(inst).x86_cvtt2si(ty, x);
let mut pos = FuncCursor::new(func).after_inst(inst);
pos.use_srcloc(inst);
let is_done = pos
.ins()
.icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
pos.ins().brnz(is_done, done, &[]);
// We now have the following possibilities:
//
// 1. INT_MIN was actually the correct conversion result.
// 2. The input was NaN -> trap bad_toint
// 3. The input was out of range -> trap int_ovf
//
// Check for NaN.
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
pos.ins()
.trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
// Check for case 1: INT_MIN is the correct result.
// Determine the smallest floating point number that would convert to INT_MIN.
let mut overflow_cc = FloatCC::LessThan;
let output_bits = ty.lane_bits();
let flimit = match xty {
ir::types::F32 =>
// An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
{
pos.ins().f32const(if output_bits < 32 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee32::pow2(output_bits - 1).neg()
})
}
ir::types::F64 =>
// An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
{
pos.ins().f64const(if output_bits < 64 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee64::pow2(output_bits - 1).neg()
})
}
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
// Finally, we could have a positive value that is too large.
let fzero = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
pos.ins().jump(done, &[]);
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, done);
}
fn expand_fcvt_to_sint_sat(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToSintSat,
arg,
} => arg,
_ => panic!(
"Need fcvt_to_sint_sat: {}",
func.dfg.display_inst(inst, None)
),
};
let old_ebb = func.layout.pp_ebb(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
// Final EBB after the bad value checks.
let done_ebb = func.dfg.make_ebb();
func.dfg.clear_results(inst);
func.dfg.attach_ebb_param(done_ebb, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
// overflow. It produces an INT_MIN result instead.
let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
let is_done = pos
.ins()
.icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
pos.ins().brnz(is_done, done_ebb, &[cvtt2si]);
// We now have the following possibilities:
//
// 1. INT_MIN was actually the correct conversion result.
// 2. The input was NaN -> replace the result value with 0.
// 3. The input was out of range -> saturate the result to the min/max value.
// Check for NaN, which is truncated to 0.
let zero = pos.ins().iconst(ty, 0);
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
pos.ins().brnz(is_nan, done_ebb, &[zero]);
// Check for case 1: INT_MIN is the correct result.
// Determine the smallest floating point number that would convert to INT_MIN.
let mut overflow_cc = FloatCC::LessThan;
let output_bits = ty.lane_bits();
let flimit = match xty {
ir::types::F32 =>
// An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
{
pos.ins().f32const(if output_bits < 32 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee32::pow2(output_bits - 1).neg()
})
}
ir::types::F64 =>
// An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
{
pos.ins().f64const(if output_bits < 64 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee64::pow2(output_bits - 1).neg()
})
}
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
let min_imm = match ty {
ir::types::I32 => i32::min_value() as i64,
ir::types::I64 => i64::min_value(),
_ => panic!("Don't know the min value for {}", ty),
};
let min_value = pos.ins().iconst(ty, min_imm);
pos.ins().brnz(overflow, done_ebb, &[min_value]);
// Finally, we could have a positive value that is too large.
let fzero = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
_ => panic!("Can't convert {}", xty),
};
let max_imm = match ty {
ir::types::I32 => i32::max_value() as i64,
ir::types::I64 => i64::max_value(),
_ => panic!("Don't know the max value for {}", ty),
};
let max_value = pos.ins().iconst(ty, max_imm);
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
pos.ins().brnz(overflow, done_ebb, &[max_value]);
// Recycle the original instruction.
pos.func.dfg.replace(inst).jump(done_ebb, &[cvtt2si]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done_ebb);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, done_ebb);
}
fn expand_fcvt_to_uint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUint,
arg,
} => arg,
_ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
};
let old_ebb = func.layout.pp_ebb(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
// EBB handling numbers >= 2^(N-1).
let large = func.dfg.make_ebb();
// Final EBB after the bad value checks.
let done = func.dfg.make_ebb();
// Move the `inst` result value onto the `done` EBB.
func.dfg.clear_results(inst);
func.dfg.attach_ebb_param(done, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
// the destination integer type.
let pow2nm1 = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
_ => panic!("Can't convert {}", xty),
};
let is_large = pos.ins().ffcmp(x, pow2nm1);
pos.ins()
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
// We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
// previous comparison.
pos.ins().trapff(
FloatCC::Unordered,
is_large,
ir::TrapCode::BadConversionToInteger,
);
// Now we know that x < 2^(N-1) and not NaN.
let sres = pos.ins().x86_cvtt2si(ty, x);
let is_neg = pos.ins().ifcmp_imm(sres, 0);
pos.ins()
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
pos.ins().trap(ir::TrapCode::IntegerOverflow);
// Handle the case where x >= 2^(N-1) and not NaN.
pos.insert_ebb(large);
let adjx = pos.ins().fsub(x, pow2nm1);
let lres = pos.ins().x86_cvtt2si(ty, adjx);
let is_neg = pos.ins().ifcmp_imm(lres, 0);
pos.ins()
.trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, large);
cfg.recompute_ebb(pos.func, done);
}
fn expand_fcvt_to_uint_sat(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &isa::TargetIsa,
) {
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUintSat,
arg,
} => arg,
_ => panic!(
"Need fcvt_to_uint_sat: {}",
func.dfg.display_inst(inst, None)
),
};
let old_ebb = func.layout.pp_ebb(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
// EBB handling numbers >= 2^(N-1).
let large = func.dfg.make_ebb();
// Final EBB after the bad value checks.
let done = func.dfg.make_ebb();
// Move the `inst` result value onto the `done` EBB.
func.dfg.clear_results(inst);
func.dfg.attach_ebb_param(done, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
// the destination integer type.
let pow2nm1 = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
_ => panic!("Can't convert {}", xty),
};
let zero = pos.ins().iconst(ty, 0);
let is_large = pos.ins().ffcmp(x, pow2nm1);
pos.ins()
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
// We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
// Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
// done; otherwise saturate to the minimum unsigned value, that is 0.
let sres = pos.ins().x86_cvtt2si(ty, x);
let is_neg = pos.ins().ifcmp_imm(sres, 0);
pos.ins()
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
pos.ins().jump(done, &[zero]);
// Handle the case where x >= 2^(N-1) and not NaN.
pos.insert_ebb(large);
let adjx = pos.ins().fsub(x, pow2nm1);
let lres = pos.ins().x86_cvtt2si(ty, adjx);
let max_value = pos.ins().iconst(
ty,
match ty {
ir::types::I32 => u32::max_value() as i64,
ir::types::I64 => u64::max_value() as i64,
_ => panic!("Can't convert {}", ty),
},
);
let is_neg = pos.ins().ifcmp_imm(lres, 0);
pos.ins()
.brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
// Recycle the original instruction as a jump.
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
// Finally insert a label for the completion.
pos.next_inst();
pos.insert_ebb(done);
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, large);
cfg.recompute_ebb(pos.func, done);
}

View File

@@ -0,0 +1,145 @@
//! x86 Instruction Set Architectures.
mod abi;
mod binemit;
mod enc_tables;
mod registers;
pub mod settings;
use super::super::settings as shared_settings;
#[cfg(feature = "testing_hooks")]
use crate::binemit::CodeSink;
use crate::binemit::{emit_function, MemoryCodeSink};
use crate::ir;
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
use crate::isa::Builder as IsaBuilder;
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
use crate::regalloc;
use crate::result::CodegenResult;
use crate::timing;
use core::fmt;
use std::boxed::Box;
use target_lexicon::{PointerWidth, Triple};
#[allow(dead_code)]
struct Isa {
triple: Triple,
shared_flags: shared_settings::Flags,
isa_flags: settings::Flags,
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
}
/// Get an ISA builder for creating x86 targets.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
IsaBuilder {
triple,
setup: settings::builder(),
constructor: isa_constructor,
}
}
fn isa_constructor(
triple: Triple,
shared_flags: shared_settings::Flags,
builder: shared_settings::Builder,
) -> Box<TargetIsa> {
let level1 = match triple.pointer_width().unwrap() {
PointerWidth::U16 => unimplemented!("x86-16"),
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
};
Box::new(Isa {
triple,
isa_flags: settings::Flags::new(&shared_flags, builder),
shared_flags,
cpumode: level1,
})
}
impl TargetIsa for Isa {
fn name(&self) -> &'static str {
"x86"
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.shared_flags
}
fn uses_cpu_flags(&self) -> bool {
true
}
fn uses_complex_addresses(&self) -> bool {
true
}
fn register_info(&self) -> RegInfo {
registers::INFO.clone()
}
fn encoding_info(&self) -> EncInfo {
enc_tables::INFO.clone()
}
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a> {
lookup_enclist(
ctrl_typevar,
inst,
func,
self.cpumode,
&enc_tables::LEVEL2[..],
&enc_tables::ENCLISTS[..],
&enc_tables::LEGALIZE_ACTIONS[..],
&enc_tables::RECIPE_PREDICATES[..],
&enc_tables::INST_PREDICATES[..],
self.isa_flags.predicate_view(),
)
}
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
abi::legalize_signature(sig, &self.triple, current)
}
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func, &self.triple)
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut CodeSink,
) {
binemit::emit_inst(func, inst, divert, sink)
}
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
emit_function(func, binemit::emit_inst, sink)
}
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
let _tt = timing::prologue_epilogue();
abi::prologue_epilogue(func, self)
}
}
impl fmt::Display for Isa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
}
}

View File

@@ -0,0 +1,63 @@
//! x86 register descriptions.
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
#[cfg(test)]
mod tests {
use super::*;
use crate::isa::RegUnit;
use std::string::{String, ToString};
#[test]
fn unit_encodings() {
// The encoding of integer registers is not alphabetical.
assert_eq!(INFO.parse_regunit("rax"), Some(0));
assert_eq!(INFO.parse_regunit("rbx"), Some(3));
assert_eq!(INFO.parse_regunit("rcx"), Some(1));
assert_eq!(INFO.parse_regunit("rdx"), Some(2));
assert_eq!(INFO.parse_regunit("rsi"), Some(6));
assert_eq!(INFO.parse_regunit("rdi"), Some(7));
assert_eq!(INFO.parse_regunit("rbp"), Some(5));
assert_eq!(INFO.parse_regunit("rsp"), Some(4));
assert_eq!(INFO.parse_regunit("r8"), Some(8));
assert_eq!(INFO.parse_regunit("r15"), Some(15));
assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
}
#[test]
fn unit_names() {
fn uname(ru: RegUnit) -> String {
INFO.display_regunit(ru).to_string()
}
assert_eq!(uname(0), "%rax");
assert_eq!(uname(3), "%rbx");
assert_eq!(uname(1), "%rcx");
assert_eq!(uname(2), "%rdx");
assert_eq!(uname(6), "%rsi");
assert_eq!(uname(7), "%rdi");
assert_eq!(uname(5), "%rbp");
assert_eq!(uname(4), "%rsp");
assert_eq!(uname(8), "%r8");
assert_eq!(uname(15), "%r15");
assert_eq!(uname(16), "%xmm0");
assert_eq!(uname(31), "%xmm15");
}
#[test]
fn regclasses() {
assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
assert_eq!(GPR.intersect_index(FPR), None);
assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
assert_eq!(ABCD.intersect_index(FPR), None);
assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
assert_eq!(FPR.intersect_index(GPR), None);
assert_eq!(FPR.intersect_index(ABCD), None);
}
}

View File

@@ -0,0 +1,52 @@
//! x86 Settings.
use crate::settings::{self, detail, Builder};
use core::fmt;
// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
// `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta-python/isa/x86/settings.py`.
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
#[cfg(test)]
mod tests {
use super::{builder, Flags};
use crate::settings::{self, Configurable};
#[test]
fn presets() {
let shared = settings::Flags::new(settings::builder());
// Nehalem has SSE4.1 but not BMI1.
let mut b0 = builder();
b0.enable("nehalem").unwrap();
let f0 = Flags::new(&shared, b0);
assert_eq!(f0.has_sse41(), true);
assert_eq!(f0.has_bmi1(), false);
let mut b1 = builder();
b1.enable("haswell").unwrap();
let f1 = Flags::new(&shared, b1);
assert_eq!(f1.has_sse41(), true);
assert_eq!(f1.has_bmi1(), true);
}
#[test]
fn display_presets() {
// Spot check that the flags Display impl does not cause a panic
let shared = settings::Flags::new(settings::builder());
let b0 = builder();
let f0 = Flags::new(&shared, b0);
let _ = format!("{}", f0);
let mut b1 = builder();
b1.enable("nehalem").unwrap();
let f1 = Flags::new(&shared, b1);
let _ = format!("{}", f1);
let mut b2 = builder();
b2.enable("haswell").unwrap();
let f2 = Flags::new(&shared, b2);
let _ = format!("{}", f2);
}
}

View File

@@ -0,0 +1,93 @@
//! Iterator utilities.
/// Extra methods for iterators.
pub trait IteratorExtras: Iterator {
/// Create an iterator that produces adjacent pairs of elements from the iterator.
fn adjacent_pairs(mut self) -> AdjacentPairs<Self>
where
Self: Sized,
Self::Item: Clone,
{
let elem = self.next();
AdjacentPairs { iter: self, elem }
}
}
impl<T> IteratorExtras for T where T: Iterator {}
/// Adjacent pairs iterator returned by `adjacent_pairs()`.
///
/// This wraps another iterator and produces a sequence of adjacent pairs of elements.
pub struct AdjacentPairs<I>
where
I: Iterator,
I::Item: Clone,
{
iter: I,
elem: Option<I::Item>,
}
impl<I> Iterator for AdjacentPairs<I>
where
I: Iterator,
I::Item: Clone,
{
type Item = (I::Item, I::Item);
fn next(&mut self) -> Option<Self::Item> {
self.elem.take().and_then(|e| {
self.elem = self.iter.next();
self.elem.clone().map(|n| (e, n))
})
}
}
#[cfg(test)]
mod tests {
use std::vec::Vec;
#[test]
fn adjpairs() {
use super::IteratorExtras;
assert_eq!(
[1, 2, 3, 4]
.iter()
.cloned()
.adjacent_pairs()
.collect::<Vec<_>>(),
vec![(1, 2), (2, 3), (3, 4)]
);
assert_eq!(
[2, 3, 4]
.iter()
.cloned()
.adjacent_pairs()
.collect::<Vec<_>>(),
vec![(2, 3), (3, 4)]
);
assert_eq!(
[2, 3, 4]
.iter()
.cloned()
.adjacent_pairs()
.collect::<Vec<_>>(),
vec![(2, 3), (3, 4)]
);
assert_eq!(
[3, 4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
vec![(3, 4)]
);
assert_eq!(
[4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
vec![]
);
assert_eq!(
[].iter()
.cloned()
.adjacent_pairs()
.collect::<Vec<(i32, i32)>>(),
vec![]
);
}
}

View File

@@ -0,0 +1,716 @@
//! Legalize ABI boundaries.
//!
//! This legalizer sub-module contains code for dealing with ABI boundaries:
//!
//! - Function arguments passed to the entry block.
//! - Function arguments passed to call instructions.
//! - Return values from call instructions.
//! - Return values passed to return instructions.
//!
//! The ABI boundary legalization happens in two phases:
//!
//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information
//! and possibly new argument types. It also rewrites the entry block arguments to match.
//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions
//! to match the new ABI signatures.
//!
//! Between the two phases, preamble signatures and call/return arguments don't match. This
//! intermediate state doesn't type check.
use crate::abi::{legalize_abi_value, ValueConversion};
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::instructions::CallInfo;
use crate::ir::{
AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
SigRef, Signature, Type, Value, ValueLoc,
};
use crate::isa::TargetIsa;
use crate::legalizer::split::{isplit, vsplit};
use log::debug;
use std::vec::Vec;
/// Legalize all the function signatures in `func`.
///
/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't
/// change the entry block arguments, calls, or return instructions, so this can leave the function
/// in a state with type discrepancies.
pub fn legalize_signatures(func: &mut Function, isa: &TargetIsa) {
legalize_signature(&mut func.signature, true, isa);
for sig_data in func.dfg.signatures.values_mut() {
legalize_signature(sig_data, false, isa);
}
if let Some(entry) = func.layout.entry_block() {
legalize_entry_params(func, entry);
spill_entry_params(func, entry);
}
}
/// Legalize the libcall signature, which we may generate on the fly after
/// `legalize_signatures` has been called.
pub fn legalize_libcall_signature(signature: &mut Signature, isa: &TargetIsa) {
legalize_signature(signature, false, isa);
}
/// Legalize the given signature.
///
/// `current` is true if this is the signature for the current function.
fn legalize_signature(signature: &mut Signature, current: bool, isa: &TargetIsa) {
isa.legalize_signature(signature, current);
}
/// Legalize the entry block parameters after `func`'s signature has been legalized.
///
/// The legalized signature may contain more parameters than the original signature, and the
/// parameter types have been changed. This function goes through the parameters of the entry EBB
/// and replaces them with parameters of the right type for the ABI.
///
/// The original entry EBB parameters are computed from the new ABI parameters by code inserted at
/// the top of the entry block.
fn legalize_entry_params(func: &mut Function, entry: Ebb) {
let mut has_sret = false;
let mut has_link = false;
let mut has_vmctx = false;
let mut has_sigid = false;
let mut has_stack_limit = false;
// Insert position for argument conversion code.
// We want to insert instructions before the first instruction in the entry block.
// If the entry block is empty, append instructions to it instead.
let mut pos = FuncCursor::new(func).at_first_inst(entry);
// Keep track of the argument types in the ABI-legalized signature.
let mut abi_arg = 0;
// Process the EBB parameters one at a time, possibly replacing one argument with multiple new
// ones. We do this by detaching the entry EBB parameters first.
let ebb_params = pos.func.dfg.detach_ebb_params(entry);
let mut old_arg = 0;
while let Some(arg) = ebb_params.get(old_arg, &pos.func.dfg.value_lists) {
old_arg += 1;
let abi_type = pos.func.signature.params[abi_arg];
let arg_type = pos.func.dfg.value_type(arg);
if arg_type == abi_type.value_type {
// No value translation is necessary, this argument matches the ABI type.
// Just use the original EBB argument value. This is the most common case.
pos.func.dfg.attach_ebb_param(entry, arg);
match abi_type.purpose {
ArgumentPurpose::Normal => {}
ArgumentPurpose::FramePointer => {}
ArgumentPurpose::CalleeSaved => {}
ArgumentPurpose::StructReturn => {
debug_assert!(!has_sret, "Multiple sret arguments found");
has_sret = true;
}
ArgumentPurpose::VMContext => {
debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
has_vmctx = true;
}
ArgumentPurpose::SignatureId => {
debug_assert!(!has_sigid, "Multiple sigid arguments found");
has_sigid = true;
}
ArgumentPurpose::StackLimit => {
debug_assert!(!has_stack_limit, "Multiple stack_limit arguments found");
has_stack_limit = true;
}
_ => panic!("Unexpected special-purpose arg {}", abi_type),
}
abi_arg += 1;
} else {
// Compute the value we want for `arg` from the legalized ABI parameters.
let mut get_arg = |func: &mut Function, ty| {
let abi_type = func.signature.params[abi_arg];
debug_assert_eq!(
abi_type.purpose,
ArgumentPurpose::Normal,
"Can't legalize special-purpose argument"
);
if ty == abi_type.value_type {
abi_arg += 1;
Ok(func.dfg.append_ebb_param(entry, ty))
} else {
Err(abi_type)
}
};
let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
// The old `arg` is no longer an attached EBB argument, but there are probably still
// uses of the value.
debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
}
}
// The legalized signature may contain additional parameters representing special-purpose
// registers.
for &arg in &pos.func.signature.params[abi_arg..] {
match arg.purpose {
// Any normal parameters should have been processed above.
ArgumentPurpose::Normal => {
panic!("Leftover arg: {}", arg);
}
// The callee-save parameters should not appear until after register allocation is
// done.
ArgumentPurpose::FramePointer | ArgumentPurpose::CalleeSaved => {
panic!("Premature callee-saved arg {}", arg);
}
// These can be meaningfully added by `legalize_signature()`.
ArgumentPurpose::Link => {
debug_assert!(!has_link, "Multiple link parameters found");
has_link = true;
}
ArgumentPurpose::StructReturn => {
debug_assert!(!has_sret, "Multiple sret parameters found");
has_sret = true;
}
ArgumentPurpose::VMContext => {
debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
has_vmctx = true;
}
ArgumentPurpose::SignatureId => {
debug_assert!(!has_sigid, "Multiple sigid parameters found");
has_sigid = true;
}
ArgumentPurpose::StackLimit => {
debug_assert!(!has_stack_limit, "Multiple stack_limit parameters found");
has_stack_limit = true;
}
}
// Just create entry block values to match here. We will use them in `handle_return_abi()`
// below.
pos.func.dfg.append_ebb_param(entry, arg.value_type);
}
}
/// Legalize the results returned from a call instruction to match the ABI signature.
///
/// The cursor `pos` points to a call instruction with at least one return value. The cursor will
/// be left pointing after the instructions inserted to convert the return values.
///
/// This function is very similar to the `legalize_entry_params` function above.
///
/// Returns the possibly new instruction representing the call.
fn legalize_inst_results<ResType>(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst
where
ResType: FnMut(&Function, usize) -> AbiParam,
{
let call = pos
.current_inst()
.expect("Cursor must point to a call instruction");
// We theoretically allow for call instructions that return a number of fixed results before
// the call return values. In practice, it doesn't happen.
debug_assert_eq!(
pos.func.dfg[call]
.opcode()
.constraints()
.num_fixed_results(),
0,
"Fixed results on calls not supported"
);
let results = pos.func.dfg.detach_results(call);
let mut next_res = 0;
let mut abi_res = 0;
// Point immediately after the call.
pos.next_inst();
while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) {
next_res += 1;
let res_type = pos.func.dfg.value_type(res);
if res_type == get_abi_type(pos.func, abi_res).value_type {
// No value translation is necessary, this result matches the ABI type.
pos.func.dfg.attach_result(call, res);
abi_res += 1;
} else {
let mut get_res = |func: &mut Function, ty| {
let abi_type = get_abi_type(func, abi_res);
if ty == abi_type.value_type {
let last_res = func.dfg.append_result(call, ty);
abi_res += 1;
Ok(last_res)
} else {
Err(abi_type)
}
};
let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
}
}
call
}
/// Compute original value of type `ty` from the legalized ABI arguments.
///
/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an
/// ABI argument. It returns:
///
/// - `Ok(arg)` if the requested type matches the next ABI argument.
/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`.
///
/// If the `into_result` value is provided, the converted result will be written into that value.
fn convert_from_abi<GetArg>(
pos: &mut FuncCursor,
ty: Type,
into_result: Option<Value>,
get_arg: &mut GetArg,
) -> Value
where
GetArg: FnMut(&mut Function, Type) -> Result<Value, AbiParam>,
{
// Terminate the recursion when we get the desired type.
let arg_type = match get_arg(pos.func, ty) {
Ok(v) => {
debug_assert_eq!(pos.func.dfg.value_type(v), ty);
debug_assert_eq!(into_result, None);
return v;
}
Err(t) => t,
};
// Reconstruct how `ty` was legalized into the `arg_type` argument.
let conversion = legalize_abi_value(ty, &arg_type);
debug!("convert_from_abi({}): {:?}", ty, conversion);
// The conversion describes value to ABI argument. We implement the reverse conversion here.
match conversion {
// Construct a `ty` by concatenating two ABI integers.
ValueConversion::IntSplit => {
let abi_ty = ty.half_width().expect("Invalid type for conversion");
let lo = convert_from_abi(pos, abi_ty, None, get_arg);
let hi = convert_from_abi(pos, abi_ty, None, get_arg);
debug!(
"intsplit {}: {}, {}: {}",
lo,
pos.func.dfg.value_type(lo),
hi,
pos.func.dfg.value_type(hi)
);
pos.ins().with_results([into_result]).iconcat(lo, hi)
}
// Construct a `ty` by concatenating two halves of a vector.
ValueConversion::VectorSplit => {
let abi_ty = ty.half_vector().expect("Invalid type for conversion");
let lo = convert_from_abi(pos, abi_ty, None, get_arg);
let hi = convert_from_abi(pos, abi_ty, None, get_arg);
pos.ins().with_results([into_result]).vconcat(lo, hi)
}
// Construct a `ty` by bit-casting from an integer type.
ValueConversion::IntBits => {
debug_assert!(!ty.is_int());
let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
pos.ins().with_results([into_result]).bitcast(ty, arg)
}
// ABI argument is a sign-extended version of the value we want.
ValueConversion::Sext(abi_ty) => {
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
// TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
// We could insert an `assert_sreduce` which would fold with a following `sextend` of
// this value.
pos.ins().with_results([into_result]).ireduce(ty, arg)
}
ValueConversion::Uext(abi_ty) => {
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
// TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
// We could insert an `assert_ureduce` which would fold with a following `uextend` of
// this value.
pos.ins().with_results([into_result]).ireduce(ty, arg)
}
}
}
/// Convert `value` to match an ABI signature by inserting instructions at `pos`.
///
/// This may require expanding the value to multiple ABI arguments. The conversion process is
/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented
/// to the closure, it will perform one of two actions:
///
/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list
/// of arguments and return `Ok(())`.
/// 2. If the suggested argument doesn't have the right value type, don't change anything, but
/// return the `Err(AbiParam)` that is needed.
///
fn convert_to_abi<PutArg>(
pos: &mut FuncCursor,
cfg: &ControlFlowGraph,
value: Value,
put_arg: &mut PutArg,
) where
PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>,
{
// Start by invoking the closure to either terminate the recursion or get the argument type
// we're trying to match.
let arg_type = match put_arg(pos.func, value) {
Ok(_) => return,
Err(t) => t,
};
let ty = pos.func.dfg.value_type(value);
match legalize_abi_value(ty, &arg_type) {
ValueConversion::IntSplit => {
let curpos = pos.position();
let srcloc = pos.srcloc();
let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value);
convert_to_abi(pos, cfg, lo, put_arg);
convert_to_abi(pos, cfg, hi, put_arg);
}
ValueConversion::VectorSplit => {
let curpos = pos.position();
let srcloc = pos.srcloc();
let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value);
convert_to_abi(pos, cfg, lo, put_arg);
convert_to_abi(pos, cfg, hi, put_arg);
}
ValueConversion::IntBits => {
debug_assert!(!ty.is_int());
let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
let arg = pos.ins().bitcast(abi_ty, value);
convert_to_abi(pos, cfg, arg, put_arg);
}
ValueConversion::Sext(abi_ty) => {
let arg = pos.ins().sextend(abi_ty, value);
convert_to_abi(pos, cfg, arg, put_arg);
}
ValueConversion::Uext(abi_ty) => {
let arg = pos.ins().uextend(abi_ty, value);
convert_to_abi(pos, cfg, arg, put_arg);
}
}
}
/// Check if a sequence of arguments match a desired sequence of argument types.
fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool {
let arg_types = args.iter().map(|&v| dfg.value_type(v));
let sig_types = types.iter().map(|&at| at.value_type);
arg_types.eq(sig_types)
}
/// Check if the arguments of the call `inst` match the signature.
///
/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the
/// signature doesn't match.
fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> {
// Extract the signature and argument values.
let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) {
CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args),
CallInfo::Indirect(sig_ref, args) => (sig_ref, args),
CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]),
};
let sig = &dfg.signatures[sig_ref];
if check_arg_types(dfg, args, &sig.params[..])
&& check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..])
{
// All types check out.
Ok(())
} else {
// Call types need fixing.
Err(sig_ref)
}
}
/// Check if the arguments of the return `inst` match the signature.
fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool {
check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns)
}
/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`.
///
/// - `abi_args` is the number of arguments that the ABI signature requires.
/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI
/// argument number in `0..abi_args`.
///
fn legalize_inst_arguments<ArgType>(
pos: &mut FuncCursor,
cfg: &ControlFlowGraph,
abi_args: usize,
mut get_abi_type: ArgType,
) where
ArgType: FnMut(&Function, usize) -> AbiParam,
{
let inst = pos
.current_inst()
.expect("Cursor must point to a call instruction");
// Lift the value list out of the call instruction so we modify it.
let mut vlist = pos.func.dfg[inst]
.take_value_list()
.expect("Call must have a value list");
// The value list contains all arguments to the instruction, including the callee on an
// indirect call which isn't part of the call arguments that must match the ABI signature.
// Figure out how many fixed values are at the front of the list. We won't touch those.
let num_fixed_values = pos.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values;
// Grow the value list to the right size and shift all the existing arguments to the right.
// This lets us write the new argument values into the list without overwriting the old
// arguments.
//
// Before:
//
// <--> fixed_values
// <-----------> have_args
// [FFFFOOOOOOOOOOOOO]
//
// After grow_at():
//
// <--> fixed_values
// <-----------> have_args
// <------------------> abi_args
// [FFFF-------OOOOOOOOOOOOO]
// ^
// old_arg_offset
//
// After writing the new arguments:
//
// <--> fixed_values
// <------------------> abi_args
// [FFFFNNNNNNNNNNNNNNNNNNNN]
//
vlist.grow_at(
num_fixed_values,
abi_args - have_args,
&mut pos.func.dfg.value_lists,
);
let old_arg_offset = num_fixed_values + abi_args - have_args;
let mut abi_arg = 0;
for old_arg in 0..have_args {
let old_value = vlist
.get(old_arg_offset + old_arg, &pos.func.dfg.value_lists)
.unwrap();
let mut put_arg = |func: &mut Function, arg| {
let abi_type = get_abi_type(func, abi_arg);
if func.dfg.value_type(arg) == abi_type.value_type {
// This is the argument type we need.
vlist.as_mut_slice(&mut func.dfg.value_lists)[num_fixed_values + abi_arg] = arg;
abi_arg += 1;
Ok(())
} else {
Err(abi_type)
}
};
convert_to_abi(pos, cfg, old_value, &mut put_arg);
}
// Put the modified value list back.
pos.func.dfg[inst].put_value_list(vlist);
}
/// Insert ABI conversion code before and after the call instruction at `pos`.
///
/// Instructions inserted before the call will compute the appropriate ABI values for the
/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to
/// match the new signature.
///
/// Instructions will be inserted after the call to convert returned ABI values back to the
/// original return values. The call's result values will be adapted to match the new signature.
///
/// Returns `true` if any instructions were inserted.
pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
let pos = &mut FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start by checking if the argument types already match the signature.
let sig_ref = match check_call_signature(&pos.func.dfg, inst) {
Ok(_) => return spill_call_arguments(pos),
Err(s) => s,
};
// OK, we need to fix the call arguments to match the ABI signature.
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
func.dfg.signatures[sig_ref].params[abi_arg]
});
if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
inst = legalize_inst_results(pos, |func, abi_res| {
func.dfg.signatures[sig_ref].returns[abi_res]
});
}
debug_assert!(
check_call_signature(&pos.func.dfg, inst).is_ok(),
"Signature still wrong: {}, {}{}",
pos.func.dfg.display_inst(inst, None),
sig_ref,
pos.func.dfg.signatures[sig_ref]
);
// Go back and insert spills for any stack arguments.
pos.goto_inst(inst);
spill_call_arguments(pos);
// Yes, we changed stuff.
true
}
/// Insert ABI conversion code before and after the return instruction at `inst`.
///
/// Return `true` if any instructions were inserted.
pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
// Check if the returned types already match the signature.
if check_return_signature(&func.dfg, inst, &func.signature) {
return false;
}
// Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to
// the legalized signature.
let special_args = func
.signature
.returns
.iter()
.rev()
.take_while(|&rt| {
rt.purpose == ArgumentPurpose::Link
|| rt.purpose == ArgumentPurpose::StructReturn
|| rt.purpose == ArgumentPurpose::VMContext
})
.count();
let abi_args = func.signature.returns.len() - special_args;
let pos = &mut FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
func.signature.returns[abi_arg]
});
debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
// Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
// the legalized signature. These values should simply be propagated from the entry block
// arguments.
if special_args > 0 {
debug!(
"Adding {} special-purpose arguments to {}",
special_args,
pos.func.dfg.display_inst(inst, None)
);
let mut vlist = pos.func.dfg[inst].take_value_list().unwrap();
for arg in &pos.func.signature.returns[abi_args..] {
match arg.purpose {
ArgumentPurpose::Link
| ArgumentPurpose::StructReturn
| ArgumentPurpose::VMContext => {}
ArgumentPurpose::Normal => panic!("unexpected return value {}", arg),
_ => panic!("Unsupported special purpose return value {}", arg),
}
// A `link`/`sret`/`vmctx` return value can only appear in a signature that has a
// unique matching argument. They are appended at the end, so search the signature from
// the end.
let idx = pos
.func
.signature
.params
.iter()
.rposition(|t| t.purpose == arg.purpose)
.expect("No matching special purpose argument.");
// Get the corresponding entry block value and add it to the return instruction's
// arguments.
let val = pos
.func
.dfg
.ebb_params(pos.func.layout.entry_block().unwrap())[idx];
debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type);
vlist.push(val, &mut pos.func.dfg.value_lists);
}
pos.func.dfg[inst].put_value_list(vlist);
}
debug_assert!(
check_return_signature(&pos.func.dfg, inst, &pos.func.signature),
"Signature still wrong: {} / signature {}",
pos.func.dfg.display_inst(inst, None),
pos.func.signature
);
// Yes, we changed stuff.
true
}
/// Assign stack slots to incoming function parameters on the stack.
///
/// Values that are passed into the function on the stack must be assigned to an `IncomingArg`
/// stack slot already during legalization.
fn spill_entry_params(func: &mut Function, entry: Ebb) {
for (abi, &arg) in func.signature.params.iter().zip(func.dfg.ebb_params(entry)) {
if let ArgumentLoc::Stack(offset) = abi.location {
let ss = func.stack_slots.make_incoming_arg(abi.value_type, offset);
func.locations[arg] = ValueLoc::Stack(ss);
}
}
}
/// Assign stack slots to outgoing function arguments on the stack.
///
/// Values that are passed to a called function on the stack must be assigned to a matching
/// `OutgoingArg` stack slot. The assignment must happen immediately before the call.
///
/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches
/// or calls between writing the stack slots and the call instruction. Writing the slots earlier
/// could help reduce register pressure before the call.
fn spill_call_arguments(pos: &mut FuncCursor) -> bool {
let inst = pos
.current_inst()
.expect("Cursor must point to a call instruction");
let sig_ref = pos
.func
.dfg
.call_signature(inst)
.expect("Call instruction expected.");
// Start by building a list of stack slots and arguments to be replaced.
// This requires borrowing `pos.func.dfg`, so we can't change anything.
let arglist = {
let locations = &pos.func.locations;
let stack_slots = &mut pos.func.stack_slots;
pos.func
.dfg
.inst_variable_args(inst)
.iter()
.zip(&pos.func.dfg.signatures[sig_ref].params)
.enumerate()
.filter_map(|(idx, (&arg, abi))| {
match abi.location {
ArgumentLoc::Stack(offset) => {
// Assign `arg` to a new stack slot, unless it's already in the correct
// slot. The legalization needs to be idempotent, so we should see a
// correct outgoing slot on the second pass.
let ss = stack_slots.get_outgoing_arg(abi.value_type, offset);
if locations[arg] != ValueLoc::Stack(ss) {
Some((idx, arg, ss))
} else {
None
}
}
_ => None,
}
})
.collect::<Vec<_>>()
};
if arglist.is_empty() {
return false;
}
// Insert the spill instructions and rewrite call arguments.
for (idx, arg, ss) in arglist {
let stack_val = pos.ins().spill(arg);
pos.func.locations[stack_val] = ValueLoc::Stack(ss);
pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val;
}
// We changed stuff.
true
}

View File

@@ -0,0 +1,54 @@
//! Legalization of calls.
//!
//! This module exports the `expand_call` function which transforms a `call`
//! instruction into `func_addr` and `call_indirect` instructions.
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{self, InstBuilder};
use crate::isa::TargetIsa;
/// Expand a `call` instruction. This lowers it to a `call_indirect`, which
/// is only done if the ABI doesn't support direct calls.
pub fn expand_call(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
// Unpack the instruction.
let (func_ref, old_args) = match func.dfg[inst] {
ir::InstructionData::Call {
opcode,
ref args,
func_ref,
} => {
debug_assert_eq!(opcode, ir::Opcode::Call);
(func_ref, args.clone())
}
_ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)),
};
let ptr_ty = isa.pointer_type();
let sig = func.dfg.ext_funcs[func_ref].signature;
let callee = {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
pos.ins().func_addr(ptr_ty, func_ref)
};
let mut new_args = ir::ValueList::default();
new_args.push(callee, &mut func.dfg.value_lists);
for i in 0..old_args.len(&func.dfg.value_lists) {
new_args.push(
old_args.as_slice(&func.dfg.value_lists)[i],
&mut func.dfg.value_lists,
);
}
func.dfg
.replace(inst)
.CallIndirect(ir::Opcode::CallIndirect, ptr_ty, sig, new_args);
}

View File

@@ -0,0 +1,129 @@
//! Legalization of global values.
//!
//! This module exports the `expand_global_value` function which transforms a `global_value`
//! instruction into code that depends on the kind of global value referenced.
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{self, InstBuilder};
use crate::isa::TargetIsa;
/// Expand a `global_value` instruction according to the definition of the global value.
pub fn expand_global_value(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
// Unpack the instruction.
let gv = match func.dfg[inst] {
ir::InstructionData::UnaryGlobalValue {
opcode,
global_value,
} => {
debug_assert_eq!(opcode, ir::Opcode::GlobalValue);
global_value
}
_ => panic!("Wanted global_value: {}", func.dfg.display_inst(inst, None)),
};
match func.global_values[gv] {
ir::GlobalValueData::VMContext => vmctx_addr(inst, func),
ir::GlobalValueData::IAddImm {
base,
offset,
global_type,
} => iadd_imm_addr(inst, func, base, offset.into(), global_type),
ir::GlobalValueData::Load {
base,
offset,
global_type,
readonly,
} => load_addr(inst, func, base, offset, global_type, readonly, isa),
ir::GlobalValueData::Symbol { .. } => symbol(inst, func, gv, isa),
}
}
/// Expand a `global_value` instruction for a vmctx global.
fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) {
// Get the value representing the `vmctx` argument.
let vmctx = func
.special_param(ir::ArgumentPurpose::VMContext)
.expect("Missing vmctx parameter");
// Replace the `global_value` instruction's value with an alias to the vmctx arg.
let result = func.dfg.first_result(inst);
func.dfg.clear_results(inst);
func.dfg.change_to_alias(result, vmctx);
func.layout.remove_inst(inst);
}
/// Expand a `global_value` instruction for an iadd_imm global.
fn iadd_imm_addr(
inst: ir::Inst,
func: &mut ir::Function,
base: ir::GlobalValue,
offset: i64,
global_type: ir::Type,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
// Get the value for the lhs. For tidiness, expand VMContext here so that we avoid
// `vmctx_addr` which creates an otherwise unneeded value alias.
let lhs = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
pos.func
.special_param(ir::ArgumentPurpose::VMContext)
.expect("Missing vmctx parameter")
} else {
pos.ins().global_value(global_type, base)
};
// Simply replace the `global_value` instruction with an `iadd_imm`, reusing the result value.
pos.func.dfg.replace(inst).iadd_imm(lhs, offset);
}
/// Expand a `global_value` instruction for a load global.
fn load_addr(
inst: ir::Inst,
func: &mut ir::Function,
base: ir::GlobalValue,
offset: ir::immediates::Offset32,
global_type: ir::Type,
readonly: bool,
isa: &TargetIsa,
) {
// We need to load a pointer from the `base` global value, so insert a new `global_value`
// instruction. This depends on the iterative legalization loop. Note that the IR verifier
// detects any cycles in the `load` globals.
let ptr_ty = isa.pointer_type();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Get the value for the base. For tidiness, expand VMContext here so that we avoid
// `vmctx_addr` which creates an otherwise unneeded value alias.
let base_addr = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] {
pos.func
.special_param(ir::ArgumentPurpose::VMContext)
.expect("Missing vmctx parameter")
} else {
pos.ins().global_value(ptr_ty, base)
};
// Global-value loads are always notrap and aligned. They may be readonly.
let mut mflags = ir::MemFlags::trusted();
if readonly {
mflags.set_readonly();
}
// Perform the load.
pos.func
.dfg
.replace(inst)
.load(global_type, mflags, base_addr, offset);
}
/// Expand a `global_value` instruction for a symbolic name global.
fn symbol(inst: ir::Inst, func: &mut ir::Function, gv: ir::GlobalValue, isa: &TargetIsa) {
let ptr_ty = isa.pointer_type();
func.dfg.replace(inst).symbol_value(ptr_ty, gv);
}

View File

@@ -0,0 +1,161 @@
//! Legalization of heaps.
//!
//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
//! instruction into code that depends on the kind of heap referenced.
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::condcodes::IntCC;
use crate::ir::{self, InstBuilder};
use crate::isa::TargetIsa;
/// Expand a `heap_addr` instruction according to the definition of the heap.
pub fn expand_heap_addr(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
// Unpack the instruction.
let (heap, offset, access_size) = match func.dfg[inst] {
ir::InstructionData::HeapAddr {
opcode,
heap,
arg,
imm,
} => {
debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
(heap, arg, imm.into())
}
_ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
};
match func.heaps[heap].style {
ir::HeapStyle::Dynamic { bound_gv } => {
dynamic_addr(inst, heap, offset, access_size, bound_gv, func)
}
ir::HeapStyle::Static { bound } => {
static_addr(inst, heap, offset, access_size, bound.into(), func, cfg)
}
}
}
/// Expand a `heap_addr` for a dynamic heap.
fn dynamic_addr(
inst: ir::Inst,
heap: ir::Heap,
offset: ir::Value,
access_size: u32,
bound_gv: ir::GlobalValue,
func: &mut ir::Function,
) {
let access_size = u64::from(access_size);
let offset_ty = func.dfg.value_type(offset);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let min_size = func.heaps[heap].min_size.into();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start with the bounds check. Trap if `offset + access_size > bound`.
let bound = pos.ins().global_value(offset_ty, bound_gv);
let oob;
if access_size == 1 {
// `offset > bound - 1` is the same as `offset >= bound`.
oob = pos
.ins()
.icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
} else if access_size <= min_size {
// We know that bound >= min_size, so here we can compare `offset > bound - access_size`
// without wrapping.
let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
oob = pos
.ins()
.icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
} else {
// We need an overflow check for the adjusted offset.
let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
let (adj_offset, overflow) = pos.ins().iadd_cout(offset, access_size_val);
pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
oob = pos
.ins()
.icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
}
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
}
/// Expand a `heap_addr` for a static heap.
fn static_addr(
inst: ir::Inst,
heap: ir::Heap,
offset: ir::Value,
access_size: u32,
bound: u64,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
) {
let access_size = u64::from(access_size);
let offset_ty = func.dfg.value_type(offset);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start with the bounds check. Trap if `offset + access_size > bound`.
if access_size > bound {
// This will simply always trap since `offset >= 0`.
pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
pos.func.dfg.replace(inst).iconst(addr_ty, 0);
// Split Ebb, as the trap is a terminator instruction.
let curr_ebb = pos.current_ebb().expect("Cursor is not in an ebb");
let new_ebb = pos.func.dfg.make_ebb();
pos.insert_ebb(new_ebb);
cfg.recompute_ebb(pos.func, curr_ebb);
cfg.recompute_ebb(pos.func, new_ebb);
return;
}
// Check `offset > limit` which is now known non-negative.
let limit = bound - access_size;
// We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
// more.
if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
let oob = if limit & 1 == 1 {
// Prefer testing `offset >= limit - 1` when limit is odd because an even number is
// likely to be a convenient constant on ARM and other RISC architectures.
pos.ins()
.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
} else {
pos.ins()
.icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
};
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
}
compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
}
/// Emit code for the base address computation of a `heap_addr` instruction.
fn compute_addr(
inst: ir::Inst,
heap: ir::Heap,
addr_ty: ir::Type,
mut offset: ir::Value,
offset_ty: ir::Type,
func: &mut ir::Function,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Convert `offset` to `addr_ty`.
if offset_ty != addr_ty {
offset = pos.ins().uextend(addr_ty, offset);
}
// Add the heap base address base
let base_gv = pos.func.heaps[heap].base;
let base = pos.ins().global_value(addr_ty, base_gv);
pos.func.dfg.replace(inst).iadd(base, offset);
}

View File

@@ -0,0 +1,31 @@
//! Expanding instructions as runtime library calls.
use crate::ir;
use crate::ir::{get_libcall_funcref, InstBuilder};
use crate::isa::TargetIsa;
use crate::legalizer::boundary::legalize_libcall_signature;
use std::vec::Vec;
/// Try to expand `inst` as a library call, returning true is successful.
pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &TargetIsa) -> bool {
// Does the opcode/ctrl_type combo even have a well-known runtime library name.
let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst))
{
Some(lc) => lc,
None => return false,
};
// Now we convert `inst` to a call. First save the arguments.
let mut args = Vec::new();
args.extend_from_slice(func.dfg.inst_args(inst));
// The replace builder will preserve the instruction result values.
let funcref = get_libcall_funcref(libcall, func, inst, isa);
func.dfg.replace(inst).call(funcref, &args);
// Ask the ISA to legalize the signature.
let fn_data = &func.dfg.ext_funcs[funcref];
let sig_data = &mut func.dfg.signatures[fn_data.signature];
legalize_libcall_signature(sig_data, isa);
true
}

View File

@@ -0,0 +1,440 @@
//! Legalize instructions.
//!
//! A legal instruction is one that can be mapped directly to a machine code instruction for the
//! target ISA. The `legalize_function()` function takes as input any function and transforms it
//! into an equivalent function using only legal instructions.
//!
//! The characteristics of legal instructions depend on the target ISA, so any given instruction
//! can be legal for one ISA and illegal for another.
//!
//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map
//! which provides a legal encoding recipe for every instruction.
//!
//! The legalizer does not deal with register allocation constraints. These constraints are derived
//! from the encoding recipes, and solved later by the register allocator.
use crate::bitset::BitSet;
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::types::I32;
use crate::ir::{self, InstBuilder, MemFlags};
use crate::isa::TargetIsa;
use crate::timing;
mod boundary;
mod call;
mod globalvalue;
mod heap;
mod libcall;
mod split;
mod table;
use self::call::expand_call;
use self::globalvalue::expand_global_value;
use self::heap::expand_heap_addr;
use self::libcall::expand_as_libcall;
use self::table::expand_table_addr;
/// Legalize `inst` for `isa`. Return true if any changes to the code were
/// made; return false if the instruction was successfully encoded as is.
fn legalize_inst(
inst: ir::Inst,
pos: &mut FuncCursor,
cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) -> bool {
let opcode = pos.func.dfg[inst].opcode();
// Check for ABI boundaries that need to be converted to the legalized signature.
if opcode.is_call() {
if boundary::handle_call_abi(inst, pos.func, cfg) {
return true;
}
} else if opcode.is_return() {
if boundary::handle_return_abi(inst, pos.func, cfg) {
return true;
}
} else if opcode.is_branch() {
split::simplify_branch_arguments(&mut pos.func.dfg, inst);
}
match pos.func.update_encoding(inst, isa) {
Ok(()) => false,
Err(action) => {
// We should transform the instruction into legal equivalents.
// If the current instruction was replaced, we need to double back and revisit
// the expanded sequence. This is both to assign encodings and possible to
// expand further.
// There's a risk of infinite looping here if the legalization patterns are
// unsound. Should we attempt to detect that?
if action(inst, pos.func, cfg, isa) {
return true;
}
// We don't have any pattern expansion for this instruction either.
// Try converting it to a library call as a last resort.
expand_as_libcall(inst, pos.func, isa)
}
}
}
/// Legalize `func` for `isa`.
///
/// - Transform any instructions that don't have a legal representation in `isa`.
/// - Fill out `func.encodings`.
///
pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) {
let _tt = timing::legalize();
debug_assert!(cfg.is_valid());
boundary::legalize_signatures(func, isa);
func.encodings.resize(func.dfg.num_insts());
let mut pos = FuncCursor::new(func);
// Process EBBs in layout order. Some legalization actions may split the current EBB or append
// new ones to the end. We need to make sure we visit those new EBBs too.
while let Some(_ebb) = pos.next_ebb() {
// Keep track of the cursor position before the instruction being processed, so we can
// double back when replacing instructions.
let mut prev_pos = pos.position();
while let Some(inst) = pos.next_inst() {
if legalize_inst(inst, &mut pos, cfg, isa) {
// Go back and legalize the inserted return value conversion instructions.
pos.set_position(prev_pos);
} else {
// Remember this position in case we need to double back.
prev_pos = pos.position();
}
}
}
// Now that we've lowered all br_tables, we don't need the jump tables anymore.
if !isa.flags().jump_tables_enabled() {
pos.func.jump_tables.clear();
}
}
// Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
// `cranelift-codegen/meta-python/base/legalize.py`.
//
// Concretely, this defines private functions `narrow()`, and `expand()`.
include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
/// Custom expansion for conditional trap instructions.
/// TODO: Add CFG support to the Python patterns so we won't have to do this.
fn expand_cond_trap(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
// Parse the instruction.
let trapz;
let (arg, code) = match func.dfg[inst] {
ir::InstructionData::CondTrap { opcode, arg, code } => {
// We want to branch *over* an unconditional trap.
trapz = match opcode {
ir::Opcode::Trapz => true,
ir::Opcode::Trapnz => false,
_ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
};
(arg, code)
}
_ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
};
// Split the EBB after `inst`:
//
// trapnz arg
//
// Becomes:
//
// brz arg, new_ebb
// trap
// new_ebb:
//
let old_ebb = func.layout.pp_ebb(inst);
let new_ebb = func.dfg.make_ebb();
if trapz {
func.dfg.replace(inst).brnz(arg, new_ebb, &[]);
} else {
func.dfg.replace(inst).brz(arg, new_ebb, &[]);
}
let mut pos = FuncCursor::new(func).after_inst(inst);
pos.use_srcloc(inst);
pos.ins().trap(code);
pos.insert_ebb(new_ebb);
// Finally update the CFG.
cfg.recompute_ebb(pos.func, old_ebb);
cfg.recompute_ebb(pos.func, new_ebb);
}
/// Jump tables.
fn expand_br_table(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
if isa.flags().jump_tables_enabled() {
expand_br_table_jt(inst, func, cfg, isa);
} else {
expand_br_table_conds(inst, func, cfg, isa);
}
}
/// Expand br_table to jump table.
fn expand_br_table_jt(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
use crate::ir::condcodes::IntCC;
let (arg, default_ebb, table) = match func.dfg[inst] {
ir::InstructionData::BranchTable {
opcode: ir::Opcode::BrTable,
arg,
destination,
table,
} => (arg, destination, table),
_ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
};
let table_size = func.jump_tables[table].len();
let addr_ty = isa.pointer_type();
let entry_ty = I32;
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Bounds check
let oob = pos
.ins()
.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size as i64);
pos.ins().brnz(oob, default_ebb, &[]);
let base_addr = pos.ins().jump_table_base(addr_ty, table);
let entry = pos
.ins()
.jump_table_entry(addr_ty, arg, base_addr, entry_ty.bytes() as u8, table);
let addr = pos.ins().iadd(base_addr, entry);
pos.ins().indirect_jump_table_br(addr, table);
let ebb = pos.current_ebb().unwrap();
pos.remove_inst();
cfg.recompute_ebb(pos.func, ebb);
}
/// Expand br_table to series of conditionals.
fn expand_br_table_conds(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
use crate::ir::condcodes::IntCC;
let (arg, default_ebb, table) = match func.dfg[inst] {
ir::InstructionData::BranchTable {
opcode: ir::Opcode::BrTable,
arg,
destination,
table,
} => (arg, destination, table),
_ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
};
// This is a poor man's jump table using just a sequence of conditional branches.
let table_size = func.jump_tables[table].len();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
for i in 0..table_size {
let dest = pos.func.jump_tables[table].as_slice()[i];
let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64);
pos.ins().brnz(t, dest, &[]);
}
// `br_table` jumps to the default destination if nothing matches
pos.ins().jump(default_ebb, &[]);
let ebb = pos.current_ebb().unwrap();
pos.remove_inst();
cfg.recompute_ebb(pos.func, ebb);
}
/// Expand the select instruction.
///
/// Conditional moves are available in some ISAs for some register classes. The remaining selects
/// are handled by a branch.
fn expand_select(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
let (ctrl, tval, fval) = match func.dfg[inst] {
ir::InstructionData::Ternary {
opcode: ir::Opcode::Select,
args,
} => (args[0], args[1], args[2]),
_ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)),
};
// Replace `result = select ctrl, tval, fval` with:
//
// brnz ctrl, new_ebb(tval)
// jump new_ebb(fval)
// new_ebb(result):
let old_ebb = func.layout.pp_ebb(inst);
let result = func.dfg.first_result(inst);
func.dfg.clear_results(inst);
let new_ebb = func.dfg.make_ebb();
func.dfg.attach_ebb_param(new_ebb, result);
func.dfg.replace(inst).brnz(ctrl, new_ebb, &[tval]);
let mut pos = FuncCursor::new(func).after_inst(inst);
pos.use_srcloc(inst);
pos.ins().jump(new_ebb, &[fval]);
pos.insert_ebb(new_ebb);
cfg.recompute_ebb(pos.func, new_ebb);
cfg.recompute_ebb(pos.func, old_ebb);
}
fn expand_br_icmp(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
let (cond, a, b, destination, ebb_args) = match func.dfg[inst] {
ir::InstructionData::BranchIcmp {
cond,
destination,
ref args,
..
} => (
cond,
args.get(0, &func.dfg.value_lists).unwrap(),
args.get(1, &func.dfg.value_lists).unwrap(),
destination,
args.as_slice(&func.dfg.value_lists)[2..].to_vec(),
),
_ => panic!("Expected br_icmp {}", func.dfg.display_inst(inst, None)),
};
let old_ebb = func.layout.pp_ebb(inst);
func.dfg.clear_results(inst);
let icmp_res = func.dfg.replace(inst).icmp(cond, a, b);
let mut pos = FuncCursor::new(func).after_inst(inst);
pos.use_srcloc(inst);
pos.ins().brnz(icmp_res, destination, &ebb_args);
cfg.recompute_ebb(pos.func, destination);
cfg.recompute_ebb(pos.func, old_ebb);
}
/// Expand illegal `f32const` and `f64const` instructions.
fn expand_fconst(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
let ty = func.dfg.value_type(func.dfg.first_result(inst));
debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
// In the future, we may want to generate constant pool entries for these constants, but for
// now use an `iconst` and a bit cast.
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let ival = match pos.func.dfg[inst] {
ir::InstructionData::UnaryIeee32 {
opcode: ir::Opcode::F32const,
imm,
} => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())),
ir::InstructionData::UnaryIeee64 {
opcode: ir::Opcode::F64const,
imm,
} => pos.ins().iconst(ir::types::I64, imm.bits() as i64),
_ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)),
};
pos.func.dfg.replace(inst).bitcast(ty, ival);
}
/// Expand illegal `stack_load` instructions.
fn expand_stack_load(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
let ty = func.dfg.value_type(func.dfg.first_result(inst));
let addr_ty = isa.pointer_type();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let (stack_slot, offset) = match pos.func.dfg[inst] {
ir::InstructionData::StackLoad {
opcode: _opcode,
stack_slot,
offset,
} => (stack_slot, offset),
_ => panic!(
"Expected stack_load: {}",
pos.func.dfg.display_inst(inst, None)
),
};
let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset);
// Stack slots are required to be accessible and aligned.
let mflags = MemFlags::trusted();
pos.func.dfg.replace(inst).load(ty, mflags, addr, 0);
}
/// Expand illegal `stack_store` instructions.
fn expand_stack_store(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &TargetIsa,
) {
let addr_ty = isa.pointer_type();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let (val, stack_slot, offset) = match pos.func.dfg[inst] {
ir::InstructionData::StackStore {
opcode: _opcode,
arg,
stack_slot,
offset,
} => (arg, stack_slot, offset),
_ => panic!(
"Expected stack_store: {}",
pos.func.dfg.display_inst(inst, None)
),
};
let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset);
let mut mflags = MemFlags::new();
// Stack slots are required to be accessible and aligned.
mflags.set_notrap();
mflags.set_aligned();
pos.func.dfg.replace(inst).store(mflags, val, addr, 0);
}

View File

@@ -0,0 +1,345 @@
//! Value splitting.
//!
//! Some value types are too large to fit in registers, so they need to be split into smaller parts
//! that the ISA can operate on. There's two dimensions of splitting, represented by two
//! complementary instruction pairs:
//!
//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
//! lane types.
//!
//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
//! This breakdown is handled by the ABI lowering.
//!
//! When legalizing a single instruction, it is wrapped in splits and concatenations:
//!
//!```clif
//! v1 = bxor.i64 v2, v3
//! ```
//!
//! becomes:
//!
//!```clif
//! v20, v21 = isplit v2
//! v30, v31 = isplit v3
//! v10 = bxor.i32 v20, v30
//! v11 = bxor.i32 v21, v31
//! v1 = iconcat v10, v11
//! ```
//!
//! This local expansion approach still leaves the original `i64` values in the code as operands on
//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
//! values are constantly split and concatenated.
//!
//! # Optimized splitting
//!
//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
//! first check if the value is defined by the corresponding concatenation. If so, then just use
//! the two concatenation inputs directly:
//!
//! ```clif
//! v4 = iadd_imm.i64 v1, 1
//! ```
//!
//! becomes, using the expanded code from above:
//!
//! ```clif
//! v40, v5 = iadd_imm_cout.i32 v10, 1
//! v6 = bint.i32
//! v41 = iadd.i32 v11, v6
//! v4 = iconcat v40, v41
//! ```
//!
//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
//! can be trivially deleted by a dead code elimination pass.
//!
//! # EBB arguments
//!
//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
//! up with no `i64` values anywhere, except for EBB arguments. We can work around this by
//! iteratively splitting EBB arguments too. That should leave us with no illegal value types
//! anywhere.
//!
//! It is possible to have circular dependencies of EBB arguments that are never used by any real
//! instructions. These loops will remain in the program.
use crate::cursor::{Cursor, CursorPosition, FuncCursor};
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
use core::iter;
use std::vec::Vec;
/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
/// if possible.
pub fn isplit(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
) -> (Value, Value) {
split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
}
/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
/// possible.
pub fn vsplit(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
) -> (Value, Value) {
split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
}
/// After splitting an EBB argument, we need to go back and fix up all of the predecessor
/// instructions. This is potentially a recursive operation, but we don't implement it recursively
/// since that could use up too muck stack.
///
/// Instead, the repairs are deferred and placed on a work list in stack form.
struct Repair {
concat: Opcode,
// The argument type after splitting.
split_type: Type,
// The destination EBB whose arguments have been split.
ebb: Ebb,
// Number of the original EBB argument which has been replaced by the low part.
num: usize,
// Number of the new EBB argument which represents the high part after the split.
hi_num: usize,
}
/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
fn split_any(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
concat: Opcode,
) -> (Value, Value) {
let mut repairs = Vec::new();
let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
let result = split_value(pos, value, concat, &mut repairs);
// We have split the value requested, and now we may need to fix some EBB predecessors.
while let Some(repair) = repairs.pop() {
for BasicBlock { inst, .. } in cfg.pred_iter(repair.ebb) {
let branch_opc = pos.func.dfg[inst].opcode();
debug_assert!(
branch_opc.is_branch(),
"Predecessor not a branch: {}",
pos.func.dfg.display_inst(inst, None)
);
let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments();
let mut args = pos.func.dfg[inst]
.take_value_list()
.expect("Branches must have value lists.");
let num_args = args.len(&pos.func.dfg.value_lists);
// Get the old value passed to the EBB argument we're repairing.
let old_arg = args
.get(num_fixed_args + repair.num, &pos.func.dfg.value_lists)
.expect("Too few branch arguments");
// It's possible that the CFG's predecessor list has duplicates. Detect them here.
if pos.func.dfg.value_type(old_arg) == repair.split_type {
pos.func.dfg[inst].put_value_list(args);
continue;
}
// Split the old argument, possibly causing more repairs to be scheduled.
pos.goto_inst(inst);
let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
// The `lo` part replaces the original argument.
*args
.get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists)
.unwrap() = lo;
// The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
// same EBB, there could be multiple arguments missing.
if num_args > num_fixed_args + repair.hi_num {
*args
.get_mut(
num_fixed_args + repair.hi_num,
&mut pos.func.dfg.value_lists,
)
.unwrap() = hi;
} else {
// We need to append one or more arguments. If we're adding more than one argument,
// there must be pending repairs on the stack that will fill in the correct values
// instead of `hi`.
args.extend(
iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args),
&mut pos.func.dfg.value_lists,
);
}
// Put the value list back after manipulating it.
pos.func.dfg[inst].put_value_list(args);
}
}
result
}
/// Split a single value using the integer or vector semantics given by the `concat` opcode.
///
/// If the value is defined by a `concat` instruction, just reuse the operand values of that
/// instruction.
///
/// Return the two new values representing the parts of `value`.
fn split_value(
pos: &mut FuncCursor,
value: Value,
concat: Opcode,
repairs: &mut Vec<Repair>,
) -> (Value, Value) {
let value = pos.func.dfg.resolve_aliases(value);
let mut reuse = None;
match pos.func.dfg.value_def(value) {
ValueDef::Result(inst, num) => {
// This is an instruction result. See if the value was created by a `concat`
// instruction.
if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
debug_assert_eq!(num, 0);
if opcode == concat {
reuse = Some((args[0], args[1]));
}
}
}
ValueDef::Param(ebb, num) => {
// This is an EBB parameter. We can split the parameter value unless this is the entry
// block.
if pos.func.layout.entry_block() != Some(ebb) {
// We are going to replace the parameter at `num` with two new arguments.
// Determine the new value types.
let ty = pos.func.dfg.value_type(value);
let split_type = match concat {
Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
_ => panic!("Unhandled concat opcode: {}", concat),
};
// Since the `repairs` stack potentially contains other parameter numbers for
// `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
// `repairs` entries.
//
// Replace the original `value` with the low part, and append the high part at the
// end of the argument list.
let lo = pos.func.dfg.replace_ebb_param(value, split_type);
let hi_num = pos.func.dfg.num_ebb_params(ebb);
let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
reuse = Some((lo, hi));
// Now the original value is dangling. Insert a concatenation instruction that can
// compute it from the two new parameters. This also serves as a record of what we
// did so a future call to this function doesn't have to redo the work.
//
// Note that it is safe to move `pos` here since `reuse` was set above, so we don't
// need to insert a split instruction before returning.
pos.goto_first_inst(ebb);
pos.ins()
.with_result(value)
.Binary(concat, split_type, lo, hi);
// Finally, splitting the EBB parameter is not enough. We also have to repair all
// of the predecessor instructions that branch here.
add_repair(concat, split_type, ebb, num, hi_num, repairs);
}
}
}
// Did the code above succeed in finding values we can reuse?
if let Some(pair) = reuse {
pair
} else {
// No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
// has not been moved by the EBB argument code above when `reuse` is `None`.
match concat {
Opcode::Iconcat => pos.ins().isplit(value),
Opcode::Vconcat => pos.ins().vsplit(value),
_ => panic!("Unhandled concat opcode: {}", concat),
}
}
}
// Add a repair entry to the work list.
fn add_repair(
concat: Opcode,
split_type: Type,
ebb: Ebb,
num: usize,
hi_num: usize,
repairs: &mut Vec<Repair>,
) {
repairs.push(Repair {
concat,
split_type,
ebb,
num,
hi_num,
});
}
/// Strip concat-split chains. Return a simpler way of computing the same value.
///
/// Given this input:
///
/// ```clif
/// v10 = iconcat v1, v2
/// v11, v12 = isplit v10
/// ```
///
/// This function resolves `v11` to `v1` and `v12` to `v2`.
fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
let value = dfg.resolve_aliases(value);
// Deconstruct a split instruction.
let split_res;
let concat_opc;
let split_arg;
if let ValueDef::Result(inst, num) = dfg.value_def(value) {
split_res = num;
concat_opc = match dfg[inst].opcode() {
Opcode::Isplit => Opcode::Iconcat,
Opcode::Vsplit => Opcode::Vconcat,
_ => return value,
};
split_arg = dfg.inst_args(inst)[0];
} else {
return value;
}
// See if split_arg is defined by a concatenation instruction.
if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
if dfg[inst].opcode() == concat_opc {
return dfg.inst_args(inst)[split_res];
}
}
value
}
/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
/// legalized.
///
/// The branch argument repairs performed by `split_any()` above may be performed on branches that
/// have not yet been legalized. The repaired arguments can be defined by actual split
/// instructions in that case.
///
/// After legalizing the instructions computing the value that was split, it is likely that we can
/// avoid depending on the split instruction. Its input probably comes from a concatenation.
pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
let mut new_args = Vec::new();
for &arg in dfg.inst_args(branch) {
let new_arg = resolve_splits(dfg, arg);
new_args.push(new_arg);
}
dfg.inst_args_mut(branch).copy_from_slice(&new_args);
}

View File

@@ -0,0 +1,113 @@
//! Legalization of tables.
//!
//! This module exports the `expand_table_addr` function which transforms a `table_addr`
//! instruction into code that depends on the kind of table referenced.
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::condcodes::IntCC;
use crate::ir::immediates::Offset32;
use crate::ir::{self, InstBuilder};
use crate::isa::TargetIsa;
/// Expand a `table_addr` instruction according to the definition of the table.
pub fn expand_table_addr(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &TargetIsa,
) {
// Unpack the instruction.
let (table, index, element_offset) = match func.dfg[inst] {
ir::InstructionData::TableAddr {
opcode,
table,
arg,
offset,
} => {
debug_assert_eq!(opcode, ir::Opcode::TableAddr);
(table, arg, offset)
}
_ => panic!("Wanted table_addr: {}", func.dfg.display_inst(inst, None)),
};
dynamic_addr(inst, table, index, element_offset, func);
}
/// Expand a `table_addr` for a dynamic table.
fn dynamic_addr(
inst: ir::Inst,
table: ir::Table,
index: ir::Value,
element_offset: Offset32,
func: &mut ir::Function,
) {
let bound_gv = func.tables[table].bound_gv;
let index_ty = func.dfg.value_type(index);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Start with the bounds check. Trap if `index + 1 > bound`.
let bound = pos.ins().global_value(index_ty, bound_gv);
// `index > bound - 1` is the same as `index >= bound`.
let oob = pos
.ins()
.icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound);
pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds);
compute_addr(
inst,
table,
addr_ty,
index,
index_ty,
element_offset,
pos.func,
);
}
/// Emit code for the base address computation of a `table_addr` instruction.
fn compute_addr(
inst: ir::Inst,
table: ir::Table,
addr_ty: ir::Type,
mut index: ir::Value,
index_ty: ir::Type,
element_offset: Offset32,
func: &mut ir::Function,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
// Convert `index` to `addr_ty`.
if index_ty != addr_ty {
index = pos.ins().uextend(addr_ty, index);
}
// Add the table base address base
let base_gv = pos.func.tables[table].base_gv;
let base = pos.ins().global_value(addr_ty, base_gv);
let element_size = pos.func.tables[table].element_size;
let mut offset;
let element_size: u64 = element_size.into();
if element_size == 1 {
offset = index;
} else if element_size.is_power_of_two() {
offset = pos
.ins()
.ishl_imm(index, i64::from(element_size.trailing_zeros()));
} else {
offset = pos.ins().imul_imm(index, element_size as i64);
}
if element_offset == Offset32::new(0) {
pos.func.dfg.replace(inst).iadd(base, offset);
} else {
let imm: i64 = element_offset.into();
offset = pos.ins().iadd(base, offset);
pos.func.dfg.replace(inst).iadd_imm(offset, imm);
}
}

View File

@@ -0,0 +1,110 @@
//! Cranelift code generation library.
#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
#![warn(unused_import_braces)]
#![cfg_attr(feature = "std", deny(unstable_features))]
#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
#![cfg_attr(feature="cargo-clippy", allow(
// Produces only a false positive:
clippy::while_let_loop,
// Produces many false positives, but did produce some valid lints, now fixed:
clippy::needless_lifetimes,
// Generated code makes some style transgressions, but readability doesn't suffer much:
clippy::many_single_char_names,
clippy::identity_op,
clippy::needless_borrow,
clippy::cast_lossless,
clippy::unreadable_literal,
clippy::assign_op_pattern,
clippy::empty_line_after_outer_attr,
// Hard to avoid in generated code:
clippy::cyclomatic_complexity,
clippy::too_many_arguments,
// Code generator doesn't have a way to collapse identical arms:
clippy::match_same_arms,
// These are relatively minor style issues, but would be easy to fix:
clippy::new_without_default,
clippy::new_without_default_derive,
clippy::should_implement_trait,
clippy::len_without_is_empty))]
#![cfg_attr(
feature = "cargo-clippy",
warn(
clippy::float_arithmetic,
clippy::mut_mut,
clippy::nonminimal_bool,
clippy::option_map_unwrap_or,
clippy::option_map_unwrap_or_else,
clippy::print_stdout,
clippy::unicode_not_nfc,
clippy::use_self
)
)]
#![no_std]
#![cfg_attr(not(feature = "std"), feature(alloc))]
#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc as std;
#[cfg(feature = "std")]
#[macro_use]
extern crate std;
#[cfg(not(feature = "std"))]
use hashmap_core::{map as hash_map, HashMap, HashSet};
#[cfg(feature = "std")]
use std::collections::{hash_map, HashMap, HashSet};
pub use crate::context::Context;
pub use crate::legalizer::legalize_function;
pub use crate::verifier::verify_function;
pub use crate::write::write_function;
pub use cranelift_bforest as bforest;
pub use cranelift_entity as entity;
pub mod binemit;
pub mod cfg_printer;
pub mod cursor;
pub mod dbg;
pub mod dominator_tree;
pub mod flowgraph;
pub mod ir;
pub mod isa;
pub mod loop_analysis;
pub mod print_errors;
pub mod settings;
pub mod timing;
pub mod verifier;
pub mod write;
pub use crate::entity::packed_option;
mod abi;
mod bitset;
mod constant_hash;
mod context;
mod dce;
mod divconst_magic_numbers;
mod fx;
mod iterators;
mod legalizer;
mod licm;
mod nan_canonicalization;
mod partition_slice;
mod postopt;
mod predicates;
mod ref_slice;
mod regalloc;
mod result;
mod scoped_hash_map;
mod simple_gvn;
mod simple_preopt;
mod stack_layout;
mod topo_order;
mod unreachable_code;
pub use crate::result::{CodegenError, CodegenResult};
/// Version number of this crate.
pub const VERSION: &str = env!("CARGO_PKG_VERSION");

View File

@@ -0,0 +1,239 @@
//! A Loop Invariant Code Motion optimization pass
use crate::cursor::{Cursor, EncCursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::entity::{EntityList, ListPool};
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::fx::FxHashSet;
use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
use crate::isa::TargetIsa;
use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::timing;
use std::vec::Vec;
/// Performs the LICM pass by detecting loops within the CFG and moving
/// loop-invariant instructions out of them.
/// Changes the CFG and domtree in-place during the operation.
pub fn do_licm(
isa: &TargetIsa,
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &mut DominatorTree,
loop_analysis: &mut LoopAnalysis,
) {
let _tt = timing::licm();
debug_assert!(cfg.is_valid());
debug_assert!(domtree.is_valid());
debug_assert!(loop_analysis.is_valid());
for lp in loop_analysis.loops() {
// For each loop that we want to optimize we determine the set of loop-invariant
// instructions
let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
// Then we create the loop's pre-header and fill it with the invariant instructions
// Then we remove the invariant instructions from the loop body
if !invariant_insts.is_empty() {
// If the loop has a natural pre-header we use it, otherwise we create it.
let mut pos;
match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
None => {
let pre_header =
create_pre_header(isa, loop_analysis.loop_header(lp), func, cfg, domtree);
pos = FuncCursor::new(func).at_last_inst(pre_header);
}
// If there is a natural pre-header we insert new instructions just before the
// related jumping instruction (which is not necessarily at the end).
Some((_, last_inst)) => {
pos = FuncCursor::new(func).at_inst(last_inst);
}
};
// The last instruction of the pre-header is the termination instruction (usually
// a jump) so we need to insert just before this.
for inst in invariant_insts {
pos.insert_inst(inst);
}
}
}
// We have to recompute the domtree to account for the changes
cfg.compute(func);
domtree.compute(func, cfg);
}
// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
// A jump instruction to the header is placed at the end of the pre-header.
fn create_pre_header(
isa: &TargetIsa,
header: Ebb,
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &DominatorTree,
) -> Ebb {
let pool = &mut ListPool::<Value>::new();
let header_args_values: Vec<Value> = func.dfg.ebb_params(header).into_iter().cloned().collect();
let header_args_types: Vec<Type> = header_args_values
.clone()
.into_iter()
.map(|val| func.dfg.value_type(val))
.collect();
let pre_header = func.dfg.make_ebb();
let mut pre_header_args_value: EntityList<Value> = EntityList::new();
for typ in header_args_types {
pre_header_args_value.push(func.dfg.append_ebb_param(pre_header, typ), pool);
}
for BasicBlock {
inst: last_inst, ..
} in cfg.pred_iter(header)
{
// We only follow normal edges (not the back edges)
if !domtree.dominates(header, last_inst, &func.layout) {
change_branch_jump_destination(last_inst, pre_header, func);
}
}
{
let mut pos = EncCursor::new(func, isa).at_top(header);
// Inserts the pre-header at the right place in the layout.
pos.insert_ebb(pre_header);
pos.next_inst();
pos.ins().jump(header, pre_header_args_value.as_slice(pool));
}
pre_header
}
// Detects if a loop header has a natural pre-header.
//
// A loop header has a pre-header if there is only one predecessor that the header doesn't
// dominate.
// Returns the pre-header Ebb and the instruction jumping to the header.
fn has_pre_header(
layout: &Layout,
cfg: &ControlFlowGraph,
domtree: &DominatorTree,
header: Ebb,
) -> Option<(Ebb, Inst)> {
let mut result = None;
for BasicBlock {
ebb: pred_ebb,
inst: branch_inst,
} in cfg.pred_iter(header)
{
// We only count normal edges (not the back edges)
if !domtree.dominates(header, branch_inst, layout) {
if result.is_some() {
// We have already found one, there are more than one
return None;
}
if branch_inst != layout.last_inst(pred_ebb).unwrap()
|| cfg.succ_iter(pred_ebb).nth(1).is_some()
{
// It's along a critical edge, so don't use it.
return None;
}
result = Some((pred_ebb, branch_inst));
}
}
result
}
// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
// or non-branch instruction.
fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
match func.dfg[inst].branch_destination_mut() {
None => (),
Some(instruction_dest) => *instruction_dest = new_ebb,
}
}
/// Test whether the given opcode is unsafe to even consider for LICM.
fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
opcode.can_load()
|| opcode.can_store()
|| opcode.is_call()
|| opcode.is_branch()
|| opcode.is_terminator()
|| opcode.is_return()
|| opcode.can_trap()
|| opcode.other_side_effects()
|| opcode.writes_cpu_flags()
}
/// Test whether the given instruction is loop-invariant.
fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
if trivially_unsafe_for_licm(dfg[inst].opcode()) {
return false;
}
let inst_args = dfg.inst_args(inst);
for arg in inst_args {
let arg = dfg.resolve_aliases(*arg);
if loop_values.contains(&arg) {
return false;
}
}
true
}
// Traverses a loop in reverse post-order from a header EBB and identify loop-invariant
// instructions. These loop-invariant instructions are then removed from the code and returned
// (in reverse post-order) for later use.
fn remove_loop_invariant_instructions(
lp: Loop,
func: &mut Function,
cfg: &ControlFlowGraph,
loop_analysis: &LoopAnalysis,
) -> Vec<Inst> {
let mut loop_values: FxHashSet<Value> = FxHashSet();
let mut invariant_insts: Vec<Inst> = Vec::new();
let mut pos = FuncCursor::new(func);
// We traverse the loop EBB in reverse post-order.
for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
// Arguments of the EBB are loop values
for val in pos.func.dfg.ebb_params(*ebb) {
loop_values.insert(*val);
}
pos.goto_top(*ebb);
#[cfg_attr(feature = "cargo-clippy", allow(clippy::block_in_if_condition_stmt))]
while let Some(inst) = pos.next_inst() {
if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
// If all the instruction's argument are defined outside the loop
// then this instruction is loop-invariant
invariant_insts.push(inst);
// We remove it from the loop
pos.remove_inst_and_step_back();
} else {
// If the instruction is not loop-invariant we push its results in the set of
// loop values
for out in pos.func.dfg.inst_results(inst) {
loop_values.insert(*out);
}
}
}
}
invariant_insts
}
/// Return ebbs from a loop in post-order, starting from an entry point in the block.
fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, cfg: &ControlFlowGraph, lp: Loop) -> Vec<Ebb> {
let mut grey = FxHashSet();
let mut black = FxHashSet();
let mut stack = vec![loop_analysis.loop_header(lp)];
let mut postorder = Vec::new();
while !stack.is_empty() {
let node = stack.pop().unwrap();
if !grey.contains(&node) {
// This is a white node. Mark it as gray.
grey.insert(node);
stack.push(node);
// Get any children we've never seen before.
for child in cfg.succ_iter(node) {
if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) {
stack.push(child);
}
}
} else if !black.contains(&node) {
postorder.push(node);
black.insert(node);
}
}
postorder
}

View File

@@ -0,0 +1,349 @@
//! A loop analysis represented as mappings of loops to their header Ebb
//! and parent in the loop tree.
use crate::dominator_tree::DominatorTree;
use crate::entity::entity_impl;
use crate::entity::SecondaryMap;
use crate::entity::{Keys, PrimaryMap};
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::ir::{Ebb, Function, Layout};
use crate::packed_option::PackedOption;
use crate::timing;
use std::vec::Vec;
/// A opaque reference to a code loop.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Loop(u32);
entity_impl!(Loop, "loop");
/// Loop tree information for a single function.
///
/// Loops are referenced by the Loop object, and for each loop you can access its header EBB,
/// its eventual parent in the loop tree and all the EBB belonging to the loop.
pub struct LoopAnalysis {
loops: PrimaryMap<Loop, LoopData>,
ebb_loop_map: SecondaryMap<Ebb, PackedOption<Loop>>,
valid: bool,
}
struct LoopData {
header: Ebb,
parent: PackedOption<Loop>,
}
impl LoopData {
/// Creates a `LoopData` object with the loop header and its eventual parent in the loop tree.
pub fn new(header: Ebb, parent: Option<Loop>) -> Self {
Self {
header,
parent: parent.into(),
}
}
}
/// Methods for querying the loop analysis.
impl LoopAnalysis {
/// Allocate a new blank loop analysis struct. Use `compute` to compute the loop analysis for
/// a function.
pub fn new() -> Self {
Self {
valid: false,
loops: PrimaryMap::new(),
ebb_loop_map: SecondaryMap::new(),
}
}
/// Returns all the loops contained in a function.
pub fn loops(&self) -> Keys<Loop> {
self.loops.keys()
}
/// Returns the header EBB of a particular loop.
///
/// The characteristic property of a loop header block is that it dominates some of its
/// predecessors.
pub fn loop_header(&self, lp: Loop) -> Ebb {
self.loops[lp].header
}
/// Return the eventual parent of a loop in the loop tree.
pub fn loop_parent(&self, lp: Loop) -> Option<Loop> {
self.loops[lp].parent.expand()
}
/// Determine if an Ebb belongs to a loop by running a finger along the loop tree.
///
/// Returns `true` if `ebb` is in loop `lp`.
pub fn is_in_loop(&self, ebb: Ebb, lp: Loop) -> bool {
let ebb_loop = self.ebb_loop_map[ebb];
match ebb_loop.expand() {
None => false,
Some(ebb_loop) => self.is_child_loop(ebb_loop, lp),
}
}
/// Determines if a loop is contained in another loop.
///
/// `is_child_loop(child,parent)` returns `true` if and only if `child` is a child loop of
/// `parent` (or `child == parent`).
pub fn is_child_loop(&self, child: Loop, parent: Loop) -> bool {
let mut finger = Some(child);
while let Some(finger_loop) = finger {
if finger_loop == parent {
return true;
}
finger = self.loop_parent(finger_loop);
}
false
}
}
impl LoopAnalysis {
/// Detects the loops in a function. Needs the control flow graph and the dominator tree.
pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) {
let _tt = timing::loop_analysis();
self.loops.clear();
self.ebb_loop_map.clear();
self.ebb_loop_map.resize(func.dfg.num_ebbs());
self.find_loop_headers(cfg, domtree, &func.layout);
self.discover_loop_blocks(cfg, domtree, &func.layout);
self.valid = true;
}
/// Check if the loop analysis is in a valid state.
///
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
/// `compute()` method has been called since the last `clear()`. It does not check that the
/// loop analysis is consistent with the CFG.
pub fn is_valid(&self) -> bool {
self.valid
}
/// Clear all the data structures contained in the loop analysis. This will leave the
/// analysis in a similar state to a context returned by `new()` except that allocated
/// memory be retained.
pub fn clear(&mut self) {
self.loops.clear();
self.ebb_loop_map.clear();
self.valid = false;
}
// Traverses the CFG in reverse postorder and create a loop object for every EBB having a
// back edge.
fn find_loop_headers(
&mut self,
cfg: &ControlFlowGraph,
domtree: &DominatorTree,
layout: &Layout,
) {
// We traverse the CFG in reverse postorder
for &ebb in domtree.cfg_postorder().iter().rev() {
for BasicBlock {
inst: pred_inst, ..
} in cfg.pred_iter(ebb)
{
// If the ebb dominates one of its predecessors it is a back edge
if domtree.dominates(ebb, pred_inst, layout) {
// This ebb is a loop header, so we create its associated loop
let lp = self.loops.push(LoopData::new(ebb, None));
self.ebb_loop_map[ebb] = lp.into();
break;
// We break because we only need one back edge to identify a loop header.
}
}
}
}
// Intended to be called after `find_loop_headers`. For each detected loop header,
// discovers all the ebb belonging to the loop and its inner loops. After a call to this
// function, the loop tree is fully constructed.
fn discover_loop_blocks(
&mut self,
cfg: &ControlFlowGraph,
domtree: &DominatorTree,
layout: &Layout,
) {
let mut stack: Vec<Ebb> = Vec::new();
// We handle each loop header in reverse order, corresponding to a pseudo postorder
// traversal of the graph.
for lp in self.loops().rev() {
for BasicBlock {
ebb: pred,
inst: pred_inst,
} in cfg.pred_iter(self.loops[lp].header)
{
// We follow the back edges
if domtree.dominates(self.loops[lp].header, pred_inst, layout) {
stack.push(pred);
}
}
while let Some(node) = stack.pop() {
let continue_dfs: Option<Ebb>;
match self.ebb_loop_map[node].expand() {
None => {
// The node hasn't been visited yet, we tag it as part of the loop
self.ebb_loop_map[node] = PackedOption::from(lp);
continue_dfs = Some(node);
}
Some(node_loop) => {
// We copy the node_loop into a mutable reference passed along the while
let mut node_loop = node_loop;
// The node is part of a loop, which can be lp or an inner loop
let mut node_loop_parent_option = self.loops[node_loop].parent;
while let Some(node_loop_parent) = node_loop_parent_option.expand() {
if node_loop_parent == lp {
// We have encountered lp so we stop (already visited)
break;
} else {
//
node_loop = node_loop_parent;
// We lookup the parent loop
node_loop_parent_option = self.loops[node_loop].parent;
}
}
// Now node_loop_parent is either:
// - None and node_loop is an new inner loop of lp
// - Some(...) and the initial node_loop was a known inner loop of lp
match node_loop_parent_option.expand() {
Some(_) => continue_dfs = None,
None => {
if node_loop != lp {
self.loops[node_loop].parent = lp.into();
continue_dfs = Some(self.loops[node_loop].header)
} else {
// If lp is a one-block loop then we make sure we stop
continue_dfs = None
}
}
}
}
}
// Now we have handled the popped node and need to continue the DFS by adding the
// predecessors of that node
if let Some(continue_dfs) = continue_dfs {
for BasicBlock { ebb: pred, .. } in cfg.pred_iter(continue_dfs) {
stack.push(pred)
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{types, Function, InstBuilder};
use crate::loop_analysis::{Loop, LoopAnalysis};
use std::vec::Vec;
#[test]
fn nested_loops_detection() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let ebb3 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
{
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb0);
cur.ins().jump(ebb1, &[]);
cur.insert_ebb(ebb1);
cur.ins().jump(ebb2, &[]);
cur.insert_ebb(ebb2);
cur.ins().brnz(cond, ebb1, &[]);
cur.ins().jump(ebb3, &[]);
cur.insert_ebb(ebb3);
cur.ins().brnz(cond, ebb0, &[]);
}
let mut loop_analysis = LoopAnalysis::new();
let mut cfg = ControlFlowGraph::new();
let mut domtree = DominatorTree::new();
cfg.compute(&func);
domtree.compute(&func, &cfg);
loop_analysis.compute(&func, &cfg, &domtree);
let loops = loop_analysis.loops().collect::<Vec<Loop>>();
assert_eq!(loops.len(), 2);
assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
assert_eq!(loop_analysis.loop_parent(loops[0]), None);
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb3, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
}
#[test]
fn complex_loop_detection() {
let mut func = Function::new();
let ebb0 = func.dfg.make_ebb();
let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb();
let ebb3 = func.dfg.make_ebb();
let ebb4 = func.dfg.make_ebb();
let ebb5 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
{
let mut cur = FuncCursor::new(&mut func);
cur.insert_ebb(ebb0);
cur.ins().brnz(cond, ebb1, &[]);
cur.ins().jump(ebb3, &[]);
cur.insert_ebb(ebb1);
cur.ins().jump(ebb2, &[]);
cur.insert_ebb(ebb2);
cur.ins().brnz(cond, ebb1, &[]);
cur.ins().jump(ebb5, &[]);
cur.insert_ebb(ebb3);
cur.ins().jump(ebb4, &[]);
cur.insert_ebb(ebb4);
cur.ins().brnz(cond, ebb3, &[]);
cur.ins().jump(ebb5, &[]);
cur.insert_ebb(ebb5);
cur.ins().brnz(cond, ebb0, &[]);
}
let mut loop_analysis = LoopAnalysis::new();
let mut cfg = ControlFlowGraph::new();
let mut domtree = DominatorTree::new();
cfg.compute(&func);
domtree.compute(&func, &cfg);
loop_analysis.compute(&func, &cfg, &domtree);
let loops = loop_analysis.loops().collect::<Vec<Loop>>();
assert_eq!(loops.len(), 3);
assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
assert_eq!(loop_analysis.loop_parent(loops[0]), None);
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
}
}

View File

@@ -0,0 +1,85 @@
//! A NaN-canonicalizing rewriting pass. Patch floating point arithmetic
//! instructions that may return a NaN result with a sequence of operations
//! that will replace nondeterministic NaN's with a single canonical NaN value.
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::condcodes::FloatCC;
use crate::ir::immediates::{Ieee32, Ieee64};
use crate::ir::types;
use crate::ir::types::Type;
use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
use crate::timing;
// Canonical 32-bit and 64-bit NaN values.
static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000;
static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000;
/// Perform the NaN canonicalization pass.
pub fn do_nan_canonicalization(func: &mut Function) {
let _tt = timing::canonicalize_nans();
let mut pos = FuncCursor::new(func);
while let Some(_ebb) = pos.next_ebb() {
while let Some(inst) = pos.next_inst() {
if is_fp_arith(&mut pos, inst) {
add_nan_canon_seq(&mut pos, inst);
}
}
}
}
/// Returns true/false based on whether the instruction is a floating-point
/// arithmetic operation. This ignores operations like `fneg`, `fabs`, or
/// `fcopysign` that only operate on the sign bit of a floating point value.
fn is_fp_arith(pos: &mut FuncCursor, inst: Inst) -> bool {
match pos.func.dfg[inst] {
InstructionData::Unary { opcode, .. } => {
opcode == Opcode::Ceil
|| opcode == Opcode::Floor
|| opcode == Opcode::Nearest
|| opcode == Opcode::Sqrt
|| opcode == Opcode::Trunc
}
InstructionData::Binary { opcode, .. } => {
opcode == Opcode::Fadd
|| opcode == Opcode::Fdiv
|| opcode == Opcode::Fmax
|| opcode == Opcode::Fmin
|| opcode == Opcode::Fmul
|| opcode == Opcode::Fsub
}
InstructionData::Ternary { opcode, .. } => opcode == Opcode::Fma,
_ => false,
}
}
/// Append a sequence of canonicalizing instructions after the given instruction.
fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
// Select the instruction result, result type. Replace the instruction
// result and step forward before inserting the canonicalization sequence.
let val = pos.func.dfg.first_result(inst);
let val_type = pos.func.dfg.value_type(val);
let new_res = pos.func.dfg.replace_result(val, val_type);
let _next_inst = pos.next_inst().expect("EBB missing terminator!");
// Insert a comparison instruction, to check if `inst_res` is NaN. Select
// the canonical NaN value if `val` is NaN, assign the result to `inst`.
let is_nan = pos.ins().fcmp(FloatCC::NotEqual, new_res, new_res);
let canon_nan = insert_nan_const(pos, val_type);
pos.ins()
.with_result(val)
.select(is_nan, canon_nan, new_res);
pos.prev_inst(); // Step backwards so the pass does not skip instructions.
}
/// Insert a canonical 32-bit or 64-bit NaN constant at the current position.
fn insert_nan_const(pos: &mut FuncCursor, nan_type: Type) -> Value {
match nan_type {
types::F32 => pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)),
types::F64 => pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)),
_ => {
// Panic if the type given was not an IEEE floating point type.
panic!("Could not canonicalize NaN: Unexpected result type found.");
}
}
}

View File

@@ -0,0 +1,97 @@
//! Rearrange the elements in a slice according to a predicate.
use core::mem;
/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
/// the elements where `p(t)` is false.
///
/// The order of elements is not preserved, unless the slice is already partitioned.
///
/// Returns the number of elements where `p(t)` is true.
pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
where
F: FnMut(&T) -> bool,
{
// The iterator works like a deque which we can pop from both ends.
let mut i = s.iter_mut();
// Number of elements for which the predicate is known to be true.
let mut pos = 0;
loop {
// Find the first element for which the predicate fails.
let head = loop {
match i.next() {
Some(head) => {
if !p(&head) {
break head;
}
}
None => return pos,
}
pos += 1;
};
// Find the last element for which the predicate succeeds.
let tail = loop {
match i.next_back() {
Some(tail) => {
if p(&tail) {
break tail;
}
}
None => return pos,
}
};
// Swap the two elements into the right order.
mem::swap(head, tail);
pos += 1;
}
}
#[cfg(test)]
mod tests {
use super::partition_slice;
use std::vec::Vec;
fn check(x: &[u32], want: &[u32]) {
assert_eq!(x.len(), want.len());
let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
let mut v = Vec::new();
v.extend(x.iter().cloned());
let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
assert_eq!(v, want);
assert_eq!(count, want_count);
}
#[test]
fn empty() {
check(&[], &[]);
}
#[test]
fn singles() {
check(&[0], &[0]);
check(&[1], &[1]);
check(&[10], &[10]);
}
#[test]
fn doubles() {
check(&[0, 0], &[0, 0]);
check(&[0, 5], &[0, 5]);
check(&[5, 0], &[0, 5]);
check(&[5, 4], &[5, 4]);
}
#[test]
fn longer() {
check(&[1, 2, 3], &[1, 2, 3]);
check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required.
check(&[1, 20, 3, 10], &[10, 20, 3, 1]);
check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
}
}

View File

@@ -0,0 +1,385 @@
//! A post-legalization rewriting pass.
#![allow(non_snake_case)]
use crate::cursor::{Cursor, EncCursor};
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
use crate::ir::dfg::ValueDef;
use crate::ir::immediates::{Imm64, Offset32};
use crate::ir::instructions::{Opcode, ValueList};
use crate::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
use crate::isa::TargetIsa;
use crate::timing;
/// Information collected about a compare+branch sequence.
struct CmpBrInfo {
/// The branch instruction.
br_inst: Inst,
/// The icmp, icmp_imm, or fcmp instruction.
cmp_inst: Inst,
/// The destination of the branch.
destination: Ebb,
/// The arguments of the branch.
args: ValueList,
/// The first argument to the comparison. The second is in the `kind` field.
cmp_arg: Value,
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
/// before the branch.
invert_branch_cond: bool,
/// The kind of comparison, and the second argument.
kind: CmpBrKind,
}
enum CmpBrKind {
Icmp { cond: IntCC, arg: Value },
IcmpImm { cond: IntCC, imm: Imm64 },
Fcmp { cond: FloatCC, arg: Value },
}
/// Optimize comparisons to use flags values, to avoid materializing conditions
/// in integer registers.
///
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
/// sequences.
fn optimize_cpu_flags(
pos: &mut EncCursor,
inst: Inst,
last_flags_clobber: Option<Inst>,
isa: &TargetIsa,
) {
// Look for compare and branch patterns.
// This code could be considerably simplified with non-lexical lifetimes.
let info = match pos.func.dfg[inst] {
InstructionData::Branch {
opcode,
destination,
ref args,
} => {
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
let invert_branch_cond = match opcode {
Opcode::Brz => true,
Opcode::Brnz => false,
_ => panic!(),
};
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
match pos.func.dfg[cond_inst] {
InstructionData::IntCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Icmp {
cond,
arg: cmp_args[1],
},
},
InstructionData::IntCompareImm {
cond,
arg: cmp_arg,
imm: cmp_imm,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg,
invert_branch_cond,
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
},
InstructionData::FloatCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Fcmp {
cond,
arg: cmp_args[1],
},
},
_ => return,
}
} else {
return;
}
}
// TODO: trapif, trueif, selectif, and their ff counterparts.
_ => return,
};
// If any instructions clobber the flags between the comparison and the branch,
// don't optimize them.
if last_flags_clobber != Some(info.cmp_inst) {
return;
}
// We found a compare+branch pattern. Transform it to use flags.
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
pos.goto_inst(info.cmp_inst);
match info.kind {
CmpBrKind::Icmp { mut cond, arg } => {
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::IcmpImm { mut cond, imm } => {
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::Fcmp { mut cond, arg } => {
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brff(cond, flags, info.destination, &args);
}
}
let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
debug_assert!(ok);
let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
debug_assert!(ok);
}
struct MemOpInfo {
opcode: Opcode,
itype: Type,
arg: Value,
st_arg: Option<Value>,
flags: MemFlags,
offset: Offset32,
}
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &TargetIsa) {
// Look for simple loads and stores we can optimize.
let info = match pos.func.dfg[inst] {
InstructionData::Load {
opcode,
arg,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg,
st_arg: None,
flags,
offset,
},
InstructionData::Store {
opcode,
args,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: args[1],
st_arg: Some(args[0]),
flags,
offset,
},
_ => return,
};
// Examine the instruction that defines the address operand.
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
match pos.func.dfg[result_inst] {
InstructionData::Binary {
opcode: Opcode::Iadd,
args,
} => match info.opcode {
// Operand is an iadd. Fold it into a memory address with a complex address mode.
Opcode::Load => {
pos.func.dfg.replace(inst).load_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload8 => {
pos.func.dfg.replace(inst).uload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload8 => {
pos.func.dfg.replace(inst).sload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload16 => {
pos.func.dfg.replace(inst).uload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload16 => {
pos.func.dfg.replace(inst).sload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload32 => {
pos.func
.dfg
.replace(inst)
.uload32_complex(info.flags, &args, info.offset);
}
Opcode::Sload32 => {
pos.func
.dfg
.replace(inst)
.sload32_complex(info.flags, &args, info.offset);
}
Opcode::Store => {
pos.func.dfg.replace(inst).store_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore8 => {
pos.func.dfg.replace(inst).istore8_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore16 => {
pos.func.dfg.replace(inst).istore16_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore32 => {
pos.func.dfg.replace(inst).istore32_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
_ => panic!("Unsupported load or store opcode"),
},
InstructionData::BinaryImm {
opcode: Opcode::IaddImm,
arg,
imm,
} => match pos.func.dfg[inst] {
// Operand is an iadd_imm. Fold the immediate into the offset if possible.
InstructionData::Load {
arg: ref mut load_arg,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
*load_arg = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
InstructionData::Store {
args: ref mut store_args,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
store_args[1] = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
_ => panic!(),
},
_ => {
// Address value is defined by some other kind of instruction.
return;
}
}
} else {
// Address value is not the result of an instruction.
return;
}
let ok = pos.func.update_encoding(inst, isa).is_ok();
debug_assert!(ok);
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
let _tt = timing::postopt();
let mut pos = EncCursor::new(func, isa);
while let Some(_ebb) = pos.next_ebb() {
let mut last_flags_clobber = None;
while let Some(inst) = pos.next_inst() {
if isa.uses_cpu_flags() {
// Optimize instructions to make use of flags.
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
// Track the most recent seen instruction that clobbers the flags.
if let Some(constraints) = isa
.encoding_info()
.operand_constraints(pos.func.encodings[inst])
{
if constraints.clobbers_flags {
last_flags_clobber = Some(inst)
}
}
}
if isa.uses_complex_addresses() {
optimize_complex_addresses(&mut pos, inst, isa);
}
}
}
}

View File

@@ -0,0 +1,106 @@
//! Predicate functions for testing instruction fields.
//!
//! This module defines functions that are used by the instruction predicates defined by
//! `cranelift-codegen/meta-python/cdsl/predicates.py` classes.
//!
//! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
//! bound is implemented by all the native integer types as well as `Imm64`.
//!
//! Some of these predicates may be unused in certain ISA configurations, so we suppress the
//! dead code warning.
use crate::ir;
/// Check that a 64-bit floating point value is zero.
#[allow(dead_code)]
pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
let x64 = x.into();
x64.bits() == 0
}
/// Check that a 32-bit floating point value is zero.
#[allow(dead_code)]
pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
let x32 = x.into();
x32.bits() == 0
}
/// Check that `x` is the same as `y`.
#[allow(dead_code)]
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
x == y.into()
}
/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits.
#[allow(dead_code)]
pub fn is_signed_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
let s = x.into();
s == (s >> sc << (64 - wd + sc) >> (64 - wd))
}
/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits.
#[allow(dead_code)]
pub fn is_unsigned_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
let u = x.into() as u64;
// Bit-mask of the permitted bits.
let m = (1 << wd) - (1 << sc);
u == (u & m)
}
#[allow(dead_code)]
pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool {
func.dfg.ext_funcs[func_ref].colocated
}
#[allow(dead_code)]
pub fn is_colocated_data(global_value: ir::GlobalValue, func: &ir::Function) -> bool {
match func.global_values[global_value] {
ir::GlobalValueData::Symbol { colocated, .. } => colocated,
_ => panic!("is_colocated_data only makes sense for data with symbolic addresses"),
}
}
#[allow(dead_code)]
pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool {
value_list.len(&func.dfg.value_lists) == num
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cvt_u32() {
let x1 = 0u32;
let x2 = 1u32;
let x3 = 0xffff_fff0u32;
assert!(is_signed_int(x1, 1, 0));
assert!(is_signed_int(x1, 2, 1));
assert!(is_signed_int(x2, 2, 0));
assert!(!is_signed_int(x2, 2, 1));
// `u32` doesn't sign-extend when converted to `i64`.
assert!(!is_signed_int(x3, 8, 0));
assert!(is_unsigned_int(x1, 1, 0));
assert!(is_unsigned_int(x1, 8, 4));
assert!(is_unsigned_int(x2, 1, 0));
assert!(!is_unsigned_int(x2, 8, 4));
assert!(!is_unsigned_int(x3, 1, 0));
assert!(is_unsigned_int(x3, 32, 4));
}
#[test]
fn cvt_imm64() {
use crate::ir::immediates::Imm64;
let x1 = Imm64::new(-8);
let x2 = Imm64::new(8);
assert!(is_signed_int(x1, 16, 2));
assert!(is_signed_int(x2, 16, 2));
assert!(!is_signed_int(x1, 16, 4));
assert!(!is_signed_int(x2, 16, 4));
}
}

View File

@@ -0,0 +1,227 @@
//! Utility routines for pretty-printing error messages.
use crate::entity::SecondaryMap;
use crate::ir;
use crate::ir::entities::{AnyEntity, Ebb, Inst, Value};
use crate::ir::function::Function;
use crate::isa::TargetIsa;
use crate::result::CodegenError;
use crate::verifier::{VerifierError, VerifierErrors};
use crate::write::{decorate_function, FuncWriter, PlainWriter};
use core::fmt;
use core::fmt::Write;
use std::boxed::Box;
use std::string::{String, ToString};
use std::vec::Vec;
/// Pretty-print a verifier error.
pub fn pretty_verifier_error<'a>(
func: &ir::Function,
isa: Option<&TargetIsa>,
func_w: Option<Box<FuncWriter + 'a>>,
errors: VerifierErrors,
) -> String {
let mut errors = errors.0;
let mut w = String::new();
let num_errors = errors.len();
decorate_function(
&mut PrettyVerifierError(func_w.unwrap_or_else(|| Box::new(PlainWriter)), &mut errors),
&mut w,
func,
isa,
)
.unwrap();
writeln!(
w,
"\n; {} verifier error{} detected (see above). Compilation aborted.",
num_errors,
if num_errors == 1 { "" } else { "s" }
)
.unwrap();
w
}
struct PrettyVerifierError<'a>(Box<FuncWriter + 'a>, &'a mut Vec<VerifierError>);
impl<'a> FuncWriter for PrettyVerifierError<'a> {
fn write_ebb_header(
&mut self,
w: &mut Write,
func: &Function,
isa: Option<&TargetIsa>,
ebb: Ebb,
indent: usize,
) -> fmt::Result {
pretty_ebb_header_error(w, func, isa, ebb, indent, &mut *self.0, self.1)
}
fn write_instruction(
&mut self,
w: &mut Write,
func: &Function,
aliases: &SecondaryMap<Value, Vec<Value>>,
isa: Option<&TargetIsa>,
inst: Inst,
indent: usize,
) -> fmt::Result {
pretty_instruction_error(w, func, aliases, isa, inst, indent, &mut *self.0, self.1)
}
fn write_entity_definition(
&mut self,
w: &mut Write,
func: &Function,
entity: AnyEntity,
value: &fmt::Display,
) -> fmt::Result {
pretty_preamble_error(w, func, entity, value, &mut *self.0, self.1)
}
}
/// Pretty-print a function verifier error for a given EBB.
fn pretty_ebb_header_error(
w: &mut Write,
func: &Function,
isa: Option<&TargetIsa>,
cur_ebb: Ebb,
indent: usize,
func_w: &mut FuncWriter,
errors: &mut Vec<VerifierError>,
) -> fmt::Result {
let mut s = String::new();
func_w.write_ebb_header(&mut s, func, isa, cur_ebb, indent)?;
write!(w, "{}", s)?;
// TODO: Use drain_filter here when it gets stabilized
let mut i = 0;
let mut printed_error = false;
while i != errors.len() {
match errors[i].location {
ir::entities::AnyEntity::Ebb(ebb) if ebb == cur_ebb => {
if !printed_error {
print_arrow(w, &s)?;
printed_error = true;
}
let err = errors.remove(i);
print_error(w, err)?;
}
_ => i += 1,
}
}
if printed_error {
w.write_char('\n')?;
}
Ok(())
}
/// Pretty-print a function verifier error for a given instruction.
fn pretty_instruction_error(
w: &mut Write,
func: &Function,
aliases: &SecondaryMap<Value, Vec<Value>>,
isa: Option<&TargetIsa>,
cur_inst: Inst,
indent: usize,
func_w: &mut FuncWriter,
errors: &mut Vec<VerifierError>,
) -> fmt::Result {
let mut s = String::new();
func_w.write_instruction(&mut s, func, aliases, isa, cur_inst, indent)?;
write!(w, "{}", s)?;
// TODO: Use drain_filter here when it gets stabilized
let mut i = 0;
let mut printed_error = false;
while i != errors.len() {
match errors[i].location {
ir::entities::AnyEntity::Inst(inst) if inst == cur_inst => {
if !printed_error {
print_arrow(w, &s)?;
printed_error = true;
}
let err = errors.remove(i);
print_error(w, err)?;
}
_ => i += 1,
}
}
if printed_error {
w.write_char('\n')?;
}
Ok(())
}
fn pretty_preamble_error(
w: &mut Write,
func: &Function,
entity: AnyEntity,
value: &fmt::Display,
func_w: &mut FuncWriter,
errors: &mut Vec<VerifierError>,
) -> fmt::Result {
let mut s = String::new();
func_w.write_entity_definition(&mut s, func, entity, value)?;
write!(w, "{}", s)?;
// TODO: Use drain_filter here when it gets stabilized
let mut i = 0;
let mut printed_error = false;
while i != errors.len() {
if entity == errors[i].location {
if !printed_error {
print_arrow(w, &s)?;
printed_error = true;
}
let err = errors.remove(i);
print_error(w, err)?;
} else {
i += 1
}
}
if printed_error {
w.write_char('\n')?;
}
Ok(())
}
/// Prints:
/// ; ^~~~~~
fn print_arrow(w: &mut Write, entity: &str) -> fmt::Result {
write!(w, ";")?;
let indent = entity.len() - entity.trim_start().len();
if indent != 0 {
write!(w, "{1:0$}^", indent - 1, "")?;
}
for _ in 0..entity.trim().len() - 1 {
write!(w, "~")?;
}
writeln!(w)
}
/// Prints:
/// ; error: [ERROR BODY]
fn print_error(w: &mut Write, err: VerifierError) -> fmt::Result {
writeln!(w, "; error: {}", err.to_string())?;
Ok(())
}
/// Pretty-print a Cranelift error.
pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CodegenError) -> String {
if let CodegenError::Verifier(e) = err {
pretty_verifier_error(func, isa, None, e)
} else {
err.to_string()
}
}

View File

@@ -0,0 +1,18 @@
//! Functions for converting a reference into a singleton slice.
//!
//! See also the [`ref_slice` crate](https://crates.io/crates/ref_slice).
//!
//! We define the functions here to avoid external dependencies, and to ensure that they are
//! inlined in this crate.
//!
//! Despite their using an unsafe block, these functions are completely safe.
use core::slice;
pub fn ref_slice<T>(s: &T) -> &[T] {
unsafe { slice::from_raw_parts(s, 1) }
}
pub fn ref_slice_mut<T>(s: &mut T) -> &mut [T] {
unsafe { slice::from_raw_parts_mut(s, 1) }
}

View File

@@ -0,0 +1,128 @@
//! Value affinity for register allocation.
//!
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
//! instruction operand constraints.
//!
//! For values that want to be in registers, the affinity hint includes a register class or
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
//! larger register class instead.
use crate::ir::{AbiParam, ArgumentLoc};
use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
use core::fmt;
/// Preferred register allocation for an SSA value.
#[derive(Clone, Copy, Debug)]
pub enum Affinity {
/// No affinity.
///
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
/// value that won't appear in the final program.
Unassigned,
/// This value should be placed in a spill slot on the stack.
Stack,
/// This value prefers a register from the given register class.
Reg(RegClassIndex),
}
impl Default for Affinity {
fn default() -> Self {
Affinity::Unassigned
}
}
impl Affinity {
/// Create an affinity that satisfies a single constraint.
///
/// This will never create an `Affinity::Unassigned`.
/// Use the `Default` implementation for that.
pub fn new(constraint: &OperandConstraint) -> Self {
if constraint.kind == ConstraintKind::Stack {
Affinity::Stack
} else {
Affinity::Reg(constraint.regclass.into())
}
}
/// Create an affinity that matches an ABI argument for `isa`.
pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Self {
match arg.location {
ArgumentLoc::Unassigned => Affinity::Unassigned,
ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
ArgumentLoc::Stack(_) => Affinity::Stack,
}
}
/// Is this the `Unassigned` affinity?
pub fn is_unassigned(self) -> bool {
match self {
Affinity::Unassigned => true,
_ => false,
}
}
/// Is this the `Reg` affinity?
pub fn is_reg(self) -> bool {
match self {
Affinity::Reg(_) => true,
_ => false,
}
}
/// Is this the `Stack` affinity?
pub fn is_stack(self) -> bool {
match self {
Affinity::Stack => true,
_ => false,
}
}
/// Merge an operand constraint into this affinity.
///
/// Note that this does not guarantee that the register allocator will pick a register that
/// satisfies the constraint.
pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
match *self {
Affinity::Unassigned => *self = Self::new(constraint),
Affinity::Reg(rc) => {
// If the preferred register class is a subclass of the constraint, there's no need
// to change anything.
if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
{
// If the register classes don't overlap, `intersect` returns `Unassigned`, and
// we just keep our previous affinity.
if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
// This constraint shrinks our preferred register class.
*self = Affinity::Reg(subclass);
}
}
}
Affinity::Stack => {}
}
}
/// Return an object that can display this value affinity, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
DisplayAffinity(self, regs.into())
}
}
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAffinity<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Affinity::Unassigned => write!(f, "unassigned"),
Affinity::Stack => write!(f, "stack"),
Affinity::Reg(rci) => match self.1 {
Some(regs) => write!(f, "{}", regs.rc(rci)),
None => write!(f, "{}", rci),
},
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
//! Register allocator context.
//!
//! The `Context` struct contains data structures that should be preserved across invocations of
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
//! avoids allocating data structures independently for each function begin compiled.
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::regalloc::coalescing::Coalescing;
use crate::regalloc::coloring::Coloring;
use crate::regalloc::live_value_tracker::LiveValueTracker;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::reload::Reload;
use crate::regalloc::spilling::Spilling;
use crate::regalloc::virtregs::VirtRegs;
use crate::result::CodegenResult;
use crate::timing;
use crate::topo_order::TopoOrder;
use crate::verifier::{
verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
};
/// Persistent memory allocations for register allocation.
pub struct Context {
liveness: Liveness,
virtregs: VirtRegs,
coalescing: Coalescing,
topo: TopoOrder,
tracker: LiveValueTracker,
spilling: Spilling,
reload: Reload,
coloring: Coloring,
}
impl Context {
/// Create a new context for register allocation.
///
/// This context should be reused for multiple functions in order to avoid repeated memory
/// allocations.
pub fn new() -> Self {
Self {
liveness: Liveness::new(),
virtregs: VirtRegs::new(),
coalescing: Coalescing::new(),
topo: TopoOrder::new(),
tracker: LiveValueTracker::new(),
spilling: Spilling::new(),
reload: Reload::new(),
coloring: Coloring::new(),
}
}
/// Clear all data structures in this context.
pub fn clear(&mut self) {
self.liveness.clear();
self.virtregs.clear();
self.coalescing.clear();
self.topo.clear();
self.tracker.clear();
self.spilling.clear();
self.reload.clear();
self.coloring.clear();
}
/// Allocate registers in `func`.
///
/// After register allocation, all values in `func` have been assigned to a register or stack
/// location that is consistent with instruction encoding constraints.
pub fn run(
&mut self,
isa: &TargetIsa,
func: &mut Function,
cfg: &ControlFlowGraph,
domtree: &mut DominatorTree,
) -> CodegenResult<()> {
let _tt = timing::regalloc();
debug_assert!(domtree.is_valid());
let mut errors = VerifierErrors::default();
// `Liveness` and `Coloring` are self-clearing.
self.virtregs.clear();
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
// phases.
self.tracker.clear();
// Pass: Liveness analysis.
self.liveness.compute(isa, func, cfg);
if isa.flags().enable_verifier() {
let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Coalesce and create Conventional SSA form.
self.coalescing.conventional_ssa(
isa,
func,
cfg,
domtree,
&mut self.liveness,
&mut self.virtregs,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Spilling.
self.spilling.run(
isa,
func,
domtree,
&mut self.liveness,
&self.virtregs,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Reload.
self.reload.run(
isa,
func,
domtree,
&mut self.liveness,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Coloring.
self.coloring
.run(isa, func, domtree, &mut self.liveness, &mut self.tracker);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_locations(isa, func, Some(&self.liveness), &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Even if we arrive here, (non-fatal) errors might have been reported, so we
// must make sure absolutely nothing is wrong
if errors.is_empty() {
Ok(())
} else {
Err(errors.into())
}
}
}

View File

@@ -0,0 +1,218 @@
//! Register diversions.
//!
//! Normally, a value is assigned to a single register or stack location by the register allocator.
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
//! instruction constraints.
//!
//! These register diversions are local to an EBB. No values can be diverted when entering a new
//! EBB.
use crate::fx::FxHashMap;
use crate::hash_map::{Entry, Iter};
use crate::ir::{InstructionData, Opcode};
use crate::ir::{StackSlot, Value, ValueLoc, ValueLocations};
use crate::isa::{RegInfo, RegUnit};
use core::fmt;
/// A diversion of a value from its original location to a new register or stack location.
///
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
/// same value.
///
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
/// the current one.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Diversion {
/// The original value location.
pub from: ValueLoc,
/// The current value location.
pub to: ValueLoc,
}
impl Diversion {
/// Make a new diversion.
pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
debug_assert!(from.is_assigned() && to.is_assigned());
Self { from, to }
}
}
/// Keep track of diversions in an EBB.
pub struct RegDiversions {
current: FxHashMap<Value, Diversion>,
}
impl RegDiversions {
/// Create a new empty diversion tracker.
pub fn new() -> Self {
Self {
current: FxHashMap::default(),
}
}
/// Clear the tracker, preparing for a new EBB.
pub fn clear(&mut self) {
self.current.clear()
}
/// Are there any diversions?
pub fn is_empty(&self) -> bool {
self.current.is_empty()
}
/// Get the current diversion of `value`, if any.
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
self.current.get(&value)
}
/// Get all current diversions.
pub fn iter(&self) -> Iter<'_, Value, Diversion> {
self.current.iter()
}
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
/// values
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
match self.diversion(value) {
Some(d) => d.to,
None => locations[value],
}
}
/// Get the current register location for `value`, or panic if `value` isn't in a register.
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
self.get(value, locations).unwrap_reg()
}
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
self.get(value, locations).unwrap_stack()
}
/// Record any kind of move.
///
/// The `from` location must match an existing `to` location, if any.
pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
debug_assert!(from.is_assigned() && to.is_assigned());
match self.current.entry(value) {
Entry::Occupied(mut e) => {
// TODO: non-lexical lifetimes should allow removal of the scope and early return.
{
let d = e.get_mut();
debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
if d.from != to {
d.to = to;
return;
}
}
e.remove();
}
Entry::Vacant(e) => {
e.insert(Diversion::new(from, to));
}
}
}
/// Record a register -> register move.
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
}
/// Record a register -> stack move.
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
}
/// Record a stack -> register move.
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
}
/// Apply the effect of `inst`.
///
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
/// match.
pub fn apply(&mut self, inst: &InstructionData) {
match *inst {
InstructionData::RegMove {
opcode: Opcode::Regmove,
arg,
src,
dst,
} => self.regmove(arg, src, dst),
InstructionData::RegSpill {
opcode: Opcode::Regspill,
arg,
src,
dst,
} => self.regspill(arg, src, dst),
InstructionData::RegFill {
opcode: Opcode::Regfill,
arg,
src,
dst,
} => self.regfill(arg, src, dst),
_ => {}
}
}
/// Drop any recorded move for `value`.
///
/// Returns the `to` location of the removed diversion.
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
self.current.remove(&value).map(|d| d.to)
}
/// Return an object that can display the diversions.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
DisplayDiversions(self, regs.into())
}
}
/// Object that displays register diversions.
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayDiversions<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for (value, div) in self.0.iter() {
write!(
f,
" {}: {} -> {}",
value,
div.from.display(self.1),
div.to.display(self.1)
)?
}
write!(f, " }}")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entity::EntityRef;
use crate::ir::Value;
#[test]
fn inserts() {
let mut divs = RegDiversions::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
divs.regmove(v1, 10, 12);
assert_eq!(
divs.diversion(v1),
Some(&Diversion {
from: ValueLoc::Reg(10),
to: ValueLoc::Reg(12),
})
);
assert_eq!(divs.diversion(v2), None);
divs.regmove(v1, 12, 11);
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
divs.regmove(v1, 11, 10);
assert_eq!(divs.diversion(v1), None);
}
}

View File

@@ -0,0 +1,345 @@
//! Track which values are live in an EBB with instruction granularity.
//!
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
//! instruction, starting at the EBB header.
use crate::dominator_tree::DominatorTree;
use crate::entity::{EntityList, ListPool};
use crate::fx::FxHashMap;
use crate::ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
use crate::partition_slice::partition_slice;
use crate::regalloc::affinity::Affinity;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::liverange::LiveRange;
use std::vec::Vec;
type ValueList = EntityList<Value>;
/// Compute and track live values throughout an EBB.
pub struct LiveValueTracker {
/// The set of values that are live at the current program point.
live: LiveValueVec,
/// Saved set of live values for every jump and branch that can potentially be an immediate
/// dominator of an EBB.
///
/// This is the set of values that are live *before* the branch.
idom_sets: FxHashMap<Inst, ValueList>,
/// Memory pool for the live sets.
idom_pool: ListPool<Value>,
}
/// Information about a value that is live at the current program point.
#[derive(Debug)]
pub struct LiveValue {
/// The live value.
pub value: Value,
/// The local ending point of the live range in the current EBB, as returned by
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
pub endpoint: Inst,
/// The affinity of the value as represented in its `LiveRange`.
///
/// This value is simply a copy of the affinity stored in the live range. We copy it because
/// almost all users of `LiveValue` need to look at it.
pub affinity: Affinity,
/// The live range for this value never leaves its EBB.
pub is_local: bool,
/// This value is dead - the live range ends immediately.
pub is_dead: bool,
}
struct LiveValueVec {
/// The set of values that are live at the current program point.
values: Vec<LiveValue>,
/// How many values at the front of `values` are known to be live after `inst`?
///
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
/// time for the same instruction.
live_prefix: Option<(Inst, usize)>,
}
impl LiveValueVec {
fn new() -> Self {
Self {
values: Vec::new(),
live_prefix: None,
}
}
/// Add a new live value to `values`. Copy some properties from `lr`.
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
self.values.push(LiveValue {
value,
endpoint,
affinity: lr.affinity,
is_local: lr.is_local(),
is_dead: lr.is_dead(),
});
}
/// Remove all elements.
fn clear(&mut self) {
self.values.clear();
self.live_prefix = None;
}
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
/// vector.
///
/// Returns the number of values that will be live after `next_inst`.
fn live_after(&mut self, next_inst: Inst) -> usize {
// How many values at the front of the vector are already known to survive `next_inst`?
// We don't need to pass this prefix to `partition_slice()`
let keep = match self.live_prefix {
Some((i, prefix)) if i == next_inst => prefix,
_ => 0,
};
// Move the remaining surviving values to the front partition of the vector.
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
// Remember the new prefix length in case we get called again for the same `next_inst`.
self.live_prefix = Some((next_inst, prefix));
prefix
}
/// Remove the values killed by `next_inst`.
fn remove_kill_values(&mut self, next_inst: Inst) {
let keep = self.live_after(next_inst);
self.values.truncate(keep);
}
/// Remove any dead values.
fn remove_dead_values(&mut self) {
self.values.retain(|v| !v.is_dead);
self.live_prefix = None;
}
}
impl LiveValueTracker {
/// Create a new blank tracker.
pub fn new() -> Self {
Self {
live: LiveValueVec::new(),
idom_sets: FxHashMap(),
idom_pool: ListPool::new(),
}
}
/// Clear all cached information.
pub fn clear(&mut self) {
self.live.clear();
self.idom_sets.clear();
self.idom_pool.clear();
}
/// Get the set of currently live values.
///
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
/// defined by the current instruction.
pub fn live(&self) -> &[LiveValue] {
&self.live.values
}
/// Get a mutable set of currently live values.
///
/// Use with care and don't move entries around.
pub fn live_mut(&mut self) -> &mut [LiveValue] {
&mut self.live.values
}
/// Move the current position to the top of `ebb`.
///
/// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
/// been visited first.
///
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
/// from the immediate dominator. The second slice is the set of `ebb` parameters.
///
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
pub fn ebb_top(
&mut self,
ebb: Ebb,
dfg: &DataFlowGraph,
liveness: &Liveness,
layout: &Layout,
domtree: &DominatorTree,
) -> (&[LiveValue], &[LiveValue]) {
// Start over, compute the set of live values at the top of the EBB from two sources:
//
// 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
// actually live-in.
// 2. Arguments to `ebb` that are not dead.
//
self.live.clear();
// Compute the live-in values. Start by filtering the set of values that were live before
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
// the entry block or an unreachable block).
if let Some(idom) = domtree.idom(ebb) {
// If the immediate dominator exits, we must have a stored list for it. This is a
// requirement to the order EBBs are visited: All dominators must have been processed
// before the current EBB.
let idom_live_list = self
.idom_sets
.get(&idom)
.expect("No stored live set for dominator");
let ctx = liveness.context(layout);
// Get just the values that are live-in to `ebb`.
for &value in idom_live_list.as_slice(&self.idom_pool) {
let lr = liveness
.get(value)
.expect("Immediate dominator value has no live range");
// Check if this value is live-in here.
if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
self.live.push(value, endpoint, lr);
}
}
}
// Now add all the live parameters to `ebb`.
let first_arg = self.live.values.len();
for &value in dfg.ebb_params(ebb) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), ebb.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Ebb(local_ebb) => {
// This is a dead EBB parameter which is not even live into the first
// instruction in the EBB.
debug_assert_eq!(
local_ebb, ebb,
"EBB parameter live range ends at wrong EBB header"
);
// Give this value a fake endpoint that is the first instruction in the EBB.
// We expect it to be removed by calling `drop_dead_args()`.
self.live
.push(value, layout.first_inst(ebb).expect("Empty EBB"), lr);
}
}
}
self.live.values.split_at(first_arg)
}
/// Prepare to move past `inst`.
///
/// Determine the set of already live values that are killed by `inst`, and add the new defined
/// values to the tracked set.
///
/// Returns `(throughs, kills, defs)` as a tuple of slices:
///
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
/// killed by the instruction.
/// 2. The `kills` slice is the set of values that were live before the instruction and are
/// killed at the instruction. This does not include dead defs.
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
/// dead defines.
///
/// The order of `throughs` and `kills` is arbitrary.
///
/// The `drop_dead()` method must be called next to actually remove the dead values from the
/// tracked set after the two returned slices are no longer needed.
pub fn process_inst(
&mut self,
inst: Inst,
dfg: &DataFlowGraph,
liveness: &Liveness,
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
// Save a copy of the live values before any branches or jumps that could be somebody's
// immediate dominator.
if dfg[inst].opcode().is_branch() {
self.save_idom_live_set(inst);
}
// Move killed values to the end of the vector.
// Don't remove them yet, `drop_dead()` will do that.
let first_kill = self.live.live_after(inst);
// Add the values defined by `inst`.
let first_def = self.live.values.len();
for &value in dfg.inst_results(inst) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), inst.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Ebb(ebb) => {
panic!("Instruction result live range can't end at {}", ebb);
}
}
}
(
&self.live.values[0..first_kill],
&self.live.values[first_kill..first_def],
&self.live.values[first_def..],
)
}
/// Prepare to move past a ghost instruction.
///
/// This is like `process_inst`, except any defs are ignored.
///
/// Returns `(throughs, kills)`.
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
let first_kill = self.live.live_after(inst);
self.live.values.as_slice().split_at(first_kill)
}
/// Drop the values that are now dead after moving past `inst`.
///
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
///
/// This must be called after `process_inst(inst)` and before proceeding to the next
/// instruction.
pub fn drop_dead(&mut self, inst: Inst) {
// Remove both live values that were killed by `inst` and dead defines from `inst`.
self.live.remove_kill_values(inst);
}
/// Drop any values that are marked as `is_dead`.
///
/// Use this after calling `ebb_top` to clean out dead EBB parameters.
pub fn drop_dead_params(&mut self) {
self.live.remove_dead_values();
}
/// Process new spills.
///
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
/// `Stack`.
pub fn process_spills<F>(&mut self, mut f: F)
where
F: FnMut(Value) -> bool,
{
for lv in &mut self.live.values {
if f(lv.value) {
lv.affinity = Affinity::Stack;
}
}
}
/// Save the current set of live values so it is associated with `idom`.
fn save_idom_live_set(&mut self, idom: Inst) {
let values = self.live.values.iter().map(|lv| lv.value);
let pool = &mut self.idom_pool;
// If there already is a set saved for `idom`, just keep it.
self.idom_sets.entry(idom).or_insert_with(|| {
let mut list = ValueList::default();
list.extend(values, pool);
list
});
}
}

View File

@@ -0,0 +1,460 @@
//! Liveness analysis for SSA values.
//!
//! This module computes the live range of all the SSA values in a function and produces a
//! `LiveRange` instance for each.
//!
//!
//! # Liveness consumers
//!
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
//! currently live values as it is iterating down the instructions in the EBB. It asks the
//! following questions:
//!
//! - What is the set of live values at the entry to the EBB?
//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
//! use?
//! - When moving past a branch, which of the live values are still live below the branch?
//!
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
//! from the set of live values at the dominating branch instruction and filtering it with
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
//!
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
//! number of live values at every program point and insert spill code until the number of
//! registers needed is small enough.
//!
//!
//! # Alternative algorithms
//!
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
//! alternatives.
//!
//! ## Data-flow equations
//!
//! The classic *live variables analysis* that you will find in all compiler books from the
//! previous century does not depend on SSA form. It is typically implemented by iteratively
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
//! variables for every basic block in the program.
//!
//! This algorithm has some disadvantages that makes us look elsewhere:
//!
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
//! their basic block, and those that do span basic blocks rarely span a large number of basic
//! blocks. This makes the data stored in the bitvectors quite sparse.
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
//! not include temporaries used in evaluating expressions. We have an SSA form program which
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
//! problem worse because there are many more SSA values than there was variables in the original
//! program, and we don't know a priori which SSA values leave their basic block.
//! - Missing last-use information. For values that are not live-out of a basic block, we would
//! need to store information about the last use in the block somewhere. LLVM stores this
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
//! source of problems for LLVM's register allocator.
//!
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
//! multiple definitions of the same variable. We don't need this generality since we already have
//! a program in SSA form.
//!
//! ## LLVM's liveness analysis
//!
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
//! a disjoint union of related SSA values that should be assigned to the same physical register.
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
//! describes the live range of a virtual register *and* which one of the related SSA values is
//! live at any given program point.
//!
//! LLVM computes the live range of each virtual register independently by using the use-def chains
//! that are baked into its IR. The algorithm for a single virtual register is:
//!
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
//! the def-chain. This does not include any phi-values.
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
//! that already contain some liveness and extend the last live SSA value in the block to be
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
//! PHI values to be created when different SSA values can reach the same block.
//!
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
//! one SSA value.
//!
//! This algorithm has some advantages compared to the data-flow equations:
//!
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
//! blocks in the program.
//! - The time to compute the live range of a global virtual register is proportional to the number
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
//! functions.
//! - A single live range can be recomputed after making modifications to the IR. No global
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
//! which Cranelift doesn't.
//!
//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
//!
//! ## Fast Liveness Checking for SSA-Form Programs
//!
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
//! was presented at CGO 2008:
//!
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
//! Checking for SSA-Form Programs.* CGO.
//!
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
//! chain of the value and performs lookups in the precomputed bit-vectors.
//!
//! I did not seriously consider this analysis for Cranelift because:
//!
//! - It depends critically on use chains which Cranelift doesn't have.
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
//! quadratic behavior in unfortunate cases.
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
//! based approach, which isn't that impressive.
//!
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
//! gains use chains, this approach would be worth a proper evaluation.
//!
//!
//! # Cranelift's liveness analysis
//!
//! The algorithm implemented in this module is similar to LLVM's with these differences:
//!
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
//! register.
//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
//! chains, so it is not possible to compute the live range for a single SSA value independently.
//!
//! The liveness computation visits all instructions in the program. The order is not important for
//! the algorithm to be correct. At each instruction, the used values are examined.
//!
//! - The first time a value is encountered, its live range is constructed as a dead live range
//! containing only the defining program point.
//! - The local interval of the value's live range is extended so it reaches the use. This may
//! require creating a new live-in local interval for the EBB.
//! - If the live range became live-in to the EBB, add the EBB to a work-list.
//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
//! of the live-in EBB's CFG predecessor instructions as a 'use'.
//!
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
//! visited. No data about each value beyond the live range is needed between visiting uses, so
//! nothing is lost by computing the live range of all values simultaneously.
//!
//! ## Cache efficiency of Cranelift vs LLVM
//!
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
//! somewhat chaotically.
//!
//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
//! instruction is brought into cache only once, and it is likely that the other instructions on
//! the same cache line will be visited before the line is evicted.
//!
//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
//! multiple related values can live on the same cache line.
//!
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
//! size to 32 bytes.
//! - Related values should be stored on the same cache line. The current sparse set implementation
//! does a decent job of that.
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
//! line. These lists are very likely to be found in L2 cache at least.
//!
//! There is some room for improvement.
use crate::entity::SparseMap;
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::ir::dfg::ValueDef;
use crate::ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
use crate::regalloc::affinity::Affinity;
use crate::regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
use crate::timing;
use core::mem;
use core::ops::Index;
use std::vec::Vec;
/// A set of live ranges, indexed by value number.
type LiveRangeSet = SparseMap<Value, LiveRange>;
/// Get a mutable reference to the live range for `value`.
/// Create it if necessary.
fn get_or_create<'a>(
lrset: &'a mut LiveRangeSet,
value: Value,
isa: &TargetIsa,
func: &Function,
encinfo: &EncInfo,
) -> &'a mut LiveRange {
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
// which can probably only be resolved by non-lexical lifetimes.
// https://github.com/rust-lang/rfcs/issues/811
if lrset.get(value).is_none() {
// Create a live range for value. We need the program point that defines it.
let def;
let affinity;
match func.dfg.value_def(value) {
ValueDef::Result(inst, rnum) => {
def = inst.into();
// Initialize the affinity from the defining instruction's result constraints.
// Don't do this for call return values which are always tied to a single register.
affinity = encinfo
.operand_constraints(func.encodings[inst])
.and_then(|rc| rc.outs.get(rnum))
.map(Affinity::new)
.or_else(|| {
// If this is a call, get the return value affinity.
func.dfg
.call_signature(inst)
.map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
})
.unwrap_or_default();
}
ValueDef::Param(ebb, num) => {
def = ebb.into();
if func.layout.entry_block() == Some(ebb) {
// The affinity for entry block parameters can be inferred from the function
// signature.
affinity = Affinity::abi(&func.signature.params[num], isa);
} else {
// Give normal EBB parameters a register affinity matching their type.
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
affinity = Affinity::Reg(rc.into());
}
}
};
lrset.insert(LiveRange::new(value, def, affinity));
}
lrset.get_mut(value).unwrap()
}
/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
fn extend_to_use(
lr: &mut LiveRange,
ebb: Ebb,
to: Inst,
worklist: &mut Vec<Ebb>,
func: &Function,
cfg: &ControlFlowGraph,
forest: &mut LiveRangeForest,
) {
// This is our scratch working space, and we'll leave it empty when we return.
debug_assert!(worklist.is_empty());
// Extend the range locally in `ebb`.
// If there already was a live interval in that block, we're done.
if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
worklist.push(ebb);
}
// The work list contains those EBBs where we have learned that the value needs to be
// live-in.
//
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
// CFG from the existing live range to `ebb`.
//
// Extend the live range as we go. The live range itself also serves as a visited set since
// `extend_in_ebb` will never return true twice for the same EBB.
//
while let Some(livein) = worklist.pop() {
// We've learned that the value needs to be live-in to the `livein` EBB.
// Make sure it is also live at all predecessor branches to `livein`.
for BasicBlock {
ebb: pred,
inst: branch,
} in cfg.pred_iter(livein)
{
if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
// This predecessor EBB also became live-in. We need to process it later.
worklist.push(pred);
}
}
}
}
/// Liveness analysis for a function.
///
/// Compute a live range for every SSA value used in the function.
pub struct Liveness {
/// The live ranges that have been computed so far.
ranges: LiveRangeSet,
/// Memory pool for the live ranges.
forest: LiveRangeForest,
/// Working space for the `extend_to_use` algorithm.
/// This vector is always empty, except for inside that function.
/// It lives here to avoid repeated allocation of scratch memory.
worklist: Vec<Ebb>,
}
impl Liveness {
/// Create a new empty liveness analysis.
///
/// The memory allocated for this analysis can be reused for multiple functions. Use the
/// `compute` method to actually runs the analysis for a function.
pub fn new() -> Self {
Self {
ranges: LiveRangeSet::new(),
forest: LiveRangeForest::new(),
worklist: Vec::new(),
}
}
/// Get a context needed for working with a `LiveRange`.
pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
LiveRangeContext::new(layout, &self.forest)
}
/// Clear all data structures in this liveness analysis.
pub fn clear(&mut self) {
self.ranges.clear();
self.forest.clear();
self.worklist.clear();
}
/// Get the live range for `value`, if it exists.
pub fn get(&self, value: Value) -> Option<&LiveRange> {
self.ranges.get(value)
}
/// Create a new live range for `value`.
///
/// The new live range will be defined at `def` with no extent, like a dead value.
///
/// This asserts that `value` does not have an existing live range.
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
where
PP: Into<ProgramPoint>,
{
let old = self
.ranges
.insert(LiveRange::new(value, def.into(), affinity));
debug_assert!(old.is_none(), "{} already has a live range", value);
}
/// Move the definition of `value` to `def`.
///
/// The old and new def points must be in the same EBB, and before the end of the live range.
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
where
PP: Into<ProgramPoint>,
{
let lr = self.ranges.get_mut(value).expect("Value has no live range");
lr.move_def_locally(def.into());
}
/// Locally extend the live range for `value` to reach `user`.
///
/// It is assumed the `value` is already live before `user` in `ebb`.
///
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
pub fn extend_locally(
&mut self,
value: Value,
ebb: Ebb,
user: Inst,
layout: &Layout,
) -> &mut Affinity {
debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
let lr = self.ranges.get_mut(value).expect("Value has no live range");
let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
debug_assert!(!livein, "{} should already be live in {}", value, ebb);
&mut lr.affinity
}
/// Change the affinity of `value` to `Stack` and return the previous affinity.
pub fn spill(&mut self, value: Value) -> Affinity {
let lr = self.ranges.get_mut(value).expect("Value has no live range");
mem::replace(&mut lr.affinity, Affinity::Stack)
}
/// Compute the live ranges of all SSA values used in `func`.
/// This clears out any existing analysis stored in this data structure.
pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
let _tt = timing::ra_liveness();
self.ranges.clear();
// Get ISA data structures used for computing live range affinities.
let encinfo = isa.encoding_info();
let reginfo = isa.register_info();
// The liveness computation needs to visit all uses, but the order doesn't matter.
// TODO: Perhaps this traversal of the function could be combined with a dead code
// elimination pass if we visit a post-order of the dominator tree?
// TODO: Resolve value aliases while we're visiting instructions?
for ebb in func.layout.ebbs() {
// Make sure we have created live ranges for dead EBB parameters.
// TODO: If these parameters are really dead, we could remove them, except for the
// entry block which must match the function signature.
for &arg in func.dfg.ebb_params(ebb) {
get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
}
for inst in func.layout.ebb_insts(ebb) {
// Eliminate all value aliases, they would confuse the register allocator.
func.dfg.resolve_aliases_in_arguments(inst);
// Make sure we have created live ranges for dead defs.
// TODO: When we implement DCE, we can use the absence of a live range to indicate
// an unused value.
for &def in func.dfg.inst_results(inst) {
get_or_create(&mut self.ranges, def, isa, func, &encinfo);
}
// Iterator of constraints, one per value operand.
let encoding = func.encodings[inst];
let operand_constraint_slice: &[OperandConstraint] =
encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
let mut operand_constraints = operand_constraint_slice.iter();
for &arg in func.dfg.inst_args(inst) {
// Get the live range, create it as a dead range if necessary.
let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
// Extend the live range to reach this use.
extend_to_use(
lr,
ebb,
inst,
&mut self.worklist,
func,
cfg,
&mut self.forest,
);
// Apply operand constraint, ignoring any variable arguments after the fixed
// operands described by `operand_constraints`. Variable arguments are either
// EBB arguments or call/return ABI arguments.
if let Some(constraint) = operand_constraints.next() {
lr.affinity.merge(constraint, &reginfo);
}
}
}
}
}
}
impl Index<Value> for Liveness {
type Output = LiveRange;
fn index(&self, index: Value) -> &LiveRange {
match self.ranges.get(index) {
Some(lr) => lr,
None => panic!("{} has no live range", index),
}
}
}

View File

@@ -0,0 +1,745 @@
//! Data structure representing the live range of an SSA value.
//!
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
//! an SSA value begins where it is defined and extends to all program points where the value is
//! still needed.
//!
//! # Local Live Ranges
//!
//! Inside a single extended basic block, the live range of a value is always an interval between
//! two program points (if the value is live in the EBB at all). The starting point is either:
//!
//! 1. The instruction that defines the value, or
//! 2. The EBB header, because the value is an argument to the EBB, or
//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
//!
//! The ending point of the local live range is the last of the following program points in the
//! EBB:
//!
//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
//! 2. The last branch or jump instruction in the EBB that can reach a use.
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
//!
//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
//! outside a loop and used inside the loop, it will be live in the entire loop.
//!
//! # Global Live Ranges
//!
//! Values that appear in more than one EBB have a *global live range* which can be seen as the
//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
//!
//! In the special case of a dead value, the global live range is a single interval where the start
//! and end points are the same. The global live range of a value is never completely empty.
//!
//! # Register interference
//!
//! The register allocator uses live ranges to determine if values *interfere*, which means that
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
//! their intervals overlap.
//!
//! If one live range ends at an instruction that defines another live range, those two live ranges
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
//! register for an output value. If Cranelift gets support for inline assembly, we will need to
//! handle *early clobbers* which are output registers that are not allowed to alias any input
//! registers.
//!
//! If `i1 < i2 < i3` are program points, we have:
//!
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
//! - `i1-i2` and `i2-i3` don't interfere.
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
//! - `i1-i2` and `i2-i2` don't interfere.
//! - `i2-i3` and `i2-i2` do interfere.
//!
//! Because of this behavior around interval end points, live range interference is not completely
//! equivalent to mathematical intersection of open or half-open intervals.
//!
//! # Implementation notes
//!
//! A few notes about the implementation of this data structure. This should not concern someone
//! only looking to use the public interface.
//!
//! ## EBB ordering
//!
//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
//! depend on any property of the program order, so alternative orderings are possible:
//!
//! 1. The EBB layout order. This is what we currently use.
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
//! def interval.
//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
//! `ProgramOrder` for comparisons.
//!
//! These orderings will cause small differences in coalescing opportunities, but all of them would
//! do a decent job of compressing a long live range. The numerical order might be preferable
//! because:
//!
//! - It has better performance because EBB numbers can be compared directly without any table
//! lookups.
//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
//! live-in intervals from any coalesced representations that happen to cross a new EBB.
//!
//! For comparing instructions, the layout order is always what we want.
//!
//! ## Alternative representation
//!
//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
//!
//! Coalescing is an important compression technique because some live ranges can span thousands of
//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
//! `Ebb` entry represents a single live-in interval.
//!
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
//! It is more complicated to work with, though, so it is probably not worth it. The performance
//! benefits of switching to a numerical EBB order only appears if the binary search is doing
//! EBB-EBB comparisons.
//!
//! ## B-tree representation
//!
//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
//! of coalescing, so we would need to roll our own.
//!
use crate::bforest;
use crate::entity::SparseMapValue;
use crate::ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
use crate::regalloc::affinity::Affinity;
use core::cmp::Ordering;
use core::marker::PhantomData;
/// Global live range of a single SSA value.
///
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
/// most one interval per EBB. We further distinguish between:
///
/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
///
/// A live-in interval always begins at the EBB header, while the def interval can begin at the
/// defining instruction, or at the EBB header for an EBB argument value.
///
/// All values have a def interval, but a large proportion of values don't have any live-in
/// intervals. These are called *local live ranges*.
///
/// # Program order requirements
///
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
/// ensure that the provided ordering is consistent between calls.
///
/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
///
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
/// instructions using or defining their value, `LiveRange` structs can contain references to
/// branch and jump instructions.
pub type LiveRange = GenLiveRange<Layout>;
/// Generic live range implementation.
///
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
/// Use `LiveRange` instead of using this generic directly.
pub struct GenLiveRange<PO: ProgramOrder> {
/// The value described by this live range.
/// This member can't be modified in case the live range is stored in a `SparseMap`.
value: Value,
/// The preferred register allocation for this value.
pub affinity: Affinity,
/// The instruction or EBB header where this value is defined.
def_begin: ProgramPoint,
/// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
///
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
/// uses.
def_end: ProgramPoint,
/// Additional live-in intervals sorted in program order.
///
/// This map is empty for most values which are only used in one EBB.
///
/// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
/// `inst` which may belong to a later EBB in the program order.
///
/// The entries are non-overlapping, and none of them overlap the EBB where the value is
/// defined.
liveins: bforest::Map<Ebb, Inst>,
po: PhantomData<*const PO>,
}
/// Context information needed to query a `LiveRange`.
pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
/// Ordering of EBBs.
pub order: &'a PO,
/// Memory pool.
pub forest: &'a bforest::MapForest<Ebb, Inst>,
}
impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
/// Make a new context.
pub fn new(order: &'a PO, forest: &'a bforest::MapForest<Ebb, Inst>) -> Self {
Self { order, forest }
}
}
impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
fn clone(&self) -> Self {
LiveRangeContext {
order: self.order,
forest: self.forest,
}
}
}
impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
/// Forest of B-trees used for storing live ranges.
pub type LiveRangeForest = bforest::MapForest<Ebb, Inst>;
struct Cmp<'a, PO: ProgramOrder + 'a>(&'a PO);
impl<'a, PO: ProgramOrder> bforest::Comparator<Ebb> for Cmp<'a, PO> {
fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
self.0.cmp(a, b)
}
}
impl<PO: ProgramOrder> GenLiveRange<PO> {
/// Create a new live range for `value` defined at `def`.
///
/// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
Self {
value,
affinity,
def_begin: def,
def_end: def,
liveins: bforest::Map::new(),
po: PhantomData,
}
}
/// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
/// Create a live-in interval if necessary.
///
/// If the live range already has a local interval in `ebb`, extend its end point so it
/// includes `to`, and return false.
///
/// If the live range did not previously have a local interval in `ebb`, add one so the value
/// is live-in to `ebb`, extending to `to`. Return true.
///
/// The return value can be used to detect if we just learned that the value is live-in to
/// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
pub fn extend_in_ebb(
&mut self,
ebb: Ebb,
to: Inst,
order: &PO,
forest: &mut bforest::MapForest<Ebb, Inst>,
) -> bool {
// First check if we're extending the def interval.
//
// We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
// check it without a method for getting `to`'s EBB.
if order.cmp(ebb, self.def_end) != Ordering::Greater
&& order.cmp(to, self.def_begin) != Ordering::Less
{
let to_pp = to.into();
debug_assert_ne!(
to_pp, self.def_begin,
"Can't use value in the defining instruction."
);
if order.cmp(to, self.def_end) == Ordering::Greater {
self.def_end = to_pp;
}
return false;
}
// Now check if we're extending any of the existing live-in intervals.
let cmp = Cmp(order);
let mut c = self.liveins.cursor(forest, &cmp);
let first_time_livein;
if let Some(end) = c.goto(ebb) {
// There's an interval beginning at `ebb`. See if it extends.
first_time_livein = false;
if order.cmp(end, to) == Ordering::Less {
*c.value_mut().unwrap() = to;
} else {
return first_time_livein;
}
} else if let Some((_, end)) = c.prev() {
// There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
// a coalesced interval that begins before and ends after.
if order.cmp(end, ebb) == Ordering::Greater {
// Yep, the previous interval overlaps `ebb`.
first_time_livein = false;
if order.cmp(end, to) == Ordering::Less {
*c.value_mut().unwrap() = to;
} else {
return first_time_livein;
}
} else {
first_time_livein = true;
// The current interval does not overlap `ebb`, but it may still be possible to
// coalesce with it.
if order.is_ebb_gap(end, ebb) {
*c.value_mut().unwrap() = to;
} else {
c.insert(ebb, to);
}
}
} else {
// There is no existing interval before `ebb`.
first_time_livein = true;
c.insert(ebb, to);
}
// Now `c` to left pointing at an interval that ends in `to`.
debug_assert_eq!(c.value(), Some(to));
// See if it can be coalesced with the following interval.
if let Some((next_ebb, next_end)) = c.next() {
if order.is_ebb_gap(to, next_ebb) {
// Remove this interval and extend the previous end point to `next_end`.
c.remove();
c.prev();
*c.value_mut().unwrap() = next_end;
}
}
first_time_livein
}
/// Is this the live range of a dead value?
///
/// A dead value has no uses, and its live range ends at the same program point where it is
/// defined.
pub fn is_dead(&self) -> bool {
self.def_begin == self.def_end
}
/// Is this a local live range?
///
/// A local live range is only used in the same EBB where it was defined. It is allowed to span
/// multiple basic blocks within that EBB.
pub fn is_local(&self) -> bool {
self.liveins.is_empty()
}
/// Get the program point where this live range is defined.
///
/// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
/// instruction.
pub fn def(&self) -> ProgramPoint {
self.def_begin
}
/// Move the definition of this value to a new program point.
///
/// It is only valid to move the definition within the same EBB, and it can't be moved beyond
/// `def_local_end()`.
pub fn move_def_locally(&mut self, def: ProgramPoint) {
self.def_begin = def;
}
/// Get the local end-point of this live range in the EBB where it is defined.
///
/// This can be the EBB header itself in the case of a dead EBB argument.
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
pub fn def_local_end(&self) -> ProgramPoint {
self.def_end
}
/// Get the local end-point of this live range in an EBB where it is live-in.
///
/// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
/// of this live range's local interval in `ebb`.
///
/// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
/// answer, but it is also possible that an even later program point is returned. So don't
/// depend on the returned `Inst` to belong to `ebb`.
pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
let cmp = Cmp(ctx.order);
self.liveins
.get_or_less(ebb, ctx.forest, &cmp)
.and_then(|(_, inst)| {
// We have an entry that ends at `inst`.
if ctx.order.cmp(inst, ebb) == Ordering::Greater {
Some(inst)
} else {
None
}
})
}
/// Is this value live-in to `ebb`?
///
/// An EBB argument is not considered to be live in.
pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
self.livein_local_end(ebb, ctx).is_some()
}
/// Get all the live-in intervals.
///
/// Note that the intervals are stored in a compressed form so each entry may span multiple
/// EBBs where the value is live in.
pub fn liveins<'a>(&'a self, ctx: LiveRangeContext<'a, PO>) -> bforest::MapIter<'a, Ebb, Inst> {
self.liveins.iter(ctx.forest)
}
/// Check if this live range overlaps a definition in `ebb`.
pub fn overlaps_def(
&self,
def: ExpandedProgramPoint,
ebb: Ebb,
ctx: LiveRangeContext<PO>,
) -> bool {
// Two defs at the same program point always overlap, even if one is dead.
if def == self.def_begin.into() {
return true;
}
// Check for an overlap with the local range.
if ctx.order.cmp(def, self.def_begin) != Ordering::Less
&& ctx.order.cmp(def, self.def_end) == Ordering::Less
{
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(ebb, ctx) {
Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
None => false,
}
}
/// Check if this live range reaches a use at `user` in `ebb`.
pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
// Check for an overlap with the local range.
if ctx.order.cmp(user, self.def_begin) == Ordering::Greater
&& ctx.order.cmp(user, self.def_end) != Ordering::Greater
{
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(ebb, ctx) {
Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
None => false,
}
}
/// Check if this live range is killed at `user` in `ebb`.
pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
}
}
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
fn key(&self) -> Value {
self.value
}
}
#[cfg(test)]
mod tests {
use super::{GenLiveRange, LiveRangeContext};
use crate::bforest;
use crate::entity::EntityRef;
use crate::ir::{Ebb, Inst, Value};
use crate::ir::{ExpandedProgramPoint, ProgramOrder};
use core::cmp::Ordering;
use std::vec::Vec;
// Dummy program order which simply compares indexes.
// It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
// in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
// ebb * 10 + 1. This is used in the coalesce test.
struct ProgOrder {}
impl ProgramOrder for ProgOrder {
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
fn idx(pp: ExpandedProgramPoint) -> usize {
match pp {
ExpandedProgramPoint::Inst(i) => i.index(),
ExpandedProgramPoint::Ebb(e) => e.index(),
}
}
let ia = idx(a.into());
let ib = idx(b.into());
ia.cmp(&ib)
}
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
}
}
impl ProgOrder {
// Get the EBB corresponding to `inst`.
fn inst_ebb(&self, inst: Inst) -> Ebb {
let i = inst.index();
Ebb::new(i - i % 10)
}
// Get the EBB of a program point.
fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
match pp.into() {
ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
ExpandedProgramPoint::Ebb(e) => e,
}
}
// Validate the live range invariants.
fn validate(&self, lr: &GenLiveRange<ProgOrder>, forest: &bforest::MapForest<Ebb, Inst>) {
// The def interval must cover a single EBB.
let def_ebb = self.pp_ebb(lr.def_begin);
assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
// Check that the def interval isn't backwards.
match self.cmp(lr.def_begin, lr.def_end) {
Ordering::Equal => assert!(lr.liveins.is_empty()),
Ordering::Greater => {
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
}
Ordering::Less => {}
}
// Check the live-in intervals.
let mut prev_end = None;
for (begin, end) in lr.liveins.iter(forest) {
assert_eq!(self.cmp(begin, end), Ordering::Less);
if let Some(e) = prev_end {
assert_eq!(self.cmp(e, begin), Ordering::Less);
}
assert!(
self.cmp(lr.def_end, begin) == Ordering::Less
|| self.cmp(lr.def_begin, end) == Ordering::Greater,
"Interval can't overlap the def EBB"
);
// Save for next round.
prev_end = Some(end);
}
}
}
// Singleton `ProgramOrder` for tests below.
const PO: &'static ProgOrder = &ProgOrder {};
#[test]
fn dead_def_range() {
let v0 = Value::new(0);
let e0 = Ebb::new(0);
let i1 = Inst::new(1);
let i2 = Inst::new(2);
let e2 = Ebb::new(2);
let lr = GenLiveRange::new(v0, i1.into(), Default::default());
let forest = &bforest::MapForest::new();
let ctx = LiveRangeContext::new(PO, forest);
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i1.into());
assert_eq!(lr.def_local_end(), i1.into());
assert_eq!(lr.livein_local_end(e2, ctx), None);
PO.validate(&lr, ctx.forest);
// A dead live range overlaps its own def program point.
assert!(lr.overlaps_def(i1.into(), e0, ctx));
assert!(!lr.overlaps_def(i2.into(), e0, ctx));
assert!(!lr.overlaps_def(e0.into(), e0, ctx));
}
#[test]
fn dead_arg_range() {
let v0 = Value::new(0);
let e2 = Ebb::new(2);
let lr = GenLiveRange::new(v0, e2.into(), Default::default());
let forest = &bforest::MapForest::new();
let ctx = LiveRangeContext::new(PO, forest);
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e2.into());
assert_eq!(lr.def_local_end(), e2.into());
// The def interval of an EBB argument does not count as live-in.
assert_eq!(lr.livein_local_end(e2, ctx), None);
PO.validate(&lr, ctx.forest);
}
#[test]
fn local_def() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
PO.validate(&lr, forest);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn local_arg() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
let forest = &mut bforest::MapForest::new();
// Extending a dead EBB argument in its own block should not indicate that a live-in
// interval was created.
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
PO.validate(&lr, forest);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending further.
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn global_def() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let e20 = Ebb::new(20);
let i21 = Inst::new(21);
let i22 = Inst::new(22);
let i23 = Inst::new(23);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
// Adding a live-in interval.
assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
PO.validate(&lr, forest);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i22)
);
// Non-extending the live-in.
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i22)
);
// Extending the existing live-in.
assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i23)
);
}
#[test]
fn coalesce() {
let v0 = Value::new(0);
let i11 = Inst::new(11);
let e20 = Ebb::new(20);
let i21 = Inst::new(21);
let e30 = Ebb::new(30);
let i31 = Inst::new(31);
let e40 = Ebb::new(40);
let i41 = Inst::new(41);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e30, i31)]
);
// Coalesce to previous
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e30, i41)]
);
// Coalesce to next
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i41)]
);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e40, i41)]
);
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i21), (e40, i41)]
);
// Coalesce to previous and next
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i41)]
);
}
// TODO: Add more tests that exercise the binary search algorithm.
}

View File

@@ -0,0 +1,23 @@
//! Register allocation.
//!
//! This module contains data structures and algorithms used for register allocation.
pub mod coloring;
pub mod live_value_tracker;
pub mod liveness;
pub mod liverange;
pub mod register_set;
pub mod virtregs;
mod affinity;
mod coalescing;
mod context;
mod diversion;
mod pressure;
mod reload;
mod solver;
mod spilling;
pub use self::context::Context;
pub use self::diversion::RegDiversions;
pub use self::register_set::RegisterSet;

View File

@@ -0,0 +1,384 @@
//! Register pressure tracking.
//!
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
//! sufficiently". This module defines the data structures needed to measure register pressure
//! accurately enough to guarantee that the coloring phase will not run out of registers.
//!
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
//! any given program point. This simplistic method has two problems:
//!
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
//! register banks, so we need to at least count the number of live registers in each register
//! bank separately.
//!
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
//! This makes it difficult to accurately measure register pressure.
//!
//! This module deals with the problems via *register banks* and *top-level register classes*.
//! Register classes in different register banks are completely independent, so we can count
//! registers in one bank without worrying about the other bank at all.
//!
//! All register classes have a unique top-level register class, and we will count registers for
//! each top-level register class individually. However, a register bank can have multiple
//! top-level register classes that interfere with each other, so all top-level counts need to
//! be considered when determining how many more registers can be allocated.
//!
//! Currently, the only register bank with multiple top-level registers is the `arm32`
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
//!
//! # Base and transient counts
//!
//! We maintain two separate register counts per top-level register class: base counts and
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
//! transient counts are adjusted with `take_transient` and `free_transient`.
// Remove once we're using the pressure tracker.
#![allow(dead_code)]
use crate::isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
use crate::regalloc::RegisterSet;
use core::cmp::min;
use core::fmt;
use core::iter::ExactSizeIterator;
/// Information per top-level register class.
///
/// Everything but the counts is static information computed from the constructor arguments.
#[derive(Default)]
struct TopRC {
// Number of registers currently used from this register class.
base_count: u32,
transient_count: u32,
// Max number of registers that can be allocated.
limit: u32,
// Register units per register.
width: u8,
// The first aliasing top-level RC.
first_toprc: u8,
// The number of aliasing top-level RCs.
num_toprcs: u8,
}
impl TopRC {
fn total_count(&self) -> u32 {
self.base_count + self.transient_count
}
}
pub struct Pressure {
// Bit mask of top-level register classes that are aliased by other top-level register classes.
// Unaliased register classes can use a simpler interference algorithm.
aliased: RegClassMask,
// Current register counts per top-level register class.
toprc: [TopRC; MAX_TRACKED_TOPRCS],
}
impl Pressure {
/// Create a new register pressure tracker.
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
let mut p = Self {
aliased: 0,
toprc: Default::default(),
};
// Get the layout of aliasing top-level register classes from the register banks.
for bank in reginfo.banks {
let first = bank.first_toprc;
let num = bank.num_toprcs;
if bank.pressure_tracking {
for rc in &mut p.toprc[first..first + num] {
rc.first_toprc = first as u8;
rc.num_toprcs = num as u8;
}
// Flag the top-level register classes with aliases.
if num > 1 {
p.aliased |= ((1 << num) - 1) << first;
}
} else {
// This bank has no pressure tracking, so its top-level register classes may exceed
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
// These aren't used if we don't set the `aliased` bit.
rc.first_toprc = !0;
rc.limit = !0;
}
}
}
// Compute per-class limits from `usable`.
for (toprc, rc) in p
.toprc
.iter_mut()
.take_while(|t| t.num_toprcs > 0)
.zip(reginfo.classes)
{
toprc.limit = usable.iter(rc).len() as u32;
toprc.width = rc.width;
}
p
}
/// Check for an available register in the register class `rc`.
///
/// If it is possible to allocate one more register from `rc`'s top-level register class,
/// returns 0.
///
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
/// pressure should be eased in one of the returned top-level register classes before calling
/// `can_take()` to check again.
fn check_avail(&self, rc: RegClass) -> RegClassMask {
let entry = match self.toprc.get(rc.toprc as usize) {
None => return 0, // Not a pressure tracked bank.
Some(e) => e,
};
let mask = 1 << rc.toprc;
if (self.aliased & mask) == 0 {
// This is a simple unaliased top-level register class.
if entry.total_count() < entry.limit {
0
} else {
mask
}
} else {
// This is the more complicated case. The top-level register class has aliases.
self.check_avail_aliased(entry)
}
}
/// Check for an available register in a top-level register class that may have aliases.
///
/// This is the out-of-line slow path for `check_avail()`.
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
let first = usize::from(entry.first_toprc);
let num = usize::from(entry.num_toprcs);
let width = u32::from(entry.width);
let ulimit = entry.limit * width;
// Count up the number of available register units.
let mut units = 0;
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
let rcw = u32::from(rc.width);
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
// one of ours. This is assuming that none of the smaller registers are straddling the
// bigger ones.
//
// If `rc.width` is larger than `width`, we are also assuming that the registers are
// aligned and `rc.width` is a multiple of `width`.
let u = if rcw < width {
// We can't take more than the total number of register units in the class.
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
min(rc.total_count() * width, rc.limit * rcw)
} else {
rc.total_count() * rcw
};
// If this top-level RC on its own is responsible for exceeding our limit, return it
// early to guarantee that registers here are spilled before spilling other registers
// unnecessarily.
if u >= ulimit {
return 1 << rci;
}
units += u;
}
// We've counted up the worst-case number of register units claimed by all aliasing
// classes. Compare to the unit limit in this class.
if units < ulimit {
0
} else {
// Registers need to be spilled from any one of the aliasing classes.
((1 << num) - 1) << first
}
}
/// Take a register from `rc`.
///
/// This does not check if there are enough registers available.
pub fn take(&mut self, rc: RegClass) {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.base_count += 1;
}
}
/// Free a register in `rc`.
pub fn free(&mut self, rc: RegClass) {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.base_count -= 1;
}
}
/// Reset all counts to 0, both base and transient.
pub fn reset(&mut self) {
for e in &mut self.toprc {
e.base_count = 0;
e.transient_count = 0;
}
}
/// Try to increment a transient counter.
///
/// This will fail if there are not enough registers available.
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
let mask = self.check_avail(rc);
if mask == 0 {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.transient_count += 1;
}
Ok(())
} else {
Err(mask)
}
}
/// Reset all transient counts to 0.
pub fn reset_transient(&mut self) {
for e in &mut self.toprc {
e.transient_count = 0;
}
}
/// Preserve the transient counts by transferring them to the base counts.
pub fn preserve_transient(&mut self) {
for e in &mut self.toprc {
e.base_count += e.transient_count;
e.transient_count = 0;
}
}
}
impl fmt::Display for Pressure {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Pressure[")?;
for rc in &self.toprc {
if rc.limit > 0 && rc.limit < !0 {
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
}
}
write!(f, " ]")
}
}
#[cfg(test)]
#[cfg(build_arm32)]
mod tests {
use super::Pressure;
use crate::isa::{RegClass, TargetIsa};
use crate::regalloc::RegisterSet;
use core::borrow::Borrow;
use core::str::FromStr;
use std::boxed::Box;
use target_lexicon::triple;
// Make an arm32 `TargetIsa`, if possible.
fn arm32() -> Option<Box<TargetIsa>> {
use crate::isa;
use crate::settings;
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(shared_builder);
isa::lookup(triple!("arm"))
.ok()
.map(|b| b.finish(shared_flags))
}
// Get a register class by name.
fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
isa.register_info()
.classes
.iter()
.find(|rc| rc.name == name)
.expect("Can't find named register class.")
}
#[test]
fn basic_counting() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let gpr = rc_by_name(isa, "GPR");
let s = rc_by_name(isa, "S");
let reginfo = isa.register_info();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
let mut count = 0;
while pressure.check_avail(gpr) == 0 {
pressure.take(gpr);
count += 1;
}
assert_eq!(count, 16);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.free(gpr);
assert_eq!(pressure.check_avail(gpr), 0);
pressure.take(gpr);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.reset();
assert_eq!(pressure.check_avail(gpr), 0);
assert_eq!(pressure.check_avail(s), 0);
}
#[test]
fn arm_float_bank() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let s = rc_by_name(isa, "S");
let d = rc_by_name(isa, "D");
let q = rc_by_name(isa, "Q");
let reginfo = isa.register_info();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Allocating a single S-register should not affect availability.
pressure.take(s);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(d);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Take a total of 16 S-regs.
for _ in 1..16 {
pressure.take(s);
}
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
for _ in 0..6 {
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
}
// We've taken 16 S, 1 D, and 7 Qs.
assert!(pressure.check_avail(s) != 0);
assert_eq!(pressure.check_avail(d), 0);
assert!(pressure.check_avail(q) != 0);
}
}

View File

@@ -0,0 +1,324 @@
//! Set of allocatable registers as a bit vector of register units.
//!
//! While allocating registers, we need to keep track of which registers are available and which
//! registers are in use. Since registers can alias in different ways, we track this via the
//! "register unit" abstraction. Every register contains one or more register units. Registers that
//! share a register unit can't be in use at the same time.
use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
use core::char;
use core::fmt;
use core::iter::ExactSizeIterator;
use core::mem::size_of_val;
/// Set of registers available for allocation.
#[derive(Clone)]
pub struct RegisterSet {
avail: RegUnitMask,
}
// Given a register class and a register unit in the class, compute a word index and a bit mask of
// register units representing that register.
//
// Note that a register is not allowed to straddle words.
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
// Bit mask representing the register. It is `rc.width` consecutive units.
let width_bits = (1 << rc.width) - 1;
// Index into avail[] of the word containing `reg`.
let word_index = (reg / 32) as usize;
// The actual bits in the word that cover `reg`.
let reg_bits = width_bits << (reg % 32);
(word_index, reg_bits)
}
impl RegisterSet {
/// Create a new register set with all registers available.
///
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
/// allocatable registers where reserved registers have been filtered out.
pub fn new() -> Self {
Self { avail: [!0; 3] }
}
/// Create a new register set with no registers available.
pub fn empty() -> Self {
Self { avail: [0; 3] }
}
/// Returns `true` if the specified register is available.
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
let (idx, bits) = bitmask(rc, reg);
(self.avail[idx] & bits) == bits
}
/// Allocate `reg` from `rc` so it is no longer available.
///
/// It is an error to take a register that doesn't have all of its register units available.
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == bits,
"{}:{} not available in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] &= !bits;
}
/// Return `reg` and all of its register units to the set of available registers.
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == 0,
"{}:{} is already free in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] |= bits;
}
/// Return an iterator over all available registers belonging to the register class `rc`.
///
/// This doesn't allocate anything from the set; use `take()` for that.
pub fn iter(&self, rc: RegClass) -> RegSetIter {
// Start by copying the RC mask. It is a single set bit for each register in the class.
let mut rsi = RegSetIter { regs: rc.mask };
// Mask out the unavailable units.
for idx in 0..self.avail.len() {
// If a single unit in a register is unavailable, the whole register can't be used.
// If a register straddles a word boundary, it will be marked as unavailable.
// There's an assertion in `cdsl/registers.py` to check for that.
for i in 0..rc.width {
rsi.regs[idx] &= self.avail[idx] >> i;
}
}
rsi
}
/// Check if any register units allocated out of this set interferes with units allocated out
/// of `other`.
///
/// This assumes that unused bits are 1.
pub fn interferes_with(&self, other: &Self) -> bool {
self.avail
.iter()
.zip(&other.avail)
.any(|(&x, &y)| (x | y) != !0)
}
/// Intersect this set of registers with `other`. This has the effect of removing any register
/// units from this set that are not in `other`.
pub fn intersect(&mut self, other: &Self) {
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
*x &= y;
}
}
/// Return an object that can display this register set, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
DisplayRegisterSet(self.clone(), regs.into())
}
}
/// Iterator over available registers in a register class.
pub struct RegSetIter {
regs: RegUnitMask,
}
impl Iterator for RegSetIter {
type Item = RegUnit;
fn next(&mut self) -> Option<RegUnit> {
let mut unit_offset = 0;
// Find the first set bit in `self.regs`.
for word in &mut self.regs {
if *word != 0 {
// Compute the register unit number from the lowest set bit in the word.
let unit = unit_offset + word.trailing_zeros() as RegUnit;
// Clear that lowest bit so we won't find it again.
*word &= *word - 1;
return Some(unit);
}
// How many register units was there in the word? This is a constant 32 for `u32` etc.
unit_offset += 8 * size_of_val(word) as RegUnit;
}
// All of `self.regs` is 0.
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
(bits, Some(bits))
}
}
impl ExactSizeIterator for RegSetIter {}
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[")?;
match self.1 {
None => {
for w in &self.0.avail {
write!(f, " #{:08x}", w)?;
}
}
Some(reginfo) => {
let toprcs = reginfo
.banks
.iter()
.map(|b| b.first_toprc + b.num_toprcs)
.max()
.expect("No register banks");
for rc in &reginfo.classes[0..toprcs] {
if rc.width == 1 {
let bank = &reginfo.banks[rc.bank as usize];
write!(f, " {}: ", rc)?;
for offset in 0..bank.units {
let reg = bank.first_unit + offset;
if !rc.contains(reg) {
continue;
}
if !self.0.is_avail(rc, reg) {
write!(f, "-")?;
continue;
}
// Display individual registers as either the second letter of their
// name or the last digit of their number.
// This works for x86 (rax, rbx, ...) and for numbered regs.
write!(
f,
"{}",
bank.names
.get(offset as usize)
.and_then(|name| name.chars().nth(1))
.unwrap_or_else(|| char::from_digit(
u32::from(offset % 10),
10
)
.unwrap())
)?;
}
}
}
}
}
write!(f, " ]")
}
}
impl fmt::Display for RegisterSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::isa::registers::{RegClass, RegClassData};
use std::vec::Vec;
// Register classes for testing.
const GPR: RegClass = &RegClassData {
name: "GPR",
index: 0,
width: 1,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0xf0000000, 0x0000000f, 0],
info: &INFO,
};
const DPR: RegClass = &RegClassData {
name: "DPR",
index: 0,
width: 2,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0x50000000, 0x0000000a, 0],
info: &INFO,
};
const INFO: RegInfo = RegInfo {
banks: &[],
classes: &[],
};
#[test]
fn put_and_take() {
let mut regs = RegisterSet::new();
// `GPR` has units 28-36.
assert_eq!(regs.iter(GPR).len(), 8);
assert_eq!(regs.iter(GPR).count(), 8);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
assert!(regs.is_avail(GPR, 29));
regs.take(&GPR, 29);
assert!(!regs.is_avail(GPR, 29));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
assert!(regs.is_avail(GPR, 30));
regs.take(&GPR, 30);
assert!(!regs.is_avail(GPR, 30));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
assert!(regs.is_avail(GPR, 32));
regs.take(&GPR, 32);
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 5);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
regs.free(&GPR, 30);
assert!(regs.is_avail(GPR, 30));
assert!(!regs.is_avail(GPR, 29));
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
regs.free(&GPR, 32);
assert!(regs.is_avail(GPR, 31));
assert!(!regs.is_avail(GPR, 29));
assert!(regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
}
#[test]
fn interference() {
let mut regs1 = RegisterSet::new();
let mut regs2 = RegisterSet::new();
assert!(!regs1.interferes_with(&regs2));
regs1.take(&GPR, 32);
assert!(!regs1.interferes_with(&regs2));
regs2.take(&GPR, 31);
assert!(!regs1.interferes_with(&regs2));
regs1.intersect(&regs2);
assert!(regs1.interferes_with(&regs2));
}
}

View File

@@ -0,0 +1,438 @@
//! Reload pass
//!
//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
//! get a value with register affinity, and operands expecting a stack slot will get a value with
//! stack affinity.
//!
//! The secondary responsibility of the reload pass is to reuse values in registers as much as
//! possible to minimize the number of `fill` instructions needed. This must not cause the register
//! pressure limits to be exceeded.
use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree;
use crate::entity::{SparseMap, SparseMapValue};
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value};
use crate::isa::RegClass;
use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
use crate::regalloc::affinity::Affinity;
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use crate::regalloc::liveness::Liveness;
use crate::timing;
use crate::topo_order::TopoOrder;
use log::debug;
use std::vec::Vec;
/// Reusable data structures for the reload pass.
pub struct Reload {
candidates: Vec<ReloadCandidate>,
reloads: SparseMap<Value, ReloadedValue>,
}
/// Context data structure that gets instantiated once per pass.
struct Context<'a> {
cur: EncCursor<'a>,
// Cached ISA information.
// We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
encinfo: EncInfo,
// References to contextual data structures we need.
domtree: &'a DominatorTree,
liveness: &'a mut Liveness,
topo: &'a mut TopoOrder,
candidates: &'a mut Vec<ReloadCandidate>,
reloads: &'a mut SparseMap<Value, ReloadedValue>,
}
impl Reload {
/// Create a new blank reload pass.
pub fn new() -> Self {
Self {
candidates: Vec::new(),
reloads: SparseMap::new(),
}
}
/// Clear all data structures in this reload pass.
pub fn clear(&mut self) {
self.candidates.clear();
self.reloads.clear();
}
/// Run the reload algorithm over `func`.
pub fn run(
&mut self,
isa: &TargetIsa,
func: &mut Function,
domtree: &DominatorTree,
liveness: &mut Liveness,
topo: &mut TopoOrder,
tracker: &mut LiveValueTracker,
) {
let _tt = timing::ra_reload();
debug!("Reload for:\n{}", func.display(isa));
let mut ctx = Context {
cur: EncCursor::new(func, isa),
encinfo: isa.encoding_info(),
domtree,
liveness,
topo,
candidates: &mut self.candidates,
reloads: &mut self.reloads,
};
ctx.run(tracker)
}
}
/// A reload candidate.
///
/// This represents a stack value that is used by the current instruction where a register is
/// needed.
struct ReloadCandidate {
argidx: usize,
value: Value,
regclass: RegClass,
}
/// A Reloaded value.
///
/// This represents a value that has been reloaded into a register value from the stack.
struct ReloadedValue {
stack: Value,
reg: Value,
}
impl SparseMapValue<Value> for ReloadedValue {
fn key(&self) -> Value {
self.stack
}
}
impl<'a> Context<'a> {
fn run(&mut self, tracker: &mut LiveValueTracker) {
self.topo.reset(self.cur.func.layout.ebbs());
while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
self.visit_ebb(ebb, tracker);
}
}
fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
debug!("Reloading {}:", ebb);
self.visit_ebb_header(ebb, tracker);
tracker.drop_dead_params();
// visit_ebb_header() places us at the first interesting instruction in the EBB.
while let Some(inst) = self.cur.current_inst() {
if !self.cur.func.dfg[inst].opcode().is_ghost() {
// This instruction either has an encoding or has ABI constraints, so visit it to
// insert spills and fills as needed.
let encoding = self.cur.func.encodings[inst];
self.visit_inst(ebb, inst, encoding, tracker);
tracker.drop_dead(inst);
} else {
// This is a ghost instruction with no encoding and no extra constraints, so we can
// just skip over it.
self.cur.next_inst();
}
}
}
/// Process the EBB parameters. Move to the next instruction in the EBB to be processed
fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
let (liveins, args) = tracker.ebb_top(
ebb,
&self.cur.func.dfg,
self.liveness,
&self.cur.func.layout,
self.domtree,
);
if self.cur.func.layout.entry_block() == Some(ebb) {
debug_assert_eq!(liveins.len(), 0);
self.visit_entry_params(ebb, args);
} else {
self.visit_ebb_params(ebb, args);
}
}
/// Visit the parameters on the entry block.
/// These values have ABI constraints from the function signature.
fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
self.cur.goto_first_inst(ebb);
for (arg_idx, arg) in args.iter().enumerate() {
let abi = self.cur.func.signature.params[arg_idx];
match abi.location {
ArgumentLoc::Reg(_) => {
if arg.affinity.is_stack() {
// An incoming register parameter was spilled. Replace the parameter value
// with a temporary register value that is immediately spilled.
let reg = self
.cur
.func
.dfg
.replace_ebb_param(arg.value, abi.value_type);
let affinity = Affinity::abi(&abi, self.cur.isa);
self.liveness.create_dead(reg, ebb, affinity);
self.insert_spill(ebb, arg.value, reg);
}
}
ArgumentLoc::Stack(_) => {
debug_assert!(arg.affinity.is_stack());
}
ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
}
}
}
fn visit_ebb_params(&mut self, ebb: Ebb, _args: &[LiveValue]) {
self.cur.goto_first_inst(ebb);
}
/// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
/// that needs processing.
fn visit_inst(
&mut self,
ebb: Ebb,
inst: Inst,
encoding: Encoding,
tracker: &mut LiveValueTracker,
) {
self.cur.use_srcloc(inst);
// Get the operand constraints for `inst` that we are trying to satisfy.
let constraints = self.encinfo.operand_constraints(encoding);
// Identify reload candidates.
debug_assert!(self.candidates.is_empty());
self.find_candidates(inst, constraints);
if let InstructionData::Unary {
opcode: Opcode::Copy,
..
} = self.cur.func.dfg[inst]
{
self.reload_copy_candidates(inst);
} else {
self.reload_inst_candidates(ebb, inst);
}
// TODO: Reuse reloads for future instructions.
self.reloads.clear();
let (_throughs, _kills, defs) =
tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
// Advance to the next instruction so we can insert any spills after the instruction.
self.cur.next_inst();
// Rewrite register defs that need to be spilled.
//
// Change:
//
// v2 = inst ...
//
// Into:
//
// v7 = inst ...
// v2 = spill v7
//
// That way, we don't need to rewrite all future uses of v2.
if let Some(constraints) = constraints {
for (lv, op) in defs.iter().zip(constraints.outs) {
if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
if let InstructionData::Unary {
opcode: Opcode::Copy,
arg,
} = self.cur.func.dfg[inst]
{
self.cur.func.dfg.replace(inst).spill(arg);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok);
} else {
let value_type = self.cur.func.dfg.value_type(lv.value);
let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
self.liveness.create_dead(reg, inst, Affinity::new(op));
self.insert_spill(ebb, lv.value, reg);
}
}
}
}
// Same thing for spilled call return values.
let retvals = &defs[self.cur.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_results()..];
if !retvals.is_empty() {
let sig = self
.cur
.func
.dfg
.call_signature(inst)
.expect("Extra results on non-call instruction");
for (i, lv) in retvals.iter().enumerate() {
let abi = self.cur.func.dfg.signatures[sig].returns[i];
debug_assert!(
abi.location.is_reg(),
"expected reg; got {:?}",
abi.location
);
if lv.affinity.is_stack() {
let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
self.liveness
.create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
self.insert_spill(ebb, lv.value, reg);
}
}
}
}
// Reload the current candidates for the given `inst`.
fn reload_inst_candidates(&mut self, ebb: Ebb, inst: Inst) {
// Insert fill instructions before `inst` and replace `cand.value` with the filled value.
for cand in self.candidates.iter_mut() {
if let Some(reload) = self.reloads.get(cand.value) {
cand.value = reload.reg;
continue;
}
let reg = self.cur.ins().fill(cand.value);
let fill = self.cur.built_inst();
self.reloads.insert(ReloadedValue {
stack: cand.value,
reg,
});
cand.value = reg;
// Create a live range for the new reload.
let affinity = Affinity::Reg(cand.regclass.into());
self.liveness.create_dead(reg, fill, affinity);
self.liveness
.extend_locally(reg, ebb, inst, &self.cur.func.layout);
}
// Rewrite instruction arguments.
//
// Only rewrite those arguments that were identified as candidates. This leaves EBB
// arguments on branches as-is without rewriting them. A spilled EBB argument needs to stay
// spilled because the matching EBB parameter is going to be in the same virtual register
// and therefore the same stack slot as the EBB argument value.
if !self.candidates.is_empty() {
let args = self.cur.func.dfg.inst_args_mut(inst);
while let Some(cand) = self.candidates.pop() {
args[cand.argidx] = cand.value;
}
}
}
// Reload the current candidates for the given copy `inst`.
//
// As an optimization, replace a copy instruction where the argument has been spilled with
// a fill instruction.
fn reload_copy_candidates(&mut self, inst: Inst) {
// Copy instructions can only have one argument.
debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
if let Some(cand) = self.candidates.pop() {
self.cur.func.dfg.replace(inst).fill(cand.value);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok);
}
}
// Find reload candidates for `inst` and add them to `self.candidates`.
//
// These are uses of spilled values where the operand constraint requires a register.
fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
let args = self.cur.func.dfg.inst_args(inst);
if let Some(constraints) = constraints {
for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
self.candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: op.regclass,
})
}
}
}
// If we only have the fixed arguments, we're done now.
let offset = self.cur.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
if args.len() == offset {
return;
}
let var_args = &args[offset..];
// Handle ABI arguments.
if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
handle_abi_args(
self.candidates,
&self.cur.func.dfg.signatures[sig].params,
var_args,
offset,
self.cur.isa,
self.liveness,
);
} else if self.cur.func.dfg[inst].opcode().is_return() {
handle_abi_args(
self.candidates,
&self.cur.func.signature.returns,
var_args,
offset,
self.cur.isa,
self.liveness,
);
}
}
/// Insert a spill at `pos` and update data structures.
///
/// - Insert `stack = spill reg` at `pos`, and assign an encoding.
/// - Move the `stack` live range starting point to the new instruction.
/// - Extend the `reg` live range to reach the new instruction.
fn insert_spill(&mut self, ebb: Ebb, stack: Value, reg: Value) {
self.cur.ins().with_result(stack).spill(reg);
let inst = self.cur.built_inst();
// Update live ranges.
self.liveness.move_def_locally(stack, inst);
self.liveness
.extend_locally(reg, ebb, inst, &self.cur.func.layout);
}
}
/// Find reload candidates in the instruction's ABI variable arguments. This handles both
/// return values and call arguments.
fn handle_abi_args(
candidates: &mut Vec<ReloadCandidate>,
abi_types: &[AbiParam],
var_args: &[Value],
offset: usize,
isa: &TargetIsa,
liveness: &Liveness,
) {
debug_assert_eq!(abi_types.len(), var_args.len());
for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
if abi.location.is_reg() {
let lv = liveness.get(arg).expect("Missing live range for ABI arg");
if lv.affinity.is_stack() {
candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: isa.regclass_for_abi_type(abi.value_type),
});
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More