Merge branch 'master' into no_std
This commit is contained in:
226
lib/codegen/src/abi.rs
Normal file
226
lib/codegen/src/abi.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
//! Common helper code for ABI lowering.
|
||||
//!
|
||||
//! This module provides functions and data structures that are useful for implementing the
|
||||
//! `TargetIsa::legalize_signature()` method.
|
||||
|
||||
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
|
||||
use std::cmp::Ordering;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Legalization action to perform on a single argument or return value when converting a
|
||||
/// signature.
|
||||
///
|
||||
/// An argument may go through a sequence of legalization steps before it reaches the final
|
||||
/// `Assign` action.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum ArgAction {
|
||||
/// Assign the argument to the given location.
|
||||
Assign(ArgumentLoc),
|
||||
|
||||
/// Convert the argument, then call again.
|
||||
///
|
||||
/// This action can split an integer type into two smaller integer arguments, or it can split a
|
||||
/// SIMD vector into halves.
|
||||
Convert(ValueConversion),
|
||||
}
|
||||
|
||||
impl From<ArgumentLoc> for ArgAction {
|
||||
fn from(x: ArgumentLoc) -> ArgAction {
|
||||
ArgAction::Assign(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValueConversion> for ArgAction {
|
||||
fn from(x: ValueConversion) -> ArgAction {
|
||||
ArgAction::Convert(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ValueConversion {
|
||||
/// Split an integer types into low and high parts, using `isplit`.
|
||||
IntSplit,
|
||||
|
||||
/// Split a vector type into halves with identical lane types, using `vsplit`.
|
||||
VectorSplit,
|
||||
|
||||
/// Bit-cast to an integer type of the same size.
|
||||
IntBits,
|
||||
|
||||
/// Sign-extend integer value to the required type.
|
||||
Sext(Type),
|
||||
|
||||
/// Unsigned zero-extend value to the required type.
|
||||
Uext(Type),
|
||||
}
|
||||
|
||||
impl ValueConversion {
|
||||
/// Apply this conversion to a type, return the converted type.
|
||||
pub fn apply(self, ty: Type) -> Type {
|
||||
match self {
|
||||
ValueConversion::IntSplit => ty.half_width().expect("Integer type too small to split"),
|
||||
ValueConversion::VectorSplit => ty.half_vector().expect("Not a vector"),
|
||||
ValueConversion::IntBits => Type::int(ty.bits()).expect("Bad integer size"),
|
||||
ValueConversion::Sext(nty) |
|
||||
ValueConversion::Uext(nty) => nty,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a split conversion that results in two arguments?
|
||||
pub fn is_split(self) -> bool {
|
||||
match self {
|
||||
ValueConversion::IntSplit |
|
||||
ValueConversion::VectorSplit => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Common trait for assigning arguments to registers or stack locations.
|
||||
///
|
||||
/// This will be implemented by individual ISAs.
|
||||
pub trait ArgAssigner {
|
||||
/// Pick an assignment action for function argument (or return value) `arg`.
|
||||
fn assign(&mut self, arg: &AbiParam) -> ArgAction;
|
||||
}
|
||||
|
||||
/// Legalize the arguments in `args` using the given argument assigner.
|
||||
///
|
||||
/// This function can be used for both arguments and return values.
|
||||
pub fn legalize_args<AA: ArgAssigner>(args: &mut Vec<AbiParam>, aa: &mut AA) {
|
||||
// Iterate over the arguments.
|
||||
// We may need to mutate the vector in place, so don't use a normal iterator, and clone the
|
||||
// argument to avoid holding a reference.
|
||||
let mut argno = 0;
|
||||
while let Some(arg) = args.get(argno).cloned() {
|
||||
// Leave the pre-assigned arguments alone.
|
||||
// We'll assume that they don't interfere with our assignments.
|
||||
if arg.location.is_assigned() {
|
||||
argno += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
match aa.assign(&arg) {
|
||||
// Assign argument to a location and move on to the next one.
|
||||
ArgAction::Assign(loc) => {
|
||||
args[argno].location = loc;
|
||||
argno += 1;
|
||||
}
|
||||
// Split this argument into two smaller ones. Then revisit both.
|
||||
ArgAction::Convert(conv) => {
|
||||
let new_arg = AbiParam {
|
||||
value_type: conv.apply(arg.value_type),
|
||||
..arg
|
||||
};
|
||||
args[argno].value_type = new_arg.value_type;
|
||||
if conv.is_split() {
|
||||
args.insert(argno + 1, new_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the right action to take when passing a `have` value type to a call signature where
|
||||
/// the next argument is `arg` which has a different value type.
|
||||
///
|
||||
/// The signature legalization process in `legalize_args` above can replace a single argument value
|
||||
/// with multiple arguments of smaller types. It can also change the type of an integer argument to
|
||||
/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
|
||||
///
|
||||
/// The legalizer needs to repair the values at all ABI boundaries:
|
||||
///
|
||||
/// - Incoming function arguments to the entry EBB.
|
||||
/// - Function arguments passed to a call.
|
||||
/// - Return values from a call.
|
||||
/// - Return values passed to a return instruction.
|
||||
///
|
||||
/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
|
||||
/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
|
||||
/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
|
||||
/// for the argument.
|
||||
///
|
||||
/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
|
||||
/// desired argument type appears. This will happen when a vector or integer type needs to be split
|
||||
/// more than once, for example.
|
||||
pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
|
||||
let have_bits = have.bits();
|
||||
let arg_bits = arg.value_type.bits();
|
||||
|
||||
match have_bits.cmp(&arg_bits) {
|
||||
// We have fewer bits than the ABI argument.
|
||||
Ordering::Less => {
|
||||
debug_assert!(
|
||||
have.is_int() && arg.value_type.is_int(),
|
||||
"Can only extend integer values"
|
||||
);
|
||||
match arg.extension {
|
||||
ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
|
||||
ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
|
||||
_ => panic!("No argument extension specified"),
|
||||
}
|
||||
}
|
||||
// We have the same number of bits as the argument.
|
||||
Ordering::Equal => {
|
||||
// This must be an integer vector that is split and then extended.
|
||||
debug_assert!(arg.value_type.is_int());
|
||||
debug_assert!(have.is_vector());
|
||||
ValueConversion::VectorSplit
|
||||
}
|
||||
// We have more bits than the argument.
|
||||
Ordering::Greater => {
|
||||
if have.is_vector() {
|
||||
ValueConversion::VectorSplit
|
||||
} else if have.is_float() {
|
||||
// Convert a float to int so it can be split the next time.
|
||||
// ARM would do this to pass an `f64` in two registers.
|
||||
ValueConversion::IntBits
|
||||
} else {
|
||||
ValueConversion::IntSplit
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use ir::AbiParam;
|
||||
use ir::types;
|
||||
|
||||
#[test]
|
||||
fn legalize() {
|
||||
let mut arg = AbiParam::new(types::I32);
|
||||
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I64X2, &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I64, &arg),
|
||||
ValueConversion::IntSplit
|
||||
);
|
||||
|
||||
// Vector of integers is broken down, then sign-extended.
|
||||
arg.extension = ArgumentExtension::Sext;
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16X4, &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16.by(2).unwrap(), &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16, &arg),
|
||||
ValueConversion::Sext(types::I32)
|
||||
);
|
||||
|
||||
// 64-bit float is split as an integer.
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::IntBits
|
||||
);
|
||||
}
|
||||
}
|
||||
920
lib/codegen/src/bforest/map.rs
Normal file
920
lib/codegen/src/bforest/map.rs
Normal file
@@ -0,0 +1,920 @@
|
||||
//! Forest of maps.
|
||||
|
||||
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE};
|
||||
use packed_option::PackedOption;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// Tag type defining forest types for a map.
|
||||
struct MapTypes<K, V, C>(PhantomData<(K, V, C)>);
|
||||
|
||||
impl<K, V, C> Forest for MapTypes<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
type Key = K;
|
||||
type Value = V;
|
||||
type LeafKeys = [K; INNER_SIZE - 1];
|
||||
type LeafValues = [V; INNER_SIZE - 1];
|
||||
type Comparator = C;
|
||||
|
||||
fn splat_key(key: Self::Key) -> Self::LeafKeys {
|
||||
[key; INNER_SIZE - 1]
|
||||
}
|
||||
|
||||
fn splat_value(value: Self::Value) -> Self::LeafValues {
|
||||
[value; INNER_SIZE - 1]
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory pool for a forest of `Map` instances.
|
||||
pub struct MapForest<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
nodes: NodePool<MapTypes<K, V, C>>,
|
||||
}
|
||||
|
||||
impl<K, V, C> MapForest<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Create a new empty forest.
|
||||
pub fn new() -> MapForest<K, V, C> {
|
||||
MapForest { nodes: NodePool::new() }
|
||||
}
|
||||
|
||||
/// Clear all maps in the forest.
|
||||
///
|
||||
/// All `Map` instances belong to this forest are invalidated and should no longer be used.
|
||||
pub fn clear(&mut self) {
|
||||
self.nodes.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// B-tree mapping from `K` to `V` using `C` for comparing keys.
|
||||
///
|
||||
/// This is not a general-purpose replacement for `BTreeMap`. See the [module
|
||||
/// documentation](index.html) for more information about design tradeoffs.
|
||||
///
|
||||
/// Maps can be cloned, but that operation should only be used as part of cloning the whole forest
|
||||
/// they belong to. *Cloning a map does not allocate new memory for the clone*. It creates an alias
|
||||
/// of the same memory.
|
||||
#[derive(Clone)]
|
||||
pub struct Map<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
root: PackedOption<Node>,
|
||||
unused: PhantomData<(K, V, C)>,
|
||||
}
|
||||
|
||||
impl<K, V, C> Map<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Make an empty map.
|
||||
pub fn new() -> Map<K, V, C> {
|
||||
Map {
|
||||
root: None.into(),
|
||||
unused: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this an empty map?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.root.is_none()
|
||||
}
|
||||
|
||||
/// Get the value stored for `key`.
|
||||
pub fn get(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<V> {
|
||||
self.root.expand().and_then(|root| {
|
||||
Path::default().find(key, root, &forest.nodes, comp)
|
||||
})
|
||||
}
|
||||
|
||||
/// Look up the value stored for `key`.
|
||||
///
|
||||
/// If it exists, return the stored key-value pair.
|
||||
///
|
||||
/// Otherwise, return the last key-value pair with a key that is less than or equal to `key`.
|
||||
///
|
||||
/// If no stored keys are less than or equal to `key`, return `None`.
|
||||
pub fn get_or_less(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> Option<(K, V)> {
|
||||
self.root.expand().and_then(|root| {
|
||||
let mut path = Path::default();
|
||||
match path.find(key, root, &forest.nodes, comp) {
|
||||
Some(v) => Some((key, v)),
|
||||
None => path.prev(root, &forest.nodes),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Insert `key, value` into the map and return the old value stored for `key`, if any.
|
||||
pub fn insert(
|
||||
&mut self,
|
||||
key: K,
|
||||
value: V,
|
||||
forest: &mut MapForest<K, V, C>,
|
||||
comp: &C,
|
||||
) -> Option<V> {
|
||||
self.cursor(forest, comp).insert(key, value)
|
||||
}
|
||||
|
||||
/// Remove `key` from the map and return the removed value for `key`, if any.
|
||||
pub fn remove(&mut self, key: K, forest: &mut MapForest<K, V, C>, comp: &C) -> Option<V> {
|
||||
let mut c = self.cursor(forest, comp);
|
||||
if c.goto(key).is_some() {
|
||||
c.remove()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove all entries.
|
||||
pub fn clear(&mut self, forest: &mut MapForest<K, V, C>) {
|
||||
if let Some(root) = self.root.take() {
|
||||
forest.nodes.free_tree(root);
|
||||
}
|
||||
}
|
||||
|
||||
/// Retains only the elements specified by the predicate.
|
||||
///
|
||||
/// Remove all key-value pairs where the predicate returns false.
|
||||
///
|
||||
/// The predicate is allowed to update the values stored in the map.
|
||||
pub fn retain<F>(&mut self, forest: &mut MapForest<K, V, C>, mut predicate: F)
|
||||
where
|
||||
F: FnMut(K, &mut V) -> bool,
|
||||
{
|
||||
let mut path = Path::default();
|
||||
if let Some(root) = self.root.expand() {
|
||||
path.first(root, &forest.nodes);
|
||||
}
|
||||
while let Some((node, entry)) = path.leaf_pos() {
|
||||
let keep = {
|
||||
let (ks, vs) = forest.nodes[node].unwrap_leaf_mut();
|
||||
predicate(ks[entry], &mut vs[entry])
|
||||
};
|
||||
if keep {
|
||||
path.next(&forest.nodes);
|
||||
} else {
|
||||
self.root = path.remove(&mut forest.nodes).into();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a cursor for navigating this map. The cursor is initially positioned off the end of
|
||||
/// the map.
|
||||
pub fn cursor<'a>(
|
||||
&'a mut self,
|
||||
forest: &'a mut MapForest<K, V, C>,
|
||||
comp: &'a C,
|
||||
) -> MapCursor<'a, K, V, C> {
|
||||
MapCursor::new(self, forest, comp)
|
||||
}
|
||||
|
||||
/// Create an iterator traversing this map. The iterator type is `(K, V)`.
|
||||
pub fn iter<'a>(&'a self, forest: &'a MapForest<K, V, C>) -> MapIter<'a, K, V, C> {
|
||||
MapIter {
|
||||
root: self.root,
|
||||
pool: &forest.nodes,
|
||||
path: Path::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, C> Default for Map<K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<K, V, C> Map<K, V, C>
|
||||
where
|
||||
K: Copy + ::std::fmt::Display,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Verify consistency.
|
||||
fn verify(&self, forest: &MapForest<K, V, C>, comp: &C)
|
||||
where
|
||||
NodeData<MapTypes<K, V, C>>: ::std::fmt::Display,
|
||||
{
|
||||
if let Some(root) = self.root.expand() {
|
||||
forest.nodes.verify_tree(root, comp);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a text version of the path to `key`.
|
||||
fn tpath(&self, key: K, forest: &MapForest<K, V, C>, comp: &C) -> ::std::string::String {
|
||||
use std::string::ToString;
|
||||
match self.root.expand() {
|
||||
None => "map(empty)".to_string(),
|
||||
Some(root) => {
|
||||
let mut path = Path::default();
|
||||
path.find(key, root, &forest.nodes, comp);
|
||||
path.to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A position in a `Map` used to navigate and modify the ordered map.
|
||||
///
|
||||
/// A cursor always points at a key-value pair in the map, or "off the end" which is a position
|
||||
/// after the last entry in the map.
|
||||
pub struct MapCursor<'a, K, V, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
V: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
root: &'a mut PackedOption<Node>,
|
||||
pool: &'a mut NodePool<MapTypes<K, V, C>>,
|
||||
comp: &'a C,
|
||||
path: Path<MapTypes<K, V, C>>,
|
||||
}
|
||||
|
||||
impl<'a, K, V, C> MapCursor<'a, K, V, C>
|
||||
where
|
||||
K: Copy,
|
||||
V: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Create a cursor with a default (off-the-end) location.
|
||||
fn new(
|
||||
container: &'a mut Map<K, V, C>,
|
||||
forest: &'a mut MapForest<K, V, C>,
|
||||
comp: &'a C,
|
||||
) -> MapCursor<'a, K, V, C> {
|
||||
MapCursor {
|
||||
root: &mut container.root,
|
||||
pool: &mut forest.nodes,
|
||||
comp,
|
||||
path: Path::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this cursor pointing to an empty map?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.root.is_none()
|
||||
}
|
||||
|
||||
/// Move cursor to the next key-value pair and return it.
|
||||
///
|
||||
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
|
||||
/// position.
|
||||
pub fn next(&mut self) -> Option<(K, V)> {
|
||||
self.path.next(self.pool)
|
||||
}
|
||||
|
||||
/// Move cursor to the previous key-value pair and return it.
|
||||
///
|
||||
/// If the cursor is already pointing at the first entry, leave it there and return `None`.
|
||||
pub fn prev(&mut self) -> Option<(K, V)> {
|
||||
self.root.expand().and_then(
|
||||
|root| self.path.prev(root, self.pool),
|
||||
)
|
||||
}
|
||||
|
||||
/// Get the current key, or `None` if the cursor is at the end.
|
||||
pub fn key(&self) -> Option<K> {
|
||||
self.path.leaf_pos().and_then(|(node, entry)| {
|
||||
self.pool[node].unwrap_leaf().0.get(entry).cloned()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the current value, or `None` if the cursor is at the end.
|
||||
pub fn value(&self) -> Option<V> {
|
||||
self.path.leaf_pos().and_then(|(node, entry)| {
|
||||
self.pool[node].unwrap_leaf().1.get(entry).cloned()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the current value, or `None` if the cursor is at the end.
|
||||
pub fn value_mut(&mut self) -> Option<&mut V> {
|
||||
self.path.leaf_pos().and_then(move |(node, entry)| {
|
||||
self.pool[node].unwrap_leaf_mut().1.get_mut(entry)
|
||||
})
|
||||
}
|
||||
|
||||
/// Move this cursor to `key`.
|
||||
///
|
||||
/// If `key` is in the map, place the cursor at `key` and return the corresponding value.
|
||||
///
|
||||
/// If `key` is not in the set, place the cursor at the next larger element (or the end) and
|
||||
/// return `None`.
|
||||
pub fn goto(&mut self, elem: K) -> Option<V> {
|
||||
self.root.expand().and_then(|root| {
|
||||
let v = self.path.find(elem, root, self.pool, self.comp);
|
||||
if v.is_none() {
|
||||
self.path.normalize(self.pool);
|
||||
}
|
||||
v
|
||||
})
|
||||
}
|
||||
|
||||
/// Move this cursor to the first element.
|
||||
pub fn goto_first(&mut self) -> Option<V> {
|
||||
self.root.map(|root| self.path.first(root, self.pool).1)
|
||||
}
|
||||
|
||||
/// Insert `(key, value))` into the map and leave the cursor at the inserted pair.
|
||||
///
|
||||
/// If the map did not contain `key`, return `None`.
|
||||
///
|
||||
/// If `key` is already present, replace the existing with `value` and return the old value.
|
||||
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
|
||||
match self.root.expand() {
|
||||
None => {
|
||||
let root = self.pool.alloc_node(NodeData::leaf(key, value));
|
||||
*self.root = root.into();
|
||||
self.path.set_root_node(root);
|
||||
None
|
||||
}
|
||||
Some(root) => {
|
||||
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
|
||||
let old = self.path.find(key, root, self.pool, self.comp);
|
||||
if old.is_some() {
|
||||
*self.path.value_mut(self.pool) = value;
|
||||
} else {
|
||||
*self.root = self.path.insert(key, value, self.pool).into();
|
||||
}
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the current entry (if any) and return the mapped value.
|
||||
/// This advances the cursor to the next entry after the removed one.
|
||||
pub fn remove(&mut self) -> Option<V> {
|
||||
let value = self.value();
|
||||
if value.is_some() {
|
||||
*self.root = self.path.remove(self.pool).into();
|
||||
}
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator visiting the key-value pairs of a `Map`.
|
||||
pub struct MapIter<'a, K, V, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
V: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
root: PackedOption<Node>,
|
||||
pool: &'a NodePool<MapTypes<K, V, C>>,
|
||||
path: Path<MapTypes<K, V, C>>,
|
||||
}
|
||||
|
||||
impl<'a, K, V, C> Iterator for MapIter<'a, K, V, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
V: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
type Item = (K, V);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
|
||||
// once we've returned the first element. This also works for an empty tree since the
|
||||
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
|
||||
match self.root.take() {
|
||||
Some(root) => Some(self.path.first(root, self.pool)),
|
||||
None => self.path.next(self.pool),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<'a, K, V, C> MapCursor<'a, K, V, C>
|
||||
where
|
||||
K: Copy + ::std::fmt::Display,
|
||||
V: Copy + ::std::fmt::Display,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
fn verify(&self) {
|
||||
self.path.verify(self.pool);
|
||||
self.root.map(|root| self.pool.verify_tree(root, self.comp));
|
||||
}
|
||||
|
||||
/// Get a text version of the path to the current position.
|
||||
fn tpath(&self) -> ::std::string::String {
|
||||
use std::string::ToString;
|
||||
self.path.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::NodeData;
|
||||
use super::*;
|
||||
use std::mem;
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn node_size() {
|
||||
// check that nodes are cache line sized when keys and values are 32 bits.
|
||||
type F = MapTypes<u32, u32, ()>;
|
||||
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let mut f = MapForest::<u32, f32, ()>::new();
|
||||
f.clear();
|
||||
|
||||
let mut m = Map::<u32, f32, ()>::new();
|
||||
assert!(m.is_empty());
|
||||
m.clear(&mut f);
|
||||
|
||||
assert_eq!(m.get(7, &f, &()), None);
|
||||
assert_eq!(m.iter(&f).next(), None);
|
||||
assert_eq!(m.get_or_less(7, &f, &()), None);
|
||||
m.retain(&mut f, |_, _| unreachable!());
|
||||
|
||||
let mut c = m.cursor(&mut f, &());
|
||||
assert!(c.is_empty());
|
||||
assert_eq!(c.key(), None);
|
||||
assert_eq!(c.value(), None);
|
||||
assert_eq!(c.next(), None);
|
||||
assert_eq!(c.prev(), None);
|
||||
c.verify();
|
||||
assert_eq!(c.tpath(), "<empty path>");
|
||||
assert_eq!(c.goto_first(), None);
|
||||
assert_eq!(c.tpath(), "<empty path>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inserting() {
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
let mut m = Map::<u32, f32, ()>::new();
|
||||
|
||||
// The first seven values stay in a single leaf node.
|
||||
assert_eq!(m.insert(50, 5.0, f, &()), None);
|
||||
assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0));
|
||||
assert_eq!(m.insert(20, 2.0, f, &()), None);
|
||||
assert_eq!(m.insert(80, 8.0, f, &()), None);
|
||||
assert_eq!(m.insert(40, 4.0, f, &()), None);
|
||||
assert_eq!(m.insert(60, 6.0, f, &()), None);
|
||||
assert_eq!(m.insert(90, 9.0, f, &()), None);
|
||||
assert_eq!(m.insert(200, 20.0, f, &()), None);
|
||||
|
||||
m.verify(f, &());
|
||||
|
||||
assert_eq!(
|
||||
m.iter(f).collect::<Vec<_>>(),
|
||||
[
|
||||
(20, 2.0),
|
||||
(40, 4.0),
|
||||
(50, 5.5),
|
||||
(60, 6.0),
|
||||
(80, 8.0),
|
||||
(90, 9.0),
|
||||
(200, 20.0),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(m.get(0, f, &()), None);
|
||||
assert_eq!(m.get(20, f, &()), Some(2.0));
|
||||
assert_eq!(m.get(30, f, &()), None);
|
||||
assert_eq!(m.get(40, f, &()), Some(4.0));
|
||||
assert_eq!(m.get(50, f, &()), Some(5.5));
|
||||
assert_eq!(m.get(60, f, &()), Some(6.0));
|
||||
assert_eq!(m.get(70, f, &()), None);
|
||||
assert_eq!(m.get(80, f, &()), Some(8.0));
|
||||
assert_eq!(m.get(100, f, &()), None);
|
||||
|
||||
assert_eq!(m.get_or_less(0, f, &()), None);
|
||||
assert_eq!(m.get_or_less(20, f, &()), Some((20, 2.0)));
|
||||
assert_eq!(m.get_or_less(30, f, &()), Some((20, 2.0)));
|
||||
assert_eq!(m.get_or_less(40, f, &()), Some((40, 4.0)));
|
||||
assert_eq!(m.get_or_less(200, f, &()), Some((200, 20.0)));
|
||||
assert_eq!(m.get_or_less(201, f, &()), Some((200, 20.0)));
|
||||
|
||||
{
|
||||
let mut c = m.cursor(f, &());
|
||||
assert_eq!(c.prev(), Some((200, 20.0)));
|
||||
assert_eq!(c.prev(), Some((90, 9.0)));
|
||||
assert_eq!(c.prev(), Some((80, 8.0)));
|
||||
assert_eq!(c.prev(), Some((60, 6.0)));
|
||||
assert_eq!(c.prev(), Some((50, 5.5)));
|
||||
assert_eq!(c.prev(), Some((40, 4.0)));
|
||||
assert_eq!(c.prev(), Some((20, 2.0)));
|
||||
assert_eq!(c.prev(), None);
|
||||
}
|
||||
|
||||
// Test some removals where the node stays healthy.
|
||||
assert_eq!(m.tpath(50, f, &()), "node0[2]");
|
||||
assert_eq!(m.tpath(80, f, &()), "node0[4]");
|
||||
assert_eq!(m.tpath(200, f, &()), "node0[6]");
|
||||
|
||||
assert_eq!(m.remove(80, f, &()), Some(8.0));
|
||||
assert_eq!(m.tpath(50, f, &()), "node0[2]");
|
||||
assert_eq!(m.tpath(80, f, &()), "node0[4]");
|
||||
assert_eq!(m.tpath(200, f, &()), "node0[5]");
|
||||
assert_eq!(m.remove(80, f, &()), None);
|
||||
m.verify(f, &());
|
||||
|
||||
assert_eq!(m.remove(20, f, &()), Some(2.0));
|
||||
assert_eq!(m.tpath(50, f, &()), "node0[1]");
|
||||
assert_eq!(m.tpath(80, f, &()), "node0[3]");
|
||||
assert_eq!(m.tpath(200, f, &()), "node0[4]");
|
||||
assert_eq!(m.remove(20, f, &()), None);
|
||||
m.verify(f, &());
|
||||
|
||||
// [ 40 50 60 90 200 ]
|
||||
|
||||
{
|
||||
let mut c = m.cursor(f, &());
|
||||
assert_eq!(c.goto_first(), Some(4.0));
|
||||
assert_eq!(c.key(), Some(40));
|
||||
assert_eq!(c.value(), Some(4.0));
|
||||
assert_eq!(c.next(), Some((50, 5.5)));
|
||||
assert_eq!(c.next(), Some((60, 6.0)));
|
||||
assert_eq!(c.next(), Some((90, 9.0)));
|
||||
assert_eq!(c.next(), Some((200, 20.0)));
|
||||
c.verify();
|
||||
assert_eq!(c.next(), None);
|
||||
c.verify();
|
||||
}
|
||||
|
||||
// Removals from the root leaf node beyond underflow.
|
||||
assert_eq!(m.remove(200, f, &()), Some(20.0));
|
||||
assert_eq!(m.remove(40, f, &()), Some(4.0));
|
||||
assert_eq!(m.remove(60, f, &()), Some(6.0));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.remove(50, f, &()), Some(5.5));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.remove(90, f, &()), Some(9.0));
|
||||
m.verify(f, &());
|
||||
assert!(m.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_level0_leaf() {
|
||||
// Various ways of splitting a full leaf node at level 0.
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
|
||||
fn full_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
|
||||
let mut m = Map::new();
|
||||
for n in 1..8 {
|
||||
m.insert(n * 10, n as f32 * 1.1, f, &());
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
// Insert at front of leaf.
|
||||
let mut m = full_leaf(f);
|
||||
m.insert(5, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(5, f, &()), Some(4.2));
|
||||
|
||||
// Retain even entries, with altered values.
|
||||
m.retain(f, |k, v| {
|
||||
*v = (k / 10) as f32;
|
||||
(k % 20) == 0
|
||||
});
|
||||
assert_eq!(
|
||||
m.iter(f).collect::<Vec<_>>(),
|
||||
[(20, 2.0), (40, 4.0), (60, 6.0)]
|
||||
);
|
||||
|
||||
// Insert at back of leaf.
|
||||
let mut m = full_leaf(f);
|
||||
m.insert(80, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(80, f, &()), Some(4.2));
|
||||
|
||||
// Insert before middle (40).
|
||||
let mut m = full_leaf(f);
|
||||
m.insert(35, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(35, f, &()), Some(4.2));
|
||||
|
||||
// Insert after middle (40).
|
||||
let mut m = full_leaf(f);
|
||||
m.insert(45, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(45, f, &()), Some(4.2));
|
||||
|
||||
m.clear(f);
|
||||
assert!(m.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_level1_leaf() {
|
||||
// Various ways of splitting a full leaf node at level 1.
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
|
||||
// Return a map whose root node is a full inner node, and the leaf nodes are all full
|
||||
// containing:
|
||||
//
|
||||
// 110, 120, ..., 170
|
||||
// 210, 220, ..., 270
|
||||
// ...
|
||||
// 810, 820, ..., 870
|
||||
fn full(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
|
||||
let mut m = Map::new();
|
||||
|
||||
// Start by inserting elements in order.
|
||||
// This should leave 8 leaf nodes with 4 elements in each.
|
||||
for row in 1..9 {
|
||||
for col in 1..5 {
|
||||
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
|
||||
}
|
||||
}
|
||||
|
||||
// Then top up the leaf nodes without splitting them.
|
||||
for row in 1..9 {
|
||||
for col in 5..8 {
|
||||
m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &());
|
||||
}
|
||||
}
|
||||
|
||||
m
|
||||
}
|
||||
|
||||
let mut m = full(f);
|
||||
// Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ...
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]");
|
||||
assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]");
|
||||
assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]");
|
||||
assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]");
|
||||
assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]");
|
||||
assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]");
|
||||
assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]");
|
||||
|
||||
{
|
||||
let mut c = m.cursor(f, &());
|
||||
assert_eq!(c.goto_first(), Some(1.1));
|
||||
assert_eq!(c.key(), Some(110));
|
||||
}
|
||||
|
||||
// Front of first leaf.
|
||||
m.insert(0, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(0, f, &()), Some(4.2));
|
||||
|
||||
// First leaf split 4-4 after appending to LHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(135, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(135, f, &()), Some(4.2));
|
||||
|
||||
// First leaf split 4-4 after prepending to RHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(145, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(145, f, &()), Some(4.2));
|
||||
|
||||
// First leaf split 4-4 after appending to RHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(175, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(175, f, &()), Some(4.2));
|
||||
|
||||
// Left-middle leaf split, ins LHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(435, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(435, f, &()), Some(4.2));
|
||||
|
||||
// Left-middle leaf split, ins RHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(445, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(445, f, &()), Some(4.2));
|
||||
|
||||
// Right-middle leaf split, ins LHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(535, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(535, f, &()), Some(4.2));
|
||||
|
||||
// Right-middle leaf split, ins RHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(545, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(545, f, &()), Some(4.2));
|
||||
|
||||
// Last leaf split, ins LHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(835, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(835, f, &()), Some(4.2));
|
||||
|
||||
// Last leaf split, ins RHS.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(845, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(845, f, &()), Some(4.2));
|
||||
|
||||
// Front of last leaf.
|
||||
f.clear();
|
||||
m = full(f);
|
||||
m.insert(805, 4.2, f, &());
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.get(805, f, &()), Some(4.2));
|
||||
|
||||
m.clear(f);
|
||||
m.verify(f, &());
|
||||
}
|
||||
|
||||
// Make a tree with two barely healthy leaf nodes:
|
||||
// [ 10 20 30 40 ] [ 50 60 70 80 ]
|
||||
fn two_leaf(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
|
||||
f.clear();
|
||||
let mut m = Map::new();
|
||||
for n in 1..9 {
|
||||
m.insert(n * 10, n as f32, f, &());
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_level1() {
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
let mut m = two_leaf(f);
|
||||
|
||||
// Verify geometry.
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]");
|
||||
assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]");
|
||||
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]");
|
||||
assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]");
|
||||
|
||||
// Remove the front entry from a node that stays healthy.
|
||||
assert_eq!(m.insert(55, 5.5, f, &()), None);
|
||||
assert_eq!(m.remove(50, f, &()), Some(5.0));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]");
|
||||
|
||||
// Remove the front entry from the first leaf node: No critical key to update.
|
||||
assert_eq!(m.insert(15, 1.5, f, &()), None);
|
||||
assert_eq!(m.remove(10, f, &()), Some(1.0));
|
||||
m.verify(f, &());
|
||||
|
||||
// [ 15 20 30 40 ] [ 55 60 70 80 ]
|
||||
|
||||
// Remove the front entry from a right-most node that underflows.
|
||||
// No rebalancing for the right-most node. Still need critical key update.
|
||||
assert_eq!(m.remove(55, f, &()), Some(5.5));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
|
||||
|
||||
// [ 15 20 30 40 ] [ 60 70 80 ]
|
||||
|
||||
// Replenish the right leaf.
|
||||
assert_eq!(m.insert(90, 9.0, f, &()), None);
|
||||
assert_eq!(m.insert(100, 10.0, f, &()), None);
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]");
|
||||
|
||||
// [ 15 20 30 40 ] [ 60 70 80 90 100 ]
|
||||
|
||||
// Removing one entry from the left leaf should trigger a rebalancing from the right
|
||||
// sibling.
|
||||
assert_eq!(m.remove(20, f, &()), Some(2.0));
|
||||
m.verify(f, &());
|
||||
|
||||
// [ 15 30 40 60 ] [ 70 80 90 100 ]
|
||||
// Check that the critical key was updated correctly.
|
||||
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]");
|
||||
assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]");
|
||||
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
|
||||
|
||||
// Remove front entry from the left-most leaf node, underflowing.
|
||||
// This should cause two leaf nodes to be merged and the root node to go away.
|
||||
assert_eq!(m.remove(15, f, &()), Some(1.5));
|
||||
m.verify(f, &());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_level1_rightmost() {
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
let mut m = two_leaf(f);
|
||||
|
||||
// [ 10 20 30 40 ] [ 50 60 70 80 ]
|
||||
|
||||
// Remove entries from the right leaf. This doesn't trigger a rebalancing.
|
||||
assert_eq!(m.remove(60, f, &()), Some(6.0));
|
||||
assert_eq!(m.remove(80, f, &()), Some(8.0));
|
||||
assert_eq!(m.remove(50, f, &()), Some(5.0));
|
||||
m.verify(f, &());
|
||||
|
||||
// [ 10 20 30 40 ] [ 70 ]
|
||||
assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]");
|
||||
assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]");
|
||||
|
||||
// Removing the last entry from the right leaf should cause a collapse.
|
||||
assert_eq!(m.remove(70, f, &()), Some(7.0));
|
||||
m.verify(f, &());
|
||||
}
|
||||
|
||||
// Make a 3-level tree with barely healthy nodes.
|
||||
// 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each.
|
||||
fn level3_sparse(f: &mut MapForest<u32, f32, ()>) -> Map<u32, f32, ()> {
|
||||
f.clear();
|
||||
let mut m = Map::new();
|
||||
for n in 1..133 {
|
||||
m.insert(n * 10, n as f32, f, &());
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn level3_removes() {
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
let mut m = level3_sparse(f);
|
||||
m.verify(f, &());
|
||||
|
||||
// Check geometry.
|
||||
// Root: node11
|
||||
// [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ]
|
||||
// L1: node11
|
||||
assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]");
|
||||
assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]");
|
||||
|
||||
// 650 is a critical key in the middle of the root.
|
||||
assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]");
|
||||
assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]");
|
||||
|
||||
// Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26.
|
||||
assert_eq!(m.remove(640, f, &()), Some(64.0));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]");
|
||||
|
||||
// 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35
|
||||
// -> node37, but no rebalance above where there is no right sibling.
|
||||
assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]");
|
||||
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]");
|
||||
assert_eq!(m.remove(1130, f, &()), Some(113.0));
|
||||
m.verify(f, &());
|
||||
assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_many() {
|
||||
let f = &mut MapForest::<u32, f32, ()>::new();
|
||||
let mut m = Map::<u32, f32, ()>::new();
|
||||
|
||||
let mm = 4096;
|
||||
let mut x = 0;
|
||||
|
||||
for n in 0..mm {
|
||||
assert_eq!(m.insert(x, n as f32, f, &()), None);
|
||||
m.verify(f, &());
|
||||
|
||||
x = (x + n + 1) % mm;
|
||||
}
|
||||
|
||||
x = 0;
|
||||
for n in 0..mm {
|
||||
assert_eq!(m.get(x, f, &()), Some(n as f32));
|
||||
x = (x + n + 1) % mm;
|
||||
}
|
||||
|
||||
x = 0;
|
||||
for n in 0..mm {
|
||||
assert_eq!(m.remove(x, f, &()), Some(n as f32));
|
||||
m.verify(f, &());
|
||||
|
||||
x = (x + n + 1) % mm;
|
||||
}
|
||||
|
||||
assert!(m.is_empty());
|
||||
}
|
||||
}
|
||||
172
lib/codegen/src/bforest/mod.rs
Normal file
172
lib/codegen/src/bforest/mod.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
//! A forest of B+-trees.
|
||||
//!
|
||||
//! This module provides a data structures representing a set of small ordered sets or maps.
|
||||
//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool.
|
||||
//!
|
||||
//! **These are not general purpose data structures that are somehow magically faster that the
|
||||
//! standard library's `BTreeSet` and `BTreeMap` types.**
|
||||
//!
|
||||
//! The tradeoffs are different:
|
||||
//!
|
||||
//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types.
|
||||
//! - A comparator object is used to compare keys, allowing smaller "context free" keys.
|
||||
//! - Empty trees have a very small 32-bit footprint.
|
||||
//! - All the trees in a forest can be cleared in constant time.
|
||||
|
||||
use std::borrow::BorrowMut;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
mod map;
|
||||
mod node;
|
||||
mod path;
|
||||
mod pool;
|
||||
mod set;
|
||||
|
||||
pub use self::map::{Map, MapCursor, MapForest, MapIter};
|
||||
pub use self::set::{Set, SetCursor, SetForest, SetIter};
|
||||
|
||||
use self::node::NodeData;
|
||||
use self::path::Path;
|
||||
use self::pool::NodePool;
|
||||
|
||||
/// The maximum branching factor of an inner node in a B+-tree.
|
||||
/// The minimum number of outgoing edges is `INNER_SIZE/2`.
|
||||
const INNER_SIZE: usize = 8;
|
||||
|
||||
/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the
|
||||
/// worst case path length from the root node to a leaf node in a tree with 2^32
|
||||
/// entries. We would run out of node references before we hit `MAX_PATH`.
|
||||
const MAX_PATH: usize = 16;
|
||||
|
||||
/// Key comparator.
|
||||
///
|
||||
/// Keys don't need to implement `Ord`. They are compared using a comparator object which
|
||||
/// provides a context for comparison.
|
||||
pub trait Comparator<K>
|
||||
where
|
||||
K: Copy,
|
||||
{
|
||||
/// Compare keys `a` and `b`.
|
||||
///
|
||||
/// This relation must provide a total ordering or the key space.
|
||||
fn cmp(&self, a: K, b: K) -> Ordering;
|
||||
|
||||
/// Binary search for `k` in an ordered slice.
|
||||
///
|
||||
/// Assume that `s` is already sorted according to this ordering, search for the key `k`.
|
||||
///
|
||||
/// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it
|
||||
/// should be inserted to preserve the ordering.
|
||||
fn search(&self, k: K, s: &[K]) -> Result<usize, usize> {
|
||||
s.binary_search_by(|x| self.cmp(*x, k))
|
||||
}
|
||||
}
|
||||
|
||||
/// Trivial comparator that doesn't actually provide any context.
|
||||
impl<K> Comparator<K> for ()
|
||||
where
|
||||
K: Copy + Ord,
|
||||
{
|
||||
fn cmp(&self, a: K, b: K) -> Ordering {
|
||||
a.cmp(&b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Family of types shared by the map and set forest implementations.
|
||||
trait Forest {
|
||||
/// The key type is present for both sets and maps.
|
||||
type Key: Copy;
|
||||
|
||||
/// The value type is `()` for sets.
|
||||
type Value: Copy;
|
||||
|
||||
/// An array of keys for the leaf nodes.
|
||||
type LeafKeys: Copy + BorrowMut<[Self::Key]>;
|
||||
|
||||
/// An array of values for the leaf nodes.
|
||||
type LeafValues: Copy + BorrowMut<[Self::Value]>;
|
||||
|
||||
/// Type used for key comparisons.
|
||||
type Comparator: Comparator<Self::Key>;
|
||||
|
||||
/// Splat a single key into a whole array.
|
||||
fn splat_key(key: Self::Key) -> Self::LeafKeys;
|
||||
|
||||
/// Splat a single value inst a whole array
|
||||
fn splat_value(value: Self::Value) -> Self::LeafValues;
|
||||
}
|
||||
|
||||
/// A reference to a B+-tree node.
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
struct Node(u32);
|
||||
entity_impl!(Node, "node");
|
||||
|
||||
/// Empty type to be used as the "value" in B-trees representing sets.
|
||||
#[derive(Clone, Copy)]
|
||||
struct SetValue();
|
||||
|
||||
/// Insert `x` into `s` at position `i`, pushing out the last element.
|
||||
fn slice_insert<T: Copy>(s: &mut [T], i: usize, x: T) {
|
||||
for j in (i + 1..s.len()).rev() {
|
||||
s[j] = s[j - 1];
|
||||
}
|
||||
s[i] = x;
|
||||
}
|
||||
|
||||
/// Shift elements in `s` to the left by `n` positions.
|
||||
fn slice_shift<T: Copy>(s: &mut [T], n: usize) {
|
||||
for j in 0..s.len() - n {
|
||||
s[j] = s[j + n];
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use entity::EntityRef;
|
||||
use ir::Ebb;
|
||||
|
||||
#[test]
|
||||
fn comparator() {
|
||||
let ebb1 = Ebb::new(1);
|
||||
let ebb2 = Ebb::new(2);
|
||||
let ebb3 = Ebb::new(3);
|
||||
let ebb4 = Ebb::new(4);
|
||||
let vals = [ebb1, ebb2, ebb4];
|
||||
let comp = ();
|
||||
assert_eq!(comp.search(ebb1, &vals), Ok(0));
|
||||
assert_eq!(comp.search(ebb3, &vals), Err(2));
|
||||
assert_eq!(comp.search(ebb4, &vals), Ok(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slice_insertion() {
|
||||
let mut a = ['a', 'b', 'c', 'd'];
|
||||
|
||||
slice_insert(&mut a[0..1], 0, 'e');
|
||||
assert_eq!(a, ['e', 'b', 'c', 'd']);
|
||||
|
||||
slice_insert(&mut a, 0, 'a');
|
||||
assert_eq!(a, ['a', 'e', 'b', 'c']);
|
||||
|
||||
slice_insert(&mut a, 3, 'g');
|
||||
assert_eq!(a, ['a', 'e', 'b', 'g']);
|
||||
|
||||
slice_insert(&mut a, 1, 'h');
|
||||
assert_eq!(a, ['a', 'h', 'e', 'b']);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slice_shifting() {
|
||||
let mut a = ['a', 'b', 'c', 'd'];
|
||||
|
||||
slice_shift(&mut a[0..1], 1);
|
||||
assert_eq!(a, ['a', 'b', 'c', 'd']);
|
||||
|
||||
slice_shift(&mut a[1..], 1);
|
||||
assert_eq!(a, ['a', 'c', 'd', 'd']);
|
||||
|
||||
slice_shift(&mut a, 2);
|
||||
assert_eq!(a, ['d', 'd', 'd', 'd']);
|
||||
}
|
||||
}
|
||||
814
lib/codegen/src/bforest/node.rs
Normal file
814
lib/codegen/src/bforest/node.rs
Normal file
@@ -0,0 +1,814 @@
|
||||
//! B+-tree nodes.
|
||||
|
||||
use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE};
|
||||
use std::borrow::{Borrow, BorrowMut};
|
||||
use std::fmt;
|
||||
|
||||
/// B+-tree node.
|
||||
///
|
||||
/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node
|
||||
/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are
|
||||
/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits
|
||||
/// each.
|
||||
///
|
||||
/// An inner node contains at least M/2 node references unless it is the right-most node at its
|
||||
/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf.
|
||||
pub(super) enum NodeData<F: Forest> {
|
||||
Inner {
|
||||
/// The number of keys in this node.
|
||||
/// The number of node references is always one more.
|
||||
size: u8,
|
||||
|
||||
/// Keys discriminating sub-trees.
|
||||
///
|
||||
/// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to
|
||||
/// all keys in `tree[i+1]`.
|
||||
keys: [F::Key; INNER_SIZE - 1],
|
||||
|
||||
/// Sub-trees.
|
||||
tree: [Node; INNER_SIZE],
|
||||
},
|
||||
Leaf {
|
||||
/// Number of key-value pairs in this node.
|
||||
size: u8,
|
||||
|
||||
// Key array.
|
||||
keys: F::LeafKeys,
|
||||
|
||||
// Value array.
|
||||
vals: F::LeafValues,
|
||||
},
|
||||
/// An unused node on the free list.
|
||||
Free { next: Option<Node> },
|
||||
}
|
||||
|
||||
// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to
|
||||
// implement `Clone`.
|
||||
impl<F: Forest> Copy for NodeData<F> {}
|
||||
impl<F: Forest> Clone for NodeData<F> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Forest> NodeData<F> {
|
||||
/// Is this a free/unused node?
|
||||
pub fn is_free(&self) -> bool {
|
||||
match *self {
|
||||
NodeData::Free { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of entries in this node.
|
||||
///
|
||||
/// This is the number of outgoing edges in an inner node, or the number of key-value pairs in
|
||||
/// a leaf node.
|
||||
pub fn entries(&self) -> usize {
|
||||
match *self {
|
||||
NodeData::Inner { size, .. } => usize::from(size) + 1,
|
||||
NodeData::Leaf { size, .. } => usize::from(size),
|
||||
NodeData::Free { .. } => panic!("freed node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an inner node with a single key and two sub-trees.
|
||||
pub fn inner(left: Node, key: F::Key, right: Node) -> NodeData<F> {
|
||||
// Splat the key and right node to the whole array.
|
||||
// Saves us from inventing a default/reserved value.
|
||||
let mut tree = [right; INNER_SIZE];
|
||||
tree[0] = left;
|
||||
NodeData::Inner {
|
||||
size: 1,
|
||||
keys: [key; INNER_SIZE - 1],
|
||||
tree,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a leaf node with a single key-value pair.
|
||||
pub fn leaf(key: F::Key, value: F::Value) -> NodeData<F> {
|
||||
NodeData::Leaf {
|
||||
size: 1,
|
||||
keys: F::splat_key(key),
|
||||
vals: F::splat_value(value),
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap an inner node into two slices (keys, trees).
|
||||
pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) {
|
||||
match *self {
|
||||
NodeData::Inner {
|
||||
size,
|
||||
ref keys,
|
||||
ref tree,
|
||||
} => {
|
||||
let size = usize::from(size);
|
||||
// TODO: We could probably use `get_unchecked()` here since `size` is always in
|
||||
// range.
|
||||
(&keys[0..size], &tree[0..size + 1])
|
||||
}
|
||||
_ => panic!("Expected inner node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap a leaf node into two slices (keys, values) of the same length.
|
||||
pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) {
|
||||
match *self {
|
||||
NodeData::Leaf {
|
||||
size,
|
||||
ref keys,
|
||||
ref vals,
|
||||
} => {
|
||||
let size = usize::from(size);
|
||||
let keys = keys.borrow();
|
||||
let vals = vals.borrow();
|
||||
// TODO: We could probably use `get_unchecked()` here since `size` is always in
|
||||
// range.
|
||||
(&keys[0..size], &vals[0..size])
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwrap a mutable leaf node into two slices (keys, values) of the same length.
|
||||
pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) {
|
||||
match *self {
|
||||
NodeData::Leaf {
|
||||
size,
|
||||
ref mut keys,
|
||||
ref mut vals,
|
||||
} => {
|
||||
let size = usize::from(size);
|
||||
let keys = keys.borrow_mut();
|
||||
let vals = vals.borrow_mut();
|
||||
// TODO: We could probably use `get_unchecked_mut()` here since `size` is always in
|
||||
// range.
|
||||
(&mut keys[0..size], &mut vals[0..size])
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the critical key for a leaf node.
|
||||
/// This is simply the first key.
|
||||
pub fn leaf_crit_key(&self) -> F::Key {
|
||||
match *self {
|
||||
NodeData::Leaf { size, ref keys, .. } => {
|
||||
debug_assert!(size > 0, "Empty leaf node");
|
||||
keys.borrow()[0]
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to insert `(key, node)` at key-position `index` in an inner node.
|
||||
/// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`.
|
||||
/// If the node is full, this leaves the node unchanged and returns false.
|
||||
pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool {
|
||||
match *self {
|
||||
NodeData::Inner {
|
||||
ref mut size,
|
||||
ref mut keys,
|
||||
ref mut tree,
|
||||
} => {
|
||||
let sz = usize::from(*size);
|
||||
debug_assert!(sz <= keys.len());
|
||||
debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz);
|
||||
|
||||
if let Some(ks) = keys.get_mut(0..sz + 1) {
|
||||
*size = (sz + 1) as u8;
|
||||
slice_insert(ks, index, key);
|
||||
slice_insert(&mut tree[1..sz + 2], index, node);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected inner node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node
|
||||
/// is full.
|
||||
pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool {
|
||||
match *self {
|
||||
NodeData::Leaf {
|
||||
ref mut size,
|
||||
ref mut keys,
|
||||
ref mut vals,
|
||||
} => {
|
||||
let sz = usize::from(*size);
|
||||
let keys = keys.borrow_mut();
|
||||
let vals = vals.borrow_mut();
|
||||
debug_assert!(sz <= keys.len());
|
||||
debug_assert!(index <= sz);
|
||||
|
||||
if let Some(ks) = keys.get_mut(0..sz + 1) {
|
||||
*size = (sz + 1) as u8;
|
||||
slice_insert(ks, index, key);
|
||||
slice_insert(&mut vals[0..sz + 1], index, value);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Split off the second half of this node.
|
||||
/// It is assumed that this a completely full inner or leaf node.
|
||||
///
|
||||
/// The `insert_index` parameter is the position where an insertion was tried and failed. The
|
||||
/// node will be split in half with a bias towards an even split after the insertion is retried.
|
||||
pub fn split(&mut self, insert_index: usize) -> SplitOff<F> {
|
||||
match *self {
|
||||
NodeData::Inner {
|
||||
ref mut size,
|
||||
ref keys,
|
||||
ref tree,
|
||||
} => {
|
||||
debug_assert_eq!(usize::from(*size), keys.len(), "Node not full");
|
||||
|
||||
// Number of tree entries in the lhs node.
|
||||
let l_ents = split_pos(tree.len(), insert_index + 1);
|
||||
let r_ents = tree.len() - l_ents;
|
||||
|
||||
// With INNER_SIZE=8, we get l_ents=4 and:
|
||||
//
|
||||
// self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ]
|
||||
// lhs: [ n0 k0 n1 k1 n2 k2 n3 ]
|
||||
// crit_key = k3 (not present in either node)
|
||||
// rhs: [ n4 k4 n5 k5 n6 k6 n7 ]
|
||||
|
||||
// 1. Truncate the LHS.
|
||||
*size = (l_ents - 1) as u8;
|
||||
|
||||
// 2. Copy second half to `rhs_data`.
|
||||
let mut r_keys = *keys;
|
||||
r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]);
|
||||
|
||||
let mut r_tree = *tree;
|
||||
r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]);
|
||||
|
||||
SplitOff {
|
||||
lhs_entries: l_ents,
|
||||
rhs_entries: r_ents,
|
||||
crit_key: keys[l_ents - 1],
|
||||
rhs_data: NodeData::Inner {
|
||||
size: (r_ents - 1) as u8,
|
||||
keys: r_keys,
|
||||
tree: r_tree,
|
||||
},
|
||||
}
|
||||
}
|
||||
NodeData::Leaf {
|
||||
ref mut size,
|
||||
ref keys,
|
||||
ref vals,
|
||||
} => {
|
||||
let o_keys = keys.borrow();
|
||||
let o_vals = vals.borrow();
|
||||
debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full");
|
||||
|
||||
let l_size = split_pos(o_keys.len(), insert_index);
|
||||
let r_size = o_keys.len() - l_size;
|
||||
|
||||
// 1. Truncate the LHS node at `l_size`.
|
||||
*size = l_size as u8;
|
||||
|
||||
// 2. Copy second half to `rhs_data`.
|
||||
let mut r_keys = *keys;
|
||||
r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]);
|
||||
|
||||
let mut r_vals = *vals;
|
||||
r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]);
|
||||
|
||||
SplitOff {
|
||||
lhs_entries: l_size,
|
||||
rhs_entries: r_size,
|
||||
crit_key: o_keys[l_size],
|
||||
rhs_data: NodeData::Leaf {
|
||||
size: r_size as u8,
|
||||
keys: r_keys,
|
||||
vals: r_vals,
|
||||
},
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the sub-tree at `index` from this inner node.
|
||||
///
|
||||
/// Note that `index` refers to a sub-tree entry and not a key entry as it does for
|
||||
/// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted
|
||||
/// by `try_inner_insert()`).
|
||||
///
|
||||
/// Return an indication of the node's health (i.e. below half capacity).
|
||||
pub fn inner_remove(&mut self, index: usize) -> Removed {
|
||||
match *self {
|
||||
NodeData::Inner {
|
||||
ref mut size,
|
||||
ref mut keys,
|
||||
ref mut tree,
|
||||
} => {
|
||||
let ents = usize::from(*size) + 1;
|
||||
debug_assert!(ents <= tree.len());
|
||||
debug_assert!(index < ents);
|
||||
// Leave an invalid 0xff size when node becomes empty.
|
||||
*size = ents.wrapping_sub(2) as u8;
|
||||
if ents > 1 {
|
||||
slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1);
|
||||
}
|
||||
slice_shift(&mut tree[index..ents], 1);
|
||||
Removed::new(index, ents - 1, tree.len())
|
||||
}
|
||||
_ => panic!("Expected inner node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the key-value pair at `index` from this leaf node.
|
||||
///
|
||||
/// Return an indication of the node's health (i.e. below half capacity).
|
||||
pub fn leaf_remove(&mut self, index: usize) -> Removed {
|
||||
match *self {
|
||||
NodeData::Leaf {
|
||||
ref mut size,
|
||||
ref mut keys,
|
||||
ref mut vals,
|
||||
} => {
|
||||
let sz = usize::from(*size);
|
||||
let keys = keys.borrow_mut();
|
||||
let vals = vals.borrow_mut();
|
||||
*size -= 1;
|
||||
slice_shift(&mut keys[index..sz], 1);
|
||||
slice_shift(&mut vals[index..sz], 1);
|
||||
Removed::new(index, sz - 1, keys.len())
|
||||
}
|
||||
_ => panic!("Expected leaf node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Balance this node with its right sibling.
|
||||
///
|
||||
/// It is assumed that the current node has underflowed. Look at the right sibling node and do
|
||||
/// one of two things:
|
||||
///
|
||||
/// 1. Move all entries to the right node, leaving this node empty, or
|
||||
/// 2. Distribute entries evenly between the two nodes.
|
||||
///
|
||||
/// In the first case, `None` is returned. In the second case, the new critical key for the
|
||||
/// right sibling node is returned.
|
||||
pub fn balance(&mut self, crit_key: F::Key, rhs: &mut NodeData<F>) -> Option<F::Key> {
|
||||
match (self, rhs) {
|
||||
(&mut NodeData::Inner {
|
||||
size: ref mut l_size,
|
||||
keys: ref mut l_keys,
|
||||
tree: ref mut l_tree,
|
||||
},
|
||||
&mut NodeData::Inner {
|
||||
size: ref mut r_size,
|
||||
keys: ref mut r_keys,
|
||||
tree: ref mut r_tree,
|
||||
}) => {
|
||||
let l_ents = usize::from(*l_size) + 1;
|
||||
let r_ents = usize::from(*r_size) + 1;
|
||||
let ents = l_ents + r_ents;
|
||||
|
||||
if ents <= r_tree.len() {
|
||||
// All entries will fit in the RHS node.
|
||||
// We'll leave the LHS node empty, but first use it as a scratch space.
|
||||
*l_size = 0;
|
||||
// Insert `crit_key` between the two nodes.
|
||||
l_keys[l_ents - 1] = crit_key;
|
||||
l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]);
|
||||
r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]);
|
||||
l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]);
|
||||
r_tree[0..ents].copy_from_slice(&l_tree[0..ents]);
|
||||
*r_size = (ents - 1) as u8;
|
||||
None
|
||||
} else {
|
||||
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
|
||||
// Split evenly with a bias to putting one entry in LHS.
|
||||
let r_goal = ents / 2;
|
||||
let l_goal = ents - r_goal;
|
||||
debug_assert!(l_goal > l_ents, "Node must be underflowed");
|
||||
|
||||
l_keys[l_ents - 1] = crit_key;
|
||||
l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]);
|
||||
l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]);
|
||||
*l_size = (l_goal - 1) as u8;
|
||||
|
||||
let new_crit = r_keys[r_ents - r_goal - 1];
|
||||
slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal);
|
||||
slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal);
|
||||
*r_size = (r_goal - 1) as u8;
|
||||
|
||||
Some(new_crit)
|
||||
}
|
||||
}
|
||||
(&mut NodeData::Leaf {
|
||||
size: ref mut l_size,
|
||||
keys: ref mut l_keys,
|
||||
vals: ref mut l_vals,
|
||||
},
|
||||
&mut NodeData::Leaf {
|
||||
size: ref mut r_size,
|
||||
keys: ref mut r_keys,
|
||||
vals: ref mut r_vals,
|
||||
}) => {
|
||||
let l_ents = usize::from(*l_size);
|
||||
let l_keys = l_keys.borrow_mut();
|
||||
let l_vals = l_vals.borrow_mut();
|
||||
let r_ents = usize::from(*r_size);
|
||||
let r_keys = r_keys.borrow_mut();
|
||||
let r_vals = r_vals.borrow_mut();
|
||||
let ents = l_ents + r_ents;
|
||||
|
||||
if ents <= r_vals.len() {
|
||||
// We can fit all entries in the RHS node.
|
||||
// We'll leave the LHS node empty, but first use it as a scratch space.
|
||||
*l_size = 0;
|
||||
l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]);
|
||||
r_keys[0..ents].copy_from_slice(&l_keys[0..ents]);
|
||||
l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]);
|
||||
r_vals[0..ents].copy_from_slice(&l_vals[0..ents]);
|
||||
*r_size = ents as u8;
|
||||
None
|
||||
} else {
|
||||
// The entries don't all fit in one node. Distribute some from RHS -> LHS.
|
||||
// Split evenly with a bias to putting one entry in LHS.
|
||||
let r_goal = ents / 2;
|
||||
let l_goal = ents - r_goal;
|
||||
debug_assert!(l_goal > l_ents, "Node must be underflowed");
|
||||
|
||||
l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]);
|
||||
l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]);
|
||||
*l_size = l_goal as u8;
|
||||
|
||||
slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal);
|
||||
slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal);
|
||||
*r_size = r_goal as u8;
|
||||
|
||||
Some(r_keys[0])
|
||||
}
|
||||
}
|
||||
_ => panic!("Mismatched nodes"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the right split position for halving a full node with `len` entries to recover from a
|
||||
/// failed insertion at `ins`.
|
||||
///
|
||||
/// If `len` is even, we should split straight down the middle regardless of `len`.
|
||||
///
|
||||
/// If `len` is odd, we should split the node such that the two halves are the same size after the
|
||||
/// insertion is retried.
|
||||
fn split_pos(len: usize, ins: usize) -> usize {
|
||||
// Anticipate `len` being a compile time constant, so this all folds away when `len` is even.
|
||||
if ins <= len / 2 {
|
||||
len / 2
|
||||
} else {
|
||||
(len + 1) / 2
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of splitting off the second half of a node.
|
||||
pub(super) struct SplitOff<F: Forest> {
|
||||
/// The number of entries left in the original node which becomes the left-hand-side of the
|
||||
/// pair. This is the number of outgoing node edges for an inner node, and the number of
|
||||
/// key-value pairs for a leaf node.
|
||||
pub lhs_entries: usize,
|
||||
|
||||
/// The number of entries in the new RHS node.
|
||||
pub rhs_entries: usize,
|
||||
|
||||
/// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less
|
||||
/// than the critical key, and all entries in the RHS sub-tree are greater or equal to the
|
||||
/// critical key.
|
||||
pub crit_key: F::Key,
|
||||
|
||||
/// The RHS node data containing the elements that were removed from the original node (now the
|
||||
/// LHS).
|
||||
pub rhs_data: NodeData<F>,
|
||||
}
|
||||
|
||||
/// The result of removing an entry from a node.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum Removed {
|
||||
/// An entry was removed, and the node is still in good shape.
|
||||
Healthy,
|
||||
|
||||
/// The node is in good shape after removing the rightmost element.
|
||||
Rightmost,
|
||||
|
||||
/// The node has too few entries now, and it should be balanced with a sibling node.
|
||||
Underflow,
|
||||
|
||||
/// The last entry was removed. For an inner node, this means that the `keys` array is empty
|
||||
/// and there is just a single sub-tree left.
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl Removed {
|
||||
/// Create a `Removed` status from a size and capacity.
|
||||
fn new(removed: usize, new_size: usize, capacity: usize) -> Removed {
|
||||
if 2 * new_size >= capacity {
|
||||
if removed == new_size {
|
||||
Removed::Rightmost
|
||||
} else {
|
||||
Removed::Healthy
|
||||
}
|
||||
} else if new_size > 0 {
|
||||
Removed::Underflow
|
||||
} else {
|
||||
Removed::Empty
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display ": value" or nothing at all for `()`.
|
||||
pub(super) trait ValDisp {
|
||||
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result;
|
||||
}
|
||||
|
||||
impl ValDisp for SetValue {
|
||||
fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Display> ValDisp for T {
|
||||
fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, ":{}", self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> fmt::Display for NodeData<F>
|
||||
where
|
||||
F: Forest,
|
||||
F::Key: fmt::Display,
|
||||
F::Value: ValDisp,
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
NodeData::Inner { size, keys, tree } => {
|
||||
write!(f, "[ {}", tree[0])?;
|
||||
for i in 0..usize::from(size) {
|
||||
write!(f, " {} {}", keys[i], tree[i + 1])?;
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
NodeData::Leaf { size, keys, vals } => {
|
||||
let keys = keys.borrow();
|
||||
let vals = vals.borrow();
|
||||
write!(f, "[")?;
|
||||
for i in 0..usize::from(size) {
|
||||
write!(f, " {}", keys[i])?;
|
||||
vals[i].valfmt(f)?;
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
NodeData::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n),
|
||||
NodeData::Free { next: None } => write!(f, "[ free ]"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use std::mem;
|
||||
use std::string::ToString;
|
||||
|
||||
// Forest impl for a set implementation.
|
||||
struct TF();
|
||||
|
||||
impl Forest for TF {
|
||||
type Key = char;
|
||||
type Value = SetValue;
|
||||
type LeafKeys = [char; 15];
|
||||
type LeafValues = [SetValue; 15];
|
||||
type Comparator = ();
|
||||
|
||||
fn splat_key(key: Self::Key) -> Self::LeafKeys {
|
||||
[key; 15]
|
||||
}
|
||||
|
||||
fn splat_value(value: Self::Value) -> Self::LeafValues {
|
||||
[value; 15]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inner() {
|
||||
let n1 = Node(1);
|
||||
let n2 = Node(2);
|
||||
let n3 = Node(3);
|
||||
let n4 = Node(4);
|
||||
let mut inner = NodeData::<TF>::inner(n1, 'c', n4);
|
||||
assert_eq!(mem::size_of_val(&inner), 64);
|
||||
assert_eq!(inner.to_string(), "[ node1 c node4 ]");
|
||||
|
||||
assert!(inner.try_inner_insert(0, 'a', n2));
|
||||
assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]");
|
||||
|
||||
assert!(inner.try_inner_insert(1, 'b', n3));
|
||||
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
|
||||
|
||||
for i in 3..7 {
|
||||
assert!(inner.try_inner_insert(
|
||||
usize::from(i),
|
||||
('a' as u8 + i) as char,
|
||||
Node(i as u32 + 2),
|
||||
));
|
||||
}
|
||||
assert_eq!(
|
||||
inner.to_string(),
|
||||
"[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]"
|
||||
);
|
||||
|
||||
// Now the node is full and insertion should fail anywhere.
|
||||
assert!(!inner.try_inner_insert(0, 'x', n3));
|
||||
assert!(!inner.try_inner_insert(4, 'x', n3));
|
||||
assert!(!inner.try_inner_insert(7, 'x', n3));
|
||||
|
||||
// Splitting should be independent of the hint because we have an even number of node
|
||||
// references.
|
||||
let saved = inner.clone();
|
||||
let sp = inner.split(1);
|
||||
assert_eq!(sp.lhs_entries, 4);
|
||||
assert_eq!(sp.rhs_entries, 4);
|
||||
assert_eq!(sp.crit_key, 'd');
|
||||
// The critical key is not present in either of the resulting nodes.
|
||||
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
|
||||
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
|
||||
|
||||
assert_eq!(inner.inner_remove(0), Removed::Underflow);
|
||||
assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]");
|
||||
|
||||
assert_eq!(inner.inner_remove(1), Removed::Underflow);
|
||||
assert_eq!(inner.to_string(), "[ node2 c node4 ]");
|
||||
|
||||
assert_eq!(inner.inner_remove(1), Removed::Underflow);
|
||||
assert_eq!(inner.to_string(), "[ node2 ]");
|
||||
|
||||
assert_eq!(inner.inner_remove(0), Removed::Empty);
|
||||
|
||||
inner = saved;
|
||||
let sp = inner.split(6);
|
||||
assert_eq!(sp.lhs_entries, 4);
|
||||
assert_eq!(sp.rhs_entries, 4);
|
||||
assert_eq!(sp.crit_key, 'd');
|
||||
assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]");
|
||||
assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leaf() {
|
||||
let mut leaf = NodeData::<TF>::leaf('d', SetValue());
|
||||
assert_eq!(leaf.to_string(), "[ d ]");
|
||||
|
||||
assert!(leaf.try_leaf_insert(0, 'a', SetValue()));
|
||||
assert_eq!(leaf.to_string(), "[ a d ]");
|
||||
assert!(leaf.try_leaf_insert(1, 'b', SetValue()));
|
||||
assert!(leaf.try_leaf_insert(2, 'c', SetValue()));
|
||||
assert_eq!(leaf.to_string(), "[ a b c d ]");
|
||||
for i in 4..15 {
|
||||
assert!(leaf.try_leaf_insert(
|
||||
usize::from(i),
|
||||
('a' as u8 + i) as char,
|
||||
SetValue(),
|
||||
));
|
||||
}
|
||||
assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]");
|
||||
|
||||
// Now the node is full and insertion should fail anywhere.
|
||||
assert!(!leaf.try_leaf_insert(0, 'x', SetValue()));
|
||||
assert!(!leaf.try_leaf_insert(8, 'x', SetValue()));
|
||||
assert!(!leaf.try_leaf_insert(15, 'x', SetValue()));
|
||||
|
||||
// The index given to `split` is not the split position, it's a hint for balancing the node.
|
||||
let saved = leaf.clone();
|
||||
let sp = leaf.split(12);
|
||||
assert_eq!(sp.lhs_entries, 8);
|
||||
assert_eq!(sp.rhs_entries, 7);
|
||||
assert_eq!(sp.crit_key, 'i');
|
||||
assert_eq!(leaf.to_string(), "[ a b c d e f g h ]");
|
||||
assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]");
|
||||
|
||||
assert!(leaf.try_leaf_insert(8, 'i', SetValue()));
|
||||
assert_eq!(leaf.leaf_remove(2), Removed::Healthy);
|
||||
assert_eq!(leaf.to_string(), "[ a b d e f g h i ]");
|
||||
assert_eq!(leaf.leaf_remove(7), Removed::Underflow);
|
||||
assert_eq!(leaf.to_string(), "[ a b d e f g h ]");
|
||||
|
||||
leaf = saved;
|
||||
let sp = leaf.split(7);
|
||||
assert_eq!(sp.lhs_entries, 7);
|
||||
assert_eq!(sp.rhs_entries, 8);
|
||||
assert_eq!(sp.crit_key, 'h');
|
||||
assert_eq!(leaf.to_string(), "[ a b c d e f g ]");
|
||||
assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimal_split_pos() {
|
||||
// An even split is easy.
|
||||
assert_eq!(split_pos(8, 0), 4);
|
||||
assert_eq!(split_pos(8, 8), 4);
|
||||
|
||||
// Easy cases for odd splits.
|
||||
assert_eq!(split_pos(7, 0), 3);
|
||||
assert_eq!(split_pos(7, 7), 4);
|
||||
|
||||
// If the insertion point is the same as the split position, we
|
||||
// will append to the lhs node.
|
||||
assert_eq!(split_pos(7, 3), 3);
|
||||
assert_eq!(split_pos(7, 4), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inner_balance() {
|
||||
let n1 = Node(1);
|
||||
let n2 = Node(2);
|
||||
let n3 = Node(3);
|
||||
let mut lhs = NodeData::<TF>::inner(n1, 'a', n2);
|
||||
assert!(lhs.try_inner_insert(1, 'b', n3));
|
||||
assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]");
|
||||
|
||||
let n11 = Node(11);
|
||||
let n12 = Node(12);
|
||||
let mut rhs = NodeData::<TF>::inner(n11, 'p', n12);
|
||||
|
||||
for i in 1..4 {
|
||||
assert!(rhs.try_inner_insert(
|
||||
usize::from(i),
|
||||
('p' as u8 + i) as char,
|
||||
Node(i as u32 + 12),
|
||||
));
|
||||
}
|
||||
assert_eq!(
|
||||
rhs.to_string(),
|
||||
"[ node11 p node12 q node13 r node14 s node15 ]"
|
||||
);
|
||||
|
||||
// 3+5 elements fit in RHS.
|
||||
assert_eq!(lhs.balance('o', &mut rhs), None);
|
||||
assert_eq!(
|
||||
rhs.to_string(),
|
||||
"[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]"
|
||||
);
|
||||
|
||||
// 2+8 elements are redistributed.
|
||||
lhs = NodeData::<TF>::inner(Node(20), 'x', Node(21));
|
||||
assert_eq!(lhs.balance('y', &mut rhs), Some('o'));
|
||||
assert_eq!(
|
||||
lhs.to_string(),
|
||||
"[ node20 x node21 y node1 a node2 b node3 ]"
|
||||
);
|
||||
assert_eq!(
|
||||
rhs.to_string(),
|
||||
"[ node11 p node12 q node13 r node14 s node15 ]"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leaf_balance() {
|
||||
let mut lhs = NodeData::<TF>::leaf('a', SetValue());
|
||||
for i in 1..6 {
|
||||
assert!(lhs.try_leaf_insert(
|
||||
usize::from(i),
|
||||
('a' as u8 + i) as char,
|
||||
SetValue(),
|
||||
));
|
||||
}
|
||||
assert_eq!(lhs.to_string(), "[ a b c d e f ]");
|
||||
|
||||
let mut rhs = NodeData::<TF>::leaf('0', SetValue());
|
||||
for i in 1..8 {
|
||||
assert!(rhs.try_leaf_insert(
|
||||
usize::from(i),
|
||||
('0' as u8 + i) as char,
|
||||
SetValue(),
|
||||
));
|
||||
}
|
||||
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
|
||||
|
||||
// 6+8 elements all fits in rhs.
|
||||
assert_eq!(lhs.balance('0', &mut rhs), None);
|
||||
assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]");
|
||||
|
||||
assert!(lhs.try_leaf_insert(0, 'x', SetValue()));
|
||||
assert!(lhs.try_leaf_insert(1, 'y', SetValue()));
|
||||
assert!(lhs.try_leaf_insert(2, 'z', SetValue()));
|
||||
assert_eq!(lhs.to_string(), "[ x y z ]");
|
||||
|
||||
// 3+14 elements need redistribution.
|
||||
assert_eq!(lhs.balance('a', &mut rhs), Some('0'));
|
||||
assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]");
|
||||
assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]");
|
||||
}
|
||||
}
|
||||
832
lib/codegen/src/bforest/path.rs
Normal file
832
lib/codegen/src/bforest/path.rs
Normal file
@@ -0,0 +1,832 @@
|
||||
//! A path from the root of a B+-tree to a leaf node.
|
||||
|
||||
use super::node::Removed;
|
||||
use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH};
|
||||
use std::borrow::Borrow;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::fmt;
|
||||
|
||||
pub(super) struct Path<F: Forest> {
|
||||
/// Number of path entries including the root and leaf nodes.
|
||||
size: usize,
|
||||
|
||||
/// Path of node references from the root to a leaf node.
|
||||
node: [Node; MAX_PATH],
|
||||
|
||||
/// Entry number in each node.
|
||||
entry: [u8; MAX_PATH],
|
||||
|
||||
unused: PhantomData<F>,
|
||||
}
|
||||
|
||||
impl<F: Forest> Default for Path<F> {
|
||||
fn default() -> Path<F> {
|
||||
Path {
|
||||
size: 0,
|
||||
node: [Node(0); MAX_PATH],
|
||||
entry: [0; MAX_PATH],
|
||||
unused: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Forest> Path<F> {
|
||||
/// Reset path by searching for `key` starting from `root`.
|
||||
///
|
||||
/// If `key` is in the tree, returns the corresponding value and leaved the path pointing at
|
||||
/// the entry. Otherwise returns `None` and:
|
||||
///
|
||||
/// - A key smaller than all stored keys returns a path to the first entry of the first leaf.
|
||||
/// - A key larger than all stored keys returns a path to one beyond the last element of the
|
||||
/// last leaf.
|
||||
/// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the
|
||||
/// last entry of the first of the leaf nodes.
|
||||
///
|
||||
pub fn find(
|
||||
&mut self,
|
||||
key: F::Key,
|
||||
root: Node,
|
||||
pool: &NodePool<F>,
|
||||
comp: &F::Comparator,
|
||||
) -> Option<F::Value> {
|
||||
let mut node = root;
|
||||
for level in 0.. {
|
||||
self.size = level + 1;
|
||||
self.node[level] = node;
|
||||
match pool[node] {
|
||||
NodeData::Inner { size, keys, tree } => {
|
||||
// Invariant: `tree[i]` contains keys smaller than
|
||||
// `keys[i]`, greater or equal to `keys[i-1]`.
|
||||
let i = match comp.search(key, &keys[0..size.into()]) {
|
||||
// We hit an existing key, so follow the >= branch.
|
||||
Ok(i) => i + 1,
|
||||
// Key is less than `keys[i]`, so follow the < branch.
|
||||
Err(i) => i,
|
||||
};
|
||||
self.entry[level] = i as u8;
|
||||
node = tree[i];
|
||||
}
|
||||
NodeData::Leaf { size, keys, vals } => {
|
||||
// For a leaf we want either the found key or an insert position.
|
||||
return match comp.search(key, &keys.borrow()[0..size.into()]) {
|
||||
Ok(i) => {
|
||||
self.entry[level] = i as u8;
|
||||
Some(vals.borrow()[i])
|
||||
}
|
||||
Err(i) => {
|
||||
self.entry[level] = i as u8;
|
||||
None
|
||||
}
|
||||
};
|
||||
}
|
||||
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
|
||||
}
|
||||
}
|
||||
unreachable!();
|
||||
}
|
||||
|
||||
/// Move path to the first entry of the tree starting at `root` and return it.
|
||||
pub fn first(&mut self, root: Node, pool: &NodePool<F>) -> (F::Key, F::Value) {
|
||||
let mut node = root;
|
||||
for level in 0.. {
|
||||
self.size = level + 1;
|
||||
self.node[level] = node;
|
||||
self.entry[level] = 0;
|
||||
match pool[node] {
|
||||
NodeData::Inner { tree, .. } => node = tree[0],
|
||||
NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]),
|
||||
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
|
||||
}
|
||||
}
|
||||
unreachable!();
|
||||
}
|
||||
|
||||
/// Move this path to the next key-value pair and return it.
|
||||
pub fn next(&mut self, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
|
||||
match self.leaf_pos() {
|
||||
None => return None,
|
||||
Some((node, entry)) => {
|
||||
let (keys, vals) = pool[node].unwrap_leaf();
|
||||
if entry + 1 < keys.len() {
|
||||
self.entry[self.size - 1] += 1;
|
||||
return Some((keys[entry + 1], vals[entry + 1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The current leaf node is exhausted. Move to the next one.
|
||||
let leaf_level = self.size - 1;
|
||||
self.next_node(leaf_level, pool).map(|node| {
|
||||
let (keys, vals) = pool[node].unwrap_leaf();
|
||||
(keys[0], vals[0])
|
||||
})
|
||||
}
|
||||
|
||||
/// Move this path to the previous key-value pair and return it.
|
||||
///
|
||||
/// If the path is at the off-the-end position, go to the last key-value pair.
|
||||
///
|
||||
/// If the path is already at the first key-value pair, leave it there and return `None`.
|
||||
pub fn prev(&mut self, root: Node, pool: &NodePool<F>) -> Option<(F::Key, F::Value)> {
|
||||
// We use `size == 0` as a generic off-the-end position.
|
||||
if self.size == 0 {
|
||||
self.goto_subtree_last(0, root, pool);
|
||||
let (node, entry) = self.leaf_pos().unwrap();
|
||||
let (keys, vals) = pool[node].unwrap_leaf();
|
||||
return Some((keys[entry], vals[entry]));
|
||||
}
|
||||
|
||||
match self.leaf_pos() {
|
||||
None => return None,
|
||||
Some((node, entry)) => {
|
||||
if entry > 0 {
|
||||
self.entry[self.size - 1] -= 1;
|
||||
let (keys, vals) = pool[node].unwrap_leaf();
|
||||
return Some((keys[entry - 1], vals[entry - 1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The current leaf node is exhausted. Move to the previous one.
|
||||
self.prev_leaf(pool).map(|node| {
|
||||
let (keys, vals) = pool[node].unwrap_leaf();
|
||||
let e = self.leaf_entry();
|
||||
(keys[e], vals[e])
|
||||
})
|
||||
}
|
||||
|
||||
/// Move path to the first entry of the next node at level, if one exists.
|
||||
///
|
||||
/// Returns the new node if it exists.
|
||||
///
|
||||
/// Reset the path to `size = 0` and return `None` if there is no next node.
|
||||
fn next_node(&mut self, level: usize, pool: &NodePool<F>) -> Option<Node> {
|
||||
match self.right_sibling_branch_level(level, pool) {
|
||||
None => {
|
||||
self.size = 0;
|
||||
None
|
||||
}
|
||||
Some(bl) => {
|
||||
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
|
||||
self.entry[bl] += 1;
|
||||
let mut node = bnodes[usize::from(self.entry[bl])];
|
||||
|
||||
for l in bl + 1..level {
|
||||
self.node[l] = node;
|
||||
self.entry[l] = 0;
|
||||
node = pool[node].unwrap_inner().1[0];
|
||||
}
|
||||
|
||||
self.node[level] = node;
|
||||
self.entry[level] = 0;
|
||||
Some(node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Move the path to the last entry of the previous leaf node, if one exists.
|
||||
///
|
||||
/// Returns the new leaf node if it exists.
|
||||
///
|
||||
/// Leave the path unchanged and returns `None` if we are already at the first leaf node.
|
||||
fn prev_leaf(&mut self, pool: &NodePool<F>) -> Option<Node> {
|
||||
self.left_sibling_branch_level(self.size - 1).map(|bl| {
|
||||
let entry = self.entry[bl] - 1;
|
||||
self.entry[bl] = entry;
|
||||
let (_, bnodes) = pool[self.node[bl]].unwrap_inner();
|
||||
self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool)
|
||||
})
|
||||
}
|
||||
|
||||
/// Move this path to the last position for the sub-tree at `level, root`.
|
||||
fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool<F>) -> Node {
|
||||
let mut node = root;
|
||||
for l in level.. {
|
||||
self.node[l] = node;
|
||||
match pool[node] {
|
||||
NodeData::Inner { size, ref tree, .. } => {
|
||||
self.entry[l] = size;
|
||||
node = tree[usize::from(size)];
|
||||
}
|
||||
NodeData::Leaf { size, .. } => {
|
||||
self.entry[l] = size - 1;
|
||||
self.size = l + 1;
|
||||
break;
|
||||
}
|
||||
NodeData::Free { .. } => panic!("Free {} reached from {}", node, root),
|
||||
}
|
||||
}
|
||||
node
|
||||
}
|
||||
|
||||
/// Set the root node and point the path at the first entry of the node.
|
||||
pub fn set_root_node(&mut self, root: Node) {
|
||||
self.size = 1;
|
||||
self.node[0] = root;
|
||||
self.entry[0] = 0;
|
||||
}
|
||||
|
||||
/// Get the current leaf node and entry, if any.
|
||||
pub fn leaf_pos(&self) -> Option<(Node, usize)> {
|
||||
let i = self.size.wrapping_sub(1);
|
||||
self.node.get(i).map(|&n| (n, self.entry[i].into()))
|
||||
}
|
||||
|
||||
/// Get the current leaf node.
|
||||
fn leaf_node(&self) -> Node {
|
||||
self.node[self.size - 1]
|
||||
}
|
||||
|
||||
/// Get the current entry in the leaf node.
|
||||
fn leaf_entry(&self) -> usize {
|
||||
self.entry[self.size - 1].into()
|
||||
}
|
||||
|
||||
/// Is this path pointing to the first entry in the tree?
|
||||
/// This corresponds to the smallest key.
|
||||
fn at_first_entry(&self) -> bool {
|
||||
self.entry[0..self.size].iter().all(|&i| i == 0)
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the current value.
|
||||
/// This assumes that there is a current value.
|
||||
pub fn value_mut<'a>(&self, pool: &'a mut NodePool<F>) -> &'a mut F::Value {
|
||||
&mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()]
|
||||
}
|
||||
|
||||
/// Insert the key-value pair at the current position.
|
||||
/// The current position must be the correct insertion location for the key.
|
||||
/// This function does not check for duplicate keys. Use `find` or similar for that.
|
||||
/// Returns the new root node.
|
||||
pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> Node {
|
||||
if !self.try_leaf_insert(key, value, pool) {
|
||||
self.split_and_insert(key, value, pool);
|
||||
}
|
||||
self.node[0]
|
||||
}
|
||||
|
||||
/// Try to insert `key, value` at the current position, but fail and return false if the leaf
|
||||
/// node is full.
|
||||
fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool<F>) -> bool {
|
||||
let index = self.leaf_entry();
|
||||
|
||||
// The case `index == 0` should only ever happen when there are no earlier leaf nodes,
|
||||
// otherwise we should have appended to the previous leaf node instead. This invariant
|
||||
// means that we don't need to update keys stored in inner nodes here.
|
||||
debug_assert!(index > 0 || self.at_first_entry());
|
||||
|
||||
pool[self.leaf_node()].try_leaf_insert(index, key, value)
|
||||
}
|
||||
|
||||
/// Split the current leaf node and then insert `key, value`.
|
||||
/// This should only be used if `try_leaf_insert()` fails.
|
||||
fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool<F>) {
|
||||
let orig_root = self.node[0];
|
||||
|
||||
// Loop invariant: We need to split the node at `level` and then retry a failed insertion.
|
||||
// The items to insert are either `(key, ins_node)` or `(key, value)`.
|
||||
let mut ins_node = None;
|
||||
let mut split;
|
||||
for level in (0..self.size).rev() {
|
||||
// Split the current node.
|
||||
let mut node = self.node[level];
|
||||
let mut entry = self.entry[level].into();
|
||||
split = pool[node].split(entry);
|
||||
let rhs_node = pool.alloc_node(split.rhs_data);
|
||||
|
||||
// Should the path be moved to the new RHS node?
|
||||
// Prefer the smaller node if we're right in the middle.
|
||||
// Prefer to append to LHS all other things being equal.
|
||||
//
|
||||
// When inserting into an inner node (`ins_node.is_some()`), we must point to a valid
|
||||
// entry in the current node since the new entry is inserted *after* the insert
|
||||
// location.
|
||||
if entry > split.lhs_entries ||
|
||||
(entry == split.lhs_entries &&
|
||||
(split.lhs_entries > split.rhs_entries || ins_node.is_some()))
|
||||
{
|
||||
node = rhs_node;
|
||||
entry -= split.lhs_entries;
|
||||
self.node[level] = node;
|
||||
self.entry[level] = entry as u8;
|
||||
}
|
||||
|
||||
// Now that we have a not-full node, it must be possible to insert.
|
||||
match ins_node {
|
||||
None => {
|
||||
let inserted = pool[node].try_leaf_insert(entry, key, value);
|
||||
debug_assert!(inserted);
|
||||
// If we inserted at the front of the new rhs_node leaf, we need to propagate
|
||||
// the inserted key as the critical key instead of the previous front key.
|
||||
if entry == 0 && node == rhs_node {
|
||||
split.crit_key = key;
|
||||
}
|
||||
}
|
||||
Some(n) => {
|
||||
let inserted = pool[node].try_inner_insert(entry, key, n);
|
||||
debug_assert!(inserted);
|
||||
// The lower level was moved to the new RHS node, so make sure that is
|
||||
// reflected here.
|
||||
if n == self.node[level + 1] {
|
||||
self.entry[level] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We are now done with the current level, but `rhs_node` must be inserted in the inner
|
||||
// node above us. If we're already at level 0, the root node needs to be split.
|
||||
key = split.crit_key;
|
||||
ins_node = Some(rhs_node);
|
||||
if level > 0 {
|
||||
let pnode = &mut pool[self.node[level - 1]];
|
||||
let pentry = self.entry[level - 1].into();
|
||||
if pnode.try_inner_insert(pentry, key, rhs_node) {
|
||||
// If this level level was moved to the new RHS node, update parent entry.
|
||||
if node == rhs_node {
|
||||
self.entry[level - 1] += 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we get here we have split the original root node and need to add an extra level.
|
||||
let rhs_node = ins_node.expect("empty path");
|
||||
let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node));
|
||||
let entry = if self.node[0] == rhs_node { 1 } else { 0 };
|
||||
self.size += 1;
|
||||
slice_insert(&mut self.node[0..self.size], 0, root);
|
||||
slice_insert(&mut self.entry[0..self.size], 0, entry);
|
||||
}
|
||||
|
||||
/// Remove the key-value pair at the current position and advance the path to the next
|
||||
/// key-value pair, leaving the path in a normalized state.
|
||||
///
|
||||
/// Return the new root node.
|
||||
pub fn remove(&mut self, pool: &mut NodePool<F>) -> Option<Node> {
|
||||
let e = self.leaf_entry();
|
||||
match pool[self.leaf_node()].leaf_remove(e) {
|
||||
Removed::Healthy => {
|
||||
if e == 0 {
|
||||
self.update_crit_key(pool)
|
||||
}
|
||||
Some(self.node[0])
|
||||
}
|
||||
status => self.balance_nodes(status, pool),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the critical key for the current node at `level`.
|
||||
///
|
||||
/// The critical key is less than or equal to all keys in the sub-tree at `level` and greater
|
||||
/// than all keys to the left of the current node at `level`.
|
||||
///
|
||||
/// The left-most node at any level does not have a critical key.
|
||||
fn current_crit_key(&self, level: usize, pool: &NodePool<F>) -> Option<F::Key> {
|
||||
// Find the level containing the critical key for the current node.
|
||||
self.left_sibling_branch_level(level).map(|bl| {
|
||||
let (keys, _) = pool[self.node[bl]].unwrap_inner();
|
||||
keys[usize::from(self.entry[bl]) - 1]
|
||||
})
|
||||
}
|
||||
|
||||
/// Update the critical key after removing the front entry of the leaf node.
|
||||
fn update_crit_key(&mut self, pool: &mut NodePool<F>) {
|
||||
// Find the inner level containing the critical key for the current leaf node.
|
||||
let crit_level = match self.left_sibling_branch_level(self.size - 1) {
|
||||
None => return,
|
||||
Some(l) => l,
|
||||
};
|
||||
let crit_kidx = self.entry[crit_level] - 1;
|
||||
|
||||
// Extract the new critical key from the leaf node.
|
||||
let crit_key = pool[self.leaf_node()].leaf_crit_key();
|
||||
let crit_node = self.node[crit_level];
|
||||
|
||||
match pool[crit_node] {
|
||||
NodeData::Inner { size, ref mut keys, .. } => {
|
||||
debug_assert!(crit_kidx < size);
|
||||
keys[usize::from(crit_kidx)] = crit_key;
|
||||
}
|
||||
_ => panic!("Expected inner node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Given that the current leaf node is in an unhealthy (underflowed or even empty) status,
|
||||
/// balance it with sibling nodes.
|
||||
///
|
||||
/// Return the new root node.
|
||||
fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool<F>) -> Option<Node> {
|
||||
// The current leaf node is not in a healthy state, and its critical key may have changed
|
||||
// too.
|
||||
//
|
||||
// Start by dealing with a changed critical key for the leaf level.
|
||||
if status != Removed::Empty && self.leaf_entry() == 0 {
|
||||
self.update_crit_key(pool);
|
||||
}
|
||||
|
||||
let leaf_level = self.size - 1;
|
||||
if self.heal_level(status, leaf_level, pool) {
|
||||
// Tree has become empty.
|
||||
self.size = 0;
|
||||
return None;
|
||||
}
|
||||
|
||||
// Discard the root node if it has shrunk to a single sub-tree.
|
||||
let mut ns = 0;
|
||||
while let NodeData::Inner { size: 0, ref tree, .. } = pool[self.node[ns]] {
|
||||
ns += 1;
|
||||
self.node[ns] = tree[0];
|
||||
}
|
||||
|
||||
if ns > 0 {
|
||||
for l in 0..ns {
|
||||
pool.free_node(self.node[l]);
|
||||
}
|
||||
|
||||
// Shift the whole array instead of just 0..size because `self.size` may be cleared
|
||||
// here if the path is pointing off-the-end.
|
||||
slice_shift(&mut self.node, ns);
|
||||
slice_shift(&mut self.entry, ns);
|
||||
|
||||
if self.size > 0 {
|
||||
self.size -= ns;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the root node, even when `size=0` indicating that we're at the off-the-end
|
||||
// position.
|
||||
Some(self.node[0])
|
||||
}
|
||||
|
||||
/// After removing an entry from the node at `level`, check its health and rebalance as needed.
|
||||
///
|
||||
/// Leave the path up to and including `level` in a normalized state where all entries are in
|
||||
/// bounds.
|
||||
///
|
||||
/// Returns true if the tree becomes empty.
|
||||
fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool<F>) -> bool {
|
||||
match status {
|
||||
Removed::Healthy => {}
|
||||
Removed::Rightmost => {
|
||||
// The rightmost entry was removed from the curent node, so move the path so it
|
||||
// points at the first entry of the next node at this level.
|
||||
debug_assert_eq!(
|
||||
usize::from(self.entry[level]),
|
||||
pool[self.node[level]].entries()
|
||||
);
|
||||
self.next_node(level, pool);
|
||||
}
|
||||
Removed::Underflow => self.underflowed_node(level, pool),
|
||||
Removed::Empty => return self.empty_node(level, pool),
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// The current node at `level` has underflowed, meaning that it is below half capacity but
|
||||
/// not completely empty.
|
||||
///
|
||||
/// Handle this by balancing entries with the right sibling node.
|
||||
///
|
||||
/// Leave the path up to and including `level` in a valid state that points to the same entry.
|
||||
fn underflowed_node(&mut self, level: usize, pool: &mut NodePool<F>) {
|
||||
// Look for a right sibling node at this level. If none exists, we allow the underflowed
|
||||
// node to persist as the right-most node at its level.
|
||||
if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) {
|
||||
// New critical key for the updated right sibling node.
|
||||
let new_ck: Option<F::Key>;
|
||||
let empty;
|
||||
// Make a COPY of the sibling node to avoid fighting the borrow checker.
|
||||
let mut rhs = pool[rhs_node];
|
||||
match pool[self.node[level]].balance(crit_key, &mut rhs) {
|
||||
None => {
|
||||
// Everything got moved to the RHS node.
|
||||
new_ck = self.current_crit_key(level, pool);
|
||||
empty = true;
|
||||
}
|
||||
Some(key) => {
|
||||
// Entries moved from RHS node.
|
||||
new_ck = Some(key);
|
||||
empty = false;
|
||||
}
|
||||
}
|
||||
// Put back the updated RHS node data.
|
||||
pool[rhs_node] = rhs;
|
||||
// Update the critical key for the RHS node unless it has become a left-most
|
||||
// node.
|
||||
if let Some(ck) = new_ck {
|
||||
self.update_right_crit_key(level, ck, pool);
|
||||
}
|
||||
if empty {
|
||||
let empty_tree = self.empty_node(level, pool);
|
||||
debug_assert!(!empty_tree);
|
||||
}
|
||||
|
||||
// Any Removed::Rightmost state must have been cleared above by merging nodes. If the
|
||||
// current entry[level] was one off the end of the node, it will now point at a proper
|
||||
// entry.
|
||||
debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries());
|
||||
} else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() {
|
||||
// There's no right sibling at this level, so the node can't be rebalanced.
|
||||
// Check if we are in an off-the-end position.
|
||||
self.size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// The current node at `level` has become empty.
|
||||
///
|
||||
/// Remove the node from its parent node and leave the path in a normalized state. This means
|
||||
/// that the path at this level will go through the right sibling of this node.
|
||||
///
|
||||
/// If the current node has no right sibling, set `self.size = 0`.
|
||||
///
|
||||
/// Returns true if the tree becomes empty.
|
||||
fn empty_node(&mut self, level: usize, pool: &mut NodePool<F>) -> bool {
|
||||
pool.free_node(self.node[level]);
|
||||
if level == 0 {
|
||||
// We just deleted the root node, so the tree is now empty.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the right sibling node before recursively removing nodes.
|
||||
let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n);
|
||||
|
||||
// Remove the current sub-tree from the parent node.
|
||||
let pl = level - 1;
|
||||
let pe = self.entry[pl].into();
|
||||
let status = pool[self.node[pl]].inner_remove(pe);
|
||||
self.heal_level(status, pl, pool);
|
||||
|
||||
// Finally update the path at this level.
|
||||
match rhs_node {
|
||||
// We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node
|
||||
// entries to the right sibling node.
|
||||
Some(rhs) => self.node[level] = rhs,
|
||||
// We have no right sibling, so we must have deleted the right-most
|
||||
// entry. The path should be moved to the "off-the-end" position.
|
||||
None => self.size = 0,
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Find the level where the right sibling to the current node at `level` branches off.
|
||||
///
|
||||
/// This will be an inner node with two adjacent sub-trees: In one the current node at level is
|
||||
/// a right-most node, in the other, the right sibling is a left-most node.
|
||||
///
|
||||
/// Returns `None` if the current node is a right-most node so no right sibling exists.
|
||||
fn right_sibling_branch_level(&self, level: usize, pool: &NodePool<F>) -> Option<usize> {
|
||||
(0..level).rposition(|l| match pool[self.node[l]] {
|
||||
NodeData::Inner { size, .. } => self.entry[l] < size,
|
||||
_ => panic!("Expected inner node"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Find the level where the left sibling to the current node at `level` branches off.
|
||||
fn left_sibling_branch_level(&self, level: usize) -> Option<usize> {
|
||||
self.entry[0..level].iter().rposition(|&e| e != 0)
|
||||
}
|
||||
|
||||
/// Get the right sibling node to the current node at `level`.
|
||||
/// Also return the critical key between the current node and the right sibling.
|
||||
fn right_sibling(&self, level: usize, pool: &NodePool<F>) -> Option<(F::Key, Node)> {
|
||||
// Find the critical level: The deepest level where two sibling subtrees contain the
|
||||
// current node and its right sibling.
|
||||
self.right_sibling_branch_level(level, pool).map(|bl| {
|
||||
// Extract the critical key and the `bl+1` node.
|
||||
let be = usize::from(self.entry[bl]);
|
||||
let crit_key;
|
||||
let mut node;
|
||||
{
|
||||
let (keys, tree) = pool[self.node[bl]].unwrap_inner();
|
||||
crit_key = keys[be];
|
||||
node = tree[be + 1];
|
||||
}
|
||||
|
||||
// Follow left-most links back down to `level`.
|
||||
for _ in bl + 1..level {
|
||||
node = pool[node].unwrap_inner().1[0];
|
||||
}
|
||||
|
||||
(crit_key, node)
|
||||
})
|
||||
}
|
||||
|
||||
/// Update the critical key for the right sibling node at `level`.
|
||||
fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool<F>) {
|
||||
let bl = self.right_sibling_branch_level(level, pool).expect(
|
||||
"No right sibling exists",
|
||||
);
|
||||
match pool[self.node[bl]] {
|
||||
NodeData::Inner { ref mut keys, .. } => {
|
||||
keys[usize::from(self.entry[bl])] = crit_key;
|
||||
}
|
||||
_ => panic!("Expected inner node"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize the path position such that it is either pointing at a real entry or `size=0`
|
||||
/// indicating "off-the-end".
|
||||
pub fn normalize(&mut self, pool: &mut NodePool<F>) {
|
||||
if let Some((leaf, entry)) = self.leaf_pos() {
|
||||
if entry >= pool[leaf].entries() {
|
||||
let leaf_level = self.size - 1;
|
||||
self.next_node(leaf_level, pool);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<F: Forest> Path<F> {
|
||||
/// Check the internal consistency of this path.
|
||||
pub fn verify(&self, pool: &NodePool<F>) {
|
||||
for level in 0..self.size {
|
||||
match pool[self.node[level]] {
|
||||
NodeData::Inner { size, tree, .. } => {
|
||||
assert!(
|
||||
level < self.size - 1,
|
||||
"Expected leaf node at level {}",
|
||||
level
|
||||
);
|
||||
assert!(
|
||||
self.entry[level] <= size,
|
||||
"OOB inner entry {}/{} at level {}",
|
||||
self.entry[level],
|
||||
size,
|
||||
level
|
||||
);
|
||||
assert_eq!(
|
||||
self.node[level + 1],
|
||||
tree[usize::from(self.entry[level])],
|
||||
"Node mismatch at level {}",
|
||||
level
|
||||
);
|
||||
}
|
||||
NodeData::Leaf { size, .. } => {
|
||||
assert_eq!(level, self.size - 1, "Expected inner node");
|
||||
assert!(
|
||||
self.entry[level] <= size,
|
||||
"OOB leaf entry {}/{}",
|
||||
self.entry[level],
|
||||
size,
|
||||
);
|
||||
}
|
||||
NodeData::Free { .. } => {
|
||||
panic!("Free {} in path", self.node[level]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<F: Forest> fmt::Display for Path<F> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.size == 0 {
|
||||
write!(f, "<empty path>")
|
||||
} else {
|
||||
write!(f, "{}[{}]", self.node[0], self.entry[0])?;
|
||||
for i in 1..self.size {
|
||||
write!(f, "--{}[{}]", self.node[i], self.entry[i])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::{Forest, NodeData, NodePool};
|
||||
use super::*;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
struct TC();
|
||||
|
||||
impl Comparator<i32> for TC {
|
||||
fn cmp(&self, a: i32, b: i32) -> Ordering {
|
||||
a.cmp(&b)
|
||||
}
|
||||
}
|
||||
|
||||
struct TF();
|
||||
|
||||
impl Forest for TF {
|
||||
type Key = i32;
|
||||
type Value = char;
|
||||
type LeafKeys = [i32; 7];
|
||||
type LeafValues = [char; 7];
|
||||
type Comparator = TC;
|
||||
|
||||
fn splat_key(key: Self::Key) -> Self::LeafKeys {
|
||||
[key; 7]
|
||||
}
|
||||
|
||||
fn splat_value(value: Self::Value) -> Self::LeafValues {
|
||||
[value; 7]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_single_leaf() {
|
||||
// Testing Path::new() for trees with a single leaf node.
|
||||
let mut pool = NodePool::<TF>::new();
|
||||
let root = pool.alloc_node(NodeData::leaf(10, 'a'));
|
||||
let mut p = Path::default();
|
||||
let comp = TC();
|
||||
|
||||
// Search for key less than stored key.
|
||||
assert_eq!(p.find(5, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 1);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 0);
|
||||
|
||||
// Search for stored key.
|
||||
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
|
||||
assert_eq!(p.size, 1);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 0);
|
||||
|
||||
// Search for key greater than stored key.
|
||||
assert_eq!(p.find(15, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 1);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 1);
|
||||
|
||||
// Modify leaf node to contain two values.
|
||||
match pool[root] {
|
||||
NodeData::Leaf {
|
||||
ref mut size,
|
||||
ref mut keys,
|
||||
ref mut vals,
|
||||
} => {
|
||||
*size = 2;
|
||||
keys[1] = 20;
|
||||
vals[1] = 'b';
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// Search for key between stored keys.
|
||||
assert_eq!(p.find(15, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 1);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 1);
|
||||
|
||||
// Search for key greater than stored keys.
|
||||
assert_eq!(p.find(25, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 1);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_single_inner() {
|
||||
// Testing Path::new() for trees with a single inner node and two leaves.
|
||||
let mut pool = NodePool::<TF>::new();
|
||||
let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a'));
|
||||
let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b'));
|
||||
let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2));
|
||||
let mut p = Path::default();
|
||||
let comp = TC();
|
||||
|
||||
// Search for key less than stored keys.
|
||||
assert_eq!(p.find(5, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 2);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 0);
|
||||
assert_eq!(p.node[1], leaf1);
|
||||
assert_eq!(p.entry[1], 0);
|
||||
|
||||
assert_eq!(p.find(10, root, &pool, &comp), Some('a'));
|
||||
assert_eq!(p.size, 2);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 0);
|
||||
assert_eq!(p.node[1], leaf1);
|
||||
assert_eq!(p.entry[1], 0);
|
||||
|
||||
// Midway between the two leaf nodes.
|
||||
assert_eq!(p.find(15, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 2);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 0);
|
||||
assert_eq!(p.node[1], leaf1);
|
||||
assert_eq!(p.entry[1], 1);
|
||||
|
||||
assert_eq!(p.find(20, root, &pool, &comp), Some('b'));
|
||||
assert_eq!(p.size, 2);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 1);
|
||||
assert_eq!(p.node[1], leaf2);
|
||||
assert_eq!(p.entry[1], 0);
|
||||
|
||||
assert_eq!(p.find(25, root, &pool, &comp), None);
|
||||
assert_eq!(p.size, 2);
|
||||
assert_eq!(p.node[0], root);
|
||||
assert_eq!(p.entry[0], 1);
|
||||
assert_eq!(p.node[1], leaf2);
|
||||
assert_eq!(p.entry[1], 1);
|
||||
}
|
||||
}
|
||||
213
lib/codegen/src/bforest/pool.rs
Normal file
213
lib/codegen/src/bforest/pool.rs
Normal file
@@ -0,0 +1,213 @@
|
||||
//! B+-tree node pool.
|
||||
|
||||
use super::{Forest, Node, NodeData};
|
||||
use entity::PrimaryMap;
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
/// A pool of nodes, including a free list.
|
||||
pub(super) struct NodePool<F: Forest> {
|
||||
nodes: PrimaryMap<Node, NodeData<F>>,
|
||||
freelist: Option<Node>,
|
||||
}
|
||||
|
||||
impl<F: Forest> NodePool<F> {
|
||||
/// Allocate a new empty pool of nodes.
|
||||
pub fn new() -> NodePool<F> {
|
||||
NodePool {
|
||||
nodes: PrimaryMap::new(),
|
||||
freelist: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Free all nodes.
|
||||
pub fn clear(&mut self) {
|
||||
self.nodes.clear();
|
||||
self.freelist = None;
|
||||
}
|
||||
|
||||
/// Allocate a new node containing `data`.
|
||||
pub fn alloc_node(&mut self, data: NodeData<F>) -> Node {
|
||||
debug_assert!(!data.is_free(), "can't allocate free node");
|
||||
match self.freelist {
|
||||
Some(node) => {
|
||||
// Remove this node from the free list.
|
||||
match self.nodes[node] {
|
||||
NodeData::Free { next } => self.freelist = next,
|
||||
_ => panic!("Invalid {} on free list", node),
|
||||
}
|
||||
self.nodes[node] = data;
|
||||
node
|
||||
}
|
||||
None => {
|
||||
// The free list is empty. Allocate a new node.
|
||||
self.nodes.push(data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Free a node.
|
||||
pub fn free_node(&mut self, node: Node) {
|
||||
// Quick check for a double free.
|
||||
debug_assert!(!self.nodes[node].is_free(), "{} is already free", node);
|
||||
self.nodes[node] = NodeData::Free { next: self.freelist };
|
||||
self.freelist = Some(node);
|
||||
}
|
||||
|
||||
/// Free the entire tree rooted at `node`.
|
||||
pub fn free_tree(&mut self, node: Node) {
|
||||
if let NodeData::Inner { size, tree, .. } = self[node] {
|
||||
// Note that we have to capture `tree` by value to avoid borrow checker trouble.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(needless_range_loop))]
|
||||
for i in 0..usize::from(size + 1) {
|
||||
// Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`,
|
||||
// and since most trees have less than a handful of nodes, it is worthwhile to
|
||||
// avoid the heap allocation for an iterative tree traversal.
|
||||
self.free_tree(tree[i]);
|
||||
}
|
||||
}
|
||||
self.free_node(node);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<F: Forest> NodePool<F> {
|
||||
/// Verify the consistency of the tree rooted at `node`.
|
||||
pub fn verify_tree(&self, node: Node, comp: &F::Comparator)
|
||||
where
|
||||
NodeData<F>: ::std::fmt::Display,
|
||||
F::Key: ::std::fmt::Display,
|
||||
{
|
||||
use super::Comparator;
|
||||
use entity::SparseSet;
|
||||
use std::borrow::Borrow;
|
||||
use std::cmp::Ordering;
|
||||
use std::vec::Vec;
|
||||
|
||||
// The root node can't be an inner node with just a single sub-tree. It should have been
|
||||
// pruned.
|
||||
if let &NodeData::Inner { size, .. } = &self[node] {
|
||||
assert!(size > 0, "Root must have more than one sub-tree");
|
||||
}
|
||||
|
||||
let mut done = SparseSet::new();
|
||||
let mut todo = Vec::new();
|
||||
|
||||
// Todo-list entries are:
|
||||
// 1. Optional LHS key which must be <= all node entries.
|
||||
// 2. The node reference.
|
||||
// 3. Optional RHS key which must be > all node entries.
|
||||
todo.push((None, node, None));
|
||||
|
||||
while let Some((lkey, node, rkey)) = todo.pop() {
|
||||
assert_eq!(
|
||||
done.insert(node),
|
||||
None,
|
||||
"Node appears more than once in tree"
|
||||
);
|
||||
let mut lower = lkey;
|
||||
|
||||
match self[node] {
|
||||
NodeData::Inner { size, keys, tree } => {
|
||||
let size = size as usize;
|
||||
let capacity = tree.len();
|
||||
let keys = &keys[0..size];
|
||||
|
||||
// Verify occupancy.
|
||||
// Right-most nodes can be small, but others must be at least half full.
|
||||
assert!(
|
||||
rkey.is_none() || (size + 1) * 2 >= capacity,
|
||||
"Only {}/{} entries in {}:{}, upper={}",
|
||||
size + 1,
|
||||
capacity,
|
||||
node,
|
||||
self[node],
|
||||
rkey.unwrap()
|
||||
);
|
||||
|
||||
// Queue up the sub-trees, checking for duplicates.
|
||||
for i in 0..size + 1 {
|
||||
// Get an upper bound for node[i].
|
||||
let upper = keys.get(i).cloned().or(rkey);
|
||||
|
||||
// Check that keys are strictly monotonic.
|
||||
if let (Some(a), Some(b)) = (lower, upper) {
|
||||
assert_eq!(
|
||||
comp.cmp(a, b),
|
||||
Ordering::Less,
|
||||
"Key order {} < {} failed in {}: {}",
|
||||
a,
|
||||
b,
|
||||
node,
|
||||
self[node]
|
||||
);
|
||||
}
|
||||
|
||||
// Queue up the sub-tree.
|
||||
todo.push((lower, tree[i], upper));
|
||||
|
||||
// Set a lower bound for the next tree.
|
||||
lower = upper;
|
||||
}
|
||||
}
|
||||
NodeData::Leaf { size, keys, .. } => {
|
||||
let size = size as usize;
|
||||
let capacity = keys.borrow().len();
|
||||
let keys = &keys.borrow()[0..size];
|
||||
|
||||
// Verify occupancy.
|
||||
// Right-most nodes can be small, but others must be at least half full.
|
||||
assert!(size > 0, "Leaf {} is empty", node);
|
||||
assert!(
|
||||
rkey.is_none() || size * 2 >= capacity,
|
||||
"Only {}/{} entries in {}:{}, upper={}",
|
||||
size,
|
||||
capacity,
|
||||
node,
|
||||
self[node],
|
||||
rkey.unwrap()
|
||||
);
|
||||
|
||||
for i in 0..size + 1 {
|
||||
let upper = keys.get(i).cloned().or(rkey);
|
||||
|
||||
// Check that keys are strictly monotonic.
|
||||
if let (Some(a), Some(b)) = (lower, upper) {
|
||||
let wanted = if i == 0 {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
Ordering::Less
|
||||
};
|
||||
assert_eq!(
|
||||
comp.cmp(a, b),
|
||||
wanted,
|
||||
"Key order for {} - {} failed in {}: {}",
|
||||
a,
|
||||
b,
|
||||
node,
|
||||
self[node]
|
||||
);
|
||||
}
|
||||
|
||||
// Set a lower bound for the next key.
|
||||
lower = upper;
|
||||
}
|
||||
}
|
||||
NodeData::Free { .. } => panic!("Free {} reached", node),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Forest> Index<Node> for NodePool<F> {
|
||||
type Output = NodeData<F>;
|
||||
|
||||
fn index(&self, index: Node) -> &Self::Output {
|
||||
self.nodes.index(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Forest> IndexMut<Node> for NodePool<F> {
|
||||
fn index_mut(&mut self, index: Node) -> &mut Self::Output {
|
||||
self.nodes.index_mut(index)
|
||||
}
|
||||
}
|
||||
594
lib/codegen/src/bforest/set.rs
Normal file
594
lib/codegen/src/bforest/set.rs
Normal file
@@ -0,0 +1,594 @@
|
||||
//! Forest of sets.
|
||||
|
||||
use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE};
|
||||
use packed_option::PackedOption;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// Tag type defining forest types for a set.
|
||||
struct SetTypes<K, C>(PhantomData<(K, C)>);
|
||||
|
||||
impl<K, C> Forest for SetTypes<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
type Key = K;
|
||||
type Value = SetValue;
|
||||
type LeafKeys = [K; 2 * INNER_SIZE - 1];
|
||||
type LeafValues = [SetValue; 2 * INNER_SIZE - 1];
|
||||
type Comparator = C;
|
||||
|
||||
fn splat_key(key: Self::Key) -> Self::LeafKeys {
|
||||
[key; 2 * INNER_SIZE - 1]
|
||||
}
|
||||
|
||||
fn splat_value(value: Self::Value) -> Self::LeafValues {
|
||||
[value; 2 * INNER_SIZE - 1]
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory pool for a forest of `Set` instances.
|
||||
pub struct SetForest<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
nodes: NodePool<SetTypes<K, C>>,
|
||||
}
|
||||
|
||||
impl<K, C> SetForest<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Create a new empty forest.
|
||||
pub fn new() -> SetForest<K, C> {
|
||||
SetForest { nodes: NodePool::new() }
|
||||
}
|
||||
|
||||
/// Clear all sets in the forest.
|
||||
///
|
||||
/// All `Set` instances belong to this forest are invalidated and should no longer be used.
|
||||
pub fn clear(&mut self) {
|
||||
self.nodes.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// B-tree representing an ordered set of `K`s using `C` for comparing elements.
|
||||
///
|
||||
/// This is not a general-purpose replacement for `BTreeSet`. See the [module
|
||||
/// documentation](index.html) for more information about design tradeoffs.
|
||||
///
|
||||
/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest
|
||||
/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias
|
||||
/// of the same memory.
|
||||
#[derive(Clone)]
|
||||
pub struct Set<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
root: PackedOption<Node>,
|
||||
unused: PhantomData<(K, C)>,
|
||||
}
|
||||
|
||||
impl<K, C> Set<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Make an empty set.
|
||||
pub fn new() -> Set<K, C> {
|
||||
Set {
|
||||
root: None.into(),
|
||||
unused: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this an empty set?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.root.is_none()
|
||||
}
|
||||
|
||||
/// Does the set contain `key`?.
|
||||
pub fn contains(&self, key: K, forest: &SetForest<K, C>, comp: &C) -> bool {
|
||||
self.root
|
||||
.expand()
|
||||
.and_then(|root| Path::default().find(key, root, &forest.nodes, comp))
|
||||
.is_some()
|
||||
}
|
||||
|
||||
/// Try to insert `key` into the set.
|
||||
///
|
||||
/// If the set did not contain `key`, insert it and return true.
|
||||
///
|
||||
/// If `key` is already present, don't change the set and return false.
|
||||
pub fn insert(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
|
||||
self.cursor(forest, comp).insert(key)
|
||||
}
|
||||
|
||||
/// Remove `key` from the set and return true.
|
||||
///
|
||||
/// If `key` was not present in the set, return false.
|
||||
pub fn remove(&mut self, key: K, forest: &mut SetForest<K, C>, comp: &C) -> bool {
|
||||
let mut c = self.cursor(forest, comp);
|
||||
if c.goto(key) {
|
||||
c.remove();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove all entries.
|
||||
pub fn clear(&mut self, forest: &mut SetForest<K, C>) {
|
||||
if let Some(root) = self.root.take() {
|
||||
forest.nodes.free_tree(root);
|
||||
}
|
||||
}
|
||||
|
||||
/// Retains only the elements specified by the predicate.
|
||||
///
|
||||
/// Remove all elements where the predicate returns false.
|
||||
pub fn retain<F>(&mut self, forest: &mut SetForest<K, C>, mut predicate: F)
|
||||
where
|
||||
F: FnMut(K) -> bool,
|
||||
{
|
||||
let mut path = Path::default();
|
||||
if let Some(root) = self.root.expand() {
|
||||
path.first(root, &forest.nodes);
|
||||
}
|
||||
while let Some((node, entry)) = path.leaf_pos() {
|
||||
if predicate(forest.nodes[node].unwrap_leaf().0[entry]) {
|
||||
path.next(&forest.nodes);
|
||||
} else {
|
||||
self.root = path.remove(&mut forest.nodes).into();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a cursor for navigating this set. The cursor is initially positioned off the end of
|
||||
/// the set.
|
||||
pub fn cursor<'a>(
|
||||
&'a mut self,
|
||||
forest: &'a mut SetForest<K, C>,
|
||||
comp: &'a C,
|
||||
) -> SetCursor<'a, K, C> {
|
||||
SetCursor::new(self, forest, comp)
|
||||
}
|
||||
|
||||
/// Create an iterator traversing this set. The iterator type is `K`.
|
||||
pub fn iter<'a>(&'a self, forest: &'a SetForest<K, C>) -> SetIter<'a, K, C> {
|
||||
SetIter {
|
||||
root: self.root,
|
||||
pool: &forest.nodes,
|
||||
path: Path::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, C> Default for Set<K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// A position in a `Set` used to navigate and modify the ordered set.
|
||||
///
|
||||
/// A cursor always points at an element in the set, or "off the end" which is a position after the
|
||||
/// last element in the set.
|
||||
pub struct SetCursor<'a, K, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
root: &'a mut PackedOption<Node>,
|
||||
pool: &'a mut NodePool<SetTypes<K, C>>,
|
||||
comp: &'a C,
|
||||
path: Path<SetTypes<K, C>>,
|
||||
}
|
||||
|
||||
impl<'a, K, C> SetCursor<'a, K, C>
|
||||
where
|
||||
K: Copy,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
/// Create a cursor with a default (invalid) location.
|
||||
fn new(
|
||||
container: &'a mut Set<K, C>,
|
||||
forest: &'a mut SetForest<K, C>,
|
||||
comp: &'a C,
|
||||
) -> SetCursor<'a, K, C> {
|
||||
SetCursor {
|
||||
root: &mut container.root,
|
||||
pool: &mut forest.nodes,
|
||||
comp,
|
||||
path: Path::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this cursor pointing to an empty set?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.root.is_none()
|
||||
}
|
||||
|
||||
/// Move cursor to the next element and return it.
|
||||
///
|
||||
/// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end
|
||||
/// position.
|
||||
pub fn next(&mut self) -> Option<K> {
|
||||
self.path.next(self.pool).map(|(k, _)| k)
|
||||
}
|
||||
|
||||
/// Move cursor to the previous element and return it.
|
||||
///
|
||||
/// If the cursor is already pointing at the first element, leave it there and return `None`.
|
||||
pub fn prev(&mut self) -> Option<K> {
|
||||
self.root.expand().and_then(|root| {
|
||||
self.path.prev(root, self.pool).map(|(k, _)| k)
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the current element, or `None` if the cursor is at the end.
|
||||
pub fn elem(&self) -> Option<K> {
|
||||
self.path.leaf_pos().and_then(|(node, entry)| {
|
||||
self.pool[node].unwrap_leaf().0.get(entry).cloned()
|
||||
})
|
||||
}
|
||||
|
||||
/// Move this cursor to `elem`.
|
||||
///
|
||||
/// If `elem` is in the set, place the cursor at `elem` and return true.
|
||||
///
|
||||
/// If `elem` is not in the set, place the cursor at the next larger element (or the end) and
|
||||
/// return false.
|
||||
pub fn goto(&mut self, elem: K) -> bool {
|
||||
match self.root.expand() {
|
||||
None => false,
|
||||
Some(root) => {
|
||||
if self.path.find(elem, root, self.pool, self.comp).is_some() {
|
||||
true
|
||||
} else {
|
||||
self.path.normalize(self.pool);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Move this cursor to the first element.
|
||||
pub fn goto_first(&mut self) -> Option<K> {
|
||||
self.root.map(|root| self.path.first(root, self.pool).0)
|
||||
}
|
||||
|
||||
/// Try to insert `elem` into the set and leave the cursor at the inserted element.
|
||||
///
|
||||
/// If the set did not contain `elem`, insert it and return true.
|
||||
///
|
||||
/// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and
|
||||
/// return false.
|
||||
pub fn insert(&mut self, elem: K) -> bool {
|
||||
match self.root.expand() {
|
||||
None => {
|
||||
let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue()));
|
||||
*self.root = root.into();
|
||||
self.path.set_root_node(root);
|
||||
true
|
||||
}
|
||||
Some(root) => {
|
||||
// TODO: Optimize the case where `self.path` is already at the correct insert pos.
|
||||
if self.path.find(elem, root, self.pool, self.comp).is_none() {
|
||||
*self.root = self.path.insert(elem, SetValue(), self.pool).into();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the current element (if any) and return it.
|
||||
/// This advances the cursor to the next element after the removed one.
|
||||
pub fn remove(&mut self) -> Option<K> {
|
||||
let elem = self.elem();
|
||||
if elem.is_some() {
|
||||
*self.root = self.path.remove(self.pool).into();
|
||||
}
|
||||
elem
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<'a, K, C> SetCursor<'a, K, C>
|
||||
where
|
||||
K: Copy + ::std::fmt::Display,
|
||||
C: Comparator<K>,
|
||||
{
|
||||
fn verify(&self) {
|
||||
self.path.verify(self.pool);
|
||||
self.root.map(|root| self.pool.verify_tree(root, self.comp));
|
||||
}
|
||||
|
||||
/// Get a text version of the path to the current position.
|
||||
fn tpath(&self) -> ::std::string::String {
|
||||
use std::string::ToString;
|
||||
self.path.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator visiting the elements of a `Set`.
|
||||
pub struct SetIter<'a, K, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
root: PackedOption<Node>,
|
||||
pool: &'a NodePool<SetTypes<K, C>>,
|
||||
path: Path<SetTypes<K, C>>,
|
||||
}
|
||||
|
||||
impl<'a, K, C> Iterator for SetIter<'a, K, C>
|
||||
where
|
||||
K: 'a + Copy,
|
||||
C: 'a + Comparator<K>,
|
||||
{
|
||||
type Item = K;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// We use `self.root` to indicate if we need to go to the first element. Reset to `None`
|
||||
// once we've returned the first element. This also works for an empty tree since the
|
||||
// `path.next()` call returns `None` when the path is empty. This also fuses the iterator.
|
||||
match self.root.take() {
|
||||
Some(root) => Some(self.path.first(root, self.pool).0),
|
||||
None => self.path.next(self.pool).map(|(k, _)| k),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::NodeData;
|
||||
use super::*;
|
||||
use std::mem;
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn node_size() {
|
||||
// check that nodes are cache line sized when keys are 32 bits.
|
||||
type F = SetTypes<u32, ()>;
|
||||
assert_eq!(mem::size_of::<NodeData<F>>(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let mut f = SetForest::<u32, ()>::new();
|
||||
f.clear();
|
||||
|
||||
let mut s = Set::<u32, ()>::new();
|
||||
assert!(s.is_empty());
|
||||
s.clear(&mut f);
|
||||
assert!(!s.contains(7, &f, &()));
|
||||
|
||||
// Iterator for an empty set.
|
||||
assert_eq!(s.iter(&f).next(), None);
|
||||
|
||||
s.retain(&mut f, |_| unreachable!());
|
||||
|
||||
let mut c = SetCursor::new(&mut s, &mut f, &());
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), None);
|
||||
|
||||
assert_eq!(c.goto_first(), None);
|
||||
assert_eq!(c.tpath(), "<empty path>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_cursor() {
|
||||
let mut f = SetForest::<u32, ()>::new();
|
||||
let mut s = Set::<u32, ()>::new();
|
||||
let mut c = SetCursor::new(&mut s, &mut f, &());
|
||||
|
||||
assert!(c.insert(50));
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), Some(50));
|
||||
|
||||
assert!(c.insert(100));
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), Some(100));
|
||||
|
||||
assert!(c.insert(10));
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), Some(10));
|
||||
|
||||
// Basic movement.
|
||||
assert_eq!(c.next(), Some(50));
|
||||
assert_eq!(c.next(), Some(100));
|
||||
assert_eq!(c.next(), None);
|
||||
assert_eq!(c.next(), None);
|
||||
assert_eq!(c.prev(), Some(100));
|
||||
assert_eq!(c.prev(), Some(50));
|
||||
assert_eq!(c.prev(), Some(10));
|
||||
assert_eq!(c.prev(), None);
|
||||
assert_eq!(c.prev(), None);
|
||||
|
||||
assert!(c.goto(50));
|
||||
assert_eq!(c.elem(), Some(50));
|
||||
assert_eq!(c.remove(), Some(50));
|
||||
c.verify();
|
||||
|
||||
assert_eq!(c.elem(), Some(100));
|
||||
assert_eq!(c.remove(), Some(100));
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), None);
|
||||
assert_eq!(c.remove(), None);
|
||||
c.verify();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_level_sparse_tree() {
|
||||
let mut f = SetForest::<u32, ()>::new();
|
||||
let mut s = Set::<u32, ()>::new();
|
||||
let mut c = SetCursor::new(&mut s, &mut f, &());
|
||||
|
||||
// Insert enough elements that we get a two-level tree.
|
||||
// Each leaf node holds 8 elements
|
||||
assert!(c.is_empty());
|
||||
for i in 0..50 {
|
||||
assert!(c.insert(i));
|
||||
assert_eq!(c.elem(), Some(i));
|
||||
}
|
||||
assert!(!c.is_empty());
|
||||
|
||||
assert_eq!(c.goto_first(), Some(0));
|
||||
assert_eq!(c.tpath(), "node2[0]--node0[0]");
|
||||
|
||||
assert_eq!(c.prev(), None);
|
||||
for i in 1..50 {
|
||||
assert_eq!(c.next(), Some(i));
|
||||
}
|
||||
assert_eq!(c.next(), None);
|
||||
for i in (0..50).rev() {
|
||||
assert_eq!(c.prev(), Some(i));
|
||||
}
|
||||
assert_eq!(c.prev(), None);
|
||||
|
||||
assert!(c.goto(25));
|
||||
for i in 25..50 {
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
assert!(!c.is_empty());
|
||||
c.verify();
|
||||
}
|
||||
|
||||
for i in (0..25).rev() {
|
||||
assert!(!c.is_empty());
|
||||
assert_eq!(c.elem(), None);
|
||||
assert_eq!(c.prev(), Some(i));
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
c.verify();
|
||||
}
|
||||
assert_eq!(c.elem(), None);
|
||||
assert!(c.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three_level_sparse_tree() {
|
||||
let mut f = SetForest::<u32, ()>::new();
|
||||
let mut s = Set::<u32, ()>::new();
|
||||
let mut c = SetCursor::new(&mut s, &mut f, &());
|
||||
|
||||
// Insert enough elements that we get a 3-level tree.
|
||||
// Each leaf node holds 8 elements when filled up sequentially.
|
||||
// Inner nodes hold 8 node pointers.
|
||||
assert!(c.is_empty());
|
||||
for i in 0..150 {
|
||||
assert!(c.insert(i));
|
||||
assert_eq!(c.elem(), Some(i));
|
||||
}
|
||||
assert!(!c.is_empty());
|
||||
|
||||
assert!(c.goto(0));
|
||||
assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]");
|
||||
|
||||
assert_eq!(c.prev(), None);
|
||||
for i in 1..150 {
|
||||
assert_eq!(c.next(), Some(i));
|
||||
}
|
||||
assert_eq!(c.next(), None);
|
||||
for i in (0..150).rev() {
|
||||
assert_eq!(c.prev(), Some(i));
|
||||
}
|
||||
assert_eq!(c.prev(), None);
|
||||
|
||||
assert!(c.goto(125));
|
||||
for i in 125..150 {
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
assert!(!c.is_empty());
|
||||
c.verify();
|
||||
}
|
||||
|
||||
for i in (0..125).rev() {
|
||||
assert!(!c.is_empty());
|
||||
assert_eq!(c.elem(), None);
|
||||
assert_eq!(c.prev(), Some(i));
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
c.verify();
|
||||
}
|
||||
assert_eq!(c.elem(), None);
|
||||
assert!(c.is_empty());
|
||||
}
|
||||
|
||||
// Generate a densely populated 4-level tree.
|
||||
//
|
||||
// Level 1: 1 root
|
||||
// Level 2: 8 inner
|
||||
// Level 3: 64 inner
|
||||
// Level 4: 512 leafs, up to 7680 elements
|
||||
//
|
||||
// A 3-level tree can hold at most 960 elements.
|
||||
fn dense4l(f: &mut SetForest<i32, ()>) -> Set<i32, ()> {
|
||||
f.clear();
|
||||
let mut s = Set::new();
|
||||
|
||||
// Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern
|
||||
// that comes from sequential insertion. This will generate a normal leaf layer.
|
||||
for n in 0..4000 {
|
||||
assert!(s.insert((n * 7) % 4000, f, &()));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn four_level() {
|
||||
let mut f = SetForest::<i32, ()>::new();
|
||||
let mut s = dense4l(&mut f);
|
||||
|
||||
assert_eq!(
|
||||
s.iter(&f).collect::<Vec<_>>()[0..10],
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
);
|
||||
|
||||
let mut c = s.cursor(&mut f, &());
|
||||
|
||||
c.verify();
|
||||
|
||||
// Peel off a whole sub-tree of the root by deleting from the front.
|
||||
// The 900 element is near the front of the second sub-tree.
|
||||
assert!(c.goto(900));
|
||||
assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]");
|
||||
assert!(c.goto(0));
|
||||
for i in 0..900 {
|
||||
assert!(!c.is_empty());
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
}
|
||||
c.verify();
|
||||
assert_eq!(c.elem(), Some(900));
|
||||
|
||||
// Delete backwards from somewhere in the middle.
|
||||
assert!(c.goto(3000));
|
||||
for i in (2000..3000).rev() {
|
||||
assert_eq!(c.prev(), Some(i));
|
||||
assert_eq!(c.remove(), Some(i));
|
||||
assert_eq!(c.elem(), Some(3000));
|
||||
}
|
||||
c.verify();
|
||||
|
||||
// Remove everything in a scattered manner, triggering many collapsing patterns.
|
||||
for i in 0..4000 {
|
||||
if c.goto((i * 7) % 4000) {
|
||||
c.remove();
|
||||
}
|
||||
}
|
||||
assert!(c.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn four_level_clear() {
|
||||
let mut f = SetForest::<i32, ()>::new();
|
||||
let mut s = dense4l(&mut f);
|
||||
s.clear(&mut f);
|
||||
}
|
||||
}
|
||||
133
lib/codegen/src/binemit/memorysink.rs
Normal file
133
lib/codegen/src/binemit/memorysink.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Code sink that writes binary machine code into contiguous memory.
|
||||
//!
|
||||
//! The `CodeSink` trait is the most general way of extracting binary machine code from Cretonne,
|
||||
//! and it is implemented by things like the `test binemit` file test driver to generate
|
||||
//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
|
||||
//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
|
||||
//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
|
||||
//! This results in many virtual function callbacks (one per `put*` call) when
|
||||
//! `TargetIsa::emit_inst()` is used.
|
||||
//!
|
||||
//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
|
||||
//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
|
||||
//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
|
||||
//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
|
||||
//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
|
||||
|
||||
use super::{Addend, CodeOffset, CodeSink, Reloc};
|
||||
use ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
|
||||
use std::ptr::write_unaligned;
|
||||
|
||||
/// A `CodeSink` that writes binary machine code directly into memory.
|
||||
///
|
||||
/// A `MemoryCodeSink` object should be used when emitting a Cretonne IR function into executable
|
||||
/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
|
||||
/// sure to allocate enough memory for the whole function. The number of bytes required is returned
|
||||
/// by the `Context::compile()` function.
|
||||
///
|
||||
/// Any relocations in the function are forwarded to the `RelocSink` trait object.
|
||||
///
|
||||
/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
|
||||
/// is not the right thing to do for cross compilation.
|
||||
pub struct MemoryCodeSink<'a> {
|
||||
data: *mut u8,
|
||||
offset: isize,
|
||||
relocs: &'a mut RelocSink,
|
||||
traps: &'a mut TrapSink,
|
||||
}
|
||||
|
||||
impl<'a> MemoryCodeSink<'a> {
|
||||
/// Create a new memory code sink that writes a function to the memory pointed to by `data`.
|
||||
pub fn new<'sink>(
|
||||
data: *mut u8,
|
||||
relocs: &'sink mut RelocSink,
|
||||
traps: &'sink mut TrapSink,
|
||||
) -> MemoryCodeSink<'sink> {
|
||||
MemoryCodeSink {
|
||||
data,
|
||||
offset: 0,
|
||||
relocs,
|
||||
traps,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for receiving relocations for code that is emitted directly into memory.
|
||||
pub trait RelocSink {
|
||||
/// Add a relocation referencing an EBB at the current offset.
|
||||
fn reloc_ebb(&mut self, CodeOffset, Reloc, CodeOffset);
|
||||
|
||||
/// Add a relocation referencing an external symbol at the current offset.
|
||||
fn reloc_external(&mut self, CodeOffset, Reloc, &ExternalName, Addend);
|
||||
|
||||
/// Add a relocation referencing a jump table.
|
||||
fn reloc_jt(&mut self, CodeOffset, Reloc, JumpTable);
|
||||
}
|
||||
|
||||
/// A trait for receiving trap codes and offsets.
|
||||
pub trait TrapSink {
|
||||
/// Add trap information for a specific offset.
|
||||
fn trap(&mut self, CodeOffset, SourceLoc, TrapCode);
|
||||
}
|
||||
|
||||
impl<'a> CodeSink for MemoryCodeSink<'a> {
|
||||
fn offset(&self) -> CodeOffset {
|
||||
self.offset as CodeOffset
|
||||
}
|
||||
|
||||
fn put1(&mut self, x: u8) {
|
||||
unsafe {
|
||||
write_unaligned(self.data.offset(self.offset), x);
|
||||
}
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn put2(&mut self, x: u16) {
|
||||
unsafe {
|
||||
write_unaligned(self.data.offset(self.offset) as *mut u16, x);
|
||||
}
|
||||
self.offset += 2;
|
||||
}
|
||||
|
||||
fn put4(&mut self, x: u32) {
|
||||
unsafe {
|
||||
write_unaligned(self.data.offset(self.offset) as *mut u32, x);
|
||||
}
|
||||
self.offset += 4;
|
||||
}
|
||||
|
||||
fn put8(&mut self, x: u64) {
|
||||
unsafe {
|
||||
write_unaligned(self.data.offset(self.offset) as *mut u64, x);
|
||||
}
|
||||
self.offset += 8;
|
||||
}
|
||||
|
||||
fn reloc_ebb(&mut self, rel: Reloc, ebb_offset: CodeOffset) {
|
||||
let ofs = self.offset();
|
||||
self.relocs.reloc_ebb(ofs, rel, ebb_offset);
|
||||
}
|
||||
|
||||
fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) {
|
||||
let ofs = self.offset();
|
||||
self.relocs.reloc_external(ofs, rel, name, addend);
|
||||
}
|
||||
|
||||
fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
|
||||
let ofs = self.offset();
|
||||
self.relocs.reloc_jt(ofs, rel, jt);
|
||||
}
|
||||
|
||||
fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
|
||||
let ofs = self.offset();
|
||||
self.traps.trap(ofs, srcloc, code);
|
||||
}
|
||||
}
|
||||
|
||||
/// A `TrapSink` implementation that does nothing, which is convenient when
|
||||
/// compiling code that does not rely on trapping semantics.
|
||||
pub struct NullTrapSink {}
|
||||
|
||||
impl TrapSink for NullTrapSink {
|
||||
fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
|
||||
}
|
||||
121
lib/codegen/src/binemit/mod.rs
Normal file
121
lib/codegen/src/binemit/mod.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
//! Binary machine code emission.
|
||||
//!
|
||||
//! The `binemit` module contains code for translating Cretonne's intermediate representation into
|
||||
//! binary machine code.
|
||||
|
||||
mod memorysink;
|
||||
mod relaxation;
|
||||
|
||||
pub use self::memorysink::{MemoryCodeSink, RelocSink, TrapSink, NullTrapSink};
|
||||
pub use self::relaxation::relax_branches;
|
||||
pub use regalloc::RegDiversions;
|
||||
|
||||
use ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
|
||||
use std::fmt;
|
||||
|
||||
/// Offset in bytes from the beginning of the function.
|
||||
///
|
||||
/// Cretonne can be used as a cross compiler, so we don't want to use a type like `usize` which
|
||||
/// depends on the *host* platform, not the *target* platform.
|
||||
pub type CodeOffset = u32;
|
||||
|
||||
/// Addend to add to the symbol value.
|
||||
pub type Addend = i64;
|
||||
|
||||
/// Relocation kinds for every ISA
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Reloc {
|
||||
/// absolute 4-byte
|
||||
Abs4,
|
||||
/// absolute 8-byte
|
||||
Abs8,
|
||||
/// x86 PC-relative 4-byte
|
||||
X86PCRel4,
|
||||
/// x86 GOT PC-relative 4-byte
|
||||
X86GOTPCRel4,
|
||||
/// x86 PLT-relative 4-byte
|
||||
X86PLTRel4,
|
||||
/// Arm32 call target
|
||||
Arm32Call,
|
||||
/// Arm64 call target
|
||||
Arm64Call,
|
||||
/// RISC-V call target
|
||||
RiscvCall,
|
||||
}
|
||||
|
||||
impl fmt::Display for Reloc {
|
||||
/// Display trait implementation drops the arch, since its used in contexts where the arch is
|
||||
/// already unambigious, e.g. cton syntax with isa specified. In other contexts, use Debug.
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Reloc::Abs4 => write!(f, "{}", "Abs4"),
|
||||
Reloc::Abs8 => write!(f, "{}", "Abs8"),
|
||||
Reloc::X86PCRel4 => write!(f, "{}", "PCRel4"),
|
||||
Reloc::X86GOTPCRel4 => write!(f, "{}", "GOTPCRel4"),
|
||||
Reloc::X86PLTRel4 => write!(f, "{}", "PLTRel4"),
|
||||
Reloc::Arm32Call | Reloc::Arm64Call | Reloc::RiscvCall => write!(f, "{}", "Call"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Abstract interface for adding bytes to the code segment.
|
||||
///
|
||||
/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
|
||||
/// which are locations in the code section that need to be fixed up when linking.
|
||||
pub trait CodeSink {
|
||||
/// Get the current position.
|
||||
fn offset(&self) -> CodeOffset;
|
||||
|
||||
/// Add 1 byte to the code section.
|
||||
fn put1(&mut self, u8);
|
||||
|
||||
/// Add 2 bytes to the code section.
|
||||
fn put2(&mut self, u16);
|
||||
|
||||
/// Add 4 bytes to the code section.
|
||||
fn put4(&mut self, u32);
|
||||
|
||||
/// Add 8 bytes to the code section.
|
||||
fn put8(&mut self, u64);
|
||||
|
||||
/// Add a relocation referencing an EBB at the current offset.
|
||||
fn reloc_ebb(&mut self, Reloc, CodeOffset);
|
||||
|
||||
/// Add a relocation referencing an external symbol plus the addend at the current offset.
|
||||
fn reloc_external(&mut self, Reloc, &ExternalName, Addend);
|
||||
|
||||
/// Add a relocation referencing a jump table.
|
||||
fn reloc_jt(&mut self, Reloc, JumpTable);
|
||||
|
||||
/// Add trap information for the current offset.
|
||||
fn trap(&mut self, TrapCode, SourceLoc);
|
||||
}
|
||||
|
||||
/// Report a bad encoding error.
|
||||
#[cold]
|
||||
pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
|
||||
panic!(
|
||||
"Bad encoding {} for {}",
|
||||
func.encodings[inst],
|
||||
func.dfg.display_inst(inst, None)
|
||||
);
|
||||
}
|
||||
|
||||
/// Emit a function to `sink`, given an instruction emitter function.
|
||||
///
|
||||
/// This function is called from the `TargetIsa::emit_function()` implementations with the
|
||||
/// appropriate instruction emitter.
|
||||
pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
|
||||
where
|
||||
CS: CodeSink,
|
||||
EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
|
||||
{
|
||||
let mut divert = RegDiversions::new();
|
||||
for ebb in func.layout.ebbs() {
|
||||
divert.clear();
|
||||
debug_assert_eq!(func.offsets[ebb], sink.offset());
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
emit_inst(func, inst, &mut divert, sink);
|
||||
}
|
||||
}
|
||||
}
|
||||
198
lib/codegen/src/binemit/relaxation.rs
Normal file
198
lib/codegen/src/binemit/relaxation.rs
Normal file
@@ -0,0 +1,198 @@
|
||||
//! Branch relaxation and offset computation.
|
||||
//!
|
||||
//! # EBB header offsets
|
||||
//!
|
||||
//! Before we can generate binary machine code for branch instructions, we need to know the final
|
||||
//! offsets of all the EBB headers in the function. This information is encoded in the
|
||||
//! `func.offsets` table.
|
||||
//!
|
||||
//! # Branch relaxation
|
||||
//!
|
||||
//! Branch relaxation is the process of ensuring that all branches in the function have enough
|
||||
//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
|
||||
//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
|
||||
//!
|
||||
//! On RISC architectures, it can happen that conditional branches have a shorter range than
|
||||
//! unconditional branches:
|
||||
//!
|
||||
//! ```cton
|
||||
//! brz v1, ebb17
|
||||
//! ```
|
||||
//!
|
||||
//! can be transformed into:
|
||||
//!
|
||||
//! ```cton
|
||||
//! brnz v1, ebb23
|
||||
//! jump ebb17
|
||||
//! ebb23:
|
||||
//! ```
|
||||
|
||||
use binemit::CodeOffset;
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use ir::{Function, InstructionData, Opcode};
|
||||
use isa::{EncInfo, TargetIsa};
|
||||
use iterators::IteratorExtras;
|
||||
use result::CtonError;
|
||||
|
||||
/// Relax branches and compute the final layout of EBB headers in `func`.
|
||||
///
|
||||
/// Fill in the `func.offsets` table so the function is ready for binary emission.
|
||||
pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
|
||||
let encinfo = isa.encoding_info();
|
||||
|
||||
// Clear all offsets so we can recognize EBBs that haven't been visited yet.
|
||||
func.offsets.clear();
|
||||
func.offsets.resize(func.dfg.num_ebbs());
|
||||
|
||||
// Start by inserting fall through instructions.
|
||||
fallthroughs(func);
|
||||
|
||||
let mut offset = 0;
|
||||
|
||||
// The relaxation algorithm iterates to convergence.
|
||||
let mut go_again = true;
|
||||
while go_again {
|
||||
go_again = false;
|
||||
offset = 0;
|
||||
|
||||
// Visit all instructions in layout order
|
||||
let mut cur = FuncCursor::new(func);
|
||||
while let Some(ebb) = cur.next_ebb() {
|
||||
// Record the offset for `ebb` and make sure we iterate until offsets are stable.
|
||||
if cur.func.offsets[ebb] != offset {
|
||||
debug_assert!(
|
||||
cur.func.offsets[ebb] < offset,
|
||||
"Code shrinking during relaxation"
|
||||
);
|
||||
cur.func.offsets[ebb] = offset;
|
||||
go_again = true;
|
||||
}
|
||||
|
||||
while let Some(inst) = cur.next_inst() {
|
||||
let enc = cur.func.encodings[inst];
|
||||
let size = encinfo.bytes(enc);
|
||||
|
||||
// See if this might be a branch that is out of range.
|
||||
if let Some(range) = encinfo.branch_range(enc) {
|
||||
if let Some(dest) = cur.func.dfg[inst].branch_destination() {
|
||||
let dest_offset = cur.func.offsets[dest];
|
||||
// This could be an out-of-range branch.
|
||||
// Relax it unless the destination offset has not been computed yet.
|
||||
if !range.contains(offset, dest_offset) &&
|
||||
(dest_offset != 0 || Some(dest) == cur.func.layout.entry_block())
|
||||
{
|
||||
offset += relax_branch(&mut cur, offset, dest_offset, &encinfo, isa);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(offset)
|
||||
}
|
||||
|
||||
/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
|
||||
/// existing `fallthrough` instructions are correct.
|
||||
fn fallthroughs(func: &mut Function) {
|
||||
for (ebb, succ) in func.layout.ebbs().adjacent_pairs() {
|
||||
let term = func.layout.last_inst(ebb).expect("EBB has no terminator.");
|
||||
if let InstructionData::Jump {
|
||||
ref mut opcode,
|
||||
destination,
|
||||
..
|
||||
} = func.dfg[term]
|
||||
{
|
||||
match *opcode {
|
||||
Opcode::Fallthrough => {
|
||||
// Somebody used a fall-through instruction before the branch relaxation pass.
|
||||
// Make sure it is correct, i.e. the destination is the layout successor.
|
||||
debug_assert_eq!(destination, succ, "Illegal fall-through in {}", ebb)
|
||||
}
|
||||
Opcode::Jump => {
|
||||
// If this is a jump to the successor EBB, change it to a fall-through.
|
||||
if destination == succ {
|
||||
*opcode = Opcode::Fallthrough;
|
||||
func.encodings[term] = Default::default();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Relax the branch instruction at `pos` so it can cover the range `offset - dest_offset`.
|
||||
///
|
||||
/// Return the size of the replacement instructions up to and including the location where `pos` is
|
||||
/// left.
|
||||
fn relax_branch(
|
||||
cur: &mut FuncCursor,
|
||||
offset: CodeOffset,
|
||||
dest_offset: CodeOffset,
|
||||
encinfo: &EncInfo,
|
||||
isa: &TargetIsa,
|
||||
) -> CodeOffset {
|
||||
let inst = cur.current_inst().unwrap();
|
||||
dbg!(
|
||||
"Relaxing [{}] {} for {:#x}-{:#x} range",
|
||||
encinfo.display(cur.func.encodings[inst]),
|
||||
cur.func.dfg.display_inst(inst, isa),
|
||||
offset,
|
||||
dest_offset
|
||||
);
|
||||
|
||||
// Pick the first encoding that can handle the branch range.
|
||||
let dfg = &cur.func.dfg;
|
||||
let ctrl_type = dfg.ctrl_typevar(inst);
|
||||
if let Some(enc) = isa.legal_encodings(cur.func, &dfg[inst], ctrl_type).find(
|
||||
|&enc| {
|
||||
let range = encinfo.branch_range(enc).expect("Branch with no range");
|
||||
if !range.contains(offset, dest_offset) {
|
||||
dbg!(" trying [{}]: out of range", encinfo.display(enc));
|
||||
false
|
||||
} else if encinfo.operand_constraints(enc) !=
|
||||
encinfo.operand_constraints(cur.func.encodings[inst])
|
||||
{
|
||||
// Conservatively give up if the encoding has different constraints
|
||||
// than the original, so that we don't risk picking a new encoding
|
||||
// which the existing operands don't satisfy. We can't check for
|
||||
// validity directly because we don't have a RegDiversions active so
|
||||
// we don't know which registers are actually in use.
|
||||
dbg!(" trying [{}]: constraints differ", encinfo.display(enc));
|
||||
false
|
||||
} else {
|
||||
dbg!(" trying [{}]: OK", encinfo.display(enc));
|
||||
true
|
||||
}
|
||||
},
|
||||
)
|
||||
{
|
||||
cur.func.encodings[inst] = enc;
|
||||
return encinfo.bytes(enc);
|
||||
}
|
||||
|
||||
// Note: On some RISC ISAs, conditional branches have shorter range than unconditional
|
||||
// branches, so one way of extending the range of a conditional branch is to invert its
|
||||
// condition and make it branch over an unconditional jump which has the larger range.
|
||||
//
|
||||
// Splitting the EBB is problematic this late because there may be register diversions in
|
||||
// effect across the conditional branch, and they can't survive the control flow edge to a new
|
||||
// EBB. We have two options for handling that:
|
||||
//
|
||||
// 1. Set a flag on the new EBB that indicates it wants the preserve the register diversions of
|
||||
// its layout predecessor, or
|
||||
// 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the EBB.
|
||||
//
|
||||
// It seems that 1. would allow us to share code among RISC ISAs that need this.
|
||||
//
|
||||
// We can't allow register diversions to survive from the layout predecessor because the layout
|
||||
// predecessor could contain kill points for some values that are live in this EBB, and
|
||||
// diversions are not automatically cancelled when the live range of a value ends.
|
||||
|
||||
// This assumes solution 2. above:
|
||||
panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
|
||||
}
|
||||
155
lib/codegen/src/bitset.rs
Normal file
155
lib/codegen/src/bitset.rs
Normal file
@@ -0,0 +1,155 @@
|
||||
//! Small Bitset
|
||||
//!
|
||||
//! This module defines a struct `BitSet<T>` encapsulating a bitset built over the type T.
|
||||
//! T is intended to be a primitive unsigned type. Currently it can be any type between u8 and u32
|
||||
//!
|
||||
//! If you would like to add support for larger bitsets in the future, you need to change the trait
|
||||
//! bound Into<u32> and the u32 in the implementation of `max_bits()`.
|
||||
use std::convert::{From, Into};
|
||||
use std::mem::size_of;
|
||||
use std::ops::{Add, BitOr, Shl, Sub};
|
||||
|
||||
/// A small bitset built on a single primitive integer type
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct BitSet<T>(pub T);
|
||||
|
||||
impl<T> BitSet<T>
|
||||
where
|
||||
T: Into<u32>
|
||||
+ From<u8>
|
||||
+ BitOr<T, Output = T>
|
||||
+ Shl<u8, Output = T>
|
||||
+ Sub<T, Output = T>
|
||||
+ Add<T, Output = T>
|
||||
+ PartialEq
|
||||
+ Copy,
|
||||
{
|
||||
/// Maximum number of bits supported by this BitSet instance
|
||||
pub fn bits() -> usize {
|
||||
size_of::<T>() * 8
|
||||
}
|
||||
|
||||
/// Maximum number of bits supported by any bitset instance atm.
|
||||
pub fn max_bits() -> usize {
|
||||
size_of::<u32>() * 8
|
||||
}
|
||||
|
||||
/// Check if this BitSet contains the number num
|
||||
pub fn contains(&self, num: u8) -> bool {
|
||||
debug_assert!((num as usize) < Self::bits());
|
||||
debug_assert!((num as usize) < Self::max_bits());
|
||||
self.0.into() & (1 << num) != 0
|
||||
}
|
||||
|
||||
/// Return the smallest number contained in the bitset or None if empty
|
||||
pub fn min(&self) -> Option<u8> {
|
||||
if self.0.into() == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(self.0.into().trailing_zeros() as u8)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the largest number contained in the bitset or None if empty
|
||||
pub fn max(&self) -> Option<u8> {
|
||||
if self.0.into() == 0 {
|
||||
None
|
||||
} else {
|
||||
let leading_zeroes = self.0.into().leading_zeros() as usize;
|
||||
Some((Self::max_bits() - leading_zeroes - 1) as u8)
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a BitSet with the half-open range [lo,hi) filled in
|
||||
pub fn from_range(lo: u8, hi: u8) -> Self {
|
||||
debug_assert!(lo <= hi);
|
||||
debug_assert!((hi as usize) <= Self::bits());
|
||||
let one: T = T::from(1);
|
||||
// I can't just do (one << hi) - one here as the shift may overflow
|
||||
let hi_rng = if hi >= 1 {
|
||||
(one << (hi - 1)) + ((one << (hi - 1)) - one)
|
||||
} else {
|
||||
T::from(0)
|
||||
};
|
||||
|
||||
let lo_rng = (one << lo) - one;
|
||||
|
||||
BitSet(hi_rng - lo_rng)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn contains() {
|
||||
let s = BitSet::<u8>(255);
|
||||
for i in 0..7 {
|
||||
assert!(s.contains(i));
|
||||
}
|
||||
|
||||
let s1 = BitSet::<u8>(0);
|
||||
for i in 0..7 {
|
||||
assert!(!s1.contains(i));
|
||||
}
|
||||
|
||||
let s2 = BitSet::<u8>(127);
|
||||
for i in 0..6 {
|
||||
assert!(s2.contains(i));
|
||||
}
|
||||
assert!(!s2.contains(7));
|
||||
|
||||
let s3 = BitSet::<u8>(2 | 4 | 64);
|
||||
assert!(!s3.contains(0) && !s3.contains(3) && !s3.contains(4));
|
||||
assert!(!s3.contains(5) && !s3.contains(7));
|
||||
assert!(s3.contains(1) && s3.contains(2) && s3.contains(6));
|
||||
|
||||
let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
|
||||
assert!(
|
||||
!s4.contains(0) && !s4.contains(1) && !s4.contains(4) && !s4.contains(5) &&
|
||||
!s4.contains(6) && !s4.contains(7) && !s4.contains(9) && !s4.contains(11)
|
||||
);
|
||||
assert!(s4.contains(2) && s4.contains(3) && s4.contains(8) && s4.contains(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minmax() {
|
||||
let s = BitSet::<u8>(255);
|
||||
assert_eq!(s.min(), Some(0));
|
||||
assert_eq!(s.max(), Some(7));
|
||||
assert!(s.min() == Some(0) && s.max() == Some(7));
|
||||
let s1 = BitSet::<u8>(0);
|
||||
assert!(s1.min() == None && s1.max() == None);
|
||||
let s2 = BitSet::<u8>(127);
|
||||
assert!(s2.min() == Some(0) && s2.max() == Some(6));
|
||||
let s3 = BitSet::<u8>(2 | 4 | 64);
|
||||
assert!(s3.min() == Some(1) && s3.max() == Some(6));
|
||||
let s4 = BitSet::<u16>(4 | 8 | 256 | 1024);
|
||||
assert!(s4.min() == Some(2) && s4.max() == Some(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_range() {
|
||||
let s = BitSet::<u8>::from_range(5, 5);
|
||||
assert!(s.0 == 0);
|
||||
|
||||
let s = BitSet::<u8>::from_range(0, 8);
|
||||
assert!(s.0 == 255);
|
||||
|
||||
let s = BitSet::<u16>::from_range(0, 8);
|
||||
assert!(s.0 == 255u16);
|
||||
|
||||
let s = BitSet::<u16>::from_range(0, 16);
|
||||
assert!(s.0 == 65535u16);
|
||||
|
||||
let s = BitSet::<u8>::from_range(5, 6);
|
||||
assert!(s.0 == 32u8);
|
||||
|
||||
let s = BitSet::<u8>::from_range(3, 7);
|
||||
assert!(s.0 == 8 | 16 | 32 | 64);
|
||||
|
||||
let s = BitSet::<u16>::from_range(5, 11);
|
||||
assert!(s.0 == 32 | 64 | 128 | 256 | 512 | 1024);
|
||||
}
|
||||
}
|
||||
76
lib/codegen/src/cfg_printer.rs
Normal file
76
lib/codegen/src/cfg_printer.rs
Normal file
@@ -0,0 +1,76 @@
|
||||
//! The `CFGPrinter` utility.
|
||||
|
||||
use std::fmt::{Display, Formatter, Result, Write};
|
||||
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::Function;
|
||||
use ir::instructions::BranchInfo;
|
||||
|
||||
/// A utility for pretty-printing the CFG of a `Function`.
|
||||
pub struct CFGPrinter<'a> {
|
||||
func: &'a Function,
|
||||
cfg: ControlFlowGraph,
|
||||
}
|
||||
|
||||
/// A utility for pretty-printing the CFG of a `Function`.
|
||||
impl<'a> CFGPrinter<'a> {
|
||||
/// Create a new CFGPrinter.
|
||||
pub fn new(func: &'a Function) -> CFGPrinter<'a> {
|
||||
CFGPrinter {
|
||||
func,
|
||||
cfg: ControlFlowGraph::with_function(func),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the CFG for this function to `w`.
|
||||
pub fn write(&self, w: &mut Write) -> Result {
|
||||
self.header(w)?;
|
||||
self.ebb_nodes(w)?;
|
||||
self.cfg_connections(w)?;
|
||||
writeln!(w, "}}")
|
||||
}
|
||||
|
||||
fn header(&self, w: &mut Write) -> Result {
|
||||
writeln!(w, "digraph \"{}\" {{", self.func.name)?;
|
||||
if let Some(entry) = self.func.layout.entry_block() {
|
||||
writeln!(w, " {{rank=min; {}}}", entry)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ebb_nodes(&self, w: &mut Write) -> Result {
|
||||
for ebb in &self.func.layout {
|
||||
write!(w, " {} [shape=record, label=\"{{{}", ebb, ebb)?;
|
||||
// Add all outgoing branch instructions to the label.
|
||||
for inst in self.func.layout.ebb_insts(ebb) {
|
||||
let idata = &self.func.dfg[inst];
|
||||
match idata.analyze_branch(&self.func.dfg.value_lists) {
|
||||
BranchInfo::SingleDest(dest, _) => {
|
||||
write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
|
||||
}
|
||||
BranchInfo::Table(table) => {
|
||||
write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?
|
||||
}
|
||||
BranchInfo::NotABranch => {}
|
||||
}
|
||||
}
|
||||
writeln!(w, "}}\"]")?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cfg_connections(&self, w: &mut Write) -> Result {
|
||||
for ebb in &self.func.layout {
|
||||
for (parent, inst) in self.cfg.pred_iter(ebb) {
|
||||
writeln!(w, " {}:{} -> {}", parent, inst, ebb)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for CFGPrinter<'a> {
|
||||
fn fmt(&self, f: &mut Formatter) -> Result {
|
||||
self.write(f)
|
||||
}
|
||||
}
|
||||
78
lib/codegen/src/constant_hash.rs
Normal file
78
lib/codegen/src/constant_hash.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
//! Runtime support for precomputed constant hash tables.
|
||||
//!
|
||||
//! The `lib/codegen/meta/constant_hash.py` Python module can generate constant hash tables using
|
||||
//! open addressing and quadratic probing. The hash tables are arrays that are guaranteed to:
|
||||
//!
|
||||
//! - Have a power-of-two size.
|
||||
//! - Contain at least one empty slot.
|
||||
//!
|
||||
//! This module provides runtime support for lookups in these tables.
|
||||
|
||||
/// Trait that must be implemented by the entries in a constant hash table.
|
||||
pub trait Table<K: Copy + Eq> {
|
||||
/// Get the number of entries in this table which must be a power of two.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty.
|
||||
/// The `idx` must be in range.
|
||||
fn key(&self, idx: usize) -> Option<K>;
|
||||
}
|
||||
|
||||
/// Look for `key` in `table`.
|
||||
///
|
||||
/// The provided `hash` value must have been computed from `key` using the same hash function that
|
||||
/// was used to construct the table.
|
||||
///
|
||||
/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty
|
||||
/// sentinel entry if no entry could be found.
|
||||
pub fn probe<K: Copy + Eq, T: Table<K> + ?Sized>(
|
||||
table: &T,
|
||||
key: K,
|
||||
hash: usize,
|
||||
) -> Result<usize, usize> {
|
||||
debug_assert!(table.len().is_power_of_two());
|
||||
let mask = table.len() - 1;
|
||||
|
||||
let mut idx = hash;
|
||||
let mut step = 0;
|
||||
|
||||
loop {
|
||||
idx &= mask;
|
||||
|
||||
match table.key(idx) {
|
||||
None => return Err(idx),
|
||||
Some(k) if k == key => return Ok(idx),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Quadratic probing.
|
||||
step += 1;
|
||||
// When `table.len()` is a power of two, it can be proven that `idx` will visit all
|
||||
// entries. This means that this loop will always terminate if the hash table has even
|
||||
// one unused entry.
|
||||
debug_assert!(step < table.len());
|
||||
idx += step;
|
||||
}
|
||||
}
|
||||
|
||||
/// A primitive hash function for matching opcodes.
|
||||
/// Must match `lib/codegen/meta/constant_hash.py`.
|
||||
pub fn simple_hash(s: &str) -> usize {
|
||||
let mut h: u32 = 5381;
|
||||
for c in s.chars() {
|
||||
h = (h ^ c as u32).wrapping_add(h.rotate_right(6));
|
||||
}
|
||||
h as usize
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::simple_hash;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
// c.f. `meta/constant_hash.py` tests.
|
||||
assert_eq!(simple_hash("Hello"), 0x2fa70c01);
|
||||
assert_eq!(simple_hash("world"), 0x5b0c31d5);
|
||||
}
|
||||
}
|
||||
272
lib/codegen/src/context.rs
Normal file
272
lib/codegen/src/context.rs
Normal file
@@ -0,0 +1,272 @@
|
||||
//! Cretonne compilation context and main entry point.
|
||||
//!
|
||||
//! When compiling many small functions, it is important to avoid repeatedly allocating and
|
||||
//! deallocating the data structures needed for compilation. The `Context` struct is used to hold
|
||||
//! on to memory allocations between function compilations.
|
||||
//!
|
||||
//! The context does not hold a `TargetIsa` instance which has to be provided as an argument
|
||||
//! instead. This is because an ISA instance is immutable and can be used by multiple compilation
|
||||
//! contexts concurrently. Typically, you would have one context per compilation thread and only a
|
||||
//! single ISA instance.
|
||||
|
||||
use binemit::{relax_branches, CodeOffset, MemoryCodeSink, RelocSink, TrapSink};
|
||||
use dce::do_dce;
|
||||
use dominator_tree::DominatorTree;
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::Function;
|
||||
use isa::TargetIsa;
|
||||
use legalize_function;
|
||||
use licm::do_licm;
|
||||
use loop_analysis::LoopAnalysis;
|
||||
use postopt::do_postopt;
|
||||
use preopt::do_preopt;
|
||||
use regalloc;
|
||||
use result::{CtonError, CtonResult};
|
||||
use settings::{FlagsOrIsa, OptLevel};
|
||||
use simple_gvn::do_simple_gvn;
|
||||
use timing;
|
||||
use unreachable_code::eliminate_unreachable_code;
|
||||
use verifier;
|
||||
|
||||
/// Persistent data structures and compilation pipeline.
|
||||
pub struct Context {
|
||||
/// The function we're compiling.
|
||||
pub func: Function,
|
||||
|
||||
/// The control flow graph of `func`.
|
||||
pub cfg: ControlFlowGraph,
|
||||
|
||||
/// Dominator tree for `func`.
|
||||
pub domtree: DominatorTree,
|
||||
|
||||
/// Register allocation context.
|
||||
pub regalloc: regalloc::Context,
|
||||
|
||||
/// Loop analysis of `func`.
|
||||
pub loop_analysis: LoopAnalysis,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Allocate a new compilation context.
|
||||
///
|
||||
/// The returned instance should be reused for compiling multiple functions in order to avoid
|
||||
/// needless allocator thrashing.
|
||||
pub fn new() -> Self {
|
||||
Context::for_function(Function::new())
|
||||
}
|
||||
|
||||
/// Allocate a new compilation context with an existing Function.
|
||||
///
|
||||
/// The returned instance should be reused for compiling multiple functions in order to avoid
|
||||
/// needless allocator thrashing.
|
||||
pub fn for_function(func: Function) -> Self {
|
||||
Self {
|
||||
func: func,
|
||||
cfg: ControlFlowGraph::new(),
|
||||
domtree: DominatorTree::new(),
|
||||
regalloc: regalloc::Context::new(),
|
||||
loop_analysis: LoopAnalysis::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this context.
|
||||
pub fn clear(&mut self) {
|
||||
self.func.clear();
|
||||
self.cfg.clear();
|
||||
self.domtree.clear();
|
||||
self.regalloc.clear();
|
||||
self.loop_analysis.clear();
|
||||
}
|
||||
|
||||
/// Compile the function.
|
||||
///
|
||||
/// Run the function through all the passes necessary to generate code for the target ISA
|
||||
/// represented by `isa`. This does not include the final step of emitting machine code into a
|
||||
/// code sink.
|
||||
///
|
||||
/// Returns the size of the function's code.
|
||||
pub fn compile(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
|
||||
let _tt = timing::compile();
|
||||
self.verify_if(isa)?;
|
||||
|
||||
self.compute_cfg();
|
||||
if isa.flags().opt_level() != OptLevel::Fastest {
|
||||
self.preopt(isa)?;
|
||||
}
|
||||
self.legalize(isa)?;
|
||||
if isa.flags().opt_level() != OptLevel::Fastest {
|
||||
self.postopt(isa)?;
|
||||
}
|
||||
if isa.flags().opt_level() == OptLevel::Best {
|
||||
self.compute_domtree();
|
||||
self.compute_loop_analysis();
|
||||
self.licm(isa)?;
|
||||
self.simple_gvn(isa)?;
|
||||
}
|
||||
self.compute_domtree();
|
||||
self.eliminate_unreachable_code(isa)?;
|
||||
if isa.flags().opt_level() != OptLevel::Fastest {
|
||||
self.dce(isa)?;
|
||||
}
|
||||
self.regalloc(isa)?;
|
||||
self.prologue_epilogue(isa)?;
|
||||
self.relax_branches(isa)
|
||||
}
|
||||
|
||||
/// Emit machine code directly into raw memory.
|
||||
///
|
||||
/// Write all of the function's machine code to the memory at `mem`. The size of the machine
|
||||
/// code is returned by `compile` above.
|
||||
///
|
||||
/// The machine code is not relocated. Instead, any relocations are emitted into `relocs`.
|
||||
pub fn emit_to_memory(
|
||||
&self,
|
||||
mem: *mut u8,
|
||||
relocs: &mut RelocSink,
|
||||
traps: &mut TrapSink,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
let _tt = timing::binemit();
|
||||
isa.emit_function(&self.func, &mut MemoryCodeSink::new(mem, relocs, traps));
|
||||
}
|
||||
|
||||
/// Run the verifier on the function.
|
||||
///
|
||||
/// Also check that the dominator tree and control flow graph are consistent with the function.
|
||||
pub fn verify<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> verifier::Result {
|
||||
verifier::verify_context(&self.func, &self.cfg, &self.domtree, fisa)
|
||||
}
|
||||
|
||||
/// Run the verifier only if the `enable_verifier` setting is true.
|
||||
pub fn verify_if<'a, FOI: Into<FlagsOrIsa<'a>>>(&self, fisa: FOI) -> CtonResult {
|
||||
let fisa = fisa.into();
|
||||
if fisa.flags.enable_verifier() {
|
||||
self.verify(fisa).map_err(Into::into)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the locations verifier on the function.
|
||||
pub fn verify_locations(&self, isa: &TargetIsa) -> verifier::Result {
|
||||
verifier::verify_locations(isa, &self.func, None)
|
||||
}
|
||||
|
||||
/// Run the locations verifier only if the `enable_verifier` setting is true.
|
||||
pub fn verify_locations_if(&self, isa: &TargetIsa) -> CtonResult {
|
||||
if isa.flags().enable_verifier() {
|
||||
self.verify_locations(isa).map_err(Into::into)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform dead-code elimination on the function.
|
||||
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
|
||||
do_dce(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform pre-legalization rewrites on the function.
|
||||
pub fn preopt(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
do_preopt(&mut self.func);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the legalizer for `isa` on the function.
|
||||
pub fn legalize(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
// Legalization invalidates the domtree and loop_analysis by mutating the CFG.
|
||||
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
|
||||
self.domtree.clear();
|
||||
self.loop_analysis.clear();
|
||||
legalize_function(&mut self.func, &mut self.cfg, isa);
|
||||
self.verify_if(isa)
|
||||
}
|
||||
|
||||
/// Perform post-legalization rewrites on the function.
|
||||
pub fn postopt(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
do_postopt(&mut self.func, isa);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the control flow graph.
|
||||
pub fn compute_cfg(&mut self) {
|
||||
self.cfg.compute(&self.func)
|
||||
}
|
||||
|
||||
/// Compute dominator tree.
|
||||
pub fn compute_domtree(&mut self) {
|
||||
self.domtree.compute(&self.func, &self.cfg)
|
||||
}
|
||||
|
||||
/// Compute the loop analysis.
|
||||
pub fn compute_loop_analysis(&mut self) {
|
||||
self.loop_analysis.compute(
|
||||
&self.func,
|
||||
&self.cfg,
|
||||
&self.domtree,
|
||||
)
|
||||
}
|
||||
|
||||
/// Compute the control flow graph and dominator tree.
|
||||
pub fn flowgraph(&mut self) {
|
||||
self.compute_cfg();
|
||||
self.compute_domtree()
|
||||
}
|
||||
|
||||
/// Perform simple GVN on the function.
|
||||
pub fn simple_gvn<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
|
||||
do_simple_gvn(&mut self.func, &mut self.domtree);
|
||||
self.verify_if(fisa)
|
||||
}
|
||||
|
||||
/// Perform LICM on the function.
|
||||
pub fn licm<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CtonResult {
|
||||
do_licm(
|
||||
&mut self.func,
|
||||
&mut self.cfg,
|
||||
&mut self.domtree,
|
||||
&mut self.loop_analysis,
|
||||
);
|
||||
self.verify_if(fisa)
|
||||
}
|
||||
|
||||
/// Perform unreachable code elimination.
|
||||
pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CtonResult
|
||||
where
|
||||
FOI: Into<FlagsOrIsa<'a>>,
|
||||
{
|
||||
eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree);
|
||||
self.verify_if(fisa)
|
||||
}
|
||||
|
||||
/// Run the register allocator.
|
||||
pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
self.regalloc.run(
|
||||
isa,
|
||||
&mut self.func,
|
||||
&self.cfg,
|
||||
&mut self.domtree,
|
||||
)
|
||||
}
|
||||
|
||||
/// Insert prologue and epilogues after computing the stack frame layout.
|
||||
pub fn prologue_epilogue(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||
isa.prologue_epilogue(&mut self.func)?;
|
||||
self.verify_if(isa)?;
|
||||
self.verify_locations_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the branch relaxation pass and return the final code size.
|
||||
pub fn relax_branches(&mut self, isa: &TargetIsa) -> Result<CodeOffset, CtonError> {
|
||||
let code_size = relax_branches(&mut self.func, isa)?;
|
||||
self.verify_if(isa)?;
|
||||
self.verify_locations_if(isa)?;
|
||||
|
||||
Ok(code_size)
|
||||
}
|
||||
}
|
||||
760
lib/codegen/src/cursor.rs
Normal file
760
lib/codegen/src/cursor.rs
Normal file
@@ -0,0 +1,760 @@
|
||||
//! Cursor library.
|
||||
//!
|
||||
//! This module defines cursor data types that can be used for inserting instructions.
|
||||
|
||||
use ir;
|
||||
use isa::TargetIsa;
|
||||
|
||||
/// The possible positions of a cursor.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||
pub enum CursorPosition {
|
||||
/// Cursor is not pointing anywhere. No instructions can be inserted.
|
||||
Nowhere,
|
||||
/// Cursor is pointing at an existing instruction.
|
||||
/// New instructions will be inserted *before* the current instruction.
|
||||
At(ir::Inst),
|
||||
/// Cursor is before the beginning of an EBB. No instructions can be inserted. Calling
|
||||
/// `next_inst()` will move to the first instruction in the EBB.
|
||||
Before(ir::Ebb),
|
||||
/// Cursor is pointing after the end of an EBB.
|
||||
/// New instructions will be appended to the EBB.
|
||||
After(ir::Ebb),
|
||||
}
|
||||
|
||||
/// All cursor types implement the `Cursor` which provides common navigation operations.
|
||||
pub trait Cursor {
|
||||
/// Get the current cursor position.
|
||||
fn position(&self) -> CursorPosition;
|
||||
|
||||
/// Set the current position.
|
||||
fn set_position(&mut self, pos: CursorPosition);
|
||||
|
||||
/// Get the source location that should be assigned to new instructions.
|
||||
fn srcloc(&self) -> ir::SourceLoc;
|
||||
|
||||
/// Set the source location that should be assigned to new instructions.
|
||||
fn set_srcloc(&mut self, srcloc: ir::SourceLoc);
|
||||
|
||||
/// Borrow a reference to the function layout that this cursor is navigating.
|
||||
fn layout(&self) -> &ir::Layout;
|
||||
|
||||
/// Borrow a mutable reference to the function layout that this cursor is navigating.
|
||||
fn layout_mut(&mut self) -> &mut ir::Layout;
|
||||
|
||||
/// Exchange this cursor for one with a set source location.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, SourceLoc};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, srcloc: SourceLoc) {
|
||||
/// let mut pos = FuncCursor::new(func).with_srcloc(srcloc);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.set_srcloc(srcloc);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at `pos`.
|
||||
fn at_position(mut self, pos: CursorPosition) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.set_position(pos);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at `inst`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, inst: Inst) {
|
||||
/// let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_inst(mut self, inst: ir::Inst) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_inst(inst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at the first insertion point for `ebb`.
|
||||
/// This differs from `at_first_inst` in that it doesn't assume that any
|
||||
/// instructions have been inserted into `ebb` yet.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut pos = FuncCursor::new(func).at_first_insertion_point(ebb);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_first_insertion_point(mut self, ebb: ir::Ebb) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_first_insertion_point(ebb);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at the first instruction in `ebb`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut pos = FuncCursor::new(func).at_first_inst(ebb);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_first_inst(mut self, ebb: ir::Ebb) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_first_inst(ebb);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at the last instruction in `ebb`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut pos = FuncCursor::new(func).at_last_inst(ebb);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_last_inst(mut self, ebb: ir::Ebb) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_last_inst(ebb);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned after `inst`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, inst: Inst) {
|
||||
/// let mut pos = FuncCursor::new(func).after_inst(inst);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn after_inst(mut self, inst: ir::Inst) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_after_inst(inst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at the top of `ebb`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut pos = FuncCursor::new(func).at_top(ebb);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_top(mut self, ebb: ir::Ebb) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_top(ebb);
|
||||
self
|
||||
}
|
||||
|
||||
/// Rebuild this cursor positioned at the bottom of `ebb`.
|
||||
///
|
||||
/// This is intended to be used as a builder method:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb, Inst};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut pos = FuncCursor::new(func).at_bottom(ebb);
|
||||
///
|
||||
/// // Use `pos`...
|
||||
/// }
|
||||
/// ```
|
||||
fn at_bottom(mut self, ebb: ir::Ebb) -> Self
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.goto_bottom(ebb);
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the EBB corresponding to the current position.
|
||||
fn current_ebb(&self) -> Option<ir::Ebb> {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
Nowhere => None,
|
||||
At(inst) => self.layout().inst_ebb(inst),
|
||||
Before(ebb) | After(ebb) => Some(ebb),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the instruction corresponding to the current position, if any.
|
||||
fn current_inst(&self) -> Option<ir::Inst> {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
At(inst) => Some(inst),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Go to the position after a specific instruction, which must be inserted
|
||||
/// in the layout. New instructions will be inserted after `inst`.
|
||||
fn goto_after_inst(&mut self, inst: ir::Inst) {
|
||||
debug_assert!(self.layout().inst_ebb(inst).is_some());
|
||||
let new_pos = if let Some(next) = self.layout().next_inst(inst) {
|
||||
CursorPosition::At(next)
|
||||
} else {
|
||||
CursorPosition::After(self.layout().inst_ebb(inst).expect(
|
||||
"current instruction removed?",
|
||||
))
|
||||
};
|
||||
self.set_position(new_pos);
|
||||
}
|
||||
|
||||
/// Go to a specific instruction which must be inserted in the layout.
|
||||
/// New instructions will be inserted before `inst`.
|
||||
fn goto_inst(&mut self, inst: ir::Inst) {
|
||||
debug_assert!(self.layout().inst_ebb(inst).is_some());
|
||||
self.set_position(CursorPosition::At(inst));
|
||||
}
|
||||
|
||||
/// Go to the position for inserting instructions at the beginning of `ebb`,
|
||||
/// which unlike `goto_first_inst` doesn't assume that any instructions have
|
||||
/// been inserted into `ebb` yet.
|
||||
fn goto_first_insertion_point(&mut self, ebb: ir::Ebb) {
|
||||
if let Some(inst) = self.layout().first_inst(ebb) {
|
||||
self.goto_inst(inst);
|
||||
} else {
|
||||
self.goto_bottom(ebb);
|
||||
}
|
||||
}
|
||||
|
||||
/// Go to the first instruction in `ebb`.
|
||||
fn goto_first_inst(&mut self, ebb: ir::Ebb) {
|
||||
let inst = self.layout().first_inst(ebb).expect("Empty EBB");
|
||||
self.goto_inst(inst);
|
||||
}
|
||||
|
||||
/// Go to the last instruction in `ebb`.
|
||||
fn goto_last_inst(&mut self, ebb: ir::Ebb) {
|
||||
let inst = self.layout().last_inst(ebb).expect("Empty EBB");
|
||||
self.goto_inst(inst);
|
||||
}
|
||||
|
||||
/// Go to the top of `ebb` which must be inserted into the layout.
|
||||
/// At this position, instructions cannot be inserted, but `next_inst()` will move to the first
|
||||
/// instruction in `ebb`.
|
||||
fn goto_top(&mut self, ebb: ir::Ebb) {
|
||||
debug_assert!(self.layout().is_ebb_inserted(ebb));
|
||||
self.set_position(CursorPosition::Before(ebb));
|
||||
}
|
||||
|
||||
/// Go to the bottom of `ebb` which must be inserted into the layout.
|
||||
/// At this position, inserted instructions will be appended to `ebb`.
|
||||
fn goto_bottom(&mut self, ebb: ir::Ebb) {
|
||||
debug_assert!(self.layout().is_ebb_inserted(ebb));
|
||||
self.set_position(CursorPosition::After(ebb));
|
||||
}
|
||||
|
||||
/// Go to the top of the next EBB in layout order and return it.
|
||||
///
|
||||
/// - If the cursor wasn't pointing at anything, go to the top of the first EBB in the
|
||||
/// function.
|
||||
/// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// The `next_ebb()` method is intended for iterating over the EBBs in layout order:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function) {
|
||||
/// let mut cursor = FuncCursor::new(func);
|
||||
/// while let Some(ebb) = cursor.next_ebb() {
|
||||
/// // Edit ebb.
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
fn next_ebb(&mut self) -> Option<ir::Ebb> {
|
||||
let next = if let Some(ebb) = self.current_ebb() {
|
||||
self.layout().next_ebb(ebb)
|
||||
} else {
|
||||
self.layout().entry_block()
|
||||
};
|
||||
self.set_position(match next {
|
||||
Some(ebb) => CursorPosition::Before(ebb),
|
||||
None => CursorPosition::Nowhere,
|
||||
});
|
||||
next
|
||||
}
|
||||
|
||||
/// Go to the bottom of the previous EBB in layout order and return it.
|
||||
///
|
||||
/// - If the cursor wasn't pointing at anything, go to the bottom of the last EBB in the
|
||||
/// function.
|
||||
/// - If there are no more EBBs, leave the cursor pointing at nothing and return `None`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// The `prev_ebb()` method is intended for iterating over the EBBs in backwards layout order:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function) {
|
||||
/// let mut cursor = FuncCursor::new(func);
|
||||
/// while let Some(ebb) = cursor.prev_ebb() {
|
||||
/// // Edit ebb.
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
fn prev_ebb(&mut self) -> Option<ir::Ebb> {
|
||||
let prev = if let Some(ebb) = self.current_ebb() {
|
||||
self.layout().prev_ebb(ebb)
|
||||
} else {
|
||||
self.layout().last_ebb()
|
||||
};
|
||||
self.set_position(match prev {
|
||||
Some(ebb) => CursorPosition::After(ebb),
|
||||
None => CursorPosition::Nowhere,
|
||||
});
|
||||
prev
|
||||
}
|
||||
|
||||
/// Move to the next instruction in the same EBB and return it.
|
||||
///
|
||||
/// - If the cursor was positioned before an EBB, go to the first instruction in that EBB.
|
||||
/// - If there are no more instructions in the EBB, go to the `After(ebb)` position and return
|
||||
/// `None`.
|
||||
/// - If the cursor wasn't pointing anywhere, keep doing that.
|
||||
///
|
||||
/// This method will never move the cursor to a different EBB.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// The `next_inst()` method is intended for iterating over the instructions in an EBB like
|
||||
/// this:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_ebb(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut cursor = FuncCursor::new(func).at_top(ebb);
|
||||
/// while let Some(inst) = cursor.next_inst() {
|
||||
/// // Edit instructions...
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
/// The loop body can insert and remove instructions via the cursor.
|
||||
///
|
||||
/// Iterating over all the instructions in a function looks like this:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_func(func: &mut Function) {
|
||||
/// let mut cursor = FuncCursor::new(func);
|
||||
/// while let Some(ebb) = cursor.next_ebb() {
|
||||
/// while let Some(inst) = cursor.next_inst() {
|
||||
/// // Edit instructions...
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
fn next_inst(&mut self) -> Option<ir::Inst> {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
Nowhere | After(..) => None,
|
||||
At(inst) => {
|
||||
if let Some(next) = self.layout().next_inst(inst) {
|
||||
self.set_position(At(next));
|
||||
Some(next)
|
||||
} else {
|
||||
let pos = After(self.layout().inst_ebb(inst).expect(
|
||||
"current instruction removed?",
|
||||
));
|
||||
self.set_position(pos);
|
||||
None
|
||||
}
|
||||
}
|
||||
Before(ebb) => {
|
||||
if let Some(next) = self.layout().first_inst(ebb) {
|
||||
self.set_position(At(next));
|
||||
Some(next)
|
||||
} else {
|
||||
self.set_position(After(ebb));
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Move to the previous instruction in the same EBB and return it.
|
||||
///
|
||||
/// - If the cursor was positioned after an EBB, go to the last instruction in that EBB.
|
||||
/// - If there are no more instructions in the EBB, go to the `Before(ebb)` position and return
|
||||
/// `None`.
|
||||
/// - If the cursor wasn't pointing anywhere, keep doing that.
|
||||
///
|
||||
/// This method will never move the cursor to a different EBB.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// The `prev_inst()` method is intended for iterating backwards over the instructions in an
|
||||
/// EBB like this:
|
||||
///
|
||||
/// ```
|
||||
/// # use cretonne_codegen::ir::{Function, Ebb};
|
||||
/// # use cretonne_codegen::cursor::{Cursor, FuncCursor};
|
||||
/// fn edit_ebb(func: &mut Function, ebb: Ebb) {
|
||||
/// let mut cursor = FuncCursor::new(func).at_bottom(ebb);
|
||||
/// while let Some(inst) = cursor.prev_inst() {
|
||||
/// // Edit instructions...
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
fn prev_inst(&mut self) -> Option<ir::Inst> {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
Nowhere | Before(..) => None,
|
||||
At(inst) => {
|
||||
if let Some(prev) = self.layout().prev_inst(inst) {
|
||||
self.set_position(At(prev));
|
||||
Some(prev)
|
||||
} else {
|
||||
let pos = Before(self.layout().inst_ebb(inst).expect(
|
||||
"current instruction removed?",
|
||||
));
|
||||
self.set_position(pos);
|
||||
None
|
||||
}
|
||||
}
|
||||
After(ebb) => {
|
||||
if let Some(prev) = self.layout().last_inst(ebb) {
|
||||
self.set_position(At(prev));
|
||||
Some(prev)
|
||||
} else {
|
||||
self.set_position(Before(ebb));
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert an instruction at the current position.
|
||||
///
|
||||
/// - If pointing at an instruction, the new instruction is inserted before the current
|
||||
/// instruction.
|
||||
/// - If pointing at the bottom of an EBB, the new instruction is appended to the EBB.
|
||||
/// - Otherwise panic.
|
||||
///
|
||||
/// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes
|
||||
/// instructions to appear in insertion order in the EBB.
|
||||
fn insert_inst(&mut self, inst: ir::Inst) {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
Nowhere | Before(..) => panic!("Invalid insert_inst position"),
|
||||
At(cur) => self.layout_mut().insert_inst(inst, cur),
|
||||
After(ebb) => self.layout_mut().append_inst(inst, ebb),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the instruction under the cursor.
|
||||
///
|
||||
/// The cursor is left pointing at the position following the current instruction.
|
||||
///
|
||||
/// Return the instruction that was removed.
|
||||
fn remove_inst(&mut self) -> ir::Inst {
|
||||
let inst = self.current_inst().expect("No instruction to remove");
|
||||
self.next_inst();
|
||||
self.layout_mut().remove_inst(inst);
|
||||
inst
|
||||
}
|
||||
|
||||
/// Remove the instruction under the cursor.
|
||||
///
|
||||
/// The cursor is left pointing at the position preceding the current instruction.
|
||||
///
|
||||
/// Return the instruction that was removed.
|
||||
fn remove_inst_and_step_back(&mut self) -> ir::Inst {
|
||||
let inst = self.current_inst().expect("No instruction to remove");
|
||||
self.prev_inst();
|
||||
self.layout_mut().remove_inst(inst);
|
||||
inst
|
||||
}
|
||||
|
||||
/// Insert an EBB at the current position and switch to it.
|
||||
///
|
||||
/// As far as possible, this method behaves as if the EBB header were an instruction inserted
|
||||
/// at the current position.
|
||||
///
|
||||
/// - If the cursor is pointing at an existing instruction, *the current EBB is split in two*
|
||||
/// and the current instruction becomes the first instruction in the inserted EBB.
|
||||
/// - If the cursor points at the bottom of an EBB, the new EBB is inserted after the current
|
||||
/// one, and moved to the bottom of the new EBB where instructions can be appended.
|
||||
/// - If the cursor points to the top of an EBB, the new EBB is inserted above the current one.
|
||||
/// - If the cursor is not pointing at anything, the new EBB is placed last in the layout.
|
||||
///
|
||||
/// This means that it is always valid to call this method, and it always leaves the cursor in
|
||||
/// a state that will insert instructions into the new EBB.
|
||||
fn insert_ebb(&mut self, new_ebb: ir::Ebb) {
|
||||
use self::CursorPosition::*;
|
||||
match self.position() {
|
||||
At(inst) => {
|
||||
self.layout_mut().split_ebb(new_ebb, inst);
|
||||
// All other cases move to `After(ebb)`, but in this case we'll stay `At(inst)`.
|
||||
return;
|
||||
}
|
||||
Nowhere => self.layout_mut().append_ebb(new_ebb),
|
||||
Before(ebb) => self.layout_mut().insert_ebb(new_ebb, ebb),
|
||||
After(ebb) => self.layout_mut().insert_ebb_after(new_ebb, ebb),
|
||||
}
|
||||
// For everything but `At(inst)` we end up appending to the new EBB.
|
||||
self.set_position(After(new_ebb));
|
||||
}
|
||||
}
|
||||
|
||||
/// Function cursor.
|
||||
///
|
||||
/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position
|
||||
/// too. The function can be re-borrowed by accessing the public `cur.func` member.
|
||||
///
|
||||
/// This cursor is for use before legalization. The inserted instructions are not given an
|
||||
/// encoding.
|
||||
pub struct FuncCursor<'f> {
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
|
||||
/// The referenced function.
|
||||
pub func: &'f mut ir::Function,
|
||||
}
|
||||
|
||||
impl<'f> FuncCursor<'f> {
|
||||
/// Create a new `FuncCursor` pointing nowhere.
|
||||
pub fn new(func: &'f mut ir::Function) -> FuncCursor<'f> {
|
||||
FuncCursor {
|
||||
pos: CursorPosition::Nowhere,
|
||||
srcloc: Default::default(),
|
||||
func,
|
||||
}
|
||||
}
|
||||
|
||||
/// Use the source location of `inst` for future instructions.
|
||||
pub fn use_srcloc(&mut self, inst: ir::Inst) {
|
||||
self.srcloc = self.func.srclocs[inst];
|
||||
}
|
||||
|
||||
/// Create an instruction builder that inserts an instruction at the current position.
|
||||
pub fn ins(&mut self) -> ir::InsertBuilder<&mut FuncCursor<'f>> {
|
||||
ir::InsertBuilder::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Cursor for FuncCursor<'f> {
|
||||
fn position(&self) -> CursorPosition {
|
||||
self.pos
|
||||
}
|
||||
|
||||
fn set_position(&mut self, pos: CursorPosition) {
|
||||
self.pos = pos
|
||||
}
|
||||
|
||||
fn srcloc(&self) -> ir::SourceLoc {
|
||||
self.srcloc
|
||||
}
|
||||
|
||||
fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
|
||||
self.srcloc = srcloc;
|
||||
}
|
||||
|
||||
fn layout(&self) -> &ir::Layout {
|
||||
&self.func.layout
|
||||
}
|
||||
|
||||
fn layout_mut(&mut self) -> &mut ir::Layout {
|
||||
&mut self.func.layout
|
||||
}
|
||||
}
|
||||
|
||||
impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
|
||||
fn data_flow_graph(&self) -> &ir::DataFlowGraph {
|
||||
&self.func.dfg
|
||||
}
|
||||
|
||||
fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
|
||||
&mut self.func.dfg
|
||||
}
|
||||
|
||||
fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
|
||||
self.insert_inst(inst);
|
||||
if !self.srcloc.is_default() {
|
||||
self.func.srclocs[inst] = self.srcloc;
|
||||
}
|
||||
&mut self.func.dfg
|
||||
}
|
||||
}
|
||||
|
||||
/// Encoding cursor.
|
||||
///
|
||||
/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding.
|
||||
/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the
|
||||
/// public `pos.func` member.
|
||||
pub struct EncCursor<'f> {
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
built_inst: Option<ir::Inst>,
|
||||
|
||||
/// The referenced function.
|
||||
pub func: &'f mut ir::Function,
|
||||
|
||||
/// The target ISA that will be used to encode instructions.
|
||||
pub isa: &'f TargetIsa,
|
||||
}
|
||||
|
||||
impl<'f> EncCursor<'f> {
|
||||
/// Create a new `EncCursor` pointing nowhere.
|
||||
pub fn new(func: &'f mut ir::Function, isa: &'f TargetIsa) -> EncCursor<'f> {
|
||||
EncCursor {
|
||||
pos: CursorPosition::Nowhere,
|
||||
srcloc: Default::default(),
|
||||
built_inst: None,
|
||||
func,
|
||||
isa,
|
||||
}
|
||||
}
|
||||
|
||||
/// Use the source location of `inst` for future instructions.
|
||||
pub fn use_srcloc(&mut self, inst: ir::Inst) {
|
||||
self.srcloc = self.func.srclocs[inst];
|
||||
}
|
||||
|
||||
/// Create an instruction builder that will insert an encoded instruction at the current
|
||||
/// position.
|
||||
///
|
||||
/// The builder will panic if it is used to insert an instruction that can't be encoded for
|
||||
/// `self.isa`.
|
||||
pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> {
|
||||
ir::InsertBuilder::new(self)
|
||||
}
|
||||
|
||||
/// Get the last built instruction.
|
||||
///
|
||||
/// This returns the last instruction that was built using the `ins()` method on this cursor.
|
||||
/// Panics if no instruction was built.
|
||||
pub fn built_inst(&self) -> ir::Inst {
|
||||
self.built_inst.expect("No instruction was inserted")
|
||||
}
|
||||
|
||||
/// Return an object that can display `inst`.
|
||||
///
|
||||
/// This is a convenience wrapper for the DFG equivalent.
|
||||
pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst {
|
||||
self.func.dfg.display_inst(inst, self.isa)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> Cursor for EncCursor<'f> {
|
||||
fn position(&self) -> CursorPosition {
|
||||
self.pos
|
||||
}
|
||||
|
||||
fn set_position(&mut self, pos: CursorPosition) {
|
||||
self.pos = pos
|
||||
}
|
||||
|
||||
fn srcloc(&self) -> ir::SourceLoc {
|
||||
self.srcloc
|
||||
}
|
||||
|
||||
fn set_srcloc(&mut self, srcloc: ir::SourceLoc) {
|
||||
self.srcloc = srcloc;
|
||||
}
|
||||
|
||||
fn layout(&self) -> &ir::Layout {
|
||||
&self.func.layout
|
||||
}
|
||||
|
||||
fn layout_mut(&mut self) -> &mut ir::Layout {
|
||||
&mut self.func.layout
|
||||
}
|
||||
}
|
||||
|
||||
impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
||||
fn data_flow_graph(&self) -> &ir::DataFlowGraph {
|
||||
&self.func.dfg
|
||||
}
|
||||
|
||||
fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph {
|
||||
&mut self.func.dfg
|
||||
}
|
||||
|
||||
fn insert_built_inst(
|
||||
self,
|
||||
inst: ir::Inst,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> &'c mut ir::DataFlowGraph {
|
||||
// Insert the instruction and remember the reference.
|
||||
self.insert_inst(inst);
|
||||
self.built_inst = Some(inst);
|
||||
|
||||
if !self.srcloc.is_default() {
|
||||
self.func.srclocs[inst] = self.srcloc;
|
||||
}
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
|
||||
match self.isa.encode(
|
||||
&self.func,
|
||||
&self.func.dfg[inst],
|
||||
ctrl_typevar,
|
||||
) {
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
}
|
||||
|
||||
&mut self.func.dfg
|
||||
}
|
||||
}
|
||||
146
lib/codegen/src/dbg.rs
Normal file
146
lib/codegen/src/dbg.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
//! Debug tracing macros.
|
||||
//!
|
||||
//! This module defines the `dbg!` macro which works like `println!` except it writes to the
|
||||
//! Cretonne tracing output file if enabled.
|
||||
//!
|
||||
//! Tracing can be enabled by setting the `CRETONNE_DBG` environment variable to something
|
||||
/// other than `0`.
|
||||
///
|
||||
/// The output will appear in files named `cretonne.dbg.*`, where the suffix is named after the
|
||||
/// thread doing the logging.
|
||||
#[cfg(feature = "std")]
|
||||
use std::cell::RefCell;
|
||||
#[cfg(feature = "std")]
|
||||
use std::env;
|
||||
#[cfg(feature = "std")]
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
#[cfg(feature = "std")]
|
||||
use std::fs::File;
|
||||
#[cfg(feature = "std")]
|
||||
use std::io::{self, Write};
|
||||
#[cfg(feature = "std")]
|
||||
use std::sync::atomic;
|
||||
#[cfg(feature = "std")]
|
||||
use std::thread;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
static STATE: atomic::AtomicIsize = atomic::ATOMIC_ISIZE_INIT;
|
||||
|
||||
/// Is debug tracing enabled?
|
||||
///
|
||||
/// Debug tracing can be enabled by setting the `CRETONNE_DBG` environment variable to something
|
||||
/// other than `0`.
|
||||
///
|
||||
/// This inline function turns into a constant `false` when debug assertions are disabled.
|
||||
#[cfg(feature = "std")]
|
||||
#[inline]
|
||||
pub fn enabled() -> bool {
|
||||
if cfg!(debug_assertions) {
|
||||
match STATE.load(atomic::Ordering::Relaxed) {
|
||||
0 => initialize(),
|
||||
s => s > 0,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Does nothing
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[inline]
|
||||
pub fn enabled() -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Initialize `STATE` from the environment variable.
|
||||
#[cfg(feature = "std")]
|
||||
fn initialize() -> bool {
|
||||
let enable = match env::var_os("CRETONNE_DBG") {
|
||||
Some(s) => s != OsStr::new("0"),
|
||||
None => false,
|
||||
};
|
||||
|
||||
if enable {
|
||||
STATE.store(1, atomic::Ordering::Relaxed);
|
||||
} else {
|
||||
STATE.store(-1, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
enable
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
thread_local! {
|
||||
static WRITER : RefCell<io::BufWriter<File>> = RefCell::new(open_file());
|
||||
}
|
||||
|
||||
/// Write a line with the given format arguments.
|
||||
///
|
||||
/// This is for use by the `dbg!` macro.
|
||||
#[cfg(feature = "std")]
|
||||
pub fn writeln_with_format_args(args: fmt::Arguments) -> io::Result<()> {
|
||||
WRITER.with(|rc| {
|
||||
let mut w = rc.borrow_mut();
|
||||
writeln!(*w, "{}", args)?;
|
||||
w.flush()
|
||||
})
|
||||
}
|
||||
|
||||
/// Open the tracing file for the current thread.
|
||||
#[cfg(feature = "std")]
|
||||
fn open_file() -> io::BufWriter<File> {
|
||||
let curthread = thread::current();
|
||||
let tmpstr;
|
||||
let mut path = "cretonne.dbg.".to_owned();
|
||||
path.extend(
|
||||
match curthread.name() {
|
||||
Some(name) => name.chars(),
|
||||
// The thread is unnamed, so use the thread ID instead.
|
||||
None => {
|
||||
tmpstr = format!("{:?}", curthread.id());
|
||||
tmpstr.chars()
|
||||
}
|
||||
}.filter(|ch| ch.is_alphanumeric() || *ch == '-' || *ch == '_'),
|
||||
);
|
||||
let file = File::create(path).expect("Can't open tracing file");
|
||||
io::BufWriter::new(file)
|
||||
}
|
||||
|
||||
/// Write a line to the debug trace file if tracing is enabled.
|
||||
///
|
||||
/// Arguments are the same as for `printf!`.
|
||||
#[macro_export]
|
||||
macro_rules! dbg {
|
||||
($($arg:tt)+) => {
|
||||
if $crate::dbg::enabled() {
|
||||
// Drop the error result so we don't get compiler errors for ignoring it.
|
||||
// What are you going to do, log the error?
|
||||
#[cfg(feature = "std")]
|
||||
$crate::dbg::writeln_with_format_args(format_args!($($arg)+)).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for printing lists.
|
||||
pub struct DisplayList<'a, T>(pub &'a [T])
|
||||
where
|
||||
T: 'a + fmt::Display;
|
||||
|
||||
impl<'a, T> fmt::Display for DisplayList<'a, T>
|
||||
where
|
||||
T: 'a + fmt::Display,
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0.split_first() {
|
||||
None => write!(f, "[]"),
|
||||
Some((first, rest)) => {
|
||||
write!(f, "[{}", first)?;
|
||||
for x in rest {
|
||||
write!(f, ", {}", x)?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
68
lib/codegen/src/dce.rs
Normal file
68
lib/codegen/src/dce.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! A Dead-Code Elimination (DCE) pass.
|
||||
//!
|
||||
//! Dead code here means instructions that have no side effects and have no
|
||||
//! result values used by other instructions.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use dominator_tree::DominatorTree;
|
||||
use entity::EntityRef;
|
||||
use ir::instructions::InstructionData;
|
||||
use ir::{DataFlowGraph, Function, Inst, Opcode};
|
||||
use std::vec::Vec;
|
||||
use timing;
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for DCE.
|
||||
fn trivially_unsafe_for_dce(opcode: Opcode) -> bool {
|
||||
opcode.is_call() || opcode.is_branch() || opcode.is_terminator() ||
|
||||
opcode.is_return() || opcode.can_trap() || opcode.other_side_effects() ||
|
||||
opcode.can_store()
|
||||
}
|
||||
|
||||
/// Preserve instructions with used result values.
|
||||
fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool {
|
||||
dfg.inst_results(inst).iter().any(|v| live[v.index()])
|
||||
}
|
||||
|
||||
/// Load instructions without the `notrap` flag are defined to trap when
|
||||
/// operating on inaccessible memory, so we can't DCE them even if the
|
||||
/// loaded value is unused.
|
||||
fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool {
|
||||
if !opcode.can_load() {
|
||||
return false;
|
||||
}
|
||||
match *data {
|
||||
InstructionData::StackLoad { .. } => false,
|
||||
InstructionData::Load { flags, .. } => !flags.notrap(),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform DCE on `func`.
|
||||
pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) {
|
||||
let _tt = timing::dce();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
let mut live = Vec::with_capacity(func.dfg.num_values());
|
||||
live.resize(func.dfg.num_values(), false);
|
||||
|
||||
for &ebb in domtree.cfg_postorder().iter() {
|
||||
let mut pos = FuncCursor::new(func).at_bottom(ebb);
|
||||
while let Some(inst) = pos.prev_inst() {
|
||||
{
|
||||
let data = &pos.func.dfg[inst];
|
||||
let opcode = data.opcode();
|
||||
if trivially_unsafe_for_dce(opcode) ||
|
||||
is_load_with_defined_trapping(opcode, &data) ||
|
||||
any_inst_results_used(inst, &live, &pos.func.dfg)
|
||||
{
|
||||
for arg in pos.func.dfg.inst_args(inst) {
|
||||
let v = pos.func.dfg.resolve_aliases(*arg);
|
||||
live[v.index()] = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pos.remove_inst();
|
||||
}
|
||||
}
|
||||
}
|
||||
547
lib/codegen/src/divconst_magic_numbers.rs
Normal file
547
lib/codegen/src/divconst_magic_numbers.rs
Normal file
@@ -0,0 +1,547 @@
|
||||
//! Compute "magic numbers" for division-by-constants transformations.
|
||||
//!
|
||||
//! Math helpers for division by (non-power-of-2) constants. This is based
|
||||
//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
|
||||
//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
|
||||
//! makes little difference, but the signed-vs-unsigned aspect has a large
|
||||
//! effect. Therefore everything is presented in the order U32 U64 S32 S64
|
||||
//! so as to emphasise the similarity of the U32 and U64 cases and the S32
|
||||
//! and S64 cases.
|
||||
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
// Structures to hold the "magic numbers" computed.
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct MU32 {
|
||||
pub mulBy: u32,
|
||||
pub doAdd: bool,
|
||||
pub shiftBy: i32,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct MU64 {
|
||||
pub mulBy: u64,
|
||||
pub doAdd: bool,
|
||||
pub shiftBy: i32,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct MS32 {
|
||||
pub mulBy: i32,
|
||||
pub shiftBy: i32,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct MS64 {
|
||||
pub mulBy: i64,
|
||||
pub shiftBy: i32,
|
||||
}
|
||||
|
||||
// The actual "magic number" generators follow.
|
||||
|
||||
pub fn magicU32(d: u32) -> MU32 {
|
||||
debug_assert_ne!(d, 0);
|
||||
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
|
||||
|
||||
let mut do_add: bool = false;
|
||||
let mut p: i32 = 31;
|
||||
let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
|
||||
let mut q1: u32 = 0x80000000u32 / nc;
|
||||
let mut r1: u32 = 0x80000000u32 - q1 * nc;
|
||||
let mut q2: u32 = 0x7FFFFFFFu32 / d;
|
||||
let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
|
||||
loop {
|
||||
p = p + 1;
|
||||
if r1 >= nc - r1 {
|
||||
q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
|
||||
r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
|
||||
} else {
|
||||
q1 = 2 * q1;
|
||||
r1 = 2 * r1;
|
||||
}
|
||||
if r2 + 1 >= d - r2 {
|
||||
if q2 >= 0x7FFFFFFFu32 {
|
||||
do_add = true;
|
||||
}
|
||||
q2 = 2 * q2 + 1;
|
||||
r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
|
||||
} else {
|
||||
if q2 >= 0x80000000u32 {
|
||||
do_add = true;
|
||||
}
|
||||
q2 = u32::wrapping_mul(2, q2);
|
||||
r2 = 2 * r2 + 1;
|
||||
}
|
||||
let delta: u32 = d - 1 - r2;
|
||||
if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MU32 {
|
||||
mulBy: q2 + 1,
|
||||
doAdd: do_add,
|
||||
shiftBy: p - 32,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn magicU64(d: u64) -> MU64 {
|
||||
debug_assert_ne!(d, 0);
|
||||
debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
|
||||
|
||||
let mut do_add: bool = false;
|
||||
let mut p: i32 = 63;
|
||||
let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
|
||||
let mut q1: u64 = 0x8000000000000000u64 / nc;
|
||||
let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
|
||||
let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
|
||||
let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
|
||||
loop {
|
||||
p = p + 1;
|
||||
if r1 >= nc - r1 {
|
||||
q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
|
||||
r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
|
||||
} else {
|
||||
q1 = 2 * q1;
|
||||
r1 = 2 * r1;
|
||||
}
|
||||
if r2 + 1 >= d - r2 {
|
||||
if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
|
||||
do_add = true;
|
||||
}
|
||||
q2 = 2 * q2 + 1;
|
||||
r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
|
||||
} else {
|
||||
if q2 >= 0x8000000000000000u64 {
|
||||
do_add = true;
|
||||
}
|
||||
q2 = u64::wrapping_mul(2, q2);
|
||||
r2 = 2 * r2 + 1;
|
||||
}
|
||||
let delta: u64 = d - 1 - r2;
|
||||
if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MU64 {
|
||||
mulBy: q2 + 1,
|
||||
doAdd: do_add,
|
||||
shiftBy: p - 64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn magicS32(d: i32) -> MS32 {
|
||||
debug_assert_ne!(d, -1);
|
||||
debug_assert_ne!(d, 0);
|
||||
debug_assert_ne!(d, 1);
|
||||
let two31: u32 = 0x80000000u32;
|
||||
let mut p: i32 = 31;
|
||||
let ad: u32 = i32::wrapping_abs(d) as u32;
|
||||
let t: u32 = two31 + ((d as u32) >> 31);
|
||||
let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
|
||||
let mut q1: u32 = two31 / anc;
|
||||
let mut r1: u32 = two31 - q1 * anc;
|
||||
let mut q2: u32 = two31 / ad;
|
||||
let mut r2: u32 = two31 - q2 * ad;
|
||||
loop {
|
||||
p = p + 1;
|
||||
q1 = 2 * q1;
|
||||
r1 = 2 * r1;
|
||||
if r1 >= anc {
|
||||
q1 = q1 + 1;
|
||||
r1 = r1 - anc;
|
||||
}
|
||||
q2 = 2 * q2;
|
||||
r2 = 2 * r2;
|
||||
if r2 >= ad {
|
||||
q2 = q2 + 1;
|
||||
r2 = r2 - ad;
|
||||
}
|
||||
let delta: u32 = ad - r2;
|
||||
if !(q1 < delta || (q1 == delta && r1 == 0)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MS32 {
|
||||
mulBy: (if d < 0 {
|
||||
u32::wrapping_neg(q2 + 1)
|
||||
} else {
|
||||
q2 + 1
|
||||
}) as i32,
|
||||
shiftBy: p - 32,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn magicS64(d: i64) -> MS64 {
|
||||
debug_assert_ne!(d, -1);
|
||||
debug_assert_ne!(d, 0);
|
||||
debug_assert_ne!(d, 1);
|
||||
let two63: u64 = 0x8000000000000000u64;
|
||||
let mut p: i32 = 63;
|
||||
let ad: u64 = i64::wrapping_abs(d) as u64;
|
||||
let t: u64 = two63 + ((d as u64) >> 63);
|
||||
let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
|
||||
let mut q1: u64 = two63 / anc;
|
||||
let mut r1: u64 = two63 - q1 * anc;
|
||||
let mut q2: u64 = two63 / ad;
|
||||
let mut r2: u64 = two63 - q2 * ad;
|
||||
loop {
|
||||
p = p + 1;
|
||||
q1 = 2 * q1;
|
||||
r1 = 2 * r1;
|
||||
if r1 >= anc {
|
||||
q1 = q1 + 1;
|
||||
r1 = r1 - anc;
|
||||
}
|
||||
q2 = 2 * q2;
|
||||
r2 = 2 * r2;
|
||||
if r2 >= ad {
|
||||
q2 = q2 + 1;
|
||||
r2 = r2 - ad;
|
||||
}
|
||||
let delta: u64 = ad - r2;
|
||||
if !(q1 < delta || (q1 == delta && r1 == 0)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MS64 {
|
||||
mulBy: (if d < 0 {
|
||||
u64::wrapping_neg(q2 + 1)
|
||||
} else {
|
||||
q2 + 1
|
||||
}) as i64,
|
||||
shiftBy: p - 64,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{MS32, MS64, MU32, MU64};
|
||||
use super::{magicS32, magicS64, magicU32, magicU64};
|
||||
|
||||
fn mkMU32(mulBy: u32, doAdd: bool, shiftBy: i32) -> MU32 {
|
||||
MU32 {
|
||||
mulBy,
|
||||
doAdd,
|
||||
shiftBy,
|
||||
}
|
||||
}
|
||||
|
||||
fn mkMU64(mulBy: u64, doAdd: bool, shiftBy: i32) -> MU64 {
|
||||
MU64 {
|
||||
mulBy,
|
||||
doAdd,
|
||||
shiftBy,
|
||||
}
|
||||
}
|
||||
|
||||
fn mkMS32(mulBy: i32, shiftBy: i32) -> MS32 {
|
||||
MS32 { mulBy, shiftBy }
|
||||
}
|
||||
|
||||
fn mkMS64(mulBy: i64, shiftBy: i32) -> MS64 {
|
||||
MS64 { mulBy, shiftBy }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_magicU32() {
|
||||
assert_eq!(magicU32(2u32), mkMU32(0x80000000u32, false, 0));
|
||||
assert_eq!(magicU32(3u32), mkMU32(0xaaaaaaabu32, false, 1));
|
||||
assert_eq!(magicU32(4u32), mkMU32(0x40000000u32, false, 0));
|
||||
assert_eq!(magicU32(5u32), mkMU32(0xcccccccdu32, false, 2));
|
||||
assert_eq!(magicU32(6u32), mkMU32(0xaaaaaaabu32, false, 2));
|
||||
assert_eq!(magicU32(7u32), mkMU32(0x24924925u32, true, 3));
|
||||
assert_eq!(magicU32(9u32), mkMU32(0x38e38e39u32, false, 1));
|
||||
assert_eq!(magicU32(10u32), mkMU32(0xcccccccdu32, false, 3));
|
||||
assert_eq!(magicU32(11u32), mkMU32(0xba2e8ba3u32, false, 3));
|
||||
assert_eq!(magicU32(12u32), mkMU32(0xaaaaaaabu32, false, 3));
|
||||
assert_eq!(magicU32(25u32), mkMU32(0x51eb851fu32, false, 3));
|
||||
assert_eq!(magicU32(125u32), mkMU32(0x10624dd3u32, false, 3));
|
||||
assert_eq!(magicU32(625u32), mkMU32(0xd1b71759u32, false, 9));
|
||||
assert_eq!(magicU32(1337u32), mkMU32(0x88233b2bu32, true, 11));
|
||||
assert_eq!(magicU32(65535u32), mkMU32(0x80008001u32, false, 15));
|
||||
assert_eq!(magicU32(65536u32), mkMU32(0x00010000u32, false, 0));
|
||||
assert_eq!(magicU32(65537u32), mkMU32(0xffff0001u32, false, 16));
|
||||
assert_eq!(magicU32(31415927u32), mkMU32(0x445b4553u32, false, 23));
|
||||
assert_eq!(magicU32(0xdeadbeefu32), mkMU32(0x93275ab3u32, false, 31));
|
||||
assert_eq!(magicU32(0xfffffffdu32), mkMU32(0x40000001u32, false, 30));
|
||||
assert_eq!(magicU32(0xfffffffeu32), mkMU32(0x00000003u32, true, 32));
|
||||
assert_eq!(magicU32(0xffffffffu32), mkMU32(0x80000001u32, false, 31));
|
||||
}
|
||||
#[test]
|
||||
fn test_magicU64() {
|
||||
assert_eq!(magicU64(2u64), mkMU64(0x8000000000000000u64, false, 0));
|
||||
assert_eq!(magicU64(3u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 1));
|
||||
assert_eq!(magicU64(4u64), mkMU64(0x4000000000000000u64, false, 0));
|
||||
assert_eq!(magicU64(5u64), mkMU64(0xcccccccccccccccdu64, false, 2));
|
||||
assert_eq!(magicU64(6u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 2));
|
||||
assert_eq!(magicU64(7u64), mkMU64(0x2492492492492493u64, true, 3));
|
||||
assert_eq!(magicU64(9u64), mkMU64(0xe38e38e38e38e38fu64, false, 3));
|
||||
assert_eq!(magicU64(10u64), mkMU64(0xcccccccccccccccdu64, false, 3));
|
||||
assert_eq!(magicU64(11u64), mkMU64(0x2e8ba2e8ba2e8ba3u64, false, 1));
|
||||
assert_eq!(magicU64(12u64), mkMU64(0xaaaaaaaaaaaaaaabu64, false, 3));
|
||||
assert_eq!(magicU64(25u64), mkMU64(0x47ae147ae147ae15u64, true, 5));
|
||||
assert_eq!(magicU64(125u64), mkMU64(0x0624dd2f1a9fbe77u64, true, 7));
|
||||
assert_eq!(magicU64(625u64), mkMU64(0x346dc5d63886594bu64, false, 7));
|
||||
assert_eq!(magicU64(1337u64), mkMU64(0xc4119d952866a139u64, false, 10));
|
||||
assert_eq!(
|
||||
magicU64(31415927u64),
|
||||
mkMU64(0x116d154b9c3d2f85u64, true, 25)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x00000000deadbeefu64),
|
||||
mkMU64(0x93275ab2dfc9094bu64, false, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x00000000fffffffdu64),
|
||||
mkMU64(0x8000000180000005u64, false, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x00000000fffffffeu64),
|
||||
mkMU64(0x0000000200000005u64, true, 32)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x00000000ffffffffu64),
|
||||
mkMU64(0x8000000080000001u64, false, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x0000000100000000u64),
|
||||
mkMU64(0x0000000100000000u64, false, 0)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x0000000100000001u64),
|
||||
mkMU64(0xffffffff00000001u64, false, 32)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0x0ddc0ffeebadf00du64),
|
||||
mkMU64(0x2788e9d394b77da1u64, true, 60)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0xfffffffffffffffdu64),
|
||||
mkMU64(0x4000000000000001u64, false, 62)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0xfffffffffffffffeu64),
|
||||
mkMU64(0x0000000000000003u64, true, 64)
|
||||
);
|
||||
assert_eq!(
|
||||
magicU64(0xffffffffffffffffu64),
|
||||
mkMU64(0x8000000000000001u64, false, 63)
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_magicS32() {
|
||||
assert_eq!(magicS32(-0x80000000i32), mkMS32(0x7fffffffu32 as i32, 30));
|
||||
assert_eq!(magicS32(-0x7FFFFFFFi32), mkMS32(0xbfffffffu32 as i32, 29));
|
||||
assert_eq!(magicS32(-0x7FFFFFFEi32), mkMS32(0x7ffffffdu32 as i32, 30));
|
||||
assert_eq!(magicS32(-31415927i32), mkMS32(0xbba4baadu32 as i32, 23));
|
||||
assert_eq!(magicS32(-1337i32), mkMS32(0x9df73135u32 as i32, 9));
|
||||
assert_eq!(magicS32(-256i32), mkMS32(0x7fffffffu32 as i32, 7));
|
||||
assert_eq!(magicS32(-5i32), mkMS32(0x99999999u32 as i32, 1));
|
||||
assert_eq!(magicS32(-3i32), mkMS32(0x55555555u32 as i32, 1));
|
||||
assert_eq!(magicS32(-2i32), mkMS32(0x7fffffffu32 as i32, 0));
|
||||
assert_eq!(magicS32(2i32), mkMS32(0x80000001u32 as i32, 0));
|
||||
assert_eq!(magicS32(3i32), mkMS32(0x55555556u32 as i32, 0));
|
||||
assert_eq!(magicS32(4i32), mkMS32(0x80000001u32 as i32, 1));
|
||||
assert_eq!(magicS32(5i32), mkMS32(0x66666667u32 as i32, 1));
|
||||
assert_eq!(magicS32(6i32), mkMS32(0x2aaaaaabu32 as i32, 0));
|
||||
assert_eq!(magicS32(7i32), mkMS32(0x92492493u32 as i32, 2));
|
||||
assert_eq!(magicS32(9i32), mkMS32(0x38e38e39u32 as i32, 1));
|
||||
assert_eq!(magicS32(10i32), mkMS32(0x66666667u32 as i32, 2));
|
||||
assert_eq!(magicS32(11i32), mkMS32(0x2e8ba2e9u32 as i32, 1));
|
||||
assert_eq!(magicS32(12i32), mkMS32(0x2aaaaaabu32 as i32, 1));
|
||||
assert_eq!(magicS32(25i32), mkMS32(0x51eb851fu32 as i32, 3));
|
||||
assert_eq!(magicS32(125i32), mkMS32(0x10624dd3u32 as i32, 3));
|
||||
assert_eq!(magicS32(625i32), mkMS32(0x68db8badu32 as i32, 8));
|
||||
assert_eq!(magicS32(1337i32), mkMS32(0x6208cecbu32 as i32, 9));
|
||||
assert_eq!(magicS32(31415927i32), mkMS32(0x445b4553u32 as i32, 23));
|
||||
assert_eq!(magicS32(0x7ffffffei32), mkMS32(0x80000003u32 as i32, 30));
|
||||
assert_eq!(magicS32(0x7fffffffi32), mkMS32(0x40000001u32 as i32, 29));
|
||||
}
|
||||
#[test]
|
||||
fn test_magicS64() {
|
||||
assert_eq!(
|
||||
magicS64(-0x8000000000000000i64),
|
||||
mkMS64(0x7fffffffffffffffu64 as i64, 62)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0x7FFFFFFFFFFFFFFFi64),
|
||||
mkMS64(0xbfffffffffffffffu64 as i64, 61)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0x7FFFFFFFFFFFFFFEi64),
|
||||
mkMS64(0x7ffffffffffffffdu64 as i64, 62)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0x0ddC0ffeeBadF00di64),
|
||||
mkMS64(0x6c3b8b1635a4412fu64 as i64, 59)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0x100000001i64),
|
||||
mkMS64(0x800000007fffffffu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0x100000000i64),
|
||||
mkMS64(0x7fffffffffffffffu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0xFFFFFFFFi64),
|
||||
mkMS64(0x7fffffff7fffffffu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0xFFFFFFFEi64),
|
||||
mkMS64(0x7ffffffefffffffdu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0xFFFFFFFDi64),
|
||||
mkMS64(0x7ffffffe7ffffffbu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-0xDeadBeefi64),
|
||||
mkMS64(0x6cd8a54d2036f6b5u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(-31415927i64),
|
||||
mkMS64(0x7749755a31e1683du64 as i64, 24)
|
||||
);
|
||||
assert_eq!(magicS64(-1337i64), mkMS64(0x9df731356bccaf63u64 as i64, 9));
|
||||
assert_eq!(magicS64(-256i64), mkMS64(0x7fffffffffffffffu64 as i64, 7));
|
||||
assert_eq!(magicS64(-5i64), mkMS64(0x9999999999999999u64 as i64, 1));
|
||||
assert_eq!(magicS64(-3i64), mkMS64(0x5555555555555555u64 as i64, 1));
|
||||
assert_eq!(magicS64(-2i64), mkMS64(0x7fffffffffffffffu64 as i64, 0));
|
||||
assert_eq!(magicS64(2i64), mkMS64(0x8000000000000001u64 as i64, 0));
|
||||
assert_eq!(magicS64(3i64), mkMS64(0x5555555555555556u64 as i64, 0));
|
||||
assert_eq!(magicS64(4i64), mkMS64(0x8000000000000001u64 as i64, 1));
|
||||
assert_eq!(magicS64(5i64), mkMS64(0x6666666666666667u64 as i64, 1));
|
||||
assert_eq!(magicS64(6i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
|
||||
assert_eq!(magicS64(7i64), mkMS64(0x4924924924924925u64 as i64, 1));
|
||||
assert_eq!(magicS64(9i64), mkMS64(0x1c71c71c71c71c72u64 as i64, 0));
|
||||
assert_eq!(magicS64(10i64), mkMS64(0x6666666666666667u64 as i64, 2));
|
||||
assert_eq!(magicS64(11i64), mkMS64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
|
||||
assert_eq!(magicS64(12i64), mkMS64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
|
||||
assert_eq!(magicS64(25i64), mkMS64(0xa3d70a3d70a3d70bu64 as i64, 4));
|
||||
assert_eq!(magicS64(125i64), mkMS64(0x20c49ba5e353f7cfu64 as i64, 4));
|
||||
assert_eq!(magicS64(625i64), mkMS64(0x346dc5d63886594bu64 as i64, 7));
|
||||
assert_eq!(magicS64(1337i64), mkMS64(0x6208ceca9433509du64 as i64, 9));
|
||||
assert_eq!(
|
||||
magicS64(31415927i64),
|
||||
mkMS64(0x88b68aa5ce1e97c3u64 as i64, 24)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x00000000deadbeefi64),
|
||||
mkMS64(0x93275ab2dfc9094bu64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x00000000fffffffdi64),
|
||||
mkMS64(0x8000000180000005u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x00000000fffffffei64),
|
||||
mkMS64(0x8000000100000003u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x00000000ffffffffi64),
|
||||
mkMS64(0x8000000080000001u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x0000000100000000i64),
|
||||
mkMS64(0x8000000000000001u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x0000000100000001i64),
|
||||
mkMS64(0x7fffffff80000001u64 as i64, 31)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x0ddc0ffeebadf00di64),
|
||||
mkMS64(0x93c474e9ca5bbed1u64 as i64, 59)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x7ffffffffffffffdi64),
|
||||
mkMS64(0x2000000000000001u64 as i64, 60)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x7ffffffffffffffei64),
|
||||
mkMS64(0x8000000000000003u64 as i64, 62)
|
||||
);
|
||||
assert_eq!(
|
||||
magicS64(0x7fffffffffffffffi64),
|
||||
mkMS64(0x4000000000000001u64 as i64, 61)
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn test_magic_generators_dont_panic() {
|
||||
// The point of this is to check that the magic number generators
|
||||
// don't panic with integer wraparounds, especially at boundary
|
||||
// cases for their arguments. The actual results are thrown away.
|
||||
let mut total: u64 = 0;
|
||||
// Testing UP magicU32
|
||||
for x in 2..(200 * 1000u32) {
|
||||
let m = magicU32(x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
total = total - (if m.doAdd { 123 } else { 456 });
|
||||
}
|
||||
assert_eq!(total, 1747815691);
|
||||
// Testing DOWN magicU32
|
||||
for x in 0..(200 * 1000u32) {
|
||||
let m = magicU32(0xFFFF_FFFFu32 - x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
total = total - (if m.doAdd { 123 } else { 456 });
|
||||
}
|
||||
assert_eq!(total, 2210292772);
|
||||
|
||||
// Testing UP magicU64
|
||||
for x in 2..(200 * 1000u64) {
|
||||
let m = magicU64(x);
|
||||
total = total ^ m.mulBy;
|
||||
total = total + (m.shiftBy as u64);
|
||||
total = total - (if m.doAdd { 123 } else { 456 });
|
||||
}
|
||||
assert_eq!(total, 7430004084791260605);
|
||||
// Testing DOWN magicU64
|
||||
for x in 0..(200 * 1000u64) {
|
||||
let m = magicU64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
|
||||
total = total ^ m.mulBy;
|
||||
total = total + (m.shiftBy as u64);
|
||||
total = total - (if m.doAdd { 123 } else { 456 });
|
||||
}
|
||||
assert_eq!(total, 7547519887519825919);
|
||||
|
||||
// Testing UP magicS32
|
||||
for x in 0..(200 * 1000i32) {
|
||||
let m = magicS32(-0x8000_0000i32 + x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
}
|
||||
assert_eq!(total, 10899224186731671235);
|
||||
// Testing DOWN magicS32
|
||||
for x in 0..(200 * 1000i32) {
|
||||
let m = magicS32(0x7FFF_FFFFi32 - x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
}
|
||||
assert_eq!(total, 7547519887517897369);
|
||||
|
||||
// Testing UP magicS64
|
||||
for x in 0..(200 * 1000i64) {
|
||||
let m = magicS64(-0x8000_0000_0000_0000i64 + x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
}
|
||||
assert_eq!(total, 8029756891368555163);
|
||||
// Testing DOWN magicS64
|
||||
for x in 0..(200 * 1000i64) {
|
||||
let m = magicS64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
|
||||
total = total ^ (m.mulBy as u64);
|
||||
total = total + (m.shiftBy as u64);
|
||||
}
|
||||
// Force `total` -- and hence, the entire computation -- to
|
||||
// be used, so that rustc can't optimise it out.
|
||||
assert_eq!(total, 7547519887532559585u64);
|
||||
}
|
||||
}
|
||||
935
lib/codegen/src/dominator_tree.rs
Normal file
935
lib/codegen/src/dominator_tree.rs
Normal file
@@ -0,0 +1,935 @@
|
||||
//! A Dominator Tree represented as mappings of Ebbs to their immediate dominator.
|
||||
|
||||
use entity::EntityMap;
|
||||
use flowgraph::{BasicBlock, ControlFlowGraph};
|
||||
use ir::instructions::BranchInfo;
|
||||
use ir::{Ebb, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value};
|
||||
use packed_option::PackedOption;
|
||||
use std::cmp;
|
||||
use std::cmp::Ordering;
|
||||
use std::mem;
|
||||
use std::vec::Vec;
|
||||
use timing;
|
||||
|
||||
/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave
|
||||
/// room for modifications of the dominator tree.
|
||||
const STRIDE: u32 = 4;
|
||||
|
||||
/// Special RPO numbers used during `compute_postorder`.
|
||||
const DONE: u32 = 1;
|
||||
const SEEN: u32 = 2;
|
||||
|
||||
/// Dominator tree node. We keep one of these per EBB.
|
||||
#[derive(Clone, Default)]
|
||||
struct DomNode {
|
||||
/// Number of this node in a reverse post-order traversal of the CFG, starting from 1.
|
||||
/// This number is monotonic in the reverse postorder but not contiguous, since we leave
|
||||
/// holes for later localized modifications of the dominator tree.
|
||||
/// Unreachable nodes get number 0, all others are positive.
|
||||
rpo_number: u32,
|
||||
|
||||
/// The immediate dominator of this EBB, represented as the branch or jump instruction at the
|
||||
/// end of the dominating basic block.
|
||||
///
|
||||
/// This is `None` for unreachable blocks and the entry block which doesn't have an immediate
|
||||
/// dominator.
|
||||
idom: PackedOption<Inst>,
|
||||
}
|
||||
|
||||
/// The dominator tree for a single function.
|
||||
pub struct DominatorTree {
|
||||
nodes: EntityMap<Ebb, DomNode>,
|
||||
|
||||
/// CFG post-order of all reachable EBBs.
|
||||
postorder: Vec<Ebb>,
|
||||
|
||||
/// Scratch memory used by `compute_postorder()`.
|
||||
stack: Vec<Ebb>,
|
||||
|
||||
valid: bool,
|
||||
}
|
||||
|
||||
/// Methods for querying the dominator tree.
|
||||
impl DominatorTree {
|
||||
/// Is `ebb` reachable from the entry block?
|
||||
pub fn is_reachable(&self, ebb: Ebb) -> bool {
|
||||
self.nodes[ebb].rpo_number != 0
|
||||
}
|
||||
|
||||
/// Get the CFG post-order of EBBs that was used to compute the dominator tree.
|
||||
///
|
||||
/// Note that this post-order is not updated automatically when the CFG is modified. It is
|
||||
/// computed from scratch and cached by `compute()`.
|
||||
pub fn cfg_postorder(&self) -> &[Ebb] {
|
||||
debug_assert!(self.is_valid());
|
||||
&self.postorder
|
||||
}
|
||||
|
||||
/// Returns the immediate dominator of `ebb`.
|
||||
///
|
||||
/// The immediate dominator of an extended basic block is a basic block which we represent by
|
||||
/// the branch or jump instruction at the end of the basic block. This does not have to be the
|
||||
/// terminator of its EBB.
|
||||
///
|
||||
/// A branch or jump is said to *dominate* `ebb` if all control flow paths from the function
|
||||
/// entry to `ebb` must go through the branch.
|
||||
///
|
||||
/// The *immediate dominator* is the dominator that is closest to `ebb`. All other dominators
|
||||
/// also dominate the immediate dominator.
|
||||
///
|
||||
/// This returns `None` if `ebb` is not reachable from the entry EBB, or if it is the entry EBB
|
||||
/// which has no dominators.
|
||||
pub fn idom(&self, ebb: Ebb) -> Option<Inst> {
|
||||
self.nodes[ebb].idom.into()
|
||||
}
|
||||
|
||||
/// Compare two EBBs relative to the reverse post-order.
|
||||
fn rpo_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
|
||||
self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number)
|
||||
}
|
||||
|
||||
/// Compare two program points relative to a reverse post-order traversal of the control-flow
|
||||
/// graph.
|
||||
///
|
||||
/// Return `Ordering::Less` if `a` comes before `b` in the RPO.
|
||||
///
|
||||
/// If `a` and `b` belong to the same EBB, compare their relative position in the EBB.
|
||||
pub fn rpo_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
let a = a.into();
|
||||
let b = b.into();
|
||||
self.rpo_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b)).then(
|
||||
layout.cmp(a, b),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns `true` if `a` dominates `b`.
|
||||
///
|
||||
/// This means that every control-flow path from the function entry to `b` must go through `a`.
|
||||
///
|
||||
/// Dominance is ill defined for unreachable blocks. This function can always determine
|
||||
/// dominance for instructions in the same EBB, but otherwise returns `false` if either block
|
||||
/// is unreachable.
|
||||
///
|
||||
/// An instruction is considered to dominate itself.
|
||||
pub fn dominates<A, B>(&self, a: A, b: B, layout: &Layout) -> bool
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
let a = a.into();
|
||||
let b = b.into();
|
||||
match a {
|
||||
ExpandedProgramPoint::Ebb(ebb_a) => {
|
||||
a == b || self.last_dominator(ebb_a, b, layout).is_some()
|
||||
}
|
||||
ExpandedProgramPoint::Inst(inst_a) => {
|
||||
let ebb_a = layout.inst_ebb(inst_a).expect("Instruction not in layout.");
|
||||
match self.last_dominator(ebb_a, b, layout) {
|
||||
Some(last) => layout.cmp(inst_a, last) != Ordering::Greater,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the last instruction in `a` that dominates `b`.
|
||||
/// If no instructions in `a` dominate `b`, return `None`.
|
||||
pub fn last_dominator<B>(&self, a: Ebb, b: B, layout: &Layout) -> Option<Inst>
|
||||
where
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
let (mut ebb_b, mut inst_b) = match b.into() {
|
||||
ExpandedProgramPoint::Ebb(ebb) => (ebb, None),
|
||||
ExpandedProgramPoint::Inst(inst) => (
|
||||
layout.inst_ebb(inst).expect(
|
||||
"Instruction not in layout.",
|
||||
),
|
||||
Some(inst),
|
||||
),
|
||||
};
|
||||
let rpo_a = self.nodes[a].rpo_number;
|
||||
|
||||
// Run a finger up the dominator tree from b until we see a.
|
||||
// Do nothing if b is unreachable.
|
||||
while rpo_a < self.nodes[ebb_b].rpo_number {
|
||||
let idom = match self.idom(ebb_b) {
|
||||
Some(idom) => idom,
|
||||
None => return None, // a is unreachable, so we climbed past the entry
|
||||
};
|
||||
ebb_b = layout.inst_ebb(idom).expect("Dominator got removed.");
|
||||
inst_b = Some(idom);
|
||||
}
|
||||
if a == ebb_b { inst_b } else { None }
|
||||
}
|
||||
|
||||
/// Compute the common dominator of two basic blocks.
|
||||
///
|
||||
/// Both basic blocks are assumed to be reachable.
|
||||
pub fn common_dominator(
|
||||
&self,
|
||||
mut a: BasicBlock,
|
||||
mut b: BasicBlock,
|
||||
layout: &Layout,
|
||||
) -> BasicBlock {
|
||||
loop {
|
||||
match self.rpo_cmp_ebb(a.0, b.0) {
|
||||
Ordering::Less => {
|
||||
// `a` comes before `b` in the RPO. Move `b` up.
|
||||
let idom = self.nodes[b.0].idom.expect("Unreachable basic block?");
|
||||
b = (
|
||||
layout.inst_ebb(idom).expect("Dangling idom instruction"),
|
||||
idom,
|
||||
);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
// `b` comes before `a` in the RPO. Move `a` up.
|
||||
let idom = self.nodes[a.0].idom.expect("Unreachable basic block?");
|
||||
a = (
|
||||
layout.inst_ebb(idom).expect("Dangling idom instruction"),
|
||||
idom,
|
||||
);
|
||||
}
|
||||
Ordering::Equal => break,
|
||||
}
|
||||
}
|
||||
|
||||
debug_assert_eq!(a.0, b.0, "Unreachable block passed to common_dominator?");
|
||||
|
||||
// We're in the same EBB. The common dominator is the earlier instruction.
|
||||
if layout.cmp(a.1, b.1) == Ordering::Less {
|
||||
a
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DominatorTree {
|
||||
/// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a
|
||||
/// function.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: EntityMap::new(),
|
||||
postorder: Vec::new(),
|
||||
stack: Vec::new(),
|
||||
valid: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate and compute a dominator tree.
|
||||
pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self {
|
||||
let mut domtree = Self::new();
|
||||
domtree.compute(func, cfg);
|
||||
domtree
|
||||
}
|
||||
|
||||
/// Reset and compute a CFG post-order and dominator tree.
|
||||
pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
|
||||
let _tt = timing::domtree();
|
||||
debug_assert!(cfg.is_valid());
|
||||
self.compute_postorder(func);
|
||||
self.compute_domtree(func, cfg);
|
||||
self.valid = true;
|
||||
}
|
||||
|
||||
/// Clear the data structures used to represent the dominator tree. This will leave the tree in
|
||||
/// a state where `is_valid()` returns false.
|
||||
pub fn clear(&mut self) {
|
||||
self.nodes.clear();
|
||||
self.postorder.clear();
|
||||
debug_assert!(self.stack.is_empty());
|
||||
self.valid = false;
|
||||
}
|
||||
|
||||
/// Check if the dominator tree is in a valid state.
|
||||
///
|
||||
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
|
||||
/// `compute()` method has been called since the last `clear()`. It does not check that the
|
||||
/// dominator tree is consistent with the CFG.
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.valid
|
||||
}
|
||||
|
||||
/// Reset all internal data structures and compute a post-order of the control flow graph.
|
||||
///
|
||||
/// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
|
||||
fn compute_postorder(&mut self, func: &Function) {
|
||||
self.clear();
|
||||
self.nodes.resize(func.dfg.num_ebbs());
|
||||
|
||||
// This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
|
||||
// post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
|
||||
// unique. The specific order we get is controlled by two factors:
|
||||
//
|
||||
// 1. The order each node's children are visited, and
|
||||
// 2. The method used for pruning graph edges to get a tree.
|
||||
//
|
||||
// There are two ways of viewing the CFG as a graph:
|
||||
//
|
||||
// 1. Each EBB is a node, with outgoing edges for all the branches in the EBB>
|
||||
// 2. Each basic block is a node, with outgoing edges for the single branch at the end of
|
||||
// the BB. (An EBB is a linear sequence of basic blocks).
|
||||
//
|
||||
// The first graph is a contraction of the second one. We want to compute an EBB post-order
|
||||
// that is compatible both graph interpretations. That is, if you compute a BB post-order
|
||||
// and then remove those BBs that do not correspond to EBB headers, you get a post-order of
|
||||
// the EBB graph.
|
||||
//
|
||||
// Node child order:
|
||||
//
|
||||
// In the BB graph, we always go down the fall-through path first and follow the branch
|
||||
// destination second.
|
||||
//
|
||||
// In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
|
||||
// order, starting from the destination of the EBB's terminating jump, ending at the
|
||||
// destination of the first branch in the EBB.
|
||||
//
|
||||
// Edge pruning:
|
||||
//
|
||||
// In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
|
||||
// of the edge. Any subsequent edges to the same EBB are pruned.
|
||||
//
|
||||
// The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
|
||||
// in a top-down traversal of the successors. (And then visiting edges in a bottom-up
|
||||
// order).
|
||||
//
|
||||
// This pruning method makes it possible to compute the DFT without storing lots of
|
||||
// information about the progress through an EBB.
|
||||
|
||||
// During this algorithm only, use `rpo_number` to hold the following state:
|
||||
//
|
||||
// 0: EBB has not yet been reached in the pre-order.
|
||||
// SEEN: EBB has been pushed on the stack but successors not yet pushed.
|
||||
// DONE: Successors pushed.
|
||||
|
||||
match func.layout.entry_block() {
|
||||
Some(ebb) => {
|
||||
self.stack.push(ebb);
|
||||
self.nodes[ebb].rpo_number = SEEN;
|
||||
}
|
||||
None => return,
|
||||
}
|
||||
|
||||
while let Some(ebb) = self.stack.pop() {
|
||||
match self.nodes[ebb].rpo_number {
|
||||
SEEN => {
|
||||
// This is the first time we pop the EBB, so we need to scan its successors and
|
||||
// then revisit it.
|
||||
self.nodes[ebb].rpo_number = DONE;
|
||||
self.stack.push(ebb);
|
||||
self.push_successors(func, ebb);
|
||||
}
|
||||
DONE => {
|
||||
// This is the second time we pop the EBB, so all successors have been
|
||||
// processed.
|
||||
self.postorder.push(ebb);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
|
||||
///
|
||||
/// The successors are pushed in program order which is important to get a split-invariant
|
||||
/// post-order. Split-invariant means that if an EBB is split in two, we get the same
|
||||
/// post-order except for the insertion of the new EBB header at the split point.
|
||||
fn push_successors(&mut self, func: &Function, ebb: Ebb) {
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
match func.dfg.analyze_branch(inst) {
|
||||
BranchInfo::SingleDest(succ, _) => {
|
||||
if self.nodes[succ].rpo_number == 0 {
|
||||
self.nodes[succ].rpo_number = SEEN;
|
||||
self.stack.push(succ);
|
||||
}
|
||||
}
|
||||
BranchInfo::Table(jt) => {
|
||||
for (_, succ) in func.jump_tables[jt].entries() {
|
||||
if self.nodes[succ].rpo_number == 0 {
|
||||
self.nodes[succ].rpo_number = SEEN;
|
||||
self.stack.push(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
BranchInfo::NotABranch => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a dominator tree from a control flow graph using Keith D. Cooper's
|
||||
/// "Simple, Fast Dominator Algorithm."
|
||||
fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) {
|
||||
// During this algorithm, `rpo_number` has the following values:
|
||||
//
|
||||
// 0: EBB is not reachable.
|
||||
// 1: EBB is reachable, but has not yet been visited during the first pass. This is set by
|
||||
// `compute_postorder`.
|
||||
// 2+: EBB is reachable and has an assigned RPO number.
|
||||
|
||||
// We'll be iterating over a reverse post-order of the CFG, skipping the entry block.
|
||||
let (entry_block, postorder) = match self.postorder.as_slice().split_last() {
|
||||
Some((&eb, rest)) => (eb, rest),
|
||||
None => return,
|
||||
};
|
||||
debug_assert_eq!(Some(entry_block), func.layout.entry_block());
|
||||
|
||||
// Do a first pass where we assign RPO numbers to all reachable nodes.
|
||||
self.nodes[entry_block].rpo_number = 2 * STRIDE;
|
||||
for (rpo_idx, &ebb) in postorder.iter().rev().enumerate() {
|
||||
// Update the current node and give it an RPO number.
|
||||
// The entry block got 2, the rest start at 3 by multiples of STRIDE to leave
|
||||
// room for future dominator tree modifications.
|
||||
//
|
||||
// Since `compute_idom` will only look at nodes with an assigned RPO number, the
|
||||
// function will never see an uninitialized predecessor.
|
||||
//
|
||||
// Due to the nature of the post-order traversal, every node we visit will have at
|
||||
// least one predecessor that has previously been visited during this RPO.
|
||||
self.nodes[ebb] = DomNode {
|
||||
idom: self.compute_idom(ebb, cfg, &func.layout).into(),
|
||||
rpo_number: (rpo_idx as u32 + 3) * STRIDE,
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we have RPO numbers for everything and initial immediate dominator estimates,
|
||||
// iterate until convergence.
|
||||
//
|
||||
// If the function is free of irreducible control flow, this will exit after one iteration.
|
||||
let mut changed = true;
|
||||
while changed {
|
||||
changed = false;
|
||||
for &ebb in postorder.iter().rev() {
|
||||
let idom = self.compute_idom(ebb, cfg, &func.layout).into();
|
||||
if self.nodes[ebb].idom != idom {
|
||||
self.nodes[ebb].idom = idom;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the immediate dominator for `ebb` using the current `idom` states for the reachable
|
||||
// nodes.
|
||||
fn compute_idom(&self, ebb: Ebb, cfg: &ControlFlowGraph, layout: &Layout) -> Inst {
|
||||
// Get an iterator with just the reachable, already visited predecessors to `ebb`.
|
||||
// Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't
|
||||
// been visited yet, 0 for unreachable blocks.
|
||||
let mut reachable_preds = cfg.pred_iter(ebb).filter(|&(pred, _)| {
|
||||
self.nodes[pred].rpo_number > 1
|
||||
});
|
||||
|
||||
// The RPO must visit at least one predecessor before this node.
|
||||
let mut idom = reachable_preds.next().expect(
|
||||
"EBB node must have one reachable predecessor",
|
||||
);
|
||||
|
||||
for pred in reachable_preds {
|
||||
idom = self.common_dominator(idom, pred, layout);
|
||||
}
|
||||
|
||||
idom.1
|
||||
}
|
||||
}
|
||||
|
||||
impl DominatorTree {
|
||||
/// When splitting an `Ebb` using `Layout::split_ebb`, you can use this method to update
|
||||
/// the dominator tree locally rather than recomputing it.
|
||||
///
|
||||
/// `old_ebb` is the `Ebb` before splitting, and `new_ebb` is the `Ebb` which now contains
|
||||
/// the second half of `old_ebb`. `split_jump_inst` is the terminator jump instruction of
|
||||
/// `old_ebb` that points to `new_ebb`.
|
||||
pub fn recompute_split_ebb(&mut self, old_ebb: Ebb, new_ebb: Ebb, split_jump_inst: Inst) {
|
||||
if !self.is_reachable(old_ebb) {
|
||||
// old_ebb is unreachable, it stays so and new_ebb is unreachable too
|
||||
self.nodes[new_ebb] = Default::default();
|
||||
return;
|
||||
}
|
||||
// We use the RPO comparison on the postorder list so we invert the operands of the
|
||||
// comparison
|
||||
let old_ebb_postorder_index =
|
||||
self.postorder
|
||||
.as_slice()
|
||||
.binary_search_by(|probe| self.rpo_cmp_ebb(old_ebb, *probe))
|
||||
.expect("the old ebb is not declared to the dominator tree");
|
||||
let new_ebb_rpo = self.insert_after_rpo(old_ebb, old_ebb_postorder_index, new_ebb);
|
||||
self.nodes[new_ebb] = DomNode {
|
||||
rpo_number: new_ebb_rpo,
|
||||
idom: Some(split_jump_inst).into(),
|
||||
};
|
||||
}
|
||||
|
||||
// Insert new_ebb just after ebb in the RPO. This function checks
|
||||
// if there is a gap in rpo numbers; if yes it returns the number in the gap and if
|
||||
// not it renumbers.
|
||||
fn insert_after_rpo(&mut self, ebb: Ebb, ebb_postorder_index: usize, new_ebb: Ebb) -> u32 {
|
||||
let ebb_rpo_number = self.nodes[ebb].rpo_number;
|
||||
let inserted_rpo_number = ebb_rpo_number + 1;
|
||||
// If there is no gaps in RPo numbers to insert this new number, we iterate
|
||||
// forward in RPO numbers and backwards in the postorder list of EBBs, renumbering the Ebbs
|
||||
// until we find a gap
|
||||
for (¤t_ebb, current_rpo) in
|
||||
self.postorder[0..ebb_postorder_index].iter().rev().zip(
|
||||
inserted_rpo_number +
|
||||
1..,
|
||||
)
|
||||
{
|
||||
if self.nodes[current_ebb].rpo_number < current_rpo {
|
||||
// There is no gap, we renumber
|
||||
self.nodes[current_ebb].rpo_number = current_rpo;
|
||||
} else {
|
||||
// There is a gap, we stop the renumbering and exit
|
||||
break;
|
||||
}
|
||||
}
|
||||
// TODO: insert in constant time?
|
||||
self.postorder.insert(ebb_postorder_index, new_ebb);
|
||||
inserted_rpo_number
|
||||
}
|
||||
}
|
||||
|
||||
/// Optional pre-order information that can be computed for a dominator tree.
|
||||
///
|
||||
/// This data structure is computed from a `DominatorTree` and provides:
|
||||
///
|
||||
/// - A forward traversable dominator tree through the `children()` iterator.
|
||||
/// - An ordering of EBBs according to a dominator tree pre-order.
|
||||
/// - Constant time dominance checks at the EBB granularity.
|
||||
///
|
||||
/// The information in this auxillary data structure is not easy to update when the control flow
|
||||
/// graph changes, which is why it is kept separate.
|
||||
pub struct DominatorTreePreorder {
|
||||
nodes: EntityMap<Ebb, ExtraNode>,
|
||||
|
||||
// Scratch memory used by `compute_postorder()`.
|
||||
stack: Vec<Ebb>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
struct ExtraNode {
|
||||
/// First child node in the domtree.
|
||||
child: PackedOption<Ebb>,
|
||||
|
||||
/// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO.
|
||||
sibling: PackedOption<Ebb>,
|
||||
|
||||
/// Sequence number for this node in a pre-order traversal of the dominator tree.
|
||||
/// Unreachable blocks have number 0, the entry block is 1.
|
||||
pre_number: u32,
|
||||
|
||||
/// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node.
|
||||
/// This is always >= `pre_number`.
|
||||
pre_max: u32,
|
||||
}
|
||||
|
||||
/// Creating and computing the dominator tree pre-order.
|
||||
impl DominatorTreePreorder {
|
||||
/// Create a new blank `DominatorTreePreorder`.
|
||||
pub fn new() -> DominatorTreePreorder {
|
||||
DominatorTreePreorder {
|
||||
nodes: EntityMap::new(),
|
||||
stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Recompute this data structure to match `domtree`.
|
||||
pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) {
|
||||
self.nodes.clear();
|
||||
debug_assert_eq!(self.stack.len(), 0);
|
||||
|
||||
// Step 1: Populate the child and sibling links.
|
||||
//
|
||||
// By following the CFG post-order and pushing to the front of the lists, we make sure that
|
||||
// sibling lists are ordered according to the CFG reverse post-order.
|
||||
for &ebb in domtree.cfg_postorder() {
|
||||
if let Some(idom_inst) = domtree.idom(ebb) {
|
||||
let idom = layout.pp_ebb(idom_inst);
|
||||
let sib = mem::replace(&mut self.nodes[idom].child, ebb.into());
|
||||
self.nodes[ebb].sibling = sib;
|
||||
} else {
|
||||
// The only EBB without an immediate dominator is the entry.
|
||||
self.stack.push(ebb);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2. Assign pre-order numbers from a DFS of the dominator tree.
|
||||
debug_assert!(self.stack.len() <= 1);
|
||||
let mut n = 0;
|
||||
while let Some(ebb) = self.stack.pop() {
|
||||
n += 1;
|
||||
let node = &mut self.nodes[ebb];
|
||||
node.pre_number = n;
|
||||
node.pre_max = n;
|
||||
if let Some(n) = node.sibling.expand() {
|
||||
self.stack.push(n);
|
||||
}
|
||||
if let Some(n) = node.child.expand() {
|
||||
self.stack.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3. Propagate the `pre_max` numbers up the tree.
|
||||
// The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all
|
||||
// its dominator tree children.
|
||||
for &ebb in domtree.cfg_postorder() {
|
||||
if let Some(idom_inst) = domtree.idom(ebb) {
|
||||
let idom = layout.pp_ebb(idom_inst);
|
||||
let pre_max = cmp::max(self.nodes[ebb].pre_max, self.nodes[idom].pre_max);
|
||||
self.nodes[idom].pre_max = pre_max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator that enumerates the direct children of an EBB in the dominator tree.
|
||||
pub struct ChildIter<'a> {
|
||||
dtpo: &'a DominatorTreePreorder,
|
||||
next: PackedOption<Ebb>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ChildIter<'a> {
|
||||
type Item = Ebb;
|
||||
|
||||
fn next(&mut self) -> Option<Ebb> {
|
||||
let n = self.next.expand();
|
||||
if let Some(ebb) = n {
|
||||
self.next = self.dtpo.nodes[ebb].sibling;
|
||||
}
|
||||
n
|
||||
}
|
||||
}
|
||||
|
||||
/// Query interface for the dominator tree pre-order.
|
||||
impl DominatorTreePreorder {
|
||||
/// Get an iterator over the direct children of `ebb` in the dominator tree.
|
||||
///
|
||||
/// These are the EBB's whose immediate dominator is an instruction in `ebb`, ordered according
|
||||
/// to the CFG reverse post-order.
|
||||
pub fn children(&self, ebb: Ebb) -> ChildIter {
|
||||
ChildIter {
|
||||
dtpo: self,
|
||||
next: self.nodes[ebb].child,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fast, constant time dominance check with EBB granularity.
|
||||
///
|
||||
/// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant
|
||||
/// time. This is less general than the `DominatorTree` method because it only works with EBB
|
||||
/// program points.
|
||||
///
|
||||
/// An EBB is considered to dominate itself.
|
||||
pub fn dominates(&self, a: Ebb, b: Ebb) -> bool {
|
||||
let na = &self.nodes[a];
|
||||
let nb = &self.nodes[b];
|
||||
na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max
|
||||
}
|
||||
|
||||
/// Compare two EBBs according to the dominator pre-order.
|
||||
pub fn pre_cmp_ebb(&self, a: Ebb, b: Ebb) -> Ordering {
|
||||
self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number)
|
||||
}
|
||||
|
||||
/// Compare two program points according to the dominator tree pre-order.
|
||||
///
|
||||
/// This ordering of program points have the property that given a program point, pp, all the
|
||||
/// program points dominated by pp follow immediately and contiguously after pp in the order.
|
||||
pub fn pre_cmp<A, B>(&self, a: A, b: B, layout: &Layout) -> Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
let a = a.into();
|
||||
let b = b.into();
|
||||
self.pre_cmp_ebb(layout.pp_ebb(a), layout.pp_ebb(b)).then(
|
||||
layout.cmp(a, b),
|
||||
)
|
||||
}
|
||||
|
||||
/// Compare two value defs according to the dominator tree pre-order.
|
||||
///
|
||||
/// Two values defined at the same program point are compared according to their parameter or
|
||||
/// result order.
|
||||
///
|
||||
/// This is a total ordering of the values in the function.
|
||||
pub fn pre_cmp_def(&self, a: Value, b: Value, func: &Function) -> Ordering {
|
||||
let da = func.dfg.value_def(a);
|
||||
let db = func.dfg.value_def(b);
|
||||
self.pre_cmp(da, db, &func.layout).then_with(
|
||||
|| da.num().cmp(&db.num()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::types::*;
|
||||
use ir::{Function, InstBuilder, TrapCode};
|
||||
use settings;
|
||||
use verifier::verify_context;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let func = Function::new();
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
debug_assert!(cfg.is_valid());
|
||||
let dtree = DominatorTree::with_function(&func, &cfg);
|
||||
assert_eq!(0, dtree.nodes.keys().count());
|
||||
assert_eq!(dtree.cfg_postorder(), &[]);
|
||||
|
||||
let mut dtpo = DominatorTreePreorder::new();
|
||||
dtpo.compute(&dtree, &func.layout);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unreachable_node() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let v0 = func.dfg.append_ebb_param(ebb0, I32);
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
cur.ins().brnz(v0, ebb2, &[]);
|
||||
cur.ins().trap(TrapCode::User(0));
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
let v1 = cur.ins().iconst(I32, 1);
|
||||
let v2 = cur.ins().iadd(v0, v1);
|
||||
cur.ins().jump(ebb0, &[v2]);
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
cur.ins().return_(&[v0]);
|
||||
|
||||
let cfg = ControlFlowGraph::with_function(cur.func);
|
||||
let dt = DominatorTree::with_function(cur.func, &cfg);
|
||||
|
||||
// Fall-through-first, prune-at-source DFT:
|
||||
//
|
||||
// ebb0 {
|
||||
// brnz ebb2 {
|
||||
// trap
|
||||
// ebb2 {
|
||||
// return
|
||||
// } ebb2
|
||||
// } ebb0
|
||||
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
|
||||
|
||||
let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
|
||||
assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
|
||||
|
||||
let mut dtpo = DominatorTreePreorder::new();
|
||||
dtpo.compute(&dt, &cur.func.layout);
|
||||
assert!(dtpo.dominates(ebb0, ebb0));
|
||||
assert!(!dtpo.dominates(ebb0, ebb1));
|
||||
assert!(dtpo.dominates(ebb0, ebb2));
|
||||
assert!(!dtpo.dominates(ebb1, ebb0));
|
||||
assert!(dtpo.dominates(ebb1, ebb1));
|
||||
assert!(!dtpo.dominates(ebb1, ebb2));
|
||||
assert!(!dtpo.dominates(ebb2, ebb0));
|
||||
assert!(!dtpo.dominates(ebb2, ebb1));
|
||||
assert!(dtpo.dominates(ebb2, ebb2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_zero_entry_block() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
let ebb3 = func.dfg.make_ebb();
|
||||
let cond = func.dfg.append_ebb_param(ebb3, I32);
|
||||
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb3);
|
||||
let jmp_ebb3_ebb1 = cur.ins().jump(ebb1, &[]);
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
let br_ebb1_ebb0 = cur.ins().brnz(cond, ebb0, &[]);
|
||||
let jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
cur.ins().jump(ebb0, &[]);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
|
||||
let cfg = ControlFlowGraph::with_function(cur.func);
|
||||
let dt = DominatorTree::with_function(cur.func, &cfg);
|
||||
|
||||
// Fall-through-first, prune-at-source DFT:
|
||||
//
|
||||
// ebb3 {
|
||||
// ebb3:jump ebb1 {
|
||||
// ebb1 {
|
||||
// ebb1:brnz ebb0 {
|
||||
// ebb1:jump ebb2 {
|
||||
// ebb2 {
|
||||
// ebb2:jump ebb0 (seen)
|
||||
// } ebb2
|
||||
// } ebb1:jump ebb2
|
||||
// ebb0 {
|
||||
// } ebb0
|
||||
// } ebb1:brnz ebb0
|
||||
// } ebb1
|
||||
// } ebb3:jump ebb1
|
||||
// } ebb3
|
||||
|
||||
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
|
||||
|
||||
assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
|
||||
assert_eq!(dt.idom(ebb3), None);
|
||||
assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
|
||||
assert_eq!(dt.idom(ebb2).unwrap(), jmp_ebb1_ebb2);
|
||||
assert_eq!(dt.idom(ebb0).unwrap(), br_ebb1_ebb0);
|
||||
|
||||
assert!(dt.dominates(br_ebb1_ebb0, br_ebb1_ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(br_ebb1_ebb0, jmp_ebb3_ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp_ebb3_ebb1, br_ebb1_ebb0, &cur.func.layout));
|
||||
|
||||
assert_eq!(dt.rpo_cmp(ebb3, ebb3, &cur.func.layout), Ordering::Equal);
|
||||
assert_eq!(dt.rpo_cmp(ebb3, ebb1, &cur.func.layout), Ordering::Less);
|
||||
assert_eq!(
|
||||
dt.rpo_cmp(ebb3, jmp_ebb3_ebb1, &cur.func.layout),
|
||||
Ordering::Less
|
||||
);
|
||||
assert_eq!(
|
||||
dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
|
||||
Ordering::Less
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backwards_layout() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
let jmp02 = cur.ins().jump(ebb2, &[]);
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
let trap = cur.ins().trap(TrapCode::User(5));
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
let jmp21 = cur.ins().jump(ebb1, &[]);
|
||||
|
||||
let cfg = ControlFlowGraph::with_function(cur.func);
|
||||
let dt = DominatorTree::with_function(cur.func, &cfg);
|
||||
|
||||
assert_eq!(cur.func.layout.entry_block(), Some(ebb0));
|
||||
assert_eq!(dt.idom(ebb0), None);
|
||||
assert_eq!(dt.idom(ebb1), Some(jmp21));
|
||||
assert_eq!(dt.idom(ebb2), Some(jmp02));
|
||||
|
||||
assert!(dt.dominates(ebb0, ebb0, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb0, jmp02, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb0, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb0, trap, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb0, ebb2, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb0, jmp21, &cur.func.layout));
|
||||
|
||||
assert!(!dt.dominates(jmp02, ebb0, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp02, jmp02, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp02, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp02, trap, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp02, ebb2, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp02, jmp21, &cur.func.layout));
|
||||
|
||||
assert!(!dt.dominates(ebb1, ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(ebb1, jmp02, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb1, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb1, trap, &cur.func.layout));
|
||||
assert!(!dt.dominates(ebb1, ebb2, &cur.func.layout));
|
||||
assert!(!dt.dominates(ebb1, jmp21, &cur.func.layout));
|
||||
|
||||
assert!(!dt.dominates(trap, ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(trap, jmp02, &cur.func.layout));
|
||||
assert!(!dt.dominates(trap, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(trap, trap, &cur.func.layout));
|
||||
assert!(!dt.dominates(trap, ebb2, &cur.func.layout));
|
||||
assert!(!dt.dominates(trap, jmp21, &cur.func.layout));
|
||||
|
||||
assert!(!dt.dominates(ebb2, ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(ebb2, jmp02, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb2, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb2, trap, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb2, ebb2, &cur.func.layout));
|
||||
assert!(dt.dominates(ebb2, jmp21, &cur.func.layout));
|
||||
|
||||
assert!(!dt.dominates(jmp21, ebb0, &cur.func.layout));
|
||||
assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp21, ebb1, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp21, trap, &cur.func.layout));
|
||||
assert!(!dt.dominates(jmp21, ebb2, &cur.func.layout));
|
||||
assert!(dt.dominates(jmp21, jmp21, &cur.func.layout));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn renumbering() {
|
||||
let mut func = Function::new();
|
||||
let entry = func.dfg.make_ebb();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb100 = func.dfg.make_ebb();
|
||||
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(entry);
|
||||
cur.ins().jump(ebb0, &[]);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
let cond = cur.ins().iconst(I32, 0);
|
||||
let inst2 = cur.ins().brz(cond, ebb0, &[]);
|
||||
let inst3 = cur.ins().brz(cond, ebb0, &[]);
|
||||
let inst4 = cur.ins().brz(cond, ebb0, &[]);
|
||||
let inst5 = cur.ins().brz(cond, ebb0, &[]);
|
||||
cur.ins().jump(ebb100, &[]);
|
||||
cur.insert_ebb(ebb100);
|
||||
cur.ins().return_(&[]);
|
||||
|
||||
let mut cfg = ControlFlowGraph::with_function(cur.func);
|
||||
let mut dt = DominatorTree::with_function(cur.func, &cfg);
|
||||
|
||||
let ebb1 = cur.func.dfg.make_ebb();
|
||||
cur.func.layout.split_ebb(ebb1, inst2);
|
||||
cur.goto_bottom(ebb0);
|
||||
let middle_jump_inst = cur.ins().jump(ebb1, &[]);
|
||||
|
||||
dt.recompute_split_ebb(ebb0, ebb1, middle_jump_inst);
|
||||
|
||||
let ebb2 = cur.func.dfg.make_ebb();
|
||||
cur.func.layout.split_ebb(ebb2, inst3);
|
||||
cur.goto_bottom(ebb1);
|
||||
let middle_jump_inst = cur.ins().jump(ebb2, &[]);
|
||||
dt.recompute_split_ebb(ebb1, ebb2, middle_jump_inst);
|
||||
|
||||
let ebb3 = cur.func.dfg.make_ebb();
|
||||
cur.func.layout.split_ebb(ebb3, inst4);
|
||||
cur.goto_bottom(ebb2);
|
||||
let middle_jump_inst = cur.ins().jump(ebb3, &[]);
|
||||
dt.recompute_split_ebb(ebb2, ebb3, middle_jump_inst);
|
||||
|
||||
let ebb4 = cur.func.dfg.make_ebb();
|
||||
cur.func.layout.split_ebb(ebb4, inst5);
|
||||
cur.goto_bottom(ebb3);
|
||||
let middle_jump_inst = cur.ins().jump(ebb4, &[]);
|
||||
dt.recompute_split_ebb(ebb3, ebb4, middle_jump_inst);
|
||||
|
||||
cfg.compute(cur.func);
|
||||
|
||||
let flags = settings::Flags::new(&settings::builder());
|
||||
verify_context(cur.func, &cfg, &dt, &flags).unwrap();
|
||||
}
|
||||
}
|
||||
316
lib/codegen/src/flowgraph.rs
Normal file
316
lib/codegen/src/flowgraph.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
//! A control flow graph represented as mappings of extended basic blocks to their predecessors
|
||||
//! and successors.
|
||||
//!
|
||||
//! Successors are represented as extended basic blocks while predecessors are represented by basic
|
||||
//! blocks. Basic blocks are denoted by tuples of EBB and branch/jump instructions. Each
|
||||
//! predecessor tuple corresponds to the end of a basic block.
|
||||
//!
|
||||
//! ```c
|
||||
//! Ebb0:
|
||||
//! ... ; beginning of basic block
|
||||
//!
|
||||
//! ...
|
||||
//!
|
||||
//! brz vx, Ebb1 ; end of basic block
|
||||
//!
|
||||
//! ... ; beginning of basic block
|
||||
//!
|
||||
//! ...
|
||||
//!
|
||||
//! jmp Ebb2 ; end of basic block
|
||||
//! ```
|
||||
//!
|
||||
//! Here `Ebb1` and `Ebb2` would each have a single predecessor denoted as `(Ebb0, brz)`
|
||||
//! and `(Ebb0, jmp Ebb2)` respectively.
|
||||
|
||||
use bforest;
|
||||
use entity::EntityMap;
|
||||
use ir::instructions::BranchInfo;
|
||||
use ir::{Ebb, Function, Inst};
|
||||
use std::mem;
|
||||
use timing;
|
||||
|
||||
/// A basic block denoted by its enclosing Ebb and last instruction.
|
||||
pub type BasicBlock = (Ebb, Inst);
|
||||
|
||||
/// A container for the successors and predecessors of some Ebb.
|
||||
#[derive(Clone, Default)]
|
||||
struct CFGNode {
|
||||
/// Instructions that can branch or jump to this EBB.
|
||||
///
|
||||
/// This maps branch instruction -> predecessor EBB which is redundant since the EBB containing
|
||||
/// the branch instruction is available from the `layout.inst_ebb()` method. We store the
|
||||
/// redundant information because:
|
||||
///
|
||||
/// 1. Many `pred_iter()` consumers want the EBB anyway, so it is handily available.
|
||||
/// 2. The `invalidate_ebb_successors()` may be called *after* branches have been removed from
|
||||
/// their EBB, but we still need to remove them form the old EBB predecessor map.
|
||||
///
|
||||
/// The redundant EBB stored here is always consistent with the CFG successor lists, even after
|
||||
/// the IR has been edited.
|
||||
pub predecessors: bforest::Map<Inst, Ebb, ()>,
|
||||
|
||||
/// Set of EBBs that are the targets of branches and jumps in this EBB.
|
||||
/// The set is ordered by EBB number, indicated by the `()` comparator type.
|
||||
pub successors: bforest::Set<Ebb, ()>,
|
||||
}
|
||||
|
||||
/// The Control Flow Graph maintains a mapping of ebbs to their predecessors
|
||||
/// and successors where predecessors are basic blocks and successors are
|
||||
/// extended basic blocks.
|
||||
pub struct ControlFlowGraph {
|
||||
data: EntityMap<Ebb, CFGNode>,
|
||||
pred_forest: bforest::MapForest<Inst, Ebb, ()>,
|
||||
succ_forest: bforest::SetForest<Ebb, ()>,
|
||||
valid: bool,
|
||||
}
|
||||
|
||||
impl ControlFlowGraph {
|
||||
/// Allocate a new blank control flow graph.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: EntityMap::new(),
|
||||
valid: false,
|
||||
pred_forest: bforest::MapForest::new(),
|
||||
succ_forest: bforest::SetForest::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this control flow graph.
|
||||
pub fn clear(&mut self) {
|
||||
self.data.clear();
|
||||
self.pred_forest.clear();
|
||||
self.succ_forest.clear();
|
||||
self.valid = false;
|
||||
}
|
||||
|
||||
/// Allocate and compute the control flow graph for `func`.
|
||||
pub fn with_function(func: &Function) -> Self {
|
||||
let mut cfg = Self::new();
|
||||
cfg.compute(func);
|
||||
cfg
|
||||
}
|
||||
|
||||
/// Compute the control flow graph of `func`.
|
||||
///
|
||||
/// This will clear and overwrite any information already stored in this data structure.
|
||||
pub fn compute(&mut self, func: &Function) {
|
||||
let _tt = timing::flowgraph();
|
||||
self.clear();
|
||||
self.data.resize(func.dfg.num_ebbs());
|
||||
|
||||
for ebb in &func.layout {
|
||||
self.compute_ebb(func, ebb);
|
||||
}
|
||||
|
||||
self.valid = true;
|
||||
}
|
||||
|
||||
fn compute_ebb(&mut self, func: &Function, ebb: Ebb) {
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
match func.dfg.analyze_branch(inst) {
|
||||
BranchInfo::SingleDest(dest, _) => {
|
||||
self.add_edge((ebb, inst), dest);
|
||||
}
|
||||
BranchInfo::Table(jt) => {
|
||||
for (_, dest) in func.jump_tables[jt].entries() {
|
||||
self.add_edge((ebb, inst), dest);
|
||||
}
|
||||
}
|
||||
BranchInfo::NotABranch => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_ebb_successors(&mut self, ebb: Ebb) {
|
||||
// Temporarily take ownership because we need mutable access to self.data inside the loop.
|
||||
// Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias
|
||||
// our iteration over successors.
|
||||
let mut successors = mem::replace(&mut self.data[ebb].successors, Default::default());
|
||||
for succ in successors.iter(&self.succ_forest) {
|
||||
self.data[succ].predecessors.retain(
|
||||
&mut self.pred_forest,
|
||||
|_, &mut e| e != ebb,
|
||||
);
|
||||
}
|
||||
successors.clear(&mut self.succ_forest);
|
||||
}
|
||||
|
||||
/// Recompute the control flow graph of `ebb`.
|
||||
///
|
||||
/// This is for use after modifying instructions within a specific EBB. It recomputes all edges
|
||||
/// from `ebb` while leaving edges to `ebb` intact. Its functionality a subset of that of the
|
||||
/// more expensive `compute`, and should be used when we know we don't need to recompute the CFG
|
||||
/// from scratch, but rather that our changes have been restricted to specific EBBs.
|
||||
pub fn recompute_ebb(&mut self, func: &Function, ebb: Ebb) {
|
||||
debug_assert!(self.is_valid());
|
||||
self.invalidate_ebb_successors(ebb);
|
||||
self.compute_ebb(func, ebb);
|
||||
}
|
||||
|
||||
fn add_edge(&mut self, from: BasicBlock, to: Ebb) {
|
||||
self.data[from.0].successors.insert(
|
||||
to,
|
||||
&mut self.succ_forest,
|
||||
&(),
|
||||
);
|
||||
self.data[to].predecessors.insert(
|
||||
from.1,
|
||||
from.0,
|
||||
&mut self.pred_forest,
|
||||
&(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Get an iterator over the CFG predecessors to `ebb`.
|
||||
pub fn pred_iter(&self, ebb: Ebb) -> PredIter {
|
||||
PredIter(self.data[ebb].predecessors.iter(&self.pred_forest))
|
||||
}
|
||||
|
||||
/// Get an iterator over the CFG successors to `ebb`.
|
||||
pub fn succ_iter(&self, ebb: Ebb) -> SuccIter {
|
||||
debug_assert!(self.is_valid());
|
||||
self.data[ebb].successors.iter(&self.succ_forest)
|
||||
}
|
||||
|
||||
/// Check if the CFG is in a valid state.
|
||||
///
|
||||
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
|
||||
/// `compute()` method has been called since the last `clear()`. It does not check that the
|
||||
/// CFG is consistent with the function.
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.valid
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over EBB predecessors. The iterator type is `BasicBlock`.
|
||||
///
|
||||
/// Each predecessor is an instruction that branches to the EBB.
|
||||
pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Ebb, ()>);
|
||||
|
||||
impl<'a> Iterator for PredIter<'a> {
|
||||
type Item = BasicBlock;
|
||||
|
||||
fn next(&mut self) -> Option<BasicBlock> {
|
||||
self.0.next().map(|(i, e)| (e, i))
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over EBB successors. The iterator type is `Ebb`.
|
||||
pub type SuccIter<'a> = bforest::SetIter<'a, Ebb, ()>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use ir::{types, Function, InstBuilder};
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let func = Function::new();
|
||||
ControlFlowGraph::with_function(&func);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_predecessors() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
func.layout.append_ebb(ebb0);
|
||||
func.layout.append_ebb(ebb1);
|
||||
func.layout.append_ebb(ebb2);
|
||||
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
|
||||
let mut fun_ebbs = func.layout.ebbs();
|
||||
for ebb in func.layout.ebbs() {
|
||||
assert_eq!(ebb, fun_ebbs.next().unwrap());
|
||||
assert_eq!(cfg.pred_iter(ebb).count(), 0);
|
||||
assert_eq!(cfg.succ_iter(ebb).count(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn branches_and_jumps() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
|
||||
let br_ebb0_ebb2;
|
||||
let br_ebb1_ebb1;
|
||||
let jmp_ebb0_ebb1;
|
||||
let jmp_ebb1_ebb2;
|
||||
|
||||
{
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
br_ebb0_ebb2 = cur.ins().brnz(cond, ebb2, &[]);
|
||||
jmp_ebb0_ebb1 = cur.ins().jump(ebb1, &[]);
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
br_ebb1_ebb1 = cur.ins().brnz(cond, ebb1, &[]);
|
||||
jmp_ebb1_ebb2 = cur.ins().jump(ebb2, &[]);
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
}
|
||||
|
||||
let mut cfg = ControlFlowGraph::with_function(&func);
|
||||
|
||||
{
|
||||
let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
|
||||
let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
|
||||
let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
|
||||
|
||||
let ebb0_successors = cfg.succ_iter(ebb0).collect::<Vec<_>>();
|
||||
let ebb1_successors = cfg.succ_iter(ebb1).collect::<Vec<_>>();
|
||||
let ebb2_successors = cfg.succ_iter(ebb2).collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(ebb0_predecessors.len(), 0);
|
||||
assert_eq!(ebb1_predecessors.len(), 2);
|
||||
assert_eq!(ebb2_predecessors.len(), 2);
|
||||
|
||||
assert_eq!(ebb1_predecessors.contains(&(ebb0, jmp_ebb0_ebb1)), true);
|
||||
assert_eq!(ebb1_predecessors.contains(&(ebb1, br_ebb1_ebb1)), true);
|
||||
assert_eq!(ebb2_predecessors.contains(&(ebb0, br_ebb0_ebb2)), true);
|
||||
assert_eq!(ebb2_predecessors.contains(&(ebb1, jmp_ebb1_ebb2)), true);
|
||||
|
||||
assert_eq!(ebb0_successors, [ebb1, ebb2]);
|
||||
assert_eq!(ebb1_successors, [ebb1, ebb2]);
|
||||
assert_eq!(ebb2_successors, []);
|
||||
}
|
||||
|
||||
// Change some instructions and recompute ebb0
|
||||
func.dfg.replace(br_ebb0_ebb2).brnz(cond, ebb1, &[]);
|
||||
func.dfg.replace(jmp_ebb0_ebb1).return_(&[]);
|
||||
cfg.recompute_ebb(&mut func, ebb0);
|
||||
let br_ebb0_ebb1 = br_ebb0_ebb2;
|
||||
|
||||
{
|
||||
let ebb0_predecessors = cfg.pred_iter(ebb0).collect::<Vec<_>>();
|
||||
let ebb1_predecessors = cfg.pred_iter(ebb1).collect::<Vec<_>>();
|
||||
let ebb2_predecessors = cfg.pred_iter(ebb2).collect::<Vec<_>>();
|
||||
|
||||
let ebb0_successors = cfg.succ_iter(ebb0);
|
||||
let ebb1_successors = cfg.succ_iter(ebb1);
|
||||
let ebb2_successors = cfg.succ_iter(ebb2);
|
||||
|
||||
assert_eq!(ebb0_predecessors.len(), 0);
|
||||
assert_eq!(ebb1_predecessors.len(), 2);
|
||||
assert_eq!(ebb2_predecessors.len(), 1);
|
||||
|
||||
assert_eq!(ebb1_predecessors.contains(&(ebb0, br_ebb0_ebb1)), true);
|
||||
assert_eq!(ebb1_predecessors.contains(&(ebb1, br_ebb1_ebb1)), true);
|
||||
assert_eq!(ebb2_predecessors.contains(&(ebb0, br_ebb0_ebb2)), false);
|
||||
assert_eq!(ebb2_predecessors.contains(&(ebb1, jmp_ebb1_ebb2)), true);
|
||||
|
||||
assert_eq!(ebb0_successors.collect::<Vec<_>>(), [ebb1]);
|
||||
assert_eq!(ebb1_successors.collect::<Vec<_>>(), [ebb1, ebb2]);
|
||||
assert_eq!(ebb2_successors.collect::<Vec<_>>(), []);
|
||||
}
|
||||
}
|
||||
}
|
||||
266
lib/codegen/src/ir/builder.rs
Normal file
266
lib/codegen/src/ir/builder.rs
Normal file
@@ -0,0 +1,266 @@
|
||||
//! Cretonne instruction builder.
|
||||
//!
|
||||
//! A `Builder` provides a convenient interface for inserting instructions into a Cretonne
|
||||
//! function. Many of its methods are generated from the meta language instruction definitions.
|
||||
|
||||
use ir;
|
||||
use ir::types;
|
||||
use ir::{DataFlowGraph, InstructionData};
|
||||
use ir::{Inst, Opcode, Type, Value};
|
||||
use isa;
|
||||
|
||||
/// Base trait for instruction builders.
|
||||
///
|
||||
/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the
|
||||
/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the
|
||||
/// methods in the `InstBuilder` trait instead.
|
||||
///
|
||||
/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder`
|
||||
/// trait.
|
||||
pub trait InstBuilderBase<'f>: Sized {
|
||||
/// Get an immutable reference to the data flow graph that will hold the constructed
|
||||
/// instructions.
|
||||
fn data_flow_graph(&self) -> &DataFlowGraph;
|
||||
/// Get a mutable reference to the data flow graph that will hold the constructed
|
||||
/// instructions.
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
|
||||
|
||||
/// Insert an instruction and return a reference to it, consuming the builder.
|
||||
///
|
||||
/// The result types may depend on a controlling type variable. For non-polymorphic
|
||||
/// instructions with multiple results, pass `VOID` for the `ctrl_typevar` argument.
|
||||
fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph);
|
||||
}
|
||||
|
||||
// Include trait code generated by `lib/codegen/meta/gen_instr.py`.
|
||||
//
|
||||
// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
|
||||
// instruction format and per opcode.
|
||||
include!(concat!(env!("OUT_DIR"), "/inst_builder.rs"));
|
||||
|
||||
/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free.
|
||||
impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {}
|
||||
|
||||
/// Base trait for instruction inserters.
|
||||
///
|
||||
/// This is an alternative base trait for an instruction builder to implement.
|
||||
///
|
||||
/// An instruction inserter can be adapted into an instruction builder by wrapping it in an
|
||||
/// `InsertBuilder`. This provides some common functionality for instruction builders that insert
|
||||
/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions.
|
||||
pub trait InstInserterBase<'f>: Sized {
|
||||
/// Get an immutable reference to the data flow graph.
|
||||
fn data_flow_graph(&self) -> &DataFlowGraph;
|
||||
|
||||
/// Get a mutable reference to the data flow graph.
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
|
||||
|
||||
/// Insert a new instruction which belongs to the DFG.
|
||||
fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
|
||||
}
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// Builder that inserts an instruction at the current position.
|
||||
///
|
||||
/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction
|
||||
/// builder with some additional facilities for creating instructions that reuse existing values as
|
||||
/// their results.
|
||||
pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> {
|
||||
inserter: IIB,
|
||||
unused: PhantomData<&'f u32>,
|
||||
}
|
||||
|
||||
impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> {
|
||||
/// Create a new builder which inserts instructions at `pos`.
|
||||
/// The `dfg` and `pos.layout` references should be from the same `Function`.
|
||||
pub fn new(inserter: IIB) -> InsertBuilder<'f, IIB> {
|
||||
InsertBuilder {
|
||||
inserter,
|
||||
unused: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Reuse result values in `reuse`.
|
||||
///
|
||||
/// Convert this builder into one that will reuse the provided result values instead of
|
||||
/// allocating new ones. The provided values for reuse must not be attached to anything. Any
|
||||
/// missing result values will be allocated as normal.
|
||||
///
|
||||
/// The `reuse` argument is expected to be an array of `Option<Value>`.
|
||||
pub fn with_results<Array>(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array>
|
||||
where
|
||||
Array: AsRef<[Option<Value>]>,
|
||||
{
|
||||
InsertReuseBuilder {
|
||||
inserter: self.inserter,
|
||||
reuse,
|
||||
unused: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Reuse a single result value.
|
||||
///
|
||||
/// Convert this into a builder that will reuse `v` as the single result value. The reused
|
||||
/// result value `v` must not be attached to anything.
|
||||
///
|
||||
/// This method should only be used when building an instruction with exactly one result. Use
|
||||
/// `with_results()` for the more general case.
|
||||
pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option<Value>; 1]> {
|
||||
// TODO: Specialize this to return a different builder that just attaches `v` instead of
|
||||
// calling `make_inst_results_reusing()`.
|
||||
self.with_results([Some(v)])
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> {
|
||||
fn data_flow_graph(&self) -> &DataFlowGraph {
|
||||
self.inserter.data_flow_graph()
|
||||
}
|
||||
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
|
||||
self.inserter.data_flow_graph_mut()
|
||||
}
|
||||
|
||||
fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
|
||||
let inst;
|
||||
{
|
||||
let dfg = self.inserter.data_flow_graph_mut();
|
||||
inst = dfg.make_inst(data);
|
||||
dfg.make_inst_results(inst, ctrl_typevar);
|
||||
}
|
||||
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values.
|
||||
pub struct InsertReuseBuilder<'f, IIB, Array>
|
||||
where
|
||||
IIB: InstInserterBase<'f>,
|
||||
Array: AsRef<[Option<Value>]>,
|
||||
{
|
||||
inserter: IIB,
|
||||
reuse: Array,
|
||||
unused: PhantomData<&'f u32>,
|
||||
}
|
||||
|
||||
impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array>
|
||||
where
|
||||
IIB: InstInserterBase<'f>,
|
||||
Array: AsRef<[Option<Value>]>,
|
||||
{
|
||||
fn data_flow_graph(&self) -> &DataFlowGraph {
|
||||
self.inserter.data_flow_graph()
|
||||
}
|
||||
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
|
||||
self.inserter.data_flow_graph_mut()
|
||||
}
|
||||
|
||||
fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
|
||||
let inst;
|
||||
{
|
||||
let dfg = self.inserter.data_flow_graph_mut();
|
||||
inst = dfg.make_inst(data);
|
||||
// Make an `Interator<Item = Option<Value>>`.
|
||||
let ru = self.reuse.as_ref().iter().cloned();
|
||||
dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
|
||||
}
|
||||
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
|
||||
}
|
||||
}
|
||||
|
||||
/// Instruction builder that replaces an existing instruction.
|
||||
///
|
||||
/// The inserted instruction will have the same `Inst` number as the old one.
|
||||
///
|
||||
/// If the old instruction still has result values attached, it is assumed that the new instruction
|
||||
/// produces the same number and types of results. The old result values are preserved. If the
|
||||
/// replacement instruction format does not support multiple results, the builder panics. It is a
|
||||
/// bug to leave result values dangling.
|
||||
pub struct ReplaceBuilder<'f> {
|
||||
dfg: &'f mut DataFlowGraph,
|
||||
inst: Inst,
|
||||
}
|
||||
|
||||
impl<'f> ReplaceBuilder<'f> {
|
||||
/// Create a `ReplaceBuilder` that will overwrite `inst`.
|
||||
pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> ReplaceBuilder {
|
||||
ReplaceBuilder { dfg, inst }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> {
|
||||
fn data_flow_graph(&self) -> &DataFlowGraph {
|
||||
self.dfg
|
||||
}
|
||||
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph {
|
||||
self.dfg
|
||||
}
|
||||
|
||||
fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) {
|
||||
// Splat the new instruction on top of the old one.
|
||||
self.dfg[self.inst] = data;
|
||||
|
||||
if !self.dfg.has_results(self.inst) {
|
||||
// The old result values were either detached or non-existent.
|
||||
// Construct new ones.
|
||||
self.dfg.make_inst_results(self.inst, ctrl_typevar);
|
||||
}
|
||||
|
||||
(self.inst, self.dfg)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use ir::condcodes::*;
|
||||
use ir::types::*;
|
||||
use ir::{Function, InstBuilder, ValueDef};
|
||||
|
||||
#[test]
|
||||
fn types() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let arg0 = func.dfg.append_ebb_param(ebb0, I32);
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_ebb(ebb0);
|
||||
|
||||
// Explicit types.
|
||||
let v0 = pos.ins().iconst(I32, 3);
|
||||
assert_eq!(pos.func.dfg.value_type(v0), I32);
|
||||
|
||||
// Inferred from inputs.
|
||||
let v1 = pos.ins().iadd(arg0, v0);
|
||||
assert_eq!(pos.func.dfg.value_type(v1), I32);
|
||||
|
||||
// Formula.
|
||||
let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0);
|
||||
assert_eq!(pos.func.dfg.value_type(cmp), B1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reuse_results() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let arg0 = func.dfg.append_ebb_param(ebb0, I32);
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_ebb(ebb0);
|
||||
|
||||
let v0 = pos.ins().iadd_imm(arg0, 17);
|
||||
assert_eq!(pos.func.dfg.value_type(v0), I32);
|
||||
let iadd = pos.prev_inst().unwrap();
|
||||
assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0));
|
||||
|
||||
// Detach v0 and reuse it for a different instruction.
|
||||
pos.func.dfg.clear_results(iadd);
|
||||
let v0b = pos.ins().with_result(v0).iconst(I32, 3);
|
||||
assert_eq!(v0, v0b);
|
||||
assert_eq!(pos.current_inst(), Some(iadd));
|
||||
let iconst = pos.prev_inst().unwrap();
|
||||
assert!(iadd != iconst);
|
||||
assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0));
|
||||
}
|
||||
}
|
||||
358
lib/codegen/src/ir/condcodes.rs
Normal file
358
lib/codegen/src/ir/condcodes.rs
Normal file
@@ -0,0 +1,358 @@
|
||||
//! Condition codes for the Cretonne code generator.
|
||||
//!
|
||||
//! A condition code here is an enumerated type that determined how to compare two numbers. There
|
||||
//! are different rules for comparing integers and floating point numbers, so they use different
|
||||
//! condition codes.
|
||||
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Common traits of condition codes.
|
||||
pub trait CondCode: Copy {
|
||||
/// Get the inverse condition code of `self`.
|
||||
///
|
||||
/// The inverse condition code produces the opposite result for all comparisons.
|
||||
/// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false.
|
||||
#[must_use]
|
||||
fn inverse(self) -> Self;
|
||||
|
||||
/// Get the reversed condition code for `self`.
|
||||
///
|
||||
/// The reversed condition code produces the same result as swapping `x` and `y` in the
|
||||
/// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`.
|
||||
#[must_use]
|
||||
fn reverse(self) -> Self;
|
||||
}
|
||||
|
||||
/// Condition code for comparing integers.
|
||||
///
|
||||
/// This condition code is used by the `icmp` instruction to compare integer values. There are
|
||||
/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
|
||||
/// difference.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
|
||||
pub enum IntCC {
|
||||
/// `==`.
|
||||
Equal,
|
||||
/// `!=`.
|
||||
NotEqual,
|
||||
/// Signed `<`.
|
||||
SignedLessThan,
|
||||
/// Signed `>=`.
|
||||
SignedGreaterThanOrEqual,
|
||||
/// Signed `>`.
|
||||
SignedGreaterThan,
|
||||
/// Signed `<=`.
|
||||
SignedLessThanOrEqual,
|
||||
/// Unsigned `<`.
|
||||
UnsignedLessThan,
|
||||
/// Unsigned `>=`.
|
||||
UnsignedGreaterThanOrEqual,
|
||||
/// Unsigned `>`.
|
||||
UnsignedGreaterThan,
|
||||
/// Unsigned `<=`.
|
||||
UnsignedLessThanOrEqual,
|
||||
}
|
||||
|
||||
impl CondCode for IntCC {
|
||||
fn inverse(self) -> Self {
|
||||
use self::IntCC::*;
|
||||
match self {
|
||||
Equal => NotEqual,
|
||||
NotEqual => Equal,
|
||||
SignedLessThan => SignedGreaterThanOrEqual,
|
||||
SignedGreaterThanOrEqual => SignedLessThan,
|
||||
SignedGreaterThan => SignedLessThanOrEqual,
|
||||
SignedLessThanOrEqual => SignedGreaterThan,
|
||||
UnsignedLessThan => UnsignedGreaterThanOrEqual,
|
||||
UnsignedGreaterThanOrEqual => UnsignedLessThan,
|
||||
UnsignedGreaterThan => UnsignedLessThanOrEqual,
|
||||
UnsignedLessThanOrEqual => UnsignedGreaterThan,
|
||||
}
|
||||
}
|
||||
|
||||
fn reverse(self) -> Self {
|
||||
use self::IntCC::*;
|
||||
match self {
|
||||
Equal => Equal,
|
||||
NotEqual => NotEqual,
|
||||
SignedGreaterThan => SignedLessThan,
|
||||
SignedGreaterThanOrEqual => SignedLessThanOrEqual,
|
||||
SignedLessThan => SignedGreaterThan,
|
||||
SignedLessThanOrEqual => SignedGreaterThanOrEqual,
|
||||
UnsignedGreaterThan => UnsignedLessThan,
|
||||
UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual,
|
||||
UnsignedLessThan => UnsignedGreaterThan,
|
||||
UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for IntCC {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
use self::IntCC::*;
|
||||
f.write_str(match *self {
|
||||
Equal => "eq",
|
||||
NotEqual => "ne",
|
||||
SignedGreaterThan => "sgt",
|
||||
SignedGreaterThanOrEqual => "sge",
|
||||
SignedLessThan => "slt",
|
||||
SignedLessThanOrEqual => "sle",
|
||||
UnsignedGreaterThan => "ugt",
|
||||
UnsignedGreaterThanOrEqual => "uge",
|
||||
UnsignedLessThan => "ult",
|
||||
UnsignedLessThanOrEqual => "ule",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for IntCC {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use self::IntCC::*;
|
||||
match s {
|
||||
"eq" => Ok(Equal),
|
||||
"ne" => Ok(NotEqual),
|
||||
"sge" => Ok(SignedGreaterThanOrEqual),
|
||||
"sgt" => Ok(SignedGreaterThan),
|
||||
"sle" => Ok(SignedLessThanOrEqual),
|
||||
"slt" => Ok(SignedLessThan),
|
||||
"uge" => Ok(UnsignedGreaterThanOrEqual),
|
||||
"ugt" => Ok(UnsignedGreaterThan),
|
||||
"ule" => Ok(UnsignedLessThanOrEqual),
|
||||
"ult" => Ok(UnsignedLessThan),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Condition code for comparing floating point numbers.
|
||||
///
|
||||
/// This condition code is used by the `fcmp` instruction to compare floating point values. Two
|
||||
/// IEEE floating point values relate in exactly one of four ways:
|
||||
///
|
||||
/// 1. `UN` - unordered when either value is NaN.
|
||||
/// 2. `EQ` - equal numerical value.
|
||||
/// 3. `LT` - `x` is less than `y`.
|
||||
/// 4. `GT` - `x` is greater than `y`.
|
||||
///
|
||||
/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0.
|
||||
///
|
||||
/// The condition codes described here are used to produce a single boolean value from the
|
||||
/// comparison. The 14 condition codes here cover every possible combination of the relation above
|
||||
/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
|
||||
pub enum FloatCC {
|
||||
/// EQ | LT | GT
|
||||
Ordered,
|
||||
/// UN
|
||||
Unordered,
|
||||
|
||||
/// EQ
|
||||
Equal,
|
||||
/// The C '!=' operator is the inverse of '==': `NotEqual`.
|
||||
/// UN | LT | GT
|
||||
NotEqual,
|
||||
/// LT | GT
|
||||
OrderedNotEqual,
|
||||
/// UN | EQ
|
||||
UnorderedOrEqual,
|
||||
|
||||
/// LT
|
||||
LessThan,
|
||||
/// LT | EQ
|
||||
LessThanOrEqual,
|
||||
/// GT
|
||||
GreaterThan,
|
||||
/// GT | EQ
|
||||
GreaterThanOrEqual,
|
||||
|
||||
/// UN | LT
|
||||
UnorderedOrLessThan,
|
||||
/// UN | LT | EQ
|
||||
UnorderedOrLessThanOrEqual,
|
||||
/// UN | GT
|
||||
UnorderedOrGreaterThan,
|
||||
/// UN | GT | EQ
|
||||
UnorderedOrGreaterThanOrEqual,
|
||||
}
|
||||
|
||||
impl CondCode for FloatCC {
|
||||
fn inverse(self) -> Self {
|
||||
use self::FloatCC::*;
|
||||
match self {
|
||||
Ordered => Unordered,
|
||||
Unordered => Ordered,
|
||||
Equal => NotEqual,
|
||||
NotEqual => Equal,
|
||||
OrderedNotEqual => UnorderedOrEqual,
|
||||
UnorderedOrEqual => OrderedNotEqual,
|
||||
LessThan => UnorderedOrGreaterThanOrEqual,
|
||||
LessThanOrEqual => UnorderedOrGreaterThan,
|
||||
GreaterThan => UnorderedOrLessThanOrEqual,
|
||||
GreaterThanOrEqual => UnorderedOrLessThan,
|
||||
UnorderedOrLessThan => GreaterThanOrEqual,
|
||||
UnorderedOrLessThanOrEqual => GreaterThan,
|
||||
UnorderedOrGreaterThan => LessThanOrEqual,
|
||||
UnorderedOrGreaterThanOrEqual => LessThan,
|
||||
}
|
||||
}
|
||||
fn reverse(self) -> Self {
|
||||
use self::FloatCC::*;
|
||||
match self {
|
||||
Ordered => Ordered,
|
||||
Unordered => Unordered,
|
||||
Equal => Equal,
|
||||
NotEqual => NotEqual,
|
||||
OrderedNotEqual => OrderedNotEqual,
|
||||
UnorderedOrEqual => UnorderedOrEqual,
|
||||
LessThan => GreaterThan,
|
||||
LessThanOrEqual => GreaterThanOrEqual,
|
||||
GreaterThan => LessThan,
|
||||
GreaterThanOrEqual => LessThanOrEqual,
|
||||
UnorderedOrLessThan => UnorderedOrGreaterThan,
|
||||
UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual,
|
||||
UnorderedOrGreaterThan => UnorderedOrLessThan,
|
||||
UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FloatCC {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
use self::FloatCC::*;
|
||||
f.write_str(match *self {
|
||||
Ordered => "ord",
|
||||
Unordered => "uno",
|
||||
Equal => "eq",
|
||||
NotEqual => "ne",
|
||||
OrderedNotEqual => "one",
|
||||
UnorderedOrEqual => "ueq",
|
||||
LessThan => "lt",
|
||||
LessThanOrEqual => "le",
|
||||
GreaterThan => "gt",
|
||||
GreaterThanOrEqual => "ge",
|
||||
UnorderedOrLessThan => "ult",
|
||||
UnorderedOrLessThanOrEqual => "ule",
|
||||
UnorderedOrGreaterThan => "ugt",
|
||||
UnorderedOrGreaterThanOrEqual => "uge",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for FloatCC {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use self::FloatCC::*;
|
||||
match s {
|
||||
"ord" => Ok(Ordered),
|
||||
"uno" => Ok(Unordered),
|
||||
"eq" => Ok(Equal),
|
||||
"ne" => Ok(NotEqual),
|
||||
"one" => Ok(OrderedNotEqual),
|
||||
"ueq" => Ok(UnorderedOrEqual),
|
||||
"lt" => Ok(LessThan),
|
||||
"le" => Ok(LessThanOrEqual),
|
||||
"gt" => Ok(GreaterThan),
|
||||
"ge" => Ok(GreaterThanOrEqual),
|
||||
"ult" => Ok(UnorderedOrLessThan),
|
||||
"ule" => Ok(UnorderedOrLessThanOrEqual),
|
||||
"ugt" => Ok(UnorderedOrGreaterThan),
|
||||
"uge" => Ok(UnorderedOrGreaterThanOrEqual),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
static INT_ALL: [IntCC; 10] = [
|
||||
IntCC::Equal,
|
||||
IntCC::NotEqual,
|
||||
IntCC::SignedLessThan,
|
||||
IntCC::SignedGreaterThanOrEqual,
|
||||
IntCC::SignedGreaterThan,
|
||||
IntCC::SignedLessThanOrEqual,
|
||||
IntCC::UnsignedLessThan,
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
IntCC::UnsignedGreaterThan,
|
||||
IntCC::UnsignedLessThanOrEqual,
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn int_inverse() {
|
||||
for r in &INT_ALL {
|
||||
let cc = *r;
|
||||
let inv = cc.inverse();
|
||||
assert!(cc != inv);
|
||||
assert_eq!(inv.inverse(), cc);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_reverse() {
|
||||
for r in &INT_ALL {
|
||||
let cc = *r;
|
||||
let rev = cc.reverse();
|
||||
assert_eq!(rev.reverse(), cc);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_display() {
|
||||
for r in &INT_ALL {
|
||||
let cc = *r;
|
||||
assert_eq!(cc.to_string().parse(), Ok(cc));
|
||||
}
|
||||
assert_eq!("bogus".parse::<IntCC>(), Err(()));
|
||||
}
|
||||
|
||||
static FLOAT_ALL: [FloatCC; 14] = [
|
||||
FloatCC::Ordered,
|
||||
FloatCC::Unordered,
|
||||
FloatCC::Equal,
|
||||
FloatCC::NotEqual,
|
||||
FloatCC::OrderedNotEqual,
|
||||
FloatCC::UnorderedOrEqual,
|
||||
FloatCC::LessThan,
|
||||
FloatCC::LessThanOrEqual,
|
||||
FloatCC::GreaterThan,
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
FloatCC::UnorderedOrLessThan,
|
||||
FloatCC::UnorderedOrLessThanOrEqual,
|
||||
FloatCC::UnorderedOrGreaterThan,
|
||||
FloatCC::UnorderedOrGreaterThanOrEqual,
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn float_inverse() {
|
||||
for r in &FLOAT_ALL {
|
||||
let cc = *r;
|
||||
let inv = cc.inverse();
|
||||
assert!(cc != inv);
|
||||
assert_eq!(inv.inverse(), cc);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_reverse() {
|
||||
for r in &FLOAT_ALL {
|
||||
let cc = *r;
|
||||
let rev = cc.reverse();
|
||||
assert_eq!(rev.reverse(), cc);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_display() {
|
||||
for r in &FLOAT_ALL {
|
||||
let cc = *r;
|
||||
assert_eq!(cc.to_string().parse(), Ok(cc));
|
||||
}
|
||||
assert_eq!("bogus".parse::<FloatCC>(), Err(()));
|
||||
}
|
||||
}
|
||||
1191
lib/codegen/src/ir/dfg.rs
Normal file
1191
lib/codegen/src/ir/dfg.rs
Normal file
File diff suppressed because it is too large
Load Diff
286
lib/codegen/src/ir/entities.rs
Normal file
286
lib/codegen/src/ir/entities.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
//! Cretonne IR entity references.
|
||||
//!
|
||||
//! Instructions in Cretonne IR need to reference other entities in the function. This can be other
|
||||
//! parts of the function like extended basic blocks or stack slots, or it can be external entities
|
||||
//! that are declared in the function preamble in the text format.
|
||||
//!
|
||||
//! These entity references in instruction operands are not implemented as Rust references both
|
||||
//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers
|
||||
//! take up a lot of space, and we want a compact in-memory representation. Instead, entity
|
||||
//! references are structs wrapping a `u32` index into a table in the `Function` main data
|
||||
//! structure. There is a separate index type for each entity type, so we don't lose type safety.
|
||||
//!
|
||||
//! The `entities` module defines public types for the entity references along with constants
|
||||
//! representing an invalid reference. We prefer to use `Option<EntityRef>` whenever possible, but
|
||||
//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact
|
||||
//! data structures use the `PackedOption<EntityRef>` representation, while function arguments and
|
||||
//! return values prefer the more Rust-like `Option<EntityRef>` variant.
|
||||
//!
|
||||
//! The entity references all implement the `Display` trait in a way that matches the textual IR
|
||||
//! format.
|
||||
|
||||
use std::fmt;
|
||||
use std::u32;
|
||||
|
||||
/// An opaque reference to an extended basic block in a function.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct Ebb(u32);
|
||||
entity_impl!(Ebb, "ebb");
|
||||
|
||||
impl Ebb {
|
||||
/// Create a new EBB reference from its number. This corresponds to the `ebbNN` representation.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<Ebb> {
|
||||
if n < u32::MAX { Some(Ebb(n)) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to an SSA value.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct Value(u32);
|
||||
entity_impl!(Value, "v");
|
||||
|
||||
impl Value {
|
||||
/// Create a value from its number representation.
|
||||
/// This is the number in the `vNN` notation.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<Value> {
|
||||
if n < u32::MAX / 2 {
|
||||
Some(Value(n))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to an instruction in a function.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct Inst(u32);
|
||||
entity_impl!(Inst, "inst");
|
||||
|
||||
/// An opaque reference to a stack slot.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct StackSlot(u32);
|
||||
entity_impl!(StackSlot, "ss");
|
||||
|
||||
impl StackSlot {
|
||||
/// Create a new stack slot reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<StackSlot> {
|
||||
if n < u32::MAX {
|
||||
Some(StackSlot(n))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to a global variable.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct GlobalVar(u32);
|
||||
entity_impl!(GlobalVar, "gv");
|
||||
|
||||
impl GlobalVar {
|
||||
/// Create a new global variable reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<GlobalVar> {
|
||||
if n < u32::MAX {
|
||||
Some(GlobalVar(n))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An opaque reference to a jump table.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct JumpTable(u32);
|
||||
entity_impl!(JumpTable, "jt");
|
||||
|
||||
impl JumpTable {
|
||||
/// Create a new jump table reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<JumpTable> {
|
||||
if n < u32::MAX {
|
||||
Some(JumpTable(n))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to an external function.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct FuncRef(u32);
|
||||
entity_impl!(FuncRef, "fn");
|
||||
|
||||
impl FuncRef {
|
||||
/// Create a new external function reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<FuncRef> {
|
||||
if n < u32::MAX { Some(FuncRef(n)) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to a function signature.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SigRef(u32);
|
||||
entity_impl!(SigRef, "sig");
|
||||
|
||||
impl SigRef {
|
||||
/// Create a new function signature reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<SigRef> {
|
||||
if n < u32::MAX { Some(SigRef(n)) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to a heap.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Heap(u32);
|
||||
entity_impl!(Heap, "heap");
|
||||
|
||||
impl Heap {
|
||||
/// Create a new heap reference from its number.
|
||||
///
|
||||
/// This method is for use by the parser.
|
||||
pub fn with_number(n: u32) -> Option<Heap> {
|
||||
if n < u32::MAX { Some(Heap(n)) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to any of the entities defined in this module.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum AnyEntity {
|
||||
/// The whole function.
|
||||
Function,
|
||||
/// An extended basic block.
|
||||
Ebb(Ebb),
|
||||
/// An instruction.
|
||||
Inst(Inst),
|
||||
/// An SSA value.
|
||||
Value(Value),
|
||||
/// A stack slot.
|
||||
StackSlot(StackSlot),
|
||||
/// A Global variable.
|
||||
GlobalVar(GlobalVar),
|
||||
/// A jump table.
|
||||
JumpTable(JumpTable),
|
||||
/// An external function.
|
||||
FuncRef(FuncRef),
|
||||
/// A function call signature.
|
||||
SigRef(SigRef),
|
||||
/// A heap.
|
||||
Heap(Heap),
|
||||
}
|
||||
|
||||
impl fmt::Display for AnyEntity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
AnyEntity::Function => write!(f, "function"),
|
||||
AnyEntity::Ebb(r) => r.fmt(f),
|
||||
AnyEntity::Inst(r) => r.fmt(f),
|
||||
AnyEntity::Value(r) => r.fmt(f),
|
||||
AnyEntity::StackSlot(r) => r.fmt(f),
|
||||
AnyEntity::GlobalVar(r) => r.fmt(f),
|
||||
AnyEntity::JumpTable(r) => r.fmt(f),
|
||||
AnyEntity::FuncRef(r) => r.fmt(f),
|
||||
AnyEntity::SigRef(r) => r.fmt(f),
|
||||
AnyEntity::Heap(r) => r.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for AnyEntity {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
(self as &fmt::Display).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Ebb> for AnyEntity {
|
||||
fn from(r: Ebb) -> AnyEntity {
|
||||
AnyEntity::Ebb(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Inst> for AnyEntity {
|
||||
fn from(r: Inst) -> AnyEntity {
|
||||
AnyEntity::Inst(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Value> for AnyEntity {
|
||||
fn from(r: Value) -> AnyEntity {
|
||||
AnyEntity::Value(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StackSlot> for AnyEntity {
|
||||
fn from(r: StackSlot) -> AnyEntity {
|
||||
AnyEntity::StackSlot(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GlobalVar> for AnyEntity {
|
||||
fn from(r: GlobalVar) -> AnyEntity {
|
||||
AnyEntity::GlobalVar(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JumpTable> for AnyEntity {
|
||||
fn from(r: JumpTable) -> AnyEntity {
|
||||
AnyEntity::JumpTable(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FuncRef> for AnyEntity {
|
||||
fn from(r: FuncRef) -> AnyEntity {
|
||||
AnyEntity::FuncRef(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SigRef> for AnyEntity {
|
||||
fn from(r: SigRef) -> AnyEntity {
|
||||
AnyEntity::SigRef(r)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Heap> for AnyEntity {
|
||||
fn from(r: Heap) -> AnyEntity {
|
||||
AnyEntity::Heap(r)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
use std::u32;
|
||||
|
||||
#[test]
|
||||
fn value_with_number() {
|
||||
assert_eq!(Value::with_number(0).unwrap().to_string(), "v0");
|
||||
assert_eq!(Value::with_number(1).unwrap().to_string(), "v1");
|
||||
|
||||
assert_eq!(Value::with_number(u32::MAX / 2), None);
|
||||
assert!(Value::with_number(u32::MAX / 2 - 1).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memory() {
|
||||
use packed_option::PackedOption;
|
||||
use std::mem;
|
||||
// This is the whole point of `PackedOption`.
|
||||
assert_eq!(
|
||||
mem::size_of::<Value>(),
|
||||
mem::size_of::<PackedOption<Value>>()
|
||||
);
|
||||
}
|
||||
}
|
||||
456
lib/codegen/src/ir/extfunc.rs
Normal file
456
lib/codegen/src/ir/extfunc.rs
Normal file
@@ -0,0 +1,456 @@
|
||||
//! External function calls.
|
||||
//!
|
||||
//! To a Cretonne function, all functions are "external". Directly called functions must be
|
||||
//! declared in the preamble, and all function calls must have a signature.
|
||||
//!
|
||||
//! This module declares the data types used to represent external functions and call signatures.
|
||||
|
||||
use ir::{ArgumentLoc, ExternalName, SigRef, Type};
|
||||
use isa::{RegInfo, RegUnit};
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Function signature.
|
||||
///
|
||||
/// The function signature describes the types of formal parameters and return values along with
|
||||
/// other details that are needed to call a function correctly.
|
||||
///
|
||||
/// A signature can optionally include ISA-specific ABI information which specifies exactly how
|
||||
/// arguments and return values are passed.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Signature {
|
||||
/// The arguments passed to the function.
|
||||
pub params: Vec<AbiParam>,
|
||||
/// Values returned from the function.
|
||||
pub returns: Vec<AbiParam>,
|
||||
|
||||
/// Calling convention.
|
||||
pub call_conv: CallConv,
|
||||
|
||||
/// When the signature has been legalized to a specific ISA, this holds the size of the
|
||||
/// argument array on the stack. Before legalization, this is `None`.
|
||||
///
|
||||
/// This can be computed from the legalized `params` array as the maximum (offset plus
|
||||
/// byte size) of the `ArgumentLoc::Stack(offset)` argument.
|
||||
pub argument_bytes: Option<u32>,
|
||||
}
|
||||
|
||||
impl Signature {
|
||||
/// Create a new blank signature.
|
||||
pub fn new(call_conv: CallConv) -> Self {
|
||||
Self {
|
||||
params: Vec::new(),
|
||||
returns: Vec::new(),
|
||||
call_conv,
|
||||
argument_bytes: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the signature so it is identical to a fresh one returned by `new()`.
|
||||
pub fn clear(&mut self, call_conv: CallConv) {
|
||||
self.params.clear();
|
||||
self.returns.clear();
|
||||
self.call_conv = call_conv;
|
||||
self.argument_bytes = None;
|
||||
}
|
||||
|
||||
/// Compute the size of the stack arguments and mark signature as legalized.
|
||||
///
|
||||
/// Even if there are no stack arguments, this will set `params` to `Some(0)` instead
|
||||
/// of `None`. This indicates that the signature has been legalized.
|
||||
pub fn compute_argument_bytes(&mut self) {
|
||||
let bytes = self.params
|
||||
.iter()
|
||||
.filter_map(|arg| match arg.location {
|
||||
ArgumentLoc::Stack(offset) if offset >= 0 => {
|
||||
Some(offset as u32 + arg.value_type.bytes())
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.fold(0, cmp::max);
|
||||
self.argument_bytes = Some(bytes);
|
||||
}
|
||||
|
||||
/// Return an object that can display `self` with correct register names.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplaySignature<'a> {
|
||||
DisplaySignature(self, regs.into())
|
||||
}
|
||||
|
||||
/// Find the index of a presumed unique special-purpose parameter.
|
||||
pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option<usize> {
|
||||
self.params.iter().rposition(|arg| arg.purpose == purpose)
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type capable of displaying a `Signature` with correct register names.
|
||||
pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>);
|
||||
|
||||
fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result {
|
||||
match args.split_first() {
|
||||
None => {}
|
||||
Some((first, rest)) => {
|
||||
write!(f, "{}", first.display(regs))?;
|
||||
for arg in rest {
|
||||
write!(f, ", {}", arg.display(regs))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for DisplaySignature<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "(")?;
|
||||
write_list(f, &self.0.params, self.1)?;
|
||||
write!(f, ")")?;
|
||||
if !self.0.returns.is_empty() {
|
||||
write!(f, " -> ")?;
|
||||
write_list(f, &self.0.returns, self.1)?;
|
||||
}
|
||||
write!(f, " {}", self.0.call_conv)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Signature {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Function parameter or return value descriptor.
|
||||
///
|
||||
/// This describes the value type being passed to or from a function along with flags that affect
|
||||
/// how the argument is passed.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct AbiParam {
|
||||
/// Type of the argument value.
|
||||
pub value_type: Type,
|
||||
/// Special purpose of argument, or `Normal`.
|
||||
pub purpose: ArgumentPurpose,
|
||||
/// Method for extending argument to a full register.
|
||||
pub extension: ArgumentExtension,
|
||||
|
||||
/// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet
|
||||
/// been legalized.
|
||||
pub location: ArgumentLoc,
|
||||
}
|
||||
|
||||
impl AbiParam {
|
||||
/// Create a parameter with default flags.
|
||||
pub fn new(vt: Type) -> Self {
|
||||
Self {
|
||||
value_type: vt,
|
||||
extension: ArgumentExtension::None,
|
||||
purpose: ArgumentPurpose::Normal,
|
||||
location: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a special-purpose parameter that is not (yet) bound to a specific register.
|
||||
pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self {
|
||||
Self {
|
||||
value_type: vt,
|
||||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a parameter for a special-purpose register.
|
||||
pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self {
|
||||
Self {
|
||||
value_type: vt,
|
||||
extension: ArgumentExtension::None,
|
||||
purpose,
|
||||
location: ArgumentLoc::Reg(regunit),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `self` to a parameter with the `uext` flag set.
|
||||
pub fn uext(self) -> Self {
|
||||
debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type);
|
||||
Self {
|
||||
extension: ArgumentExtension::Uext,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `self` to a parameter type with the `sext` flag set.
|
||||
pub fn sext(self) -> Self {
|
||||
debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type);
|
||||
Self {
|
||||
extension: ArgumentExtension::Sext,
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display `self` with correct register names.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayAbiParam<'a> {
|
||||
DisplayAbiParam(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type capable of displaying a `AbiParam` with correct register names.
|
||||
pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayAbiParam<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0.value_type)?;
|
||||
match self.0.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => write!(f, " uext")?,
|
||||
ArgumentExtension::Sext => write!(f, " sext")?,
|
||||
}
|
||||
if self.0.purpose != ArgumentPurpose::Normal {
|
||||
write!(f, " {}", self.0.purpose)?;
|
||||
}
|
||||
|
||||
if self.0.location.is_assigned() {
|
||||
write!(f, " [{}]", self.0.location.display(self.1))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for AbiParam {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Function argument extension options.
|
||||
///
|
||||
/// On some architectures, small integer function arguments are extended to the width of a
|
||||
/// general-purpose register.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
pub enum ArgumentExtension {
|
||||
/// No extension, high bits are indeterminate.
|
||||
None,
|
||||
/// Unsigned extension: high bits in register are 0.
|
||||
Uext,
|
||||
/// Signed extension: high bits in register replicate sign bit.
|
||||
Sext,
|
||||
}
|
||||
|
||||
/// The special purpose of a function argument.
|
||||
///
|
||||
/// Function arguments and return values are used to pass user program values between functions,
|
||||
/// but they are also used to represent special registers with significance to the ABI such as
|
||||
/// frame pointers and callee-saved registers.
|
||||
///
|
||||
/// The argument purpose is used to indicate any special meaning of an argument or return value.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
pub enum ArgumentPurpose {
|
||||
/// A normal user program value passed to or from a function.
|
||||
Normal,
|
||||
|
||||
/// Struct return pointer.
|
||||
///
|
||||
/// When a function needs to return more data than will fit in registers, the caller passes a
|
||||
/// pointer to a memory location where the return value can be written. In some ABIs, this
|
||||
/// struct return pointer is passed in a specific register.
|
||||
///
|
||||
/// This argument kind can also appear as a return value for ABIs that require a function with
|
||||
/// a `StructReturn` pointer argument to also return that pointer in a register.
|
||||
StructReturn,
|
||||
|
||||
/// The link register.
|
||||
///
|
||||
/// Most RISC architectures implement calls by saving the return address in a designated
|
||||
/// register rather than pushing it on the stack. This is represented with a `Link` argument.
|
||||
///
|
||||
/// Similarly, some return instructions expect the return address in a register represented as
|
||||
/// a `Link` return value.
|
||||
Link,
|
||||
|
||||
/// The frame pointer.
|
||||
///
|
||||
/// This indicates the frame pointer register which has a special meaning in some ABIs.
|
||||
///
|
||||
/// The frame pointer appears as an argument and as a return value since it is a callee-saved
|
||||
/// register.
|
||||
FramePointer,
|
||||
|
||||
/// A callee-saved register.
|
||||
///
|
||||
/// Some calling conventions have registers that must be saved by the callee. These registers
|
||||
/// are represented as `CalleeSaved` arguments and return values.
|
||||
CalleeSaved,
|
||||
|
||||
/// A VM context pointer.
|
||||
///
|
||||
/// This is a pointer to a context struct containing details about the current sandbox. It is
|
||||
/// used as a base pointer for `vmctx` global variables.
|
||||
VMContext,
|
||||
|
||||
/// A signature identifier.
|
||||
///
|
||||
/// This is a special-purpose argument used to identify the calling convention expected by the
|
||||
/// caller in an indirect call. The callee can verify that the expected signature ID matches.
|
||||
SignatureId,
|
||||
}
|
||||
|
||||
/// Text format names of the `ArgumentPurpose` variants.
|
||||
static PURPOSE_NAMES: [&str; 7] = ["normal", "sret", "link", "fp", "csr", "vmctx", "sigid"];
|
||||
|
||||
impl fmt::Display for ArgumentPurpose {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(PURPOSE_NAMES[*self as usize])
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for ArgumentPurpose {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<ArgumentPurpose, ()> {
|
||||
match s {
|
||||
"normal" => Ok(ArgumentPurpose::Normal),
|
||||
"sret" => Ok(ArgumentPurpose::StructReturn),
|
||||
"link" => Ok(ArgumentPurpose::Link),
|
||||
"fp" => Ok(ArgumentPurpose::FramePointer),
|
||||
"csr" => Ok(ArgumentPurpose::CalleeSaved),
|
||||
"vmctx" => Ok(ArgumentPurpose::VMContext),
|
||||
"sigid" => Ok(ArgumentPurpose::SignatureId),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An external function.
|
||||
///
|
||||
/// Information about a function that can be called directly with a direct `call` instruction.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExtFuncData {
|
||||
/// Name of the external function.
|
||||
pub name: ExternalName,
|
||||
/// Call signature of function.
|
||||
pub signature: SigRef,
|
||||
/// Will this function be defined nearby, such that it will always be a certain distance away,
|
||||
/// after linking? If so, references to it can avoid going through a GOT or PLT. Note that
|
||||
/// symbols meant to be preemptible cannot be considered colocated.
|
||||
pub colocated: bool,
|
||||
}
|
||||
|
||||
impl fmt::Display for ExtFuncData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.colocated {
|
||||
write!(f, "colocated ")?;
|
||||
}
|
||||
write!(f, "{} {}", self.name, self.signature)
|
||||
}
|
||||
}
|
||||
|
||||
/// A Calling convention.
|
||||
///
|
||||
/// A function's calling convention determines exactly how arguments and return values are passed,
|
||||
/// and how stack frames are managed. Since all of these details depend on both the instruction set
|
||||
/// architecture and possibly the operating system, a function's calling convention is only fully
|
||||
/// determined by a `(TargetIsa, CallConv)` tuple.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum CallConv {
|
||||
/// The System V-style calling convention.
|
||||
///
|
||||
/// This is the System V-style calling convention that a C compiler would
|
||||
/// use on many platforms.
|
||||
SystemV,
|
||||
|
||||
/// A JIT-compiled WebAssembly function in the SpiderMonkey VM.
|
||||
SpiderWASM,
|
||||
}
|
||||
|
||||
impl fmt::Display for CallConv {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::CallConv::*;
|
||||
f.write_str(match *self {
|
||||
SystemV => "system_v",
|
||||
SpiderWASM => "spiderwasm",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for CallConv {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use self::CallConv::*;
|
||||
match s {
|
||||
"system_v" => Ok(SystemV),
|
||||
"spiderwasm" => Ok(SpiderWASM),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use ir::types::{B8, F32, I32};
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn argument_type() {
|
||||
let t = AbiParam::new(I32);
|
||||
assert_eq!(t.to_string(), "i32");
|
||||
let mut t = t.uext();
|
||||
assert_eq!(t.to_string(), "i32 uext");
|
||||
assert_eq!(t.sext().to_string(), "i32 sext");
|
||||
t.purpose = ArgumentPurpose::StructReturn;
|
||||
assert_eq!(t.to_string(), "i32 uext sret");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn argument_purpose() {
|
||||
let all_purpose = [
|
||||
ArgumentPurpose::Normal,
|
||||
ArgumentPurpose::StructReturn,
|
||||
ArgumentPurpose::Link,
|
||||
ArgumentPurpose::FramePointer,
|
||||
ArgumentPurpose::CalleeSaved,
|
||||
ArgumentPurpose::VMContext,
|
||||
];
|
||||
for (&e, &n) in all_purpose.iter().zip(PURPOSE_NAMES.iter()) {
|
||||
assert_eq!(e.to_string(), n);
|
||||
assert_eq!(Ok(e), n.parse());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn call_conv() {
|
||||
for &cc in &[CallConv::SystemV, CallConv::SpiderWASM] {
|
||||
assert_eq!(Ok(cc), cc.to_string().parse())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn signatures() {
|
||||
let mut sig = Signature::new(CallConv::SpiderWASM);
|
||||
assert_eq!(sig.to_string(), "() spiderwasm");
|
||||
sig.params.push(AbiParam::new(I32));
|
||||
assert_eq!(sig.to_string(), "(i32) spiderwasm");
|
||||
sig.returns.push(AbiParam::new(F32));
|
||||
assert_eq!(sig.to_string(), "(i32) -> f32 spiderwasm");
|
||||
sig.params.push(AbiParam::new(I32.by(4).unwrap()));
|
||||
assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 spiderwasm");
|
||||
sig.returns.push(AbiParam::new(B8));
|
||||
assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 spiderwasm");
|
||||
|
||||
// Test the offset computation algorithm.
|
||||
assert_eq!(sig.argument_bytes, None);
|
||||
sig.params[1].location = ArgumentLoc::Stack(8);
|
||||
sig.compute_argument_bytes();
|
||||
// An `i32x4` at offset 8 requires a 24-byte argument array.
|
||||
assert_eq!(sig.argument_bytes, Some(24));
|
||||
// Order does not matter.
|
||||
sig.params[0].location = ArgumentLoc::Stack(24);
|
||||
sig.compute_argument_bytes();
|
||||
assert_eq!(sig.argument_bytes, Some(28));
|
||||
|
||||
// Writing ABI-annotated signatures.
|
||||
assert_eq!(
|
||||
sig.to_string(),
|
||||
"(i32 [24], i32x4 [8]) -> f32, b8 spiderwasm"
|
||||
);
|
||||
}
|
||||
}
|
||||
165
lib/codegen/src/ir/extname.rs
Normal file
165
lib/codegen/src/ir/extname.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
//! External names.
|
||||
//!
|
||||
//! These are identifiers for declaring entities defined outside the current
|
||||
//! function. The name of an external declaration doesn't have any meaning to
|
||||
//! Cretonne, which compiles functions independently.
|
||||
|
||||
use ir::LibCall;
|
||||
use std::cmp;
|
||||
use std::fmt::{self, Write};
|
||||
use std::str::FromStr;
|
||||
|
||||
const TESTCASE_NAME_LENGTH: usize = 16;
|
||||
|
||||
/// The name of an external is either a reference to a user-defined symbol
|
||||
/// table, or a short sequence of ascii bytes so that test cases do not have
|
||||
/// to keep track of a sy mbol table.
|
||||
///
|
||||
/// External names are primarily used as keys by code using Cretonne to map
|
||||
/// from a `cretonne_codegen::ir::FuncRef` or similar to additional associated
|
||||
/// data.
|
||||
///
|
||||
/// External names can also serve as a primitive testing and debugging tool.
|
||||
/// In particular, many `.cton` test files use function names to identify
|
||||
/// functions.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ExternalName {
|
||||
/// A name in a user-defined symbol table. Cretonne does not interpret
|
||||
/// these numbers in any way.
|
||||
User {
|
||||
/// Arbitrary.
|
||||
namespace: u32,
|
||||
/// Arbitrary.
|
||||
index: u32,
|
||||
},
|
||||
/// A test case function name of up to 10 ascii characters. This is
|
||||
/// not intended to be used outside test cases.
|
||||
TestCase {
|
||||
/// How many of the bytes in `ascii` are valid?
|
||||
length: u8,
|
||||
/// Ascii bytes of the name.
|
||||
ascii: [u8; TESTCASE_NAME_LENGTH],
|
||||
},
|
||||
/// A well-known runtime library function.
|
||||
LibCall(LibCall),
|
||||
}
|
||||
|
||||
impl ExternalName {
|
||||
/// Creates a new external name from a sequence of bytes. Caller is expected
|
||||
/// to guarantee bytes are only ascii alphanumeric or `_`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use cretonne_codegen::ir::ExternalName;
|
||||
/// // Create `ExternalName` from a string.
|
||||
/// let name = ExternalName::testcase("hello");
|
||||
/// assert_eq!(name.to_string(), "%hello");
|
||||
/// ```
|
||||
pub fn testcase<T: AsRef<[u8]>>(v: T) -> ExternalName {
|
||||
let vec = v.as_ref();
|
||||
let len = cmp::min(vec.len(), TESTCASE_NAME_LENGTH);
|
||||
let mut bytes = [0u8; TESTCASE_NAME_LENGTH];
|
||||
bytes[0..len].copy_from_slice(&vec[0..len]);
|
||||
|
||||
ExternalName::TestCase {
|
||||
length: len as u8,
|
||||
ascii: bytes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new external name from user-provided integer indicies.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```rust
|
||||
/// # use cretonne_codegen::ir::ExternalName;
|
||||
/// // Create `ExternalName` from integer indicies
|
||||
/// let name = ExternalName::user(123, 456);
|
||||
/// assert_eq!(name.to_string(), "u123:456");
|
||||
/// ```
|
||||
pub fn user(namespace: u32, index: u32) -> ExternalName {
|
||||
ExternalName::User {
|
||||
namespace: namespace,
|
||||
index: index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ExternalName {
|
||||
fn default() -> ExternalName {
|
||||
ExternalName::user(0, 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ExternalName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ExternalName::User { namespace, index } => write!(f, "u{}:{}", namespace, index),
|
||||
ExternalName::TestCase { length, ascii } => {
|
||||
f.write_char('%')?;
|
||||
for byte in ascii.iter().take(length as usize) {
|
||||
f.write_char(*byte as char)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
ExternalName::LibCall(lc) => write!(f, "%{}", lc),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for ExternalName {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
// Try to parse as a libcall name, otherwise it's a test case.
|
||||
match s.parse() {
|
||||
Ok(lc) => Ok(ExternalName::LibCall(lc)),
|
||||
Err(_) => Ok(ExternalName::testcase(s.as_bytes())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ExternalName;
|
||||
use ir::LibCall;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn display_testcase() {
|
||||
assert_eq!(ExternalName::testcase("").to_string(), "%");
|
||||
assert_eq!(ExternalName::testcase("x").to_string(), "%x");
|
||||
assert_eq!(ExternalName::testcase("x_1").to_string(), "%x_1");
|
||||
assert_eq!(
|
||||
ExternalName::testcase("longname12345678").to_string(),
|
||||
"%longname12345678"
|
||||
);
|
||||
// Constructor will silently drop bytes beyond the 16th
|
||||
assert_eq!(
|
||||
ExternalName::testcase("longname123456789").to_string(),
|
||||
"%longname12345678"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn display_user() {
|
||||
assert_eq!(ExternalName::user(0, 0).to_string(), "u0:0");
|
||||
assert_eq!(ExternalName::user(1, 1).to_string(), "u1:1");
|
||||
assert_eq!(
|
||||
ExternalName::user(::std::u32::MAX, ::std::u32::MAX).to_string(),
|
||||
"u4294967295:4294967295"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parsing() {
|
||||
assert_eq!(
|
||||
"FloorF32".parse(),
|
||||
Ok(ExternalName::LibCall(LibCall::FloorF32))
|
||||
);
|
||||
assert_eq!(
|
||||
ExternalName::LibCall(LibCall::FloorF32).to_string(),
|
||||
"%FloorF32"
|
||||
);
|
||||
}
|
||||
}
|
||||
232
lib/codegen/src/ir/function.rs
Normal file
232
lib/codegen/src/ir/function.rs
Normal file
@@ -0,0 +1,232 @@
|
||||
//! Intermediate representation of a function.
|
||||
//!
|
||||
//! The `Function` struct defined in this module owns all of its extended basic blocks and
|
||||
//! instructions.
|
||||
|
||||
use binemit::CodeOffset;
|
||||
use entity::{EntityMap, PrimaryMap};
|
||||
use ir;
|
||||
use ir::{CallConv, DataFlowGraph, ExternalName, Layout, Signature};
|
||||
use ir::{Ebb, ExtFuncData, FuncRef, GlobalVar, GlobalVarData, Heap, HeapData, JumpTable,
|
||||
JumpTableData, SigRef, StackSlot, StackSlotData};
|
||||
use ir::{EbbOffsets, InstEncodings, JumpTables, SourceLocs, StackSlots, ValueLocations};
|
||||
use isa::{EncInfo, Legalize, TargetIsa, Encoding};
|
||||
use std::fmt;
|
||||
use write::write_function;
|
||||
|
||||
/// A function.
|
||||
///
|
||||
/// Functions can be cloned, but it is not a very fast operation.
|
||||
/// The clone will have all the same entity numbers as the original.
|
||||
#[derive(Clone)]
|
||||
pub struct Function {
|
||||
/// Name of this function. Mostly used by `.cton` files.
|
||||
pub name: ExternalName,
|
||||
|
||||
/// Signature of this function.
|
||||
pub signature: Signature,
|
||||
|
||||
/// Stack slots allocated in this function.
|
||||
pub stack_slots: StackSlots,
|
||||
|
||||
/// Global variables referenced.
|
||||
pub global_vars: PrimaryMap<ir::GlobalVar, ir::GlobalVarData>,
|
||||
|
||||
/// Heaps referenced.
|
||||
pub heaps: PrimaryMap<ir::Heap, ir::HeapData>,
|
||||
|
||||
/// Jump tables used in this function.
|
||||
pub jump_tables: JumpTables,
|
||||
|
||||
/// Data flow graph containing the primary definition of all instructions, EBBs and values.
|
||||
pub dfg: DataFlowGraph,
|
||||
|
||||
/// Layout of EBBs and instructions in the function body.
|
||||
pub layout: Layout,
|
||||
|
||||
/// Encoding recipe and bits for the legal instructions.
|
||||
/// Illegal instructions have the `Encoding::default()` value.
|
||||
pub encodings: InstEncodings,
|
||||
|
||||
/// Location assigned to every value.
|
||||
pub locations: ValueLocations,
|
||||
|
||||
/// Code offsets of the EBB headers.
|
||||
///
|
||||
/// This information is only transiently available after the `binemit::relax_branches` function
|
||||
/// computes it, and it can easily be recomputed by calling that function. It is not included
|
||||
/// in the textual IR format.
|
||||
pub offsets: EbbOffsets,
|
||||
|
||||
/// Source locations.
|
||||
///
|
||||
/// Track the original source location for each instruction. The source locations are not
|
||||
/// interpreted by Cretonne, only preserved.
|
||||
pub srclocs: SourceLocs,
|
||||
}
|
||||
|
||||
impl Function {
|
||||
/// Create a function with the given name and signature.
|
||||
pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self {
|
||||
Self {
|
||||
name,
|
||||
signature: sig,
|
||||
stack_slots: StackSlots::new(),
|
||||
global_vars: PrimaryMap::new(),
|
||||
heaps: PrimaryMap::new(),
|
||||
jump_tables: PrimaryMap::new(),
|
||||
dfg: DataFlowGraph::new(),
|
||||
layout: Layout::new(),
|
||||
encodings: EntityMap::new(),
|
||||
locations: EntityMap::new(),
|
||||
offsets: EntityMap::new(),
|
||||
srclocs: EntityMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this function.
|
||||
pub fn clear(&mut self) {
|
||||
self.signature.clear(ir::CallConv::SystemV);
|
||||
self.stack_slots.clear();
|
||||
self.global_vars.clear();
|
||||
self.heaps.clear();
|
||||
self.jump_tables.clear();
|
||||
self.dfg.clear();
|
||||
self.layout.clear();
|
||||
self.encodings.clear();
|
||||
self.locations.clear();
|
||||
self.offsets.clear();
|
||||
self.srclocs.clear();
|
||||
}
|
||||
|
||||
/// Create a new empty, anonymous function with a SystemV calling convention.
|
||||
pub fn new() -> Self {
|
||||
Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::SystemV))
|
||||
}
|
||||
|
||||
/// Creates a jump table in the function, to be used by `br_table` instructions.
|
||||
pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable {
|
||||
self.jump_tables.push(data)
|
||||
}
|
||||
|
||||
/// Inserts an entry in a previously declared jump table.
|
||||
pub fn insert_jump_table_entry(&mut self, jt: JumpTable, index: usize, ebb: Ebb) {
|
||||
self.jump_tables[jt].set_entry(index, ebb);
|
||||
}
|
||||
|
||||
/// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and
|
||||
/// `stack_addr` instructions.
|
||||
pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot {
|
||||
self.stack_slots.push(data)
|
||||
}
|
||||
|
||||
/// Adds a signature which can later be used to declare an external function import.
|
||||
pub fn import_signature(&mut self, signature: Signature) -> SigRef {
|
||||
self.dfg.signatures.push(signature)
|
||||
}
|
||||
|
||||
/// Declare an external function import.
|
||||
pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef {
|
||||
self.dfg.ext_funcs.push(data)
|
||||
}
|
||||
|
||||
/// Declares a global variable accessible to the function.
|
||||
pub fn create_global_var(&mut self, data: GlobalVarData) -> GlobalVar {
|
||||
self.global_vars.push(data)
|
||||
}
|
||||
|
||||
/// Declares a heap accessible to the function.
|
||||
pub fn create_heap(&mut self, data: HeapData) -> Heap {
|
||||
self.heaps.push(data)
|
||||
}
|
||||
|
||||
/// Return an object that can display this function with correct ISA-specific annotations.
|
||||
pub fn display<'a, I: Into<Option<&'a TargetIsa>>>(&'a self, isa: I) -> DisplayFunction<'a> {
|
||||
DisplayFunction(self, isa.into())
|
||||
}
|
||||
|
||||
/// Find a presumed unique special-purpose function parameter value.
|
||||
///
|
||||
/// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists.
|
||||
pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option<ir::Value> {
|
||||
let entry = self.layout.entry_block().expect("Function is empty");
|
||||
self.signature.special_param_index(purpose).map(|i| {
|
||||
self.dfg.ebb_params(entry)[i]
|
||||
})
|
||||
}
|
||||
|
||||
/// Get an iterator over the instructions in `ebb`, including offsets and encoded instruction
|
||||
/// sizes.
|
||||
///
|
||||
/// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
|
||||
/// from the beginning of the function to the instruction, and `size` is the size of the
|
||||
/// instruction in bytes, or 0 for unencoded instructions.
|
||||
///
|
||||
/// This function can only be used after the code layout has been computed by the
|
||||
/// `binemit::relax_branches()` function.
|
||||
pub fn inst_offsets<'a>(&'a self, ebb: Ebb, encinfo: &EncInfo) -> InstOffsetIter<'a> {
|
||||
assert!(
|
||||
!self.offsets.is_empty(),
|
||||
"Code layout must be computed first"
|
||||
);
|
||||
InstOffsetIter {
|
||||
encinfo: encinfo.clone(),
|
||||
encodings: &self.encodings,
|
||||
offset: self.offsets[ebb],
|
||||
iter: self.layout.ebb_insts(ebb),
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around `encode` which assigns `inst` the resulting encoding.
|
||||
pub fn update_encoding(&mut self, inst: ir::Inst, isa: &TargetIsa) -> Result<(), Legalize> {
|
||||
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
|
||||
}
|
||||
|
||||
/// Wrapper around `TargetIsa::encode` for encoding an existing instruction
|
||||
/// in the `Function`.
|
||||
pub fn encode(&self, inst: ir::Inst, isa: &TargetIsa) -> Result<Encoding, Legalize> {
|
||||
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type capable of displaying a `Function` with correct ISA annotations.
|
||||
pub struct DisplayFunction<'a>(&'a Function, Option<&'a TargetIsa>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayFunction<'a> {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write_function(fmt, self.0, self.1)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Function {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write_function(fmt, self, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Function {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
write_function(fmt, self, None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
|
||||
pub struct InstOffsetIter<'a> {
|
||||
encinfo: EncInfo,
|
||||
encodings: &'a InstEncodings,
|
||||
offset: CodeOffset,
|
||||
iter: ir::layout::Insts<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for InstOffsetIter<'a> {
|
||||
type Item = (CodeOffset, ir::Inst, CodeOffset);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|inst| {
|
||||
let size = self.encinfo.bytes(self.encodings[inst]);
|
||||
let offset = self.offset;
|
||||
self.offset += size;
|
||||
(offset, inst, size)
|
||||
})
|
||||
}
|
||||
}
|
||||
70
lib/codegen/src/ir/globalvar.rs
Normal file
70
lib/codegen/src/ir/globalvar.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
//! Global variables.
|
||||
|
||||
use ir::immediates::Offset32;
|
||||
use ir::{ExternalName, GlobalVar};
|
||||
use std::fmt;
|
||||
|
||||
/// Information about a global variable declaration.
|
||||
#[derive(Clone)]
|
||||
pub enum GlobalVarData {
|
||||
/// Variable is part of the VM context struct, it's address is a constant offset from the VM
|
||||
/// context pointer.
|
||||
VMContext {
|
||||
/// Offset from the `vmctx` pointer to this global.
|
||||
offset: Offset32,
|
||||
},
|
||||
|
||||
/// Variable is part of a struct pointed to by another global variable.
|
||||
///
|
||||
/// The `base` global variable is assumed to contain a pointer to a struct. This global
|
||||
/// variable lives at an offset into the struct. The memory must be accessible, and
|
||||
/// naturally aligned to hold a pointer value.
|
||||
Deref {
|
||||
/// The base pointer global variable.
|
||||
base: GlobalVar,
|
||||
|
||||
/// Byte offset to be added to the pointer loaded from `base`.
|
||||
offset: Offset32,
|
||||
},
|
||||
|
||||
/// Variable is at an address identified by a symbolic name. Cretonne itself
|
||||
/// does not interpret this name; it's used by embedders to link with other
|
||||
/// data structures.
|
||||
Sym {
|
||||
/// The symbolic name.
|
||||
name: ExternalName,
|
||||
|
||||
/// Will this variable be defined nearby, such that it will always be a certain distance
|
||||
/// away, after linking? If so, references to it can avoid going through a GOT. Note that
|
||||
/// symbols meant to be preemptible cannot be colocated.
|
||||
colocated: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl GlobalVarData {
|
||||
/// Assume that `self` is an `GlobalVarData::Sym` and return its name.
|
||||
pub fn symbol_name(&self) -> &ExternalName {
|
||||
match *self {
|
||||
GlobalVarData::Sym { ref name, .. } => name,
|
||||
_ => panic!("only symbols have names"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for GlobalVarData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
GlobalVarData::VMContext { offset } => write!(f, "vmctx{}", offset),
|
||||
GlobalVarData::Deref { base, offset } => write!(f, "deref({}){}", base, offset),
|
||||
GlobalVarData::Sym {
|
||||
ref name,
|
||||
colocated,
|
||||
} => {
|
||||
if colocated {
|
||||
write!(f, "colocated ")?;
|
||||
}
|
||||
write!(f, "globalsym {}", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
74
lib/codegen/src/ir/heap.rs
Normal file
74
lib/codegen/src/ir/heap.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
//! Heaps.
|
||||
|
||||
use ir::GlobalVar;
|
||||
use ir::immediates::Imm64;
|
||||
use std::fmt;
|
||||
|
||||
/// Information about a heap declaration.
|
||||
#[derive(Clone)]
|
||||
pub struct HeapData {
|
||||
/// Method for determining the heap base address.
|
||||
pub base: HeapBase,
|
||||
|
||||
/// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds
|
||||
/// checking.
|
||||
pub min_size: Imm64,
|
||||
|
||||
/// Size in bytes of the guard pages following the heap.
|
||||
pub guard_size: Imm64,
|
||||
|
||||
/// Heap style, with additional style-specific info.
|
||||
pub style: HeapStyle,
|
||||
}
|
||||
|
||||
/// Method for determining the base address of a heap.
|
||||
#[derive(Clone)]
|
||||
pub enum HeapBase {
|
||||
/// The heap base lives in a reserved register.
|
||||
///
|
||||
/// This feature is not yet implemented.
|
||||
ReservedReg,
|
||||
|
||||
/// The heap base is in a global variable. The variable must be accessible and naturally
|
||||
/// aligned for a pointer.
|
||||
GlobalVar(GlobalVar),
|
||||
}
|
||||
|
||||
/// Style of heap including style-specific information.
|
||||
#[derive(Clone)]
|
||||
pub enum HeapStyle {
|
||||
/// A dynamic heap can be relocated to a different base address when it is grown.
|
||||
Dynamic {
|
||||
/// Global variable holding the current bound of the heap in bytes. It is
|
||||
/// required to be accessible and naturally aligned for a pointer-sized integer.
|
||||
bound_gv: GlobalVar,
|
||||
},
|
||||
|
||||
/// A static heap has a fixed base address and a number of not-yet-allocated pages before the
|
||||
/// guard pages.
|
||||
Static {
|
||||
/// Heap bound in bytes. The guard pages are allocated after the bound.
|
||||
bound: Imm64,
|
||||
},
|
||||
}
|
||||
|
||||
impl fmt::Display for HeapData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(match self.style {
|
||||
HeapStyle::Dynamic { .. } => "dynamic",
|
||||
HeapStyle::Static { .. } => "static",
|
||||
})?;
|
||||
|
||||
match self.base {
|
||||
HeapBase::ReservedReg => write!(f, " reserved_reg")?,
|
||||
HeapBase::GlobalVar(gv) => write!(f, " {}", gv)?,
|
||||
}
|
||||
|
||||
write!(f, ", min {}", self.min_size)?;
|
||||
match self.style {
|
||||
HeapStyle::Dynamic { bound_gv } => write!(f, ", bound {}", bound_gv)?,
|
||||
HeapStyle::Static { bound } => write!(f, ", bound {}", bound)?,
|
||||
}
|
||||
write!(f, ", guard {}", self.guard_size)
|
||||
}
|
||||
}
|
||||
1031
lib/codegen/src/ir/immediates.rs
Normal file
1031
lib/codegen/src/ir/immediates.rs
Normal file
File diff suppressed because it is too large
Load Diff
680
lib/codegen/src/ir/instructions.rs
Normal file
680
lib/codegen/src/ir/instructions.rs
Normal file
@@ -0,0 +1,680 @@
|
||||
//! Instruction formats and opcodes.
|
||||
//!
|
||||
//! The `instructions` module contains definitions for instruction formats, opcodes, and the
|
||||
//! in-memory representation of IR instructions.
|
||||
//!
|
||||
//! A large part of this module is auto-generated from the instruction descriptions in the meta
|
||||
//! directory.
|
||||
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::str::FromStr;
|
||||
use std::vec::Vec;
|
||||
|
||||
use ir;
|
||||
use ir::types;
|
||||
use ir::{Ebb, FuncRef, JumpTable, SigRef, Type, Value};
|
||||
use isa;
|
||||
|
||||
use bitset::BitSet;
|
||||
use entity;
|
||||
use ref_slice::{ref_slice, ref_slice_mut};
|
||||
|
||||
/// Some instructions use an external list of argument values because there is not enough space in
|
||||
/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in
|
||||
/// `dfg.value_lists`.
|
||||
pub type ValueList = entity::EntityList<Value>;
|
||||
|
||||
/// Memory pool for holding value lists. See `ValueList`.
|
||||
pub type ValueListPool = entity::ListPool<Value>;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_instr.py`. This file contains:
|
||||
//
|
||||
// - The `pub enum InstructionFormat` enum with all the instruction formats.
|
||||
// - The `pub enum InstructionData` enum with all the instruction data fields.
|
||||
// - The `pub enum Opcode` definition with all known opcodes,
|
||||
// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table.
|
||||
// - The private `fn opcode_name(Opcode) -> &'static str` function, and
|
||||
// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`.
|
||||
//
|
||||
// For value type constraints:
|
||||
//
|
||||
// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table.
|
||||
// - The `const TYPE_SETS : [ValueTypeSet; N]` table.
|
||||
// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table.
|
||||
//
|
||||
include!(concat!(env!("OUT_DIR"), "/opcodes.rs"));
|
||||
|
||||
impl Display for Opcode {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "{}", opcode_name(*self))
|
||||
}
|
||||
}
|
||||
|
||||
impl Opcode {
|
||||
/// Get the instruction format for this opcode.
|
||||
pub fn format(self) -> InstructionFormat {
|
||||
OPCODE_FORMAT[self as usize - 1]
|
||||
}
|
||||
|
||||
/// Get the constraint descriptor for this opcode.
|
||||
/// Panic if this is called on `NotAnOpcode`.
|
||||
pub fn constraints(self) -> OpcodeConstraints {
|
||||
OPCODE_CONSTRAINTS[self as usize - 1]
|
||||
}
|
||||
}
|
||||
|
||||
// This trait really belongs in lib/reader where it is used by the `.cton` file parser, but since
|
||||
// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in
|
||||
// this module. This also saves us from running the build script twice to generate code for the two
|
||||
// separate crates.
|
||||
impl FromStr for Opcode {
|
||||
type Err = &'static str;
|
||||
|
||||
/// Parse an Opcode name from a string.
|
||||
fn from_str(s: &str) -> Result<Opcode, &'static str> {
|
||||
use constant_hash::{probe, simple_hash, Table};
|
||||
|
||||
impl<'a> Table<&'a str> for [Option<Opcode>] {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn key(&self, idx: usize) -> Option<&'a str> {
|
||||
self[idx].map(opcode_name)
|
||||
}
|
||||
}
|
||||
|
||||
match probe::<&str, [Option<Opcode>]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) {
|
||||
Err(_) => Err("Unknown opcode"),
|
||||
// We unwrap here because probe() should have ensured that the entry
|
||||
// at this index is not None.
|
||||
Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A variable list of `Value` operands used for function call arguments and passing arguments to
|
||||
/// basic blocks.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct VariableArgs(Vec<Value>);
|
||||
|
||||
impl VariableArgs {
|
||||
/// Create an empty argument list.
|
||||
pub fn new() -> Self {
|
||||
VariableArgs(Vec::new())
|
||||
}
|
||||
|
||||
/// Add an argument to the end.
|
||||
pub fn push(&mut self, v: Value) {
|
||||
self.0.push(v)
|
||||
}
|
||||
|
||||
/// Check if the list is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
/// Convert this to a value list in `pool` with `fixed` prepended.
|
||||
pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList {
|
||||
let mut vlist = ValueList::default();
|
||||
vlist.extend(fixed.iter().cloned(), pool);
|
||||
vlist.extend(self.0, pool);
|
||||
vlist
|
||||
}
|
||||
}
|
||||
|
||||
// Coerce `VariableArgs` into a `&[Value]` slice.
|
||||
impl Deref for VariableArgs {
|
||||
type Target = [Value];
|
||||
|
||||
fn deref(&self) -> &[Value] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for VariableArgs {
|
||||
fn deref_mut(&mut self) -> &mut [Value] {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for VariableArgs {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
|
||||
for (i, val) in self.0.iter().enumerate() {
|
||||
if i == 0 {
|
||||
write!(fmt, "{}", val)?;
|
||||
} else {
|
||||
write!(fmt, ", {}", val)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for VariableArgs {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyzing an instruction.
|
||||
///
|
||||
/// Avoid large matches on instruction formats by using the methods defined here to examine
|
||||
/// instructions.
|
||||
impl InstructionData {
|
||||
/// Return information about the destination of a branch or jump instruction.
|
||||
///
|
||||
/// Any instruction that can transfer control to another EBB reveals its possible destinations
|
||||
/// here.
|
||||
pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> {
|
||||
match *self {
|
||||
InstructionData::Jump {
|
||||
destination,
|
||||
ref args,
|
||||
..
|
||||
} => BranchInfo::SingleDest(destination, args.as_slice(pool)),
|
||||
InstructionData::BranchInt {
|
||||
destination,
|
||||
ref args,
|
||||
..
|
||||
} |
|
||||
InstructionData::BranchFloat {
|
||||
destination,
|
||||
ref args,
|
||||
..
|
||||
} |
|
||||
InstructionData::Branch {
|
||||
destination,
|
||||
ref args,
|
||||
..
|
||||
} => BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]),
|
||||
InstructionData::BranchIcmp {
|
||||
destination,
|
||||
ref args,
|
||||
..
|
||||
} => BranchInfo::SingleDest(destination, &args.as_slice(pool)[2..]),
|
||||
InstructionData::BranchTable { table, .. } => BranchInfo::Table(table),
|
||||
_ => {
|
||||
debug_assert!(!self.opcode().is_branch());
|
||||
BranchInfo::NotABranch
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the single destination of this branch instruction, if it is a single destination
|
||||
/// branch or jump.
|
||||
///
|
||||
/// Multi-destination branches like `br_table` return `None`.
|
||||
pub fn branch_destination(&self) -> Option<Ebb> {
|
||||
match *self {
|
||||
InstructionData::Jump { destination, .. } |
|
||||
InstructionData::Branch { destination, .. } |
|
||||
InstructionData::BranchInt { destination, .. } |
|
||||
InstructionData::BranchFloat { destination, .. } |
|
||||
InstructionData::BranchIcmp { destination, .. } => Some(destination),
|
||||
InstructionData::BranchTable { .. } => None,
|
||||
_ => {
|
||||
debug_assert!(!self.opcode().is_branch());
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the single destination of this branch instruction, if it is a
|
||||
/// single destination branch or jump.
|
||||
///
|
||||
/// Multi-destination branches like `br_table` return `None`.
|
||||
pub fn branch_destination_mut(&mut self) -> Option<&mut Ebb> {
|
||||
match *self {
|
||||
InstructionData::Jump { ref mut destination, .. } |
|
||||
InstructionData::Branch { ref mut destination, .. } |
|
||||
InstructionData::BranchInt { ref mut destination, .. } |
|
||||
InstructionData::BranchFloat { ref mut destination, .. } |
|
||||
InstructionData::BranchIcmp { ref mut destination, .. } => Some(destination),
|
||||
InstructionData::BranchTable { .. } => None,
|
||||
_ => {
|
||||
debug_assert!(!self.opcode().is_branch());
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return information about a call instruction.
|
||||
///
|
||||
/// Any instruction that can call another function reveals its call signature here.
|
||||
pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> {
|
||||
match *self {
|
||||
InstructionData::Call { func_ref, ref args, .. } => {
|
||||
CallInfo::Direct(func_ref, args.as_slice(pool))
|
||||
}
|
||||
InstructionData::CallIndirect { sig_ref, ref args, .. } => {
|
||||
CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..])
|
||||
}
|
||||
_ => {
|
||||
debug_assert!(!self.opcode().is_call());
|
||||
CallInfo::NotACall
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about branch and jump instructions.
|
||||
pub enum BranchInfo<'a> {
|
||||
/// This is not a branch or jump instruction.
|
||||
/// This instruction will not transfer control to another EBB in the function, but it may still
|
||||
/// affect control flow by returning or trapping.
|
||||
NotABranch,
|
||||
|
||||
/// This is a branch or jump to a single destination EBB, possibly taking value arguments.
|
||||
SingleDest(Ebb, &'a [Value]),
|
||||
|
||||
/// This is a jump table branch which can have many destination EBBs.
|
||||
Table(JumpTable),
|
||||
}
|
||||
|
||||
/// Information about call instructions.
|
||||
pub enum CallInfo<'a> {
|
||||
/// This is not a call instruction.
|
||||
NotACall,
|
||||
|
||||
/// This is a direct call to an external function declared in the preamble. See
|
||||
/// `DataFlowGraph.ext_funcs`.
|
||||
Direct(FuncRef, &'a [Value]),
|
||||
|
||||
/// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`.
|
||||
Indirect(SigRef, &'a [Value]),
|
||||
}
|
||||
|
||||
/// Value type constraints for a given opcode.
|
||||
///
|
||||
/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and
|
||||
/// results are not determined by the format. Every `Opcode` has an associated
|
||||
/// `OpcodeConstraints` object that provides the missing details.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct OpcodeConstraints {
|
||||
/// Flags for this opcode encoded as a bit field:
|
||||
///
|
||||
/// Bits 0-2:
|
||||
/// Number of fixed result values. This does not include `variable_args` results as are
|
||||
/// produced by call instructions.
|
||||
///
|
||||
/// Bit 3:
|
||||
/// This opcode is polymorphic and the controlling type variable can be inferred from the
|
||||
/// designated input operand. This is the `typevar_operand` index given to the
|
||||
/// `InstructionFormat` meta language object. When this bit is not set, the controlling
|
||||
/// type variable must be the first output value instead.
|
||||
///
|
||||
/// Bit 4:
|
||||
/// This opcode is polymorphic and the controlling type variable does *not* appear as the
|
||||
/// first result type.
|
||||
///
|
||||
/// Bits 5-7:
|
||||
/// Number of fixed value arguments. The minimum required number of value operands.
|
||||
flags: u8,
|
||||
|
||||
/// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`.
|
||||
typeset_offset: u8,
|
||||
|
||||
/// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first
|
||||
/// `fixed_results()` entries describe the result constraints, then follows constraints for the
|
||||
/// fixed `Value` input operands. (`fixed_value_arguments()` of them).
|
||||
constraint_offset: u16,
|
||||
}
|
||||
|
||||
impl OpcodeConstraints {
|
||||
/// Can the controlling type variable for this opcode be inferred from the designated value
|
||||
/// input operand?
|
||||
/// This also implies that this opcode is polymorphic.
|
||||
pub fn use_typevar_operand(self) -> bool {
|
||||
(self.flags & 0x8) != 0
|
||||
}
|
||||
|
||||
/// Is it necessary to look at the designated value input operand in order to determine the
|
||||
/// controlling type variable, or is it good enough to use the first return type?
|
||||
///
|
||||
/// Most polymorphic instructions produce a single result with the type of the controlling type
|
||||
/// variable. A few polymorphic instructions either don't produce any results, or produce
|
||||
/// results with a fixed type. These instructions return `true`.
|
||||
pub fn requires_typevar_operand(self) -> bool {
|
||||
(self.flags & 0x10) != 0
|
||||
}
|
||||
|
||||
/// Get the number of *fixed* result values produced by this opcode.
|
||||
/// This does not include `variable_args` produced by calls.
|
||||
pub fn fixed_results(self) -> usize {
|
||||
(self.flags & 0x7) as usize
|
||||
}
|
||||
|
||||
/// Get the number of *fixed* input values required by this opcode.
|
||||
///
|
||||
/// This does not include `variable_args` arguments on call and branch instructions.
|
||||
///
|
||||
/// The number of fixed input values is usually implied by the instruction format, but
|
||||
/// instruction formats that use a `ValueList` put both fixed and variable arguments in the
|
||||
/// list. This method returns the *minimum* number of values required in the value list.
|
||||
pub fn fixed_value_arguments(self) -> usize {
|
||||
((self.flags >> 5) & 0x7) as usize
|
||||
}
|
||||
|
||||
/// Get the offset into `TYPE_SETS` for the controlling type variable.
|
||||
/// Returns `None` if the instruction is not polymorphic.
|
||||
fn typeset_offset(self) -> Option<usize> {
|
||||
let offset = usize::from(self.typeset_offset);
|
||||
if offset < TYPE_SETS.len() {
|
||||
Some(offset)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin.
|
||||
fn constraint_offset(self) -> usize {
|
||||
self.constraint_offset as usize
|
||||
}
|
||||
|
||||
/// Get the value type of result number `n`, having resolved the controlling type variable to
|
||||
/// `ctrl_type`.
|
||||
pub fn result_type(self, n: usize, ctrl_type: Type) -> Type {
|
||||
debug_assert!(n < self.fixed_results(), "Invalid result index");
|
||||
if let ResolvedConstraint::Bound(t) =
|
||||
OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type)
|
||||
{
|
||||
t
|
||||
} else {
|
||||
panic!("Result constraints can't be free");
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the value type of input value number `n`, having resolved the controlling type variable
|
||||
/// to `ctrl_type`.
|
||||
///
|
||||
/// Unlike results, it is possible for some input values to vary freely within a specific
|
||||
/// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant.
|
||||
pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint {
|
||||
debug_assert!(
|
||||
n < self.fixed_value_arguments(),
|
||||
"Invalid value argument index"
|
||||
);
|
||||
let offset = self.constraint_offset() + self.fixed_results();
|
||||
OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type)
|
||||
}
|
||||
|
||||
/// Get the typeset of allowed types for the controlling type variable in a polymorphic
|
||||
/// instruction.
|
||||
pub fn ctrl_typeset(self) -> Option<ValueTypeSet> {
|
||||
self.typeset_offset().map(|offset| TYPE_SETS[offset])
|
||||
}
|
||||
|
||||
/// Is this instruction polymorphic?
|
||||
pub fn is_polymorphic(self) -> bool {
|
||||
self.ctrl_typeset().is_some()
|
||||
}
|
||||
}
|
||||
|
||||
type BitSet8 = BitSet<u8>;
|
||||
type BitSet16 = BitSet<u16>;
|
||||
|
||||
/// A value type set describes the permitted set of types for a type variable.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct ValueTypeSet {
|
||||
/// Allowed lane sizes
|
||||
pub lanes: BitSet16,
|
||||
/// Allowed int widths
|
||||
pub ints: BitSet8,
|
||||
/// Allowed float widths
|
||||
pub floats: BitSet8,
|
||||
/// Allowed bool widths
|
||||
pub bools: BitSet8,
|
||||
}
|
||||
|
||||
impl ValueTypeSet {
|
||||
/// Is `scalar` part of the base type set?
|
||||
///
|
||||
/// Note that the base type set does not have to be included in the type set proper.
|
||||
fn is_base_type(&self, scalar: Type) -> bool {
|
||||
let l2b = scalar.log2_lane_bits();
|
||||
if scalar.is_int() {
|
||||
self.ints.contains(l2b)
|
||||
} else if scalar.is_float() {
|
||||
self.floats.contains(l2b)
|
||||
} else if scalar.is_bool() {
|
||||
self.bools.contains(l2b)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Does `typ` belong to this set?
|
||||
pub fn contains(&self, typ: Type) -> bool {
|
||||
let l2l = typ.log2_lane_count();
|
||||
self.lanes.contains(l2l) && self.is_base_type(typ.lane_type())
|
||||
}
|
||||
|
||||
/// Get an example member of this type set.
|
||||
///
|
||||
/// This is used for error messages to avoid suggesting invalid types.
|
||||
pub fn example(&self) -> Type {
|
||||
let t = if self.ints.max().unwrap_or(0) > 5 {
|
||||
types::I32
|
||||
} else if self.floats.max().unwrap_or(0) > 5 {
|
||||
types::F32
|
||||
} else if self.bools.max().unwrap_or(0) > 5 {
|
||||
types::B32
|
||||
} else {
|
||||
types::B1
|
||||
};
|
||||
t.by(1 << self.lanes.min().unwrap()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Operand constraints. This describes the value type constraints on a single `Value` operand.
|
||||
enum OperandConstraint {
|
||||
/// This operand has a concrete value type.
|
||||
Concrete(Type),
|
||||
|
||||
/// This operand can vary freely within the given type set.
|
||||
/// The type set is identified by its index into the TYPE_SETS constant table.
|
||||
Free(u8),
|
||||
|
||||
/// This operand is the same type as the controlling type variable.
|
||||
Same,
|
||||
|
||||
/// This operand is `ctrlType.lane_type()`.
|
||||
LaneOf,
|
||||
|
||||
/// This operand is `ctrlType.as_bool()`.
|
||||
AsBool,
|
||||
|
||||
/// This operand is `ctrlType.half_width()`.
|
||||
HalfWidth,
|
||||
|
||||
/// This operand is `ctrlType.double_width()`.
|
||||
DoubleWidth,
|
||||
|
||||
/// This operand is `ctrlType.half_vector()`.
|
||||
HalfVector,
|
||||
|
||||
/// This operand is `ctrlType.double_vector()`.
|
||||
DoubleVector,
|
||||
}
|
||||
|
||||
impl OperandConstraint {
|
||||
/// Resolve this operand constraint into a concrete value type, given the value of the
|
||||
/// controlling type variable.
|
||||
pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint {
|
||||
use self::OperandConstraint::*;
|
||||
use self::ResolvedConstraint::Bound;
|
||||
match *self {
|
||||
Concrete(t) => Bound(t),
|
||||
Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]),
|
||||
Same => Bound(ctrl_type),
|
||||
LaneOf => Bound(ctrl_type.lane_type()),
|
||||
AsBool => Bound(ctrl_type.as_bool()),
|
||||
HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
|
||||
DoubleWidth => Bound(ctrl_type.double_width().expect(
|
||||
"invalid type for double_width",
|
||||
)),
|
||||
HalfVector => Bound(ctrl_type.half_vector().expect(
|
||||
"invalid type for half_vector",
|
||||
)),
|
||||
DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The type constraint on a value argument once the controlling type variable is known.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ResolvedConstraint {
|
||||
/// The operand is bound to a known type.
|
||||
Bound(Type),
|
||||
/// The operand type can vary freely within the given set.
|
||||
Free(ValueTypeSet),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn opcodes() {
|
||||
use std::mem;
|
||||
|
||||
let x = Opcode::Iadd;
|
||||
let mut y = Opcode::Isub;
|
||||
|
||||
assert!(x != y);
|
||||
y = Opcode::Iadd;
|
||||
assert_eq!(x, y);
|
||||
assert_eq!(x.format(), InstructionFormat::Binary);
|
||||
|
||||
assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm");
|
||||
assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm");
|
||||
|
||||
// Check the matcher.
|
||||
assert_eq!("iadd".parse::<Opcode>(), Ok(Opcode::Iadd));
|
||||
assert_eq!("iadd_imm".parse::<Opcode>(), Ok(Opcode::IaddImm));
|
||||
assert_eq!("iadd\0".parse::<Opcode>(), Err("Unknown opcode"));
|
||||
assert_eq!("".parse::<Opcode>(), Err("Unknown opcode"));
|
||||
assert_eq!("\0".parse::<Opcode>(), Err("Unknown opcode"));
|
||||
|
||||
// Opcode is a single byte, and because Option<Opcode> originally came to 2 bytes, early on
|
||||
// Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust
|
||||
// compiler has brought in NonZero optimization, meaning that an enum not using the 0 value
|
||||
// can be optional for no size cost. We want to ensure Option<Opcode> remains small.
|
||||
assert_eq!(mem::size_of::<Opcode>(), mem::size_of::<Option<Opcode>>());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn instruction_data() {
|
||||
use std::mem;
|
||||
// The size of the `InstructionData` enum is important for performance. It should not
|
||||
// exceed 16 bytes. Use `Box<FooData>` out-of-line payloads for instruction formats that
|
||||
// require more space than that. It would be fine with a data structure smaller than 16
|
||||
// bytes, but what are the odds of that?
|
||||
assert_eq!(mem::size_of::<InstructionData>(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn constraints() {
|
||||
let a = Opcode::Iadd.constraints();
|
||||
assert!(a.use_typevar_operand());
|
||||
assert!(!a.requires_typevar_operand());
|
||||
assert_eq!(a.fixed_results(), 1);
|
||||
assert_eq!(a.fixed_value_arguments(), 2);
|
||||
assert_eq!(a.result_type(0, types::I32), types::I32);
|
||||
assert_eq!(a.result_type(0, types::I8), types::I8);
|
||||
assert_eq!(
|
||||
a.value_argument_constraint(0, types::I32),
|
||||
ResolvedConstraint::Bound(types::I32)
|
||||
);
|
||||
assert_eq!(
|
||||
a.value_argument_constraint(1, types::I32),
|
||||
ResolvedConstraint::Bound(types::I32)
|
||||
);
|
||||
|
||||
let b = Opcode::Bitcast.constraints();
|
||||
assert!(!b.use_typevar_operand());
|
||||
assert!(!b.requires_typevar_operand());
|
||||
assert_eq!(b.fixed_results(), 1);
|
||||
assert_eq!(b.fixed_value_arguments(), 1);
|
||||
assert_eq!(b.result_type(0, types::I32), types::I32);
|
||||
assert_eq!(b.result_type(0, types::I8), types::I8);
|
||||
match b.value_argument_constraint(0, types::I32) {
|
||||
ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)),
|
||||
_ => panic!("Unexpected constraint from value_argument_constraint"),
|
||||
}
|
||||
|
||||
let c = Opcode::Call.constraints();
|
||||
assert_eq!(c.fixed_results(), 0);
|
||||
assert_eq!(c.fixed_value_arguments(), 0);
|
||||
|
||||
let i = Opcode::CallIndirect.constraints();
|
||||
assert_eq!(i.fixed_results(), 0);
|
||||
assert_eq!(i.fixed_value_arguments(), 1);
|
||||
|
||||
let cmp = Opcode::Icmp.constraints();
|
||||
assert!(cmp.use_typevar_operand());
|
||||
assert!(cmp.requires_typevar_operand());
|
||||
assert_eq!(cmp.fixed_results(), 1);
|
||||
assert_eq!(cmp.fixed_value_arguments(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_set() {
|
||||
use ir::types::*;
|
||||
|
||||
let vts = ValueTypeSet {
|
||||
lanes: BitSet16::from_range(0, 8),
|
||||
ints: BitSet8::from_range(4, 7),
|
||||
floats: BitSet8::from_range(0, 0),
|
||||
bools: BitSet8::from_range(3, 7),
|
||||
};
|
||||
assert!(!vts.contains(I8));
|
||||
assert!(vts.contains(I32));
|
||||
assert!(vts.contains(I64));
|
||||
assert!(vts.contains(I32X4));
|
||||
assert!(!vts.contains(F32));
|
||||
assert!(!vts.contains(B1));
|
||||
assert!(vts.contains(B8));
|
||||
assert!(vts.contains(B64));
|
||||
assert_eq!(vts.example().to_string(), "i32");
|
||||
|
||||
let vts = ValueTypeSet {
|
||||
lanes: BitSet16::from_range(0, 8),
|
||||
ints: BitSet8::from_range(0, 0),
|
||||
floats: BitSet8::from_range(5, 7),
|
||||
bools: BitSet8::from_range(3, 7),
|
||||
};
|
||||
assert_eq!(vts.example().to_string(), "f32");
|
||||
|
||||
let vts = ValueTypeSet {
|
||||
lanes: BitSet16::from_range(1, 8),
|
||||
ints: BitSet8::from_range(0, 0),
|
||||
floats: BitSet8::from_range(5, 7),
|
||||
bools: BitSet8::from_range(3, 7),
|
||||
};
|
||||
assert_eq!(vts.example().to_string(), "f32x2");
|
||||
|
||||
let vts = ValueTypeSet {
|
||||
lanes: BitSet16::from_range(2, 8),
|
||||
ints: BitSet8::from_range(0, 0),
|
||||
floats: BitSet8::from_range(0, 0),
|
||||
bools: BitSet8::from_range(3, 7),
|
||||
};
|
||||
assert!(!vts.contains(B32X2));
|
||||
assert!(vts.contains(B32X4));
|
||||
assert_eq!(vts.example().to_string(), "b32x4");
|
||||
|
||||
let vts = ValueTypeSet {
|
||||
// TypeSet(lanes=(1, 256), ints=(8, 64))
|
||||
lanes: BitSet16::from_range(0, 9),
|
||||
ints: BitSet8::from_range(3, 7),
|
||||
floats: BitSet8::from_range(0, 0),
|
||||
bools: BitSet8::from_range(0, 0),
|
||||
};
|
||||
assert!(vts.contains(I32));
|
||||
assert!(vts.contains(I32X4));
|
||||
}
|
||||
}
|
||||
180
lib/codegen/src/ir/jumptable.rs
Normal file
180
lib/codegen/src/ir/jumptable.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
//! Jump table representation.
|
||||
//!
|
||||
//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference.
|
||||
//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module.
|
||||
|
||||
use ir::entities::Ebb;
|
||||
use packed_option::PackedOption;
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::iter;
|
||||
use std::slice;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Contents of a jump table.
|
||||
///
|
||||
/// All jump tables use 0-based indexing and are expected to be densely populated. They don't need
|
||||
/// to be completely populated, though. Individual entries can be missing.
|
||||
#[derive(Clone)]
|
||||
pub struct JumpTableData {
|
||||
// Table entries, using `None` as a placeholder for missing entries.
|
||||
table: Vec<PackedOption<Ebb>>,
|
||||
|
||||
// How many `None` holes in table?
|
||||
holes: usize,
|
||||
}
|
||||
|
||||
impl JumpTableData {
|
||||
/// Create a new empty jump table.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
table: Vec::new(),
|
||||
holes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new empty jump table with the specified capacity.
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
table: Vec::with_capacity(capacity),
|
||||
holes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of table entries.
|
||||
pub fn len(&self) -> usize {
|
||||
self.table.len()
|
||||
}
|
||||
|
||||
/// Set a table entry.
|
||||
///
|
||||
/// The table will grow as needed to fit `idx`.
|
||||
pub fn set_entry(&mut self, idx: usize, dest: Ebb) {
|
||||
// Resize table to fit `idx`.
|
||||
if idx >= self.table.len() {
|
||||
self.holes += idx - self.table.len();
|
||||
self.table.resize(idx + 1, None.into());
|
||||
} else if self.table[idx].is_none() {
|
||||
// We're filling in an existing hole.
|
||||
self.holes -= 1;
|
||||
}
|
||||
self.table[idx] = dest.into();
|
||||
}
|
||||
|
||||
/// Append a table entry.
|
||||
pub fn push_entry(&mut self, dest: Ebb) {
|
||||
self.table.push(dest.into())
|
||||
}
|
||||
|
||||
/// Clear a table entry.
|
||||
///
|
||||
/// The `br_table` instruction will fall through if given an index corresponding to a cleared
|
||||
/// table entry.
|
||||
pub fn clear_entry(&mut self, idx: usize) {
|
||||
if idx < self.table.len() && self.table[idx].is_some() {
|
||||
self.holes += 1;
|
||||
self.table[idx] = None.into();
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the entry for `idx`, or `None`.
|
||||
pub fn get_entry(&self, idx: usize) -> Option<Ebb> {
|
||||
self.table.get(idx).and_then(|e| e.expand())
|
||||
}
|
||||
|
||||
/// Enumerate over all `(idx, dest)` pairs in the table in order.
|
||||
///
|
||||
/// This returns an iterator that skips any empty slots in the table.
|
||||
pub fn entries(&self) -> Entries {
|
||||
Entries(self.table.iter().cloned().enumerate())
|
||||
}
|
||||
|
||||
/// Checks if any of the entries branch to `ebb`.
|
||||
pub fn branches_to(&self, ebb: Ebb) -> bool {
|
||||
self.table.iter().any(|target_ebb| {
|
||||
target_ebb.expand() == Some(ebb)
|
||||
})
|
||||
}
|
||||
|
||||
/// Access the whole table as a mutable slice.
|
||||
pub fn as_mut_slice(&mut self) -> &mut [PackedOption<Ebb>] {
|
||||
self.table.as_mut_slice()
|
||||
}
|
||||
}
|
||||
|
||||
/// Enumerate `(idx, dest)` pairs in order.
|
||||
pub struct Entries<'a>(iter::Enumerate<iter::Cloned<slice::Iter<'a, PackedOption<Ebb>>>>);
|
||||
|
||||
impl<'a> Iterator for Entries<'a> {
|
||||
type Item = (usize, Ebb);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if let Some((idx, dest)) = self.0.next() {
|
||||
if let Some(ebb) = dest.expand() {
|
||||
return Some((idx, ebb));
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JumpTableData {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
|
||||
match self.table.first().and_then(|e| e.expand()) {
|
||||
None => write!(fmt, "jump_table 0")?,
|
||||
Some(first) => write!(fmt, "jump_table {}", first)?,
|
||||
}
|
||||
|
||||
for dest in self.table.iter().skip(1).map(|e| e.expand()) {
|
||||
match dest {
|
||||
None => write!(fmt, ", 0")?,
|
||||
Some(ebb) => write!(fmt, ", {}", ebb)?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::JumpTableData;
|
||||
use entity::EntityRef;
|
||||
use ir::Ebb;
|
||||
use std::string::ToString;
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let jt = JumpTableData::new();
|
||||
|
||||
assert_eq!(jt.get_entry(0), None);
|
||||
assert_eq!(jt.get_entry(10), None);
|
||||
|
||||
assert_eq!(jt.to_string(), "jump_table 0");
|
||||
|
||||
let v: Vec<(usize, Ebb)> = jt.entries().collect();
|
||||
assert_eq!(v, []);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
let e1 = Ebb::new(1);
|
||||
let e2 = Ebb::new(2);
|
||||
|
||||
let mut jt = JumpTableData::new();
|
||||
|
||||
jt.set_entry(0, e1);
|
||||
jt.set_entry(0, e2);
|
||||
jt.set_entry(10, e1);
|
||||
|
||||
assert_eq!(
|
||||
jt.to_string(),
|
||||
"jump_table ebb2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ebb1"
|
||||
);
|
||||
|
||||
let v: Vec<(usize, Ebb)> = jt.entries().collect();
|
||||
assert_eq!(v, [(0, e2), (10, e1)]);
|
||||
}
|
||||
}
|
||||
1173
lib/codegen/src/ir/layout.rs
Normal file
1173
lib/codegen/src/ir/layout.rs
Normal file
File diff suppressed because it is too large
Load Diff
115
lib/codegen/src/ir/libcall.rs
Normal file
115
lib/codegen/src/ir/libcall.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
//! Naming well-known routines in the runtime library.
|
||||
|
||||
use ir::{types, Opcode, Type};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// The name of a runtime library routine.
|
||||
///
|
||||
/// Runtime library calls are generated for Cretonne IR instructions that don't have an equivalent
|
||||
/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to
|
||||
/// the runtime library routine. This way, Cretonne doesn't have to know about the naming
|
||||
/// convention in the embedding VM's runtime library.
|
||||
///
|
||||
/// This list is likely to grow over time.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum LibCall {
|
||||
/// ceil.f32
|
||||
CeilF32,
|
||||
/// ceil.f64
|
||||
CeilF64,
|
||||
/// floor.f32
|
||||
FloorF32,
|
||||
/// floor.f64
|
||||
FloorF64,
|
||||
/// trunc.f32
|
||||
TruncF32,
|
||||
/// frunc.f64
|
||||
TruncF64,
|
||||
/// nearest.f32
|
||||
NearestF32,
|
||||
/// nearest.f64
|
||||
NearestF64,
|
||||
}
|
||||
|
||||
const NAME: [&str; 8] = [
|
||||
"CeilF32",
|
||||
"CeilF64",
|
||||
"FloorF32",
|
||||
"FloorF64",
|
||||
"TruncF32",
|
||||
"TruncF64",
|
||||
"NearestF32",
|
||||
"NearestF64",
|
||||
];
|
||||
|
||||
impl fmt::Display for LibCall {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(NAME[*self as usize])
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for LibCall {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"CeilF32" => Ok(LibCall::CeilF32),
|
||||
"CeilF64" => Ok(LibCall::CeilF64),
|
||||
"FloorF32" => Ok(LibCall::FloorF32),
|
||||
"FloorF64" => Ok(LibCall::FloorF64),
|
||||
"TruncF32" => Ok(LibCall::TruncF32),
|
||||
"TruncF64" => Ok(LibCall::TruncF64),
|
||||
"NearestF32" => Ok(LibCall::NearestF32),
|
||||
"NearestF64" => Ok(LibCall::NearestF64),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LibCall {
|
||||
/// Get the well-known library call name to use as a replacement for an instruction with the
|
||||
/// given opcode and controlling type variable.
|
||||
///
|
||||
/// Returns `None` if no well-known library routine name exists for that instruction.
|
||||
pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option<LibCall> {
|
||||
Some(match ctrl_type {
|
||||
types::F32 => {
|
||||
match opcode {
|
||||
Opcode::Ceil => LibCall::CeilF32,
|
||||
Opcode::Floor => LibCall::FloorF32,
|
||||
Opcode::Trunc => LibCall::TruncF32,
|
||||
Opcode::Nearest => LibCall::NearestF32,
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
types::F64 => {
|
||||
match opcode {
|
||||
Opcode::Ceil => LibCall::CeilF64,
|
||||
Opcode::Floor => LibCall::FloorF64,
|
||||
Opcode::Trunc => LibCall::TruncF64,
|
||||
Opcode::Nearest => LibCall::NearestF64,
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn display() {
|
||||
assert_eq!(LibCall::CeilF32.to_string(), "CeilF32");
|
||||
assert_eq!(LibCall::NearestF64.to_string(), "NearestF64");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parsing() {
|
||||
assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32));
|
||||
}
|
||||
}
|
||||
93
lib/codegen/src/ir/memflags.rs
Normal file
93
lib/codegen/src/ir/memflags.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
//! Memory operation flags.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
enum FlagBit {
|
||||
Notrap,
|
||||
Aligned,
|
||||
}
|
||||
|
||||
const NAMES: [&str; 2] = ["notrap", "aligned"];
|
||||
|
||||
/// Flags for memory operations like load/store.
|
||||
///
|
||||
/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
|
||||
/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
|
||||
/// program does not change when a flag is removed, but adding a flag will.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub struct MemFlags {
|
||||
bits: u8,
|
||||
}
|
||||
|
||||
impl MemFlags {
|
||||
/// Create a new empty set of flags.
|
||||
pub fn new() -> Self {
|
||||
Self { bits: 0 }
|
||||
}
|
||||
|
||||
/// Read a flag bit.
|
||||
fn read(self, bit: FlagBit) -> bool {
|
||||
self.bits & (1 << bit as usize) != 0
|
||||
}
|
||||
|
||||
/// Set a flag bit.
|
||||
fn set(&mut self, bit: FlagBit) {
|
||||
self.bits |= 1 << bit as usize
|
||||
}
|
||||
|
||||
/// Set a flag bit by name.
|
||||
///
|
||||
/// Returns true if the flag was found and set, false for an unknown flag name.
|
||||
pub fn set_by_name(&mut self, name: &str) -> bool {
|
||||
match NAMES.iter().position(|&s| s == name) {
|
||||
Some(bit) => {
|
||||
self.bits |= 1 << bit;
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if the `notrap` flag is set.
|
||||
///
|
||||
/// Normally, trapping is part of the semantics of a load/store operation. If the platform
|
||||
/// would cause a trap when accessing the effective address, the Cretonne memory operation is
|
||||
/// also required to trap.
|
||||
///
|
||||
/// The `notrap` flag tells Cretonne that the memory is *accessible*, which means that
|
||||
/// accesses will not trap. This makes it possible to delete an unused load or a dead store
|
||||
/// instruction.
|
||||
pub fn notrap(self) -> bool {
|
||||
self.read(FlagBit::Notrap)
|
||||
}
|
||||
|
||||
/// Set the `notrap` flag.
|
||||
pub fn set_notrap(&mut self) {
|
||||
self.set(FlagBit::Notrap)
|
||||
}
|
||||
|
||||
/// Test if the `aligned` flag is set.
|
||||
///
|
||||
/// By default, Cretonne memory instructions work with any unaligned effective address. If the
|
||||
/// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
|
||||
/// effective address is misaligned.
|
||||
pub fn aligned(self) -> bool {
|
||||
self.read(FlagBit::Aligned)
|
||||
}
|
||||
|
||||
/// Set the `aligned` flag.
|
||||
pub fn set_aligned(&mut self) {
|
||||
self.set(FlagBit::Aligned)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MemFlags {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
for (i, n) in NAMES.iter().enumerate() {
|
||||
if self.bits & (1 << i) != 0 {
|
||||
write!(f, " {}", n)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
63
lib/codegen/src/ir/mod.rs
Normal file
63
lib/codegen/src/ir/mod.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Representation of Cretonne IR functions.
|
||||
|
||||
mod builder;
|
||||
pub mod condcodes;
|
||||
pub mod dfg;
|
||||
pub mod entities;
|
||||
mod extfunc;
|
||||
mod extname;
|
||||
pub mod function;
|
||||
mod globalvar;
|
||||
mod heap;
|
||||
pub mod immediates;
|
||||
pub mod instructions;
|
||||
pub mod jumptable;
|
||||
pub mod layout;
|
||||
mod libcall;
|
||||
mod memflags;
|
||||
mod progpoint;
|
||||
mod sourceloc;
|
||||
pub mod stackslot;
|
||||
mod trapcode;
|
||||
pub mod types;
|
||||
mod valueloc;
|
||||
|
||||
pub use ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
|
||||
pub use ir::dfg::{DataFlowGraph, ValueDef};
|
||||
pub use ir::entities::{Ebb, FuncRef, GlobalVar, Heap, Inst, JumpTable, SigRef, StackSlot, Value};
|
||||
pub use ir::extfunc::{AbiParam, ArgumentExtension, ArgumentPurpose, CallConv, ExtFuncData,
|
||||
Signature};
|
||||
pub use ir::extname::ExternalName;
|
||||
pub use ir::function::Function;
|
||||
pub use ir::globalvar::GlobalVarData;
|
||||
pub use ir::heap::{HeapBase, HeapData, HeapStyle};
|
||||
pub use ir::instructions::{InstructionData, Opcode, ValueList, ValueListPool, VariableArgs};
|
||||
pub use ir::jumptable::JumpTableData;
|
||||
pub use ir::layout::Layout;
|
||||
pub use ir::libcall::LibCall;
|
||||
pub use ir::memflags::MemFlags;
|
||||
pub use ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
|
||||
pub use ir::sourceloc::SourceLoc;
|
||||
pub use ir::stackslot::{StackSlotData, StackSlotKind, StackSlots};
|
||||
pub use ir::trapcode::TrapCode;
|
||||
pub use ir::types::Type;
|
||||
pub use ir::valueloc::{ArgumentLoc, ValueLoc};
|
||||
|
||||
use binemit;
|
||||
use entity::{EntityMap, PrimaryMap};
|
||||
use isa;
|
||||
|
||||
/// Map of value locations.
|
||||
pub type ValueLocations = EntityMap<Value, ValueLoc>;
|
||||
|
||||
/// Map of jump tables.
|
||||
pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
|
||||
|
||||
/// Map of instruction encodings.
|
||||
pub type InstEncodings = EntityMap<Inst, isa::Encoding>;
|
||||
|
||||
/// Code offsets for EBBs.
|
||||
pub type EbbOffsets = EntityMap<Ebb, binemit::CodeOffset>;
|
||||
|
||||
/// Source locations for instructions.
|
||||
pub type SourceLocs = EntityMap<Inst, SourceLoc>;
|
||||
164
lib/codegen/src/ir/progpoint.rs
Normal file
164
lib/codegen/src/ir/progpoint.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
//! Program points.
|
||||
|
||||
use entity::EntityRef;
|
||||
use ir::{Ebb, Inst, ValueDef};
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::u32;
|
||||
|
||||
/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can
|
||||
/// begin or end. It can be either:
|
||||
///
|
||||
/// 1. An instruction or
|
||||
/// 2. An EBB header.
|
||||
///
|
||||
/// This corresponds more or less to the lines in the textual form of Cretonne IR.
|
||||
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||
pub struct ProgramPoint(u32);
|
||||
|
||||
impl From<Inst> for ProgramPoint {
|
||||
fn from(inst: Inst) -> ProgramPoint {
|
||||
let idx = inst.index();
|
||||
debug_assert!(idx < (u32::MAX / 2) as usize);
|
||||
ProgramPoint((idx * 2) as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Ebb> for ProgramPoint {
|
||||
fn from(ebb: Ebb) -> ProgramPoint {
|
||||
let idx = ebb.index();
|
||||
debug_assert!(idx < (u32::MAX / 2) as usize);
|
||||
ProgramPoint((idx * 2 + 1) as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValueDef> for ProgramPoint {
|
||||
fn from(def: ValueDef) -> ProgramPoint {
|
||||
match def {
|
||||
ValueDef::Result(inst, _) => inst.into(),
|
||||
ValueDef::Param(ebb, _) => ebb.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An expanded program point directly exposes the variants, but takes twice the space to
|
||||
/// represent.
|
||||
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||
pub enum ExpandedProgramPoint {
|
||||
/// An instruction in the function.
|
||||
Inst(Inst),
|
||||
/// An EBB header.
|
||||
Ebb(Ebb),
|
||||
}
|
||||
|
||||
impl ExpandedProgramPoint {
|
||||
/// Get the instruction we know is inside.
|
||||
pub fn unwrap_inst(self) -> Inst {
|
||||
match self {
|
||||
ExpandedProgramPoint::Inst(x) => x,
|
||||
ExpandedProgramPoint::Ebb(x) => panic!("expected inst: {}", x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Inst> for ExpandedProgramPoint {
|
||||
fn from(inst: Inst) -> ExpandedProgramPoint {
|
||||
ExpandedProgramPoint::Inst(inst)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Ebb> for ExpandedProgramPoint {
|
||||
fn from(ebb: Ebb) -> ExpandedProgramPoint {
|
||||
ExpandedProgramPoint::Ebb(ebb)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValueDef> for ExpandedProgramPoint {
|
||||
fn from(def: ValueDef) -> ExpandedProgramPoint {
|
||||
match def {
|
||||
ValueDef::Result(inst, _) => inst.into(),
|
||||
ValueDef::Param(ebb, _) => ebb.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ProgramPoint> for ExpandedProgramPoint {
|
||||
fn from(pp: ProgramPoint) -> ExpandedProgramPoint {
|
||||
if pp.0 & 1 == 0 {
|
||||
ExpandedProgramPoint::Inst(Inst::new((pp.0 / 2) as usize))
|
||||
} else {
|
||||
ExpandedProgramPoint::Ebb(Ebb::new((pp.0 / 2) as usize))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ExpandedProgramPoint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ExpandedProgramPoint::Inst(x) => write!(f, "{}", x),
|
||||
ExpandedProgramPoint::Ebb(x) => write!(f, "{}", x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ProgramPoint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let epp: ExpandedProgramPoint = (*self).into();
|
||||
epp.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ExpandedProgramPoint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ExpandedProgramPoint({})", self)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ProgramPoint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "ProgramPoint({})", self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Context for ordering program points.
|
||||
///
|
||||
/// `ProgramPoint` objects don't carry enough information to be ordered independently, they need a
|
||||
/// context providing the program order.
|
||||
pub trait ProgramOrder {
|
||||
/// Compare the program points `a` and `b` relative to this program order.
|
||||
///
|
||||
/// Return `Less` if `a` appears in the program before `b`.
|
||||
///
|
||||
/// This is declared as a generic such that it can be called with `Inst` and `Ebb` arguments
|
||||
/// directly. Depending on the implementation, there is a good chance performance will be
|
||||
/// improved for those cases where the type of either argument is known statically.
|
||||
fn cmp<A, B>(&self, a: A, b: B) -> cmp::Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>;
|
||||
|
||||
/// Is the range from `inst` to `ebb` just the gap between consecutive EBBs?
|
||||
///
|
||||
/// This returns true if `inst` is the terminator in the EBB immediately before `ebb`.
|
||||
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use entity::EntityRef;
|
||||
use ir::{Ebb, Inst};
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn convert() {
|
||||
let i5 = Inst::new(5);
|
||||
let b3 = Ebb::new(3);
|
||||
|
||||
let pp1: ProgramPoint = i5.into();
|
||||
let pp2: ProgramPoint = b3.into();
|
||||
|
||||
assert_eq!(pp1.to_string(), "inst5");
|
||||
assert_eq!(pp2.to_string(), "ebb3");
|
||||
}
|
||||
}
|
||||
63
lib/codegen/src/ir/sourceloc.rs
Normal file
63
lib/codegen/src/ir/sourceloc.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Source locations.
|
||||
//!
|
||||
//! Cretonne tracks the original source location of each instruction, and preserves the source
|
||||
//! location when instructions are transformed.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
/// A source location.
|
||||
///
|
||||
/// This is an opaque 32-bit number attached to each Cretonne IR instruction. Cretonne does not
|
||||
/// interpret source locations in any way, they are simply preserved from the input to the output.
|
||||
///
|
||||
/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
|
||||
/// that can't be given a real source location.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct SourceLoc(u32);
|
||||
|
||||
impl SourceLoc {
|
||||
/// Create a new source location with the given bits.
|
||||
pub fn new(bits: u32) -> SourceLoc {
|
||||
SourceLoc(bits)
|
||||
}
|
||||
|
||||
/// Is this the default source location?
|
||||
pub fn is_default(self) -> bool {
|
||||
self == Default::default()
|
||||
}
|
||||
|
||||
/// Read the bits of this source location.
|
||||
pub fn bits(self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SourceLoc {
|
||||
fn default() -> Self {
|
||||
SourceLoc(!0)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SourceLoc {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_default() {
|
||||
write!(f, "@-")
|
||||
} else {
|
||||
write!(f, "@{:04x}", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ir::SourceLoc;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn display() {
|
||||
assert_eq!(SourceLoc::default().to_string(), "@-");
|
||||
assert_eq!(SourceLoc::new(0).to_string(), "@0000");
|
||||
assert_eq!(SourceLoc::new(16).to_string(), "@0010");
|
||||
assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef");
|
||||
}
|
||||
}
|
||||
425
lib/codegen/src/ir/stackslot.rs
Normal file
425
lib/codegen/src/ir/stackslot.rs
Normal file
@@ -0,0 +1,425 @@
|
||||
//! Stack slots.
|
||||
//!
|
||||
//! The `StackSlotData` struct keeps track of a single stack slot in a function.
|
||||
//!
|
||||
|
||||
use entity::{Iter, IterMut, Keys, PrimaryMap};
|
||||
use ir::{StackSlot, Type};
|
||||
use packed_option::PackedOption;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::ops::{Index, IndexMut};
|
||||
use std::slice;
|
||||
use std::str::FromStr;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// The size of an object on the stack, or the size of a stack frame.
|
||||
///
|
||||
/// We don't use `usize` to represent object sizes on the target platform because Cretonne supports
|
||||
/// cross-compilation, and `usize` is a type that depends on the host platform, not the target
|
||||
/// platform.
|
||||
pub type StackSize = u32;
|
||||
|
||||
/// A stack offset.
|
||||
///
|
||||
/// The location of a stack offset relative to a stack pointer or frame pointer.
|
||||
pub type StackOffset = i32;
|
||||
|
||||
/// The minimum size of a spill slot in bytes.
|
||||
///
|
||||
/// ISA implementations are allowed to assume that small types like `b1` and `i8` get a full 4-byte
|
||||
/// spill slot.
|
||||
const MIN_SPILL_SLOT_SIZE: StackSize = 4;
|
||||
|
||||
/// Get the spill slot size to use for `ty`.
|
||||
fn spill_size(ty: Type) -> StackSize {
|
||||
cmp::max(MIN_SPILL_SLOT_SIZE, ty.bytes())
|
||||
}
|
||||
|
||||
/// The kind of a stack slot.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum StackSlotKind {
|
||||
/// A spill slot. This is a stack slot created by the register allocator.
|
||||
SpillSlot,
|
||||
|
||||
/// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load`
|
||||
/// and `stack_store` instructions.
|
||||
ExplicitSlot,
|
||||
|
||||
/// An incoming function argument.
|
||||
///
|
||||
/// If the current function has more arguments than fits in registers, the remaining arguments
|
||||
/// are passed on the stack by the caller. These incoming arguments are represented as SSA
|
||||
/// values assigned to incoming stack slots.
|
||||
IncomingArg,
|
||||
|
||||
/// An outgoing function argument.
|
||||
///
|
||||
/// When preparing to call a function whose arguments don't fit in registers, outgoing argument
|
||||
/// stack slots are used to represent individual arguments in the outgoing call frame. These
|
||||
/// stack slots are only valid while setting up a call.
|
||||
OutgoingArg,
|
||||
|
||||
/// An emergency spill slot.
|
||||
///
|
||||
/// Emergency slots are allocated late when the register's constraint solver needs extra space
|
||||
/// to shuffle registers around. The are only used briefly, and can be reused.
|
||||
EmergencySlot,
|
||||
}
|
||||
|
||||
impl FromStr for StackSlotKind {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<StackSlotKind, ()> {
|
||||
use self::StackSlotKind::*;
|
||||
match s {
|
||||
"explicit_slot" => Ok(ExplicitSlot),
|
||||
"spill_slot" => Ok(SpillSlot),
|
||||
"incoming_arg" => Ok(IncomingArg),
|
||||
"outgoing_arg" => Ok(OutgoingArg),
|
||||
"emergency_slot" => Ok(EmergencySlot),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for StackSlotKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::StackSlotKind::*;
|
||||
f.write_str(match *self {
|
||||
ExplicitSlot => "explicit_slot",
|
||||
SpillSlot => "spill_slot",
|
||||
IncomingArg => "incoming_arg",
|
||||
OutgoingArg => "outgoing_arg",
|
||||
EmergencySlot => "emergency_slot",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Contents of a stack slot.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StackSlotData {
|
||||
/// The kind of stack slot.
|
||||
pub kind: StackSlotKind,
|
||||
|
||||
/// Size of stack slot in bytes.
|
||||
pub size: StackSize,
|
||||
|
||||
/// Offset of stack slot relative to the stack pointer in the caller.
|
||||
///
|
||||
/// On x86, the base address is the stack pointer *before* the return address was pushed. On
|
||||
/// RISC ISAs, the base address is the value of the stack pointer on entry to the function.
|
||||
///
|
||||
/// For `OutgoingArg` stack slots, the offset is relative to the current function's stack
|
||||
/// pointer immediately before the call.
|
||||
pub offset: Option<StackOffset>,
|
||||
}
|
||||
|
||||
impl StackSlotData {
|
||||
/// Create a stack slot with the specified byte size.
|
||||
pub fn new(kind: StackSlotKind, size: StackSize) -> StackSlotData {
|
||||
StackSlotData {
|
||||
kind,
|
||||
size,
|
||||
offset: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the alignment in bytes of this stack slot given the stack pointer alignment.
|
||||
pub fn alignment(&self, max_align: StackSize) -> StackSize {
|
||||
debug_assert!(max_align.is_power_of_two());
|
||||
// We want to find the largest power of two that divides both `self.size` and `max_align`.
|
||||
// That is the same as isolating the rightmost bit in `x`.
|
||||
let x = self.size | max_align;
|
||||
// C.f. Hacker's delight.
|
||||
x & x.wrapping_neg()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for StackSlotData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{} {}", self.kind, self.size)?;
|
||||
if let Some(offset) = self.offset {
|
||||
write!(f, ", offset {}", offset)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Stack frame manager.
|
||||
///
|
||||
/// Keep track of all the stack slots used by a function.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StackSlots {
|
||||
/// All allocated stack slots.
|
||||
slots: PrimaryMap<StackSlot, StackSlotData>,
|
||||
|
||||
/// All the outgoing stack slots, ordered by offset.
|
||||
outgoing: Vec<StackSlot>,
|
||||
|
||||
/// All the emergency slots.
|
||||
emergency: Vec<StackSlot>,
|
||||
|
||||
/// The total size of the stack frame.
|
||||
///
|
||||
/// This is the distance from the stack pointer in the current function to the stack pointer in
|
||||
/// the calling function, so it includes a pushed return address as well as space for outgoing
|
||||
/// call arguments.
|
||||
///
|
||||
/// This is computed by the `layout()` method.
|
||||
pub frame_size: Option<StackSize>,
|
||||
}
|
||||
|
||||
/// Stack slot manager functions that behave mostly like an entity map.
|
||||
impl StackSlots {
|
||||
/// Create an empty stack slot manager.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
slots: PrimaryMap::new(),
|
||||
outgoing: Vec::new(),
|
||||
emergency: Vec::new(),
|
||||
frame_size: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear out everything.
|
||||
pub fn clear(&mut self) {
|
||||
self.slots.clear();
|
||||
self.outgoing.clear();
|
||||
self.emergency.clear();
|
||||
self.frame_size = None;
|
||||
}
|
||||
|
||||
/// Allocate a new stack slot.
|
||||
///
|
||||
/// This function should be primarily used by the text format parser. There are more convenient
|
||||
/// functions for creating specific kinds of stack slots below.
|
||||
pub fn push(&mut self, data: StackSlotData) -> StackSlot {
|
||||
self.slots.push(data)
|
||||
}
|
||||
|
||||
/// Check if `ss` is a valid stack slot reference.
|
||||
pub fn is_valid(&self, ss: StackSlot) -> bool {
|
||||
self.slots.is_valid(ss)
|
||||
}
|
||||
|
||||
/// Set the offset of a stack slot.
|
||||
pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
|
||||
self.slots[ss].offset = Some(offset);
|
||||
}
|
||||
|
||||
/// Get an iterator over all the stack slot keys.
|
||||
pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
|
||||
self.slots.iter()
|
||||
}
|
||||
|
||||
/// Get an iterator over all the stack slot keys, mutable edition.
|
||||
pub fn iter_mut(&mut self) -> IterMut<StackSlot, StackSlotData> {
|
||||
self.slots.iter_mut()
|
||||
}
|
||||
|
||||
/// Get an iterator over all the stack slot records.
|
||||
pub fn values(&self) -> slice::Iter<StackSlotData> {
|
||||
self.slots.values()
|
||||
}
|
||||
|
||||
/// Get an iterator over all the stack slot records, mutable edition.
|
||||
pub fn values_mut(&mut self) -> slice::IterMut<StackSlotData> {
|
||||
self.slots.values_mut()
|
||||
}
|
||||
|
||||
/// Get an iterator over all the stack slot keys.
|
||||
pub fn keys(&self) -> Keys<StackSlot> {
|
||||
self.slots.keys()
|
||||
}
|
||||
|
||||
/// Get a reference to the next stack slot that would be created by `push()`.
|
||||
///
|
||||
/// This should just be used by the parser.
|
||||
pub fn next_key(&self) -> StackSlot {
|
||||
self.slots.next_key()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<StackSlot> for StackSlots {
|
||||
type Output = StackSlotData;
|
||||
|
||||
fn index(&self, ss: StackSlot) -> &StackSlotData {
|
||||
&self.slots[ss]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<StackSlot> for StackSlots {
|
||||
fn index_mut(&mut self, ss: StackSlot) -> &mut StackSlotData {
|
||||
&mut self.slots[ss]
|
||||
}
|
||||
}
|
||||
|
||||
/// Higher-level stack frame manipulation functions.
|
||||
impl StackSlots {
|
||||
/// Create a new spill slot for spilling values of type `ty`.
|
||||
pub fn make_spill_slot(&mut self, ty: Type) -> StackSlot {
|
||||
self.push(StackSlotData::new(StackSlotKind::SpillSlot, spill_size(ty)))
|
||||
}
|
||||
|
||||
/// Create a stack slot representing an incoming function argument.
|
||||
pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
|
||||
let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes());
|
||||
debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset);
|
||||
data.offset = Some(offset);
|
||||
self.push(data)
|
||||
}
|
||||
|
||||
/// Get a stack slot representing an outgoing argument.
|
||||
///
|
||||
/// This may create a new stack slot, or reuse an existing outgoing stack slot with the
|
||||
/// requested offset and size.
|
||||
///
|
||||
/// The requested offset is relative to this function's stack pointer immediately before making
|
||||
/// the call.
|
||||
pub fn get_outgoing_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot {
|
||||
let size = ty.bytes();
|
||||
|
||||
// Look for an existing outgoing stack slot with the same offset and size.
|
||||
let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| {
|
||||
(self[ss].offset.unwrap(), self[ss].size)
|
||||
}) {
|
||||
Ok(idx) => return self.outgoing[idx],
|
||||
Err(idx) => idx,
|
||||
};
|
||||
|
||||
// No existing slot found. Make one and insert it into `outgoing`.
|
||||
let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size);
|
||||
debug_assert!(offset <= StackOffset::max_value() - size as StackOffset);
|
||||
data.offset = Some(offset);
|
||||
let ss = self.slots.push(data);
|
||||
self.outgoing.insert(inspos, ss);
|
||||
ss
|
||||
}
|
||||
|
||||
/// Get an emergency spill slot that can be used to store a `ty` value.
|
||||
///
|
||||
/// This may allocate a new slot, or it may reuse an existing emergency spill slot, excluding
|
||||
/// any slots in the `in_use` list.
|
||||
pub fn get_emergency_slot(
|
||||
&mut self,
|
||||
ty: Type,
|
||||
in_use: &[PackedOption<StackSlot>],
|
||||
) -> StackSlot {
|
||||
let size = spill_size(ty);
|
||||
|
||||
// Find the smallest existing slot that can fit the type.
|
||||
if let Some(&ss) = self.emergency
|
||||
.iter()
|
||||
.filter(|&&ss| self[ss].size >= size && !in_use.contains(&ss.into()))
|
||||
.min_by_key(|&&ss| self[ss].size)
|
||||
{
|
||||
return ss;
|
||||
}
|
||||
|
||||
// Alternatively, use the largest available slot and make it larger.
|
||||
if let Some(&ss) = self.emergency
|
||||
.iter()
|
||||
.filter(|&&ss| !in_use.contains(&ss.into()))
|
||||
.max_by_key(|&&ss| self[ss].size)
|
||||
{
|
||||
self.slots[ss].size = size;
|
||||
return ss;
|
||||
}
|
||||
|
||||
// No existing slot found. Make one and insert it into `emergency`.
|
||||
let data = StackSlotData::new(StackSlotKind::EmergencySlot, size);
|
||||
let ss = self.slots.push(data);
|
||||
self.emergency.push(ss);
|
||||
ss
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use ir::Function;
|
||||
use ir::types;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn stack_slot() {
|
||||
let mut func = Function::new();
|
||||
|
||||
let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::IncomingArg, 4));
|
||||
let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 8));
|
||||
assert_eq!(ss0.to_string(), "ss0");
|
||||
assert_eq!(ss1.to_string(), "ss1");
|
||||
|
||||
assert_eq!(func.stack_slots[ss0].size, 4);
|
||||
assert_eq!(func.stack_slots[ss1].size, 8);
|
||||
|
||||
assert_eq!(func.stack_slots[ss0].to_string(), "incoming_arg 4");
|
||||
assert_eq!(func.stack_slots[ss1].to_string(), "spill_slot 8");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outgoing() {
|
||||
let mut sss = StackSlots::new();
|
||||
|
||||
let ss0 = sss.get_outgoing_arg(types::I32, 8);
|
||||
let ss1 = sss.get_outgoing_arg(types::I32, 4);
|
||||
let ss2 = sss.get_outgoing_arg(types::I64, 8);
|
||||
|
||||
assert_eq!(sss[ss0].offset, Some(8));
|
||||
assert_eq!(sss[ss0].size, 4);
|
||||
|
||||
assert_eq!(sss[ss1].offset, Some(4));
|
||||
assert_eq!(sss[ss1].size, 4);
|
||||
|
||||
assert_eq!(sss[ss2].offset, Some(8));
|
||||
assert_eq!(sss[ss2].size, 8);
|
||||
|
||||
assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0);
|
||||
assert_eq!(sss.get_outgoing_arg(types::I32, 4), ss1);
|
||||
assert_eq!(sss.get_outgoing_arg(types::I64, 8), ss2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alignment() {
|
||||
let slot = StackSlotData::new(StackSlotKind::SpillSlot, 8);
|
||||
|
||||
assert_eq!(slot.alignment(4), 4);
|
||||
assert_eq!(slot.alignment(8), 8);
|
||||
assert_eq!(slot.alignment(16), 8);
|
||||
|
||||
let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);
|
||||
|
||||
assert_eq!(slot2.alignment(4), 4);
|
||||
assert_eq!(slot2.alignment(8), 8);
|
||||
assert_eq!(slot2.alignment(16), 8);
|
||||
assert_eq!(slot2.alignment(32), 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emergency() {
|
||||
let mut sss = StackSlots::new();
|
||||
|
||||
let ss0 = sss.get_emergency_slot(types::I32, &[]);
|
||||
assert_eq!(sss[ss0].size, 4);
|
||||
|
||||
// When a smaller size is requested, we should simply get the same slot back.
|
||||
assert_eq!(sss.get_emergency_slot(types::I8, &[]), ss0);
|
||||
assert_eq!(sss[ss0].size, 4);
|
||||
assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss0);
|
||||
assert_eq!(sss[ss0].size, 4);
|
||||
|
||||
// Ask for a larger size and the slot should grow.
|
||||
assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
|
||||
assert_eq!(sss[ss0].size, 8);
|
||||
|
||||
// When one slot is in use, we should get a new one.
|
||||
let ss1 = sss.get_emergency_slot(types::I32, &[None.into(), ss0.into()]);
|
||||
assert_eq!(sss[ss0].size, 8);
|
||||
assert_eq!(sss[ss1].size, 4);
|
||||
|
||||
// Now we should get the smallest fit of the two available slots.
|
||||
assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss1);
|
||||
assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0);
|
||||
}
|
||||
}
|
||||
120
lib/codegen/src/ir/trapcode.rs
Normal file
120
lib/codegen/src/ir/trapcode.rs
Normal file
@@ -0,0 +1,120 @@
|
||||
//! Trap codes describing the reason for a trap.
|
||||
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
|
||||
/// A trap code describing the reason for a trap.
|
||||
///
|
||||
/// All trap instructions have an explicit trap code.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
|
||||
pub enum TrapCode {
|
||||
/// The current stack space was exhausted.
|
||||
///
|
||||
/// On some platforms, a stack overflow may also be indicated by a segmentation fault from the
|
||||
/// stack guard page.
|
||||
StackOverflow,
|
||||
|
||||
/// A `heap_addr` instruction detected an out-of-bounds error.
|
||||
///
|
||||
/// Some out-of-bounds heap accesses are detected by a segmentation fault on the heap guard
|
||||
/// pages.
|
||||
HeapOutOfBounds,
|
||||
|
||||
/// Other bounds checking error.
|
||||
OutOfBounds,
|
||||
|
||||
/// Indirect call to a null table entry.
|
||||
IndirectCallToNull,
|
||||
|
||||
/// Signature mismatch on indirect call.
|
||||
BadSignature,
|
||||
|
||||
/// An integer arithmetic operation caused an overflow.
|
||||
IntegerOverflow,
|
||||
|
||||
/// An integer division by zero.
|
||||
IntegerDivisionByZero,
|
||||
|
||||
/// Failed float-to-int conversion.
|
||||
BadConversionToInteger,
|
||||
|
||||
/// Execution has potentially run too long and may be interrupted.
|
||||
/// This trap is resumable.
|
||||
Interrupt,
|
||||
|
||||
/// A user-defined trap code.
|
||||
User(u16),
|
||||
}
|
||||
|
||||
impl Display for TrapCode {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
use self::TrapCode::*;
|
||||
let identifier = match *self {
|
||||
StackOverflow => "stk_ovf",
|
||||
HeapOutOfBounds => "heap_oob",
|
||||
OutOfBounds => "oob",
|
||||
IndirectCallToNull => "icall_null",
|
||||
BadSignature => "bad_sig",
|
||||
IntegerOverflow => "int_ovf",
|
||||
IntegerDivisionByZero => "int_divz",
|
||||
BadConversionToInteger => "bad_toint",
|
||||
Interrupt => "interrupt",
|
||||
User(x) => return write!(f, "user{}", x),
|
||||
};
|
||||
f.write_str(identifier)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for TrapCode {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use self::TrapCode::*;
|
||||
match s {
|
||||
"stk_ovf" => Ok(StackOverflow),
|
||||
"heap_oob" => Ok(HeapOutOfBounds),
|
||||
"oob" => Ok(OutOfBounds),
|
||||
"icall_null" => Ok(IndirectCallToNull),
|
||||
"bad_sig" => Ok(BadSignature),
|
||||
"int_ovf" => Ok(IntegerOverflow),
|
||||
"int_divz" => Ok(IntegerDivisionByZero),
|
||||
"bad_toint" => Ok(BadConversionToInteger),
|
||||
"interrupt" => Ok(Interrupt),
|
||||
_ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
// Everything but user-defined codes.
|
||||
const CODES: [TrapCode; 8] = [
|
||||
TrapCode::StackOverflow,
|
||||
TrapCode::HeapOutOfBounds,
|
||||
TrapCode::OutOfBounds,
|
||||
TrapCode::IndirectCallToNull,
|
||||
TrapCode::BadSignature,
|
||||
TrapCode::IntegerOverflow,
|
||||
TrapCode::IntegerDivisionByZero,
|
||||
TrapCode::BadConversionToInteger,
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn display() {
|
||||
for r in &CODES {
|
||||
let tc = *r;
|
||||
assert_eq!(tc.to_string().parse(), Ok(tc));
|
||||
}
|
||||
assert_eq!("bogus".parse::<TrapCode>(), Err(()));
|
||||
|
||||
assert_eq!(TrapCode::User(17).to_string(), "user17");
|
||||
assert_eq!("user22".parse(), Ok(TrapCode::User(22)));
|
||||
assert_eq!("user".parse::<TrapCode>(), Err(()));
|
||||
assert_eq!("user-1".parse::<TrapCode>(), Err(()));
|
||||
assert_eq!("users".parse::<TrapCode>(), Err(()));
|
||||
}
|
||||
}
|
||||
456
lib/codegen/src/ir/types.rs
Normal file
456
lib/codegen/src/ir/types.rs
Normal file
@@ -0,0 +1,456 @@
|
||||
//! Common types for the Cretonne code generator.
|
||||
|
||||
use std::default::Default;
|
||||
use std::fmt::{self, Debug, Display, Formatter};
|
||||
|
||||
/// The type of an SSA value.
|
||||
///
|
||||
/// The `VOID` type is only used for instructions that produce no value. It can't be part of a SIMD
|
||||
/// vector.
|
||||
///
|
||||
/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic.
|
||||
///
|
||||
/// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
|
||||
///
|
||||
/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The
|
||||
/// larger types use redundant bits.
|
||||
///
|
||||
/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
|
||||
///
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Type(u8);
|
||||
|
||||
/// No type. Used for functions without a return value. Can't be loaded or stored. Can't be part of
|
||||
/// a SIMD vector.
|
||||
pub const VOID: Type = Type(0);
|
||||
|
||||
/// Start of the lane types. See also `meta/cdsl.types.py`.
|
||||
const LANE_BASE: u8 = 0x70;
|
||||
|
||||
/// Start of the 2-lane vector types.
|
||||
const VECTOR_BASE: u8 = LANE_BASE + 16;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_types.py`. This file contains constant
|
||||
// definitions for all the scalar types as well as common vector types for 64, 128, 256, and
|
||||
// 512-bit SIMD vectors.
|
||||
include!(concat!(env!("OUT_DIR"), "/types.rs"));
|
||||
|
||||
impl Type {
|
||||
/// Get the lane type of this SIMD vector type.
|
||||
///
|
||||
/// A lane type is the same as a SIMD vector type with one lane, so it returns itself.
|
||||
pub fn lane_type(self) -> Type {
|
||||
if self.0 < VECTOR_BASE {
|
||||
self
|
||||
} else {
|
||||
Type(LANE_BASE | (self.0 & 0x0f))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get log_2 of the number of bits in a lane.
|
||||
pub fn log2_lane_bits(self) -> u8 {
|
||||
match self.lane_type() {
|
||||
B1 => 0,
|
||||
B8 | I8 => 3,
|
||||
B16 | I16 => 4,
|
||||
B32 | I32 | F32 => 5,
|
||||
B64 | I64 | F64 => 6,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of bits in a lane.
|
||||
pub fn lane_bits(self) -> u8 {
|
||||
match self.lane_type() {
|
||||
B1 => 1,
|
||||
B8 | I8 => 8,
|
||||
B16 | I16 => 16,
|
||||
B32 | I32 | F32 => 32,
|
||||
B64 | I64 | F64 => 64,
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get an integer type with the requested number of bits.
|
||||
pub fn int(bits: u16) -> Option<Type> {
|
||||
match bits {
|
||||
8 => Some(I8),
|
||||
16 => Some(I16),
|
||||
32 => Some(I32),
|
||||
64 => Some(I64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a type with the same number of lanes as `self`, but using `lane` as the lane type.
|
||||
fn replace_lanes(self, lane: Type) -> Type {
|
||||
debug_assert!(lane.is_lane() && !self.is_special());
|
||||
Type((lane.0 & 0x0f) | (self.0 & 0xf0))
|
||||
}
|
||||
|
||||
/// Get a type with the same number of lanes as this type, but with the lanes replaced by
|
||||
/// booleans of the same size.
|
||||
///
|
||||
/// Scalar types are treated as vectors with one lane, so they are converted to the multi-bit
|
||||
/// boolean types.
|
||||
pub fn as_bool_pedantic(self) -> Type {
|
||||
// Replace the low 4 bits with the boolean version, preserve the high 4 bits.
|
||||
self.replace_lanes(match self.lane_type() {
|
||||
B8 | I8 => B8,
|
||||
B16 | I16 => B16,
|
||||
B32 | I32 | F32 => B32,
|
||||
B64 | I64 | F64 => B64,
|
||||
_ => B1,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a type with the same number of lanes as this type, but with the lanes replaced by
|
||||
/// booleans of the same size.
|
||||
///
|
||||
/// Scalar types are all converted to `b1` which is usually what you want.
|
||||
pub fn as_bool(self) -> Type {
|
||||
if !self.is_vector() {
|
||||
B1
|
||||
} else {
|
||||
self.as_bool_pedantic()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a type with the same number of lanes as this type, but with lanes that are half the
|
||||
/// number of bits.
|
||||
pub fn half_width(self) -> Option<Type> {
|
||||
Some(self.replace_lanes(match self.lane_type() {
|
||||
I16 => I8,
|
||||
I32 => I16,
|
||||
I64 => I32,
|
||||
F64 => F32,
|
||||
B16 => B8,
|
||||
B32 => B16,
|
||||
B64 => B32,
|
||||
_ => return None,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Get a type with the same number of lanes as this type, but with lanes that are twice the
|
||||
/// number of bits.
|
||||
pub fn double_width(self) -> Option<Type> {
|
||||
Some(self.replace_lanes(match self.lane_type() {
|
||||
I8 => I16,
|
||||
I16 => I32,
|
||||
I32 => I64,
|
||||
F32 => F64,
|
||||
B8 => B16,
|
||||
B16 => B32,
|
||||
B32 => B64,
|
||||
_ => return None,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Is this the VOID type?
|
||||
pub fn is_void(self) -> bool {
|
||||
self == VOID
|
||||
}
|
||||
|
||||
/// Is this a special type?
|
||||
pub fn is_special(self) -> bool {
|
||||
self.0 < LANE_BASE
|
||||
}
|
||||
|
||||
/// Is this a lane type?
|
||||
///
|
||||
/// This is a scalar type that can also appear as the lane type of a SIMD vector.
|
||||
pub fn is_lane(self) -> bool {
|
||||
LANE_BASE <= self.0 && self.0 < VECTOR_BASE
|
||||
}
|
||||
|
||||
/// Is this a SIMD vector type?
|
||||
///
|
||||
/// A vector type has 2 or more lanes.
|
||||
pub fn is_vector(self) -> bool {
|
||||
self.0 >= VECTOR_BASE
|
||||
}
|
||||
|
||||
/// Is this a scalar boolean type?
|
||||
pub fn is_bool(self) -> bool {
|
||||
match self {
|
||||
B1 | B8 | B16 | B32 | B64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a scalar integer type?
|
||||
pub fn is_int(self) -> bool {
|
||||
match self {
|
||||
I8 | I16 | I32 | I64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a scalar floating point type?
|
||||
pub fn is_float(self) -> bool {
|
||||
match self {
|
||||
F32 | F64 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a CPU flags type?
|
||||
pub fn is_flags(self) -> bool {
|
||||
match self {
|
||||
IFLAGS | FFLAGS => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get log_2 of the number of lanes in this SIMD vector type.
|
||||
///
|
||||
/// All SIMD types have a lane count that is a power of two and no larger than 256, so this
|
||||
/// will be a number in the range 0-8.
|
||||
///
|
||||
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 0.
|
||||
pub fn log2_lane_count(self) -> u8 {
|
||||
self.0.saturating_sub(LANE_BASE) >> 4
|
||||
}
|
||||
|
||||
/// Get the number of lanes in this SIMD vector type.
|
||||
///
|
||||
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 1.
|
||||
pub fn lane_count(self) -> u16 {
|
||||
1 << self.log2_lane_count()
|
||||
}
|
||||
|
||||
/// Get the total number of bits used to represent this type.
|
||||
pub fn bits(self) -> u16 {
|
||||
u16::from(self.lane_bits()) * self.lane_count()
|
||||
}
|
||||
|
||||
/// Get the number of bytes used to store this type in memory.
|
||||
pub fn bytes(self) -> u32 {
|
||||
(u32::from(self.bits()) + 7) / 8
|
||||
}
|
||||
|
||||
/// Get a SIMD vector type with `n` times more lanes than this one.
|
||||
///
|
||||
/// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes.
|
||||
///
|
||||
/// If this is already a SIMD vector type, this produces a SIMD vector type with `n *
|
||||
/// self.lane_count()` lanes.
|
||||
pub fn by(self, n: u16) -> Option<Type> {
|
||||
if self.lane_bits() == 0 || !n.is_power_of_two() {
|
||||
return None;
|
||||
}
|
||||
let log2_lanes: u32 = n.trailing_zeros();
|
||||
let new_type = u32::from(self.0) + (log2_lanes << 4);
|
||||
if new_type < 0x100 {
|
||||
Some(Type(new_type as u8))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a SIMD vector with half the number of lanes.
|
||||
///
|
||||
/// There is no `double_vector()` method. Use `t.by(2)` instead.
|
||||
pub fn half_vector(self) -> Option<Type> {
|
||||
if self.is_vector() {
|
||||
Some(Type(self.0 - 0x10))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Index of this type, for use with hash tables etc.
|
||||
pub fn index(self) -> usize {
|
||||
usize::from(self.0)
|
||||
}
|
||||
|
||||
/// True iff:
|
||||
///
|
||||
/// 1. `self.lane_count() == other.lane_count()` and
|
||||
/// 2. `self.lane_bits() >= other.lane_bits()`
|
||||
pub fn wider_or_equal(self, other: Type) -> bool {
|
||||
self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Type {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
if self.is_bool() {
|
||||
write!(f, "b{}", self.lane_bits())
|
||||
} else if self.is_int() {
|
||||
write!(f, "i{}", self.lane_bits())
|
||||
} else if self.is_float() {
|
||||
write!(f, "f{}", self.lane_bits())
|
||||
} else if self.is_vector() {
|
||||
write!(f, "{}x{}", self.lane_type(), self.lane_count())
|
||||
} else {
|
||||
f.write_str(match *self {
|
||||
VOID => "void",
|
||||
IFLAGS => "iflags",
|
||||
FFLAGS => "fflags",
|
||||
_ => panic!("Invalid Type(0x{:x})", self.0),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Type {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
if self.is_bool() {
|
||||
write!(f, "types::B{}", self.lane_bits())
|
||||
} else if self.is_int() {
|
||||
write!(f, "types::I{}", self.lane_bits())
|
||||
} else if self.is_float() {
|
||||
write!(f, "types::F{}", self.lane_bits())
|
||||
} else if self.is_vector() {
|
||||
write!(f, "{:?}X{}", self.lane_type(), self.lane_count())
|
||||
} else {
|
||||
match *self {
|
||||
VOID => write!(f, "types::VOID"),
|
||||
IFLAGS => write!(f, "types::IFLAGS"),
|
||||
FFLAGS => write!(f, "types::FFLAGS"),
|
||||
_ => write!(f, "Type(0x{:x})", self.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Type {
|
||||
fn default() -> Self {
|
||||
VOID
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn basic_scalars() {
|
||||
assert_eq!(VOID, VOID.lane_type());
|
||||
assert_eq!(0, VOID.bits());
|
||||
assert_eq!(IFLAGS, IFLAGS.lane_type());
|
||||
assert_eq!(0, IFLAGS.bits());
|
||||
assert_eq!(FFLAGS, FFLAGS.lane_type());
|
||||
assert_eq!(0, FFLAGS.bits());
|
||||
assert_eq!(B1, B1.lane_type());
|
||||
assert_eq!(B8, B8.lane_type());
|
||||
assert_eq!(B16, B16.lane_type());
|
||||
assert_eq!(B32, B32.lane_type());
|
||||
assert_eq!(B64, B64.lane_type());
|
||||
assert_eq!(I8, I8.lane_type());
|
||||
assert_eq!(I16, I16.lane_type());
|
||||
assert_eq!(I32, I32.lane_type());
|
||||
assert_eq!(I64, I64.lane_type());
|
||||
assert_eq!(F32, F32.lane_type());
|
||||
assert_eq!(F64, F64.lane_type());
|
||||
|
||||
assert_eq!(VOID.lane_bits(), 0);
|
||||
assert_eq!(IFLAGS.lane_bits(), 0);
|
||||
assert_eq!(FFLAGS.lane_bits(), 0);
|
||||
assert_eq!(B1.lane_bits(), 1);
|
||||
assert_eq!(B8.lane_bits(), 8);
|
||||
assert_eq!(B16.lane_bits(), 16);
|
||||
assert_eq!(B32.lane_bits(), 32);
|
||||
assert_eq!(B64.lane_bits(), 64);
|
||||
assert_eq!(I8.lane_bits(), 8);
|
||||
assert_eq!(I16.lane_bits(), 16);
|
||||
assert_eq!(I32.lane_bits(), 32);
|
||||
assert_eq!(I64.lane_bits(), 64);
|
||||
assert_eq!(F32.lane_bits(), 32);
|
||||
assert_eq!(F64.lane_bits(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typevar_functions() {
|
||||
assert_eq!(VOID.half_width(), None);
|
||||
assert_eq!(IFLAGS.half_width(), None);
|
||||
assert_eq!(FFLAGS.half_width(), None);
|
||||
assert_eq!(B1.half_width(), None);
|
||||
assert_eq!(B8.half_width(), None);
|
||||
assert_eq!(B16.half_width(), Some(B8));
|
||||
assert_eq!(B32.half_width(), Some(B16));
|
||||
assert_eq!(B64.half_width(), Some(B32));
|
||||
assert_eq!(I8.half_width(), None);
|
||||
assert_eq!(I16.half_width(), Some(I8));
|
||||
assert_eq!(I32.half_width(), Some(I16));
|
||||
assert_eq!(I32X4.half_width(), Some(I16X4));
|
||||
assert_eq!(I64.half_width(), Some(I32));
|
||||
assert_eq!(F32.half_width(), None);
|
||||
assert_eq!(F64.half_width(), Some(F32));
|
||||
|
||||
assert_eq!(VOID.double_width(), None);
|
||||
assert_eq!(IFLAGS.double_width(), None);
|
||||
assert_eq!(FFLAGS.double_width(), None);
|
||||
assert_eq!(B1.double_width(), None);
|
||||
assert_eq!(B8.double_width(), Some(B16));
|
||||
assert_eq!(B16.double_width(), Some(B32));
|
||||
assert_eq!(B32.double_width(), Some(B64));
|
||||
assert_eq!(B64.double_width(), None);
|
||||
assert_eq!(I8.double_width(), Some(I16));
|
||||
assert_eq!(I16.double_width(), Some(I32));
|
||||
assert_eq!(I32.double_width(), Some(I64));
|
||||
assert_eq!(I32X4.double_width(), Some(I64X4));
|
||||
assert_eq!(I64.double_width(), None);
|
||||
assert_eq!(F32.double_width(), Some(F64));
|
||||
assert_eq!(F64.double_width(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vectors() {
|
||||
let big = F64.by(256).unwrap();
|
||||
assert_eq!(big.lane_bits(), 64);
|
||||
assert_eq!(big.lane_count(), 256);
|
||||
assert_eq!(big.bits(), 64 * 256);
|
||||
|
||||
assert_eq!(big.half_vector().unwrap().to_string(), "f64x128");
|
||||
assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1");
|
||||
assert_eq!(I32.half_vector(), None);
|
||||
assert_eq!(VOID.half_vector(), None);
|
||||
|
||||
// Check that the generated constants match the computed vector types.
|
||||
assert_eq!(I32.by(4), Some(I32X4));
|
||||
assert_eq!(F64.by(8), Some(F64X8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_scalars() {
|
||||
assert_eq!(VOID.to_string(), "void");
|
||||
assert_eq!(IFLAGS.to_string(), "iflags");
|
||||
assert_eq!(FFLAGS.to_string(), "fflags");
|
||||
assert_eq!(B1.to_string(), "b1");
|
||||
assert_eq!(B8.to_string(), "b8");
|
||||
assert_eq!(B16.to_string(), "b16");
|
||||
assert_eq!(B32.to_string(), "b32");
|
||||
assert_eq!(B64.to_string(), "b64");
|
||||
assert_eq!(I8.to_string(), "i8");
|
||||
assert_eq!(I16.to_string(), "i16");
|
||||
assert_eq!(I32.to_string(), "i32");
|
||||
assert_eq!(I64.to_string(), "i64");
|
||||
assert_eq!(F32.to_string(), "f32");
|
||||
assert_eq!(F64.to_string(), "f64");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_vectors() {
|
||||
assert_eq!(B1.by(8).unwrap().to_string(), "b1x8");
|
||||
assert_eq!(B8.by(1).unwrap().to_string(), "b8");
|
||||
assert_eq!(B16.by(256).unwrap().to_string(), "b16x256");
|
||||
assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8");
|
||||
assert_eq!(B64.by(8).unwrap().to_string(), "b64x8");
|
||||
assert_eq!(I8.by(64).unwrap().to_string(), "i8x64");
|
||||
assert_eq!(F64.by(2).unwrap().to_string(), "f64x2");
|
||||
assert_eq!(I8.by(3), None);
|
||||
assert_eq!(I8.by(512), None);
|
||||
assert_eq!(VOID.by(4), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn as_bool() {
|
||||
assert_eq!(I32X4.as_bool(), B32X4);
|
||||
assert_eq!(I32.as_bool(), B1);
|
||||
assert_eq!(I32X4.as_bool_pedantic(), B32X4);
|
||||
assert_eq!(I32.as_bool_pedantic(), B32);
|
||||
}
|
||||
}
|
||||
165
lib/codegen/src/ir/valueloc.rs
Normal file
165
lib/codegen/src/ir/valueloc.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
//! Value locations.
|
||||
//!
|
||||
//! The register allocator assigns every SSA value to either a register or a stack slot. This
|
||||
//! assignment is represented by a `ValueLoc` object.
|
||||
|
||||
use ir::StackSlot;
|
||||
use isa::{RegInfo, RegUnit};
|
||||
use std::fmt;
|
||||
|
||||
/// Value location.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ValueLoc {
|
||||
/// This value has not been assigned to a location yet.
|
||||
Unassigned,
|
||||
/// Value is assigned to a register.
|
||||
Reg(RegUnit),
|
||||
/// Value is assigned to a stack slot.
|
||||
Stack(StackSlot),
|
||||
}
|
||||
|
||||
impl Default for ValueLoc {
|
||||
fn default() -> Self {
|
||||
ValueLoc::Unassigned
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueLoc {
|
||||
/// Is this an assigned location? (That is, not `Unassigned`).
|
||||
pub fn is_assigned(&self) -> bool {
|
||||
match *self {
|
||||
ValueLoc::Unassigned => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the register unit of this location, or panic.
|
||||
pub fn unwrap_reg(self) -> RegUnit {
|
||||
match self {
|
||||
ValueLoc::Reg(ru) => ru,
|
||||
_ => panic!("Expected register: {:?}", self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the stack slot of this location, or panic.
|
||||
pub fn unwrap_stack(self) -> StackSlot {
|
||||
match self {
|
||||
ValueLoc::Stack(ss) => ss,
|
||||
_ => panic!("Expected stack slot: {:?}", self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this value location, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayValueLoc<'a> {
|
||||
DisplayValueLoc(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA.
|
||||
/// Without the register info, register units are simply show as numbers.
|
||||
///
|
||||
/// The `DisplayValueLoc` type can display the contained `ValueLoc`.
|
||||
pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayValueLoc<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
ValueLoc::Unassigned => write!(f, "-"),
|
||||
ValueLoc::Reg(ru) => {
|
||||
match self.1 {
|
||||
Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
|
||||
None => write!(f, "%{}", ru),
|
||||
}
|
||||
}
|
||||
ValueLoc::Stack(ss) => write!(f, "{}", ss),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Function argument location.
|
||||
///
|
||||
/// The ABI specifies how arguments are passed to a function, and where return values appear after
|
||||
/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the
|
||||
/// stack.
|
||||
///
|
||||
/// Function arguments on the stack are accessed differently for the incoming arguments to the
|
||||
/// current function and the outgoing arguments to a called external function. For this reason,
|
||||
/// the location of stack arguments is described as an offset into the array of function arguments
|
||||
/// on the stack.
|
||||
///
|
||||
/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an
|
||||
/// incoming argument or an outgoing argument.
|
||||
///
|
||||
/// - For stack arguments, different `StackSlot` entities are used to represent incoming and
|
||||
/// outgoing arguments.
|
||||
/// - For register arguments, there is usually no difference, but if we ever add support for a
|
||||
/// register-window ISA like SPARC, register arguments would also need to be translated.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum ArgumentLoc {
|
||||
/// This argument has not been assigned to a location yet.
|
||||
Unassigned,
|
||||
/// Argument is passed in a register.
|
||||
Reg(RegUnit),
|
||||
/// Argument is passed on the stack, at the given byte offset into the argument array.
|
||||
Stack(i32),
|
||||
}
|
||||
|
||||
impl Default for ArgumentLoc {
|
||||
fn default() -> Self {
|
||||
ArgumentLoc::Unassigned
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgumentLoc {
|
||||
/// Is this an assigned location? (That is, not `Unassigned`).
|
||||
pub fn is_assigned(&self) -> bool {
|
||||
match *self {
|
||||
ArgumentLoc::Unassigned => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a register location?
|
||||
pub fn is_reg(&self) -> bool {
|
||||
match *self {
|
||||
ArgumentLoc::Reg(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this a stack location?
|
||||
pub fn is_stack(&self) -> bool {
|
||||
match *self {
|
||||
ArgumentLoc::Stack(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this argument location, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayArgumentLoc<'a> {
|
||||
DisplayArgumentLoc(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA.
|
||||
/// Without the register info, register units are simply show as numbers.
|
||||
///
|
||||
/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`.
|
||||
pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayArgumentLoc<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
ArgumentLoc::Unassigned => write!(f, "-"),
|
||||
ArgumentLoc::Reg(ru) => {
|
||||
match self.1 {
|
||||
Some(regs) => write!(f, "{}", regs.display_regunit(ru)),
|
||||
None => write!(f, "%{}", ru),
|
||||
}
|
||||
}
|
||||
ArgumentLoc::Stack(offset) => write!(f, "{}", offset),
|
||||
}
|
||||
}
|
||||
}
|
||||
35
lib/codegen/src/isa/arm32/abi.rs
Normal file
35
lib/codegen/src/isa/arm32/abi.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
//! ARM ABI implementation.
|
||||
|
||||
use super::registers::{D, GPR, Q, S};
|
||||
use ir;
|
||||
use isa::RegClass;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
|
||||
/// Legalize `sig`.
|
||||
pub fn legalize_signature(
|
||||
_sig: &mut ir::Signature,
|
||||
_flags: &shared_settings::Flags,
|
||||
_current: bool,
|
||||
) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
if ty.is_int() {
|
||||
GPR
|
||||
} else {
|
||||
match ty.bits() {
|
||||
32 => S,
|
||||
64 => D,
|
||||
128 => Q,
|
||||
_ => panic!("Unexpected {} ABI type for arm32", ty),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
|
||||
unimplemented!()
|
||||
}
|
||||
7
lib/codegen/src/isa/arm32/binemit.rs
Normal file
7
lib/codegen/src/isa/arm32/binemit.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
//! Emitting binary ARM32 machine code.
|
||||
|
||||
use binemit::{bad_encoding, CodeSink};
|
||||
use ir::{Function, Inst};
|
||||
use regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs"));
|
||||
10
lib/codegen/src/isa/arm32/enc_tables.rs
Normal file
10
lib/codegen/src/isa/arm32/enc_tables.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
//! Encoding tables for ARM32 ISA.
|
||||
|
||||
use ir;
|
||||
use isa;
|
||||
use isa::constraints::*;
|
||||
use isa::enc_tables::*;
|
||||
use isa::encoding::RecipeSizing;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-arm32.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-arm32.rs"));
|
||||
118
lib/codegen/src/isa/arm32/mod.rs
Normal file
118
lib/codegen/src/isa/arm32/mod.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
//! ARM 32-bit Instruction Set Architecture.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
use binemit::{emit_function, CodeSink, MemoryCodeSink};
|
||||
use ir;
|
||||
use isa::Builder as IsaBuilder;
|
||||
use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
|
||||
use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use regalloc;
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating ARM32 targets.
|
||||
pub fn isa_builder() -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: &shared_settings::Builder,
|
||||
) -> Box<TargetIsa> {
|
||||
let level1 = if shared_flags.is_compressed() {
|
||||
&enc_tables::LEVEL1_T32[..]
|
||||
} else {
|
||||
&enc_tables::LEVEL1_A32[..]
|
||||
};
|
||||
Box::new(Isa {
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"arm32"
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
self.cpumode,
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
|
||||
abi::legalize_signature(sig, &self.shared_flags, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func)
|
||||
}
|
||||
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink)
|
||||
}
|
||||
|
||||
fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
68
lib/codegen/src/isa/arm32/registers.rs
Normal file
68
lib/codegen/src/isa/arm32/registers.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! ARM32 register descriptions.
|
||||
|
||||
use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{D, GPR, INFO, S};
|
||||
use isa::RegUnit;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
assert_eq!(INFO.parse_regunit("s0"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("s31"), Some(31));
|
||||
assert_eq!(INFO.parse_regunit("s32"), Some(32));
|
||||
assert_eq!(INFO.parse_regunit("r0"), Some(64));
|
||||
assert_eq!(INFO.parse_regunit("r15"), Some(79));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%s0");
|
||||
assert_eq!(uname(1), "%s1");
|
||||
assert_eq!(uname(31), "%s31");
|
||||
assert_eq!(uname(64), "%r0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn overlaps() {
|
||||
// arm32 has the most interesting register geometries, so test `regs_overlap()` here.
|
||||
use isa::regs_overlap;
|
||||
|
||||
let r0 = GPR.unit(0);
|
||||
let r1 = GPR.unit(1);
|
||||
let r2 = GPR.unit(2);
|
||||
|
||||
assert!(regs_overlap(GPR, r0, GPR, r0));
|
||||
assert!(regs_overlap(GPR, r2, GPR, r2));
|
||||
assert!(!regs_overlap(GPR, r0, GPR, r1));
|
||||
assert!(!regs_overlap(GPR, r1, GPR, r0));
|
||||
assert!(!regs_overlap(GPR, r2, GPR, r1));
|
||||
assert!(!regs_overlap(GPR, r1, GPR, r2));
|
||||
|
||||
let s0 = S.unit(0);
|
||||
let s1 = S.unit(1);
|
||||
let s2 = S.unit(2);
|
||||
let s3 = S.unit(3);
|
||||
let d0 = D.unit(0);
|
||||
let d1 = D.unit(1);
|
||||
|
||||
assert!(regs_overlap(S, s0, D, d0));
|
||||
assert!(regs_overlap(S, s1, D, d0));
|
||||
assert!(!regs_overlap(S, s0, D, d1));
|
||||
assert!(!regs_overlap(S, s1, D, d1));
|
||||
assert!(regs_overlap(S, s2, D, d1));
|
||||
assert!(regs_overlap(S, s3, D, d1));
|
||||
assert!(!regs_overlap(D, d1, S, s1));
|
||||
assert!(regs_overlap(D, d1, S, s2));
|
||||
assert!(!regs_overlap(D, d0, D, d1));
|
||||
assert!(regs_overlap(D, d1, D, d1));
|
||||
}
|
||||
}
|
||||
9
lib/codegen/src/isa/arm32/settings.rs
Normal file
9
lib/codegen/src/isa/arm32/settings.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
//! ARM32 Settings.
|
||||
|
||||
use settings::{self, detail, Builder};
|
||||
use std::fmt;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
|
||||
// `Flags` struct with an impl for all of the settings defined in
|
||||
// `lib/codegen/meta/isa/arm32/settings.py`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
|
||||
26
lib/codegen/src/isa/arm64/abi.rs
Normal file
26
lib/codegen/src/isa/arm64/abi.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
//! ARM 64 ABI implementation.
|
||||
|
||||
use super::registers::{FPR, GPR};
|
||||
use ir;
|
||||
use isa::RegClass;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
|
||||
/// Legalize `sig`.
|
||||
pub fn legalize_signature(
|
||||
_sig: &mut ir::Signature,
|
||||
_flags: &shared_settings::Flags,
|
||||
_current: bool,
|
||||
) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
if ty.is_int() { GPR } else { FPR }
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
|
||||
unimplemented!()
|
||||
}
|
||||
7
lib/codegen/src/isa/arm64/binemit.rs
Normal file
7
lib/codegen/src/isa/arm64/binemit.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
//! Emitting binary ARM64 machine code.
|
||||
|
||||
use binemit::{bad_encoding, CodeSink};
|
||||
use ir::{Function, Inst};
|
||||
use regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
|
||||
10
lib/codegen/src/isa/arm64/enc_tables.rs
Normal file
10
lib/codegen/src/isa/arm64/enc_tables.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
//! Encoding tables for ARM64 ISA.
|
||||
|
||||
use ir;
|
||||
use isa;
|
||||
use isa::constraints::*;
|
||||
use isa::enc_tables::*;
|
||||
use isa::encoding::RecipeSizing;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs"));
|
||||
111
lib/codegen/src/isa/arm64/mod.rs
Normal file
111
lib/codegen/src/isa/arm64/mod.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! ARM 64-bit Instruction Set Architecture.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
use binemit::{emit_function, CodeSink, MemoryCodeSink};
|
||||
use ir;
|
||||
use isa::Builder as IsaBuilder;
|
||||
use isa::enc_tables::{lookup_enclist, Encodings};
|
||||
use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use regalloc;
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating ARM64 targets.
|
||||
pub fn isa_builder() -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: &shared_settings::Builder,
|
||||
) -> Box<TargetIsa> {
|
||||
Box::new(Isa {
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"arm64"
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
&enc_tables::LEVEL1_A64[..],
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
|
||||
abi::legalize_signature(sig, &self.shared_flags, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func)
|
||||
}
|
||||
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink)
|
||||
}
|
||||
|
||||
fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
39
lib/codegen/src/isa/arm64/registers.rs
Normal file
39
lib/codegen/src/isa/arm64/registers.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
//! ARM64 register descriptions.
|
||||
|
||||
use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::INFO;
|
||||
use isa::RegUnit;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
assert_eq!(INFO.parse_regunit("x0"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("x31"), Some(31));
|
||||
assert_eq!(INFO.parse_regunit("v0"), Some(32));
|
||||
assert_eq!(INFO.parse_regunit("v31"), Some(63));
|
||||
|
||||
assert_eq!(INFO.parse_regunit("x32"), None);
|
||||
assert_eq!(INFO.parse_regunit("v32"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%x0");
|
||||
assert_eq!(uname(1), "%x1");
|
||||
assert_eq!(uname(31), "%x31");
|
||||
assert_eq!(uname(32), "%v0");
|
||||
assert_eq!(uname(33), "%v1");
|
||||
assert_eq!(uname(63), "%v31");
|
||||
assert_eq!(uname(64), "%nzcv");
|
||||
assert_eq!(uname(65), "%INVALID65");
|
||||
}
|
||||
}
|
||||
9
lib/codegen/src/isa/arm64/settings.rs
Normal file
9
lib/codegen/src/isa/arm64/settings.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
//! ARM64 Settings.
|
||||
|
||||
use settings::{self, detail, Builder};
|
||||
use std::fmt;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
|
||||
// `Flags` struct with an impl for all of the settings defined in
|
||||
// `lib/codegen/meta/isa/arm64/settings.py`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
|
||||
209
lib/codegen/src/isa/constraints.rs
Normal file
209
lib/codegen/src/isa/constraints.rs
Normal file
@@ -0,0 +1,209 @@
|
||||
//! Register constraints for instruction operands.
|
||||
//!
|
||||
//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only
|
||||
//! works if the operands and results satisfy certain constraints. Constraints on immediate
|
||||
//! operands are checked by instruction predicates when the recipe is chosen.
|
||||
//!
|
||||
//! It is the register allocator's job to make sure that the register constraints on value operands
|
||||
//! are satisfied.
|
||||
|
||||
use binemit::CodeOffset;
|
||||
use ir::{Function, Inst, ValueLoc};
|
||||
use isa::{RegClass, RegUnit};
|
||||
use regalloc::RegDiversions;
|
||||
|
||||
/// Register constraint for a single value operand or instruction result.
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct OperandConstraint {
|
||||
/// The kind of constraint.
|
||||
pub kind: ConstraintKind,
|
||||
|
||||
/// The register class of the operand.
|
||||
///
|
||||
/// This applies to all kinds of constraints, but with slightly different meaning.
|
||||
pub regclass: RegClass,
|
||||
}
|
||||
|
||||
impl OperandConstraint {
|
||||
/// Check if this operand constraint is satisfied by the given value location.
|
||||
/// For tied constraints, this only checks the register class, not that the
|
||||
/// counterpart operand has the same value location.
|
||||
pub fn satisfied(&self, loc: ValueLoc) -> bool {
|
||||
match self.kind {
|
||||
ConstraintKind::Reg |
|
||||
ConstraintKind::Tied(_) => {
|
||||
if let ValueLoc::Reg(reg) = loc {
|
||||
self.regclass.contains(reg)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
ConstraintKind::FixedReg(reg) |
|
||||
ConstraintKind::FixedTied(reg) => {
|
||||
loc == ValueLoc::Reg(reg) && self.regclass.contains(reg)
|
||||
}
|
||||
ConstraintKind::Stack => {
|
||||
if let ValueLoc::Stack(_) = loc {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The different kinds of operand constraints.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||
pub enum ConstraintKind {
|
||||
/// This operand or result must be a register from the given register class.
|
||||
Reg,
|
||||
|
||||
/// This operand or result must be a fixed register.
|
||||
///
|
||||
/// The constraint's `regclass` field is the top-level register class containing the fixed
|
||||
/// register.
|
||||
FixedReg(RegUnit),
|
||||
|
||||
/// This result value must use the same register as an input value operand.
|
||||
///
|
||||
/// The associated number is the index of the input value operand this result is tied to. The
|
||||
/// constraint's `regclass` field is the same as the tied operand's register class.
|
||||
///
|
||||
/// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and
|
||||
/// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for
|
||||
/// the out operand is `Tied(in)`.
|
||||
Tied(u8),
|
||||
|
||||
/// This operand must be a fixed register, and it has a tied counterpart.
|
||||
///
|
||||
/// This works just like `FixedReg`, but additionally indicates that there are identical
|
||||
/// input/output operands for this fixed register. For an input operand, this means that the
|
||||
/// value will be clobbered by the instruction
|
||||
FixedTied(RegUnit),
|
||||
|
||||
/// This operand must be a value in a stack slot.
|
||||
///
|
||||
/// The constraint's `regclass` field is the register class that would normally be used to load
|
||||
/// and store values of this type.
|
||||
Stack,
|
||||
}
|
||||
|
||||
/// Value operand constraints for an encoding recipe.
|
||||
#[derive(PartialEq, Clone)]
|
||||
pub struct RecipeConstraints {
|
||||
/// Constraints for the instruction's fixed value operands.
|
||||
///
|
||||
/// If the instruction takes a variable number of operands, the register constraints for those
|
||||
/// operands must be computed dynamically.
|
||||
///
|
||||
/// - For branches and jumps, EBB arguments must match the expectations of the destination EBB.
|
||||
/// - For calls and returns, the calling convention ABI specifies constraints.
|
||||
pub ins: &'static [OperandConstraint],
|
||||
|
||||
/// Constraints for the instruction's fixed results.
|
||||
///
|
||||
/// If the instruction produces a variable number of results, it's probably a call and the
|
||||
/// constraints must be derived from the calling convention ABI.
|
||||
pub outs: &'static [OperandConstraint],
|
||||
|
||||
/// Are any of the input constraints `FixedReg`?
|
||||
pub fixed_ins: bool,
|
||||
|
||||
/// Are any of the output constraints `FixedReg`?
|
||||
pub fixed_outs: bool,
|
||||
|
||||
/// Are there any tied operands?
|
||||
pub tied_ops: bool,
|
||||
|
||||
/// Does this instruction clobber the CPU flags?
|
||||
///
|
||||
/// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
|
||||
pub clobbers_flags: bool,
|
||||
}
|
||||
|
||||
impl RecipeConstraints {
|
||||
/// Check that these constraints are satisfied by the operands on `inst`.
|
||||
pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
|
||||
for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
|
||||
let loc = divert.get(arg, &func.locations);
|
||||
|
||||
if let ConstraintKind::Tied(out_index) = constraint.kind {
|
||||
let out_val = func.dfg.inst_results(inst)[out_index as usize];
|
||||
let out_loc = func.locations[out_val];
|
||||
if loc != out_loc {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if !constraint.satisfied(loc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
|
||||
let loc = divert.get(arg, &func.locations);
|
||||
if !constraint.satisfied(loc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Constraints on the range of a branch instruction.
|
||||
///
|
||||
/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
|
||||
/// The origin depends on the ISA and the specific instruction:
|
||||
///
|
||||
/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`.
|
||||
/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte
|
||||
/// branch instruction.
|
||||
/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct BranchRange {
|
||||
/// Offset in bytes from the address of the branch instruction to the origin used for computing
|
||||
/// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
|
||||
pub origin: u8,
|
||||
|
||||
/// Number of bits in the signed byte displacement encoded in the instruction. This does not
|
||||
/// account for branches that can only target aligned addresses.
|
||||
pub bits: u8,
|
||||
}
|
||||
|
||||
impl BranchRange {
|
||||
/// Determine if this branch range can represent the range from `branch` to `dest`, where
|
||||
/// `branch` is the code offset of the branch instruction itself and `dest` is the code offset
|
||||
/// of the destination EBB header.
|
||||
///
|
||||
/// This method does not detect if the range is larger than 2 GB.
|
||||
pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool {
|
||||
let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32;
|
||||
let s = 32 - self.bits;
|
||||
d == d << s >> s
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn branch_range() {
|
||||
// ARM T1 branch.
|
||||
let t1 = BranchRange { origin: 4, bits: 9 };
|
||||
assert!(t1.contains(0, 0));
|
||||
assert!(t1.contains(0, 2));
|
||||
assert!(t1.contains(2, 0));
|
||||
assert!(t1.contains(1000, 1000));
|
||||
|
||||
// Forward limit.
|
||||
assert!(t1.contains(1000, 1258));
|
||||
assert!(!t1.contains(1000, 1260));
|
||||
|
||||
// Backward limit
|
||||
assert!(t1.contains(1000, 748));
|
||||
assert!(!t1.contains(1000, 746));
|
||||
}
|
||||
}
|
||||
292
lib/codegen/src/isa/enc_tables.rs
Normal file
292
lib/codegen/src/isa/enc_tables.rs
Normal file
@@ -0,0 +1,292 @@
|
||||
//! Support types for generated encoding tables.
|
||||
//!
|
||||
//! This module contains types and functions for working with the encoding tables generated by
|
||||
//! `lib/codegen/meta/gen_encoding.py`.
|
||||
|
||||
use constant_hash::{probe, Table};
|
||||
use ir::{Function, InstructionData, Opcode, Type};
|
||||
use isa::{Encoding, Legalize};
|
||||
use settings::PredicateView;
|
||||
use std::ops::Range;
|
||||
|
||||
/// A recipe predicate.
|
||||
///
|
||||
/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
|
||||
///
|
||||
/// A None predicate is always satisfied.
|
||||
pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
|
||||
|
||||
/// An instruction predicate.
|
||||
///
|
||||
/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
|
||||
/// can't depend on ISA settings.
|
||||
pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
|
||||
|
||||
/// Legalization action to perform when no encoding can be found for an instruction.
|
||||
///
|
||||
/// This is an index into an ISA-specific table of legalization actions.
|
||||
pub type LegalizeCode = u8;
|
||||
|
||||
/// Level 1 hash table entry.
|
||||
///
|
||||
/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
|
||||
/// variable, using `VOID` for non-polymorphic instructions.
|
||||
///
|
||||
/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
|
||||
/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
|
||||
/// have a power-of-two size.
|
||||
///
|
||||
/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
|
||||
/// size of the `LEVEL2` table.
|
||||
///
|
||||
/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
|
||||
/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out f
|
||||
/// bounds.
|
||||
pub struct Level1Entry<OffT: Into<u32> + Copy> {
|
||||
pub ty: Type,
|
||||
pub log2len: u8,
|
||||
pub legalize: LegalizeCode,
|
||||
pub offset: OffT,
|
||||
}
|
||||
|
||||
impl<OffT: Into<u32> + Copy> Level1Entry<OffT> {
|
||||
/// Get the level 2 table range indicated by this entry.
|
||||
fn range(&self) -> Range<usize> {
|
||||
let b = self.offset.into() as usize;
|
||||
b..b + (1 << self.log2len)
|
||||
}
|
||||
}
|
||||
|
||||
impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn key(&self, idx: usize) -> Option<Type> {
|
||||
if self[idx].log2len != !0 {
|
||||
Some(self[idx].ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Level 2 hash table entry.
|
||||
///
|
||||
/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
|
||||
/// table where the encoding recipes for the instruction are stored.
|
||||
///
|
||||
/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
|
||||
/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
|
||||
/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
|
||||
/// bits.
|
||||
///
|
||||
/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
|
||||
pub struct Level2Entry<OffT: Into<u32> + Copy> {
|
||||
pub opcode: Option<Opcode>,
|
||||
pub offset: OffT,
|
||||
}
|
||||
|
||||
impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn key(&self, idx: usize) -> Option<Opcode> {
|
||||
self[idx].opcode
|
||||
}
|
||||
}
|
||||
|
||||
/// Two-level hash table lookup and iterator construction.
|
||||
///
|
||||
/// Given the controlling type variable and instruction opcode, find the corresponding encoding
|
||||
/// list.
|
||||
///
|
||||
/// Returns an iterator that produces legal encodings for `inst`.
|
||||
pub fn lookup_enclist<'a, OffT1, OffT2>(
|
||||
ctrl_typevar: Type,
|
||||
inst: &'a InstructionData,
|
||||
func: &'a Function,
|
||||
level1_table: &'static [Level1Entry<OffT1>],
|
||||
level2_table: &'static [Level2Entry<OffT2>],
|
||||
enclist: &'static [EncListEntry],
|
||||
legalize_actions: &'static [Legalize],
|
||||
recipe_preds: &'static [RecipePredicate],
|
||||
inst_preds: &'static [InstPredicate],
|
||||
isa_preds: PredicateView<'a>,
|
||||
) -> Encodings<'a>
|
||||
where
|
||||
OffT1: Into<u32> + Copy,
|
||||
OffT2: Into<u32> + Copy,
|
||||
{
|
||||
let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) {
|
||||
Err(l1idx) => {
|
||||
// No level 1 entry found for the type.
|
||||
// We have a sentinel entry with the default legalization code.
|
||||
(!0, level1_table[l1idx].legalize)
|
||||
}
|
||||
Ok(l1idx) => {
|
||||
// We have a valid level 1 entry for this type.
|
||||
let l1ent = &level1_table[l1idx];
|
||||
let offset = match level2_table.get(l1ent.range()) {
|
||||
Some(l2tab) => {
|
||||
let opcode = inst.opcode();
|
||||
match probe(l2tab, opcode, opcode as usize) {
|
||||
Ok(l2idx) => l2tab[l2idx].offset.into() as usize,
|
||||
Err(_) => !0,
|
||||
}
|
||||
}
|
||||
// The l1ent range is invalid. This means that we just have a customized
|
||||
// legalization code for this type. The level 2 table is empty.
|
||||
None => !0,
|
||||
};
|
||||
(offset, l1ent.legalize)
|
||||
}
|
||||
};
|
||||
|
||||
// Now we have an offset into `enclist` that is `!0` when no encoding list could be found.
|
||||
// The default legalization code is always valid.
|
||||
Encodings::new(
|
||||
offset,
|
||||
legalize,
|
||||
inst,
|
||||
func,
|
||||
enclist,
|
||||
legalize_actions,
|
||||
recipe_preds,
|
||||
inst_preds,
|
||||
isa_preds,
|
||||
)
|
||||
}
|
||||
|
||||
/// Encoding list entry.
|
||||
///
|
||||
/// Encoding lists are represented as sequences of u16 words.
|
||||
pub type EncListEntry = u16;
|
||||
|
||||
/// Number of bits used to represent a predicate. c.f. `meta/gen_encoding.py`.
|
||||
const PRED_BITS: u8 = 12;
|
||||
const PRED_MASK: usize = (1 << PRED_BITS) - 1;
|
||||
/// First code word representing a predicate check. c.f. `meta/gen_encoding.py`.
|
||||
const PRED_START: usize = 0x1000;
|
||||
|
||||
/// An iterator over legal encodings for the instruction.
|
||||
pub struct Encodings<'a> {
|
||||
// Current offset into `enclist`, or out of bounds after we've reached the end.
|
||||
offset: usize,
|
||||
// Legalization code to use of no encoding is found.
|
||||
legalize: LegalizeCode,
|
||||
inst: &'a InstructionData,
|
||||
func: &'a Function,
|
||||
enclist: &'static [EncListEntry],
|
||||
legalize_actions: &'static [Legalize],
|
||||
recipe_preds: &'static [RecipePredicate],
|
||||
inst_preds: &'static [InstPredicate],
|
||||
isa_preds: PredicateView<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Encodings<'a> {
|
||||
/// Creates a new instance of `Encodings`.
|
||||
///
|
||||
/// This iterator provides search for encodings that applies to the given instruction. The
|
||||
/// encoding lists are laid out such that first call to `next` returns valid entry in the list
|
||||
/// or `None`.
|
||||
pub fn new(
|
||||
offset: usize,
|
||||
legalize: LegalizeCode,
|
||||
inst: &'a InstructionData,
|
||||
func: &'a Function,
|
||||
enclist: &'static [EncListEntry],
|
||||
legalize_actions: &'static [Legalize],
|
||||
recipe_preds: &'static [RecipePredicate],
|
||||
inst_preds: &'static [InstPredicate],
|
||||
isa_preds: PredicateView<'a>,
|
||||
) -> Self {
|
||||
Encodings {
|
||||
offset,
|
||||
inst,
|
||||
func,
|
||||
legalize,
|
||||
isa_preds,
|
||||
recipe_preds,
|
||||
inst_preds,
|
||||
enclist,
|
||||
legalize_actions,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the legalization action that caused the enumeration of encodings to stop.
|
||||
/// This can be the default legalization action for the type or a custom code for the
|
||||
/// instruction.
|
||||
///
|
||||
/// This method must only be called after the iterator returns `None`.
|
||||
pub fn legalize(&self) -> Legalize {
|
||||
debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
|
||||
self.legalize_actions[self.legalize as usize]
|
||||
}
|
||||
|
||||
/// Check if the `rpred` recipe predicate is satisfied.
|
||||
fn check_recipe(&self, rpred: RecipePredicate) -> bool {
|
||||
match rpred {
|
||||
Some(p) => p(self.isa_preds, self.inst),
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check an instruction or isa predicate.
|
||||
fn check_pred(&self, pred: usize) -> bool {
|
||||
if let Some(&p) = self.inst_preds.get(pred) {
|
||||
p(self.func, self.inst)
|
||||
} else {
|
||||
let pred = pred - self.inst_preds.len();
|
||||
self.isa_preds.test(pred)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Encodings<'a> {
|
||||
type Item = Encoding;
|
||||
|
||||
fn next(&mut self) -> Option<Encoding> {
|
||||
while let Some(entryref) = self.enclist.get(self.offset) {
|
||||
let entry = *entryref as usize;
|
||||
|
||||
// Check for "recipe+bits".
|
||||
let recipe = entry >> 1;
|
||||
if let Some(&rpred) = self.recipe_preds.get(recipe) {
|
||||
let bits = self.offset + 1;
|
||||
if entry & 1 == 0 {
|
||||
self.offset += 2; // Next entry.
|
||||
} else {
|
||||
self.offset = !0; // Stop.
|
||||
}
|
||||
if self.check_recipe(rpred) {
|
||||
return Some(Encoding::new(recipe as u16, self.enclist[bits]));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for "stop with legalize".
|
||||
if entry < PRED_START {
|
||||
self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
|
||||
self.offset = !0; // Stop.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Finally, this must be a predicate entry.
|
||||
let pred_entry = entry - PRED_START;
|
||||
let skip = pred_entry >> PRED_BITS;
|
||||
let pred = pred_entry & PRED_MASK;
|
||||
|
||||
if self.check_pred(pred) {
|
||||
self.offset += 1;
|
||||
} else if skip == 0 {
|
||||
self.offset = !0; // Stop.
|
||||
return None;
|
||||
} else {
|
||||
self.offset += 1 + skip;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
137
lib/codegen/src/isa/encoding.rs
Normal file
137
lib/codegen/src/isa/encoding.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
//! The `Encoding` struct.
|
||||
|
||||
use binemit::CodeOffset;
|
||||
use isa::constraints::{BranchRange, RecipeConstraints};
|
||||
use std::fmt;
|
||||
|
||||
/// Bits needed to encode an instruction as binary machine code.
|
||||
///
|
||||
/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
|
||||
/// encoding *bits*. The recipe determines the native instruction format and the mapping of
|
||||
/// operands to encoded bits. The encoding bits provide additional information to the recipe,
|
||||
/// typically parts of the opcode.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Encoding {
|
||||
recipe: u16,
|
||||
bits: u16,
|
||||
}
|
||||
|
||||
impl Encoding {
|
||||
/// Create a new `Encoding` containing `(recipe, bits)`.
|
||||
pub fn new(recipe: u16, bits: u16) -> Encoding {
|
||||
Encoding { recipe, bits }
|
||||
}
|
||||
|
||||
/// Get the recipe number in this encoding.
|
||||
pub fn recipe(self) -> usize {
|
||||
self.recipe as usize
|
||||
}
|
||||
|
||||
/// Get the recipe-specific encoding bits.
|
||||
pub fn bits(self) -> u16 {
|
||||
self.bits
|
||||
}
|
||||
|
||||
/// Is this a legal encoding, or the default placeholder?
|
||||
pub fn is_legal(self) -> bool {
|
||||
self != Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// The default encoding is the illegal one.
|
||||
impl Default for Encoding {
|
||||
fn default() -> Self {
|
||||
Self::new(0xffff, 0xffff)
|
||||
}
|
||||
}
|
||||
|
||||
/// ISA-independent display of an encoding.
|
||||
impl fmt::Display for Encoding {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_legal() {
|
||||
write!(f, "{}#{:02x}", self.recipe, self.bits)
|
||||
} else {
|
||||
write!(f, "-")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Temporary object that holds enough context to properly display an encoding.
|
||||
/// This is meant to be created by `EncInfo::display()`.
|
||||
pub struct DisplayEncoding {
|
||||
pub encoding: Encoding,
|
||||
pub recipe_names: &'static [&'static str],
|
||||
}
|
||||
|
||||
impl fmt::Display for DisplayEncoding {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.encoding.is_legal() {
|
||||
write!(
|
||||
f,
|
||||
"{}#{:02x}",
|
||||
self.recipe_names[self.encoding.recipe()],
|
||||
self.encoding.bits
|
||||
)
|
||||
} else {
|
||||
write!(f, "-")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Code size information for an encoding recipe.
|
||||
///
|
||||
/// All encoding recipes correspond to an exact instruction size.
|
||||
pub struct RecipeSizing {
|
||||
/// Size in bytes of instructions encoded with this recipe.
|
||||
pub bytes: u8,
|
||||
|
||||
/// Allowed branch range in this recipe, if any.
|
||||
///
|
||||
/// All encoding recipes for branches have exact branch range information.
|
||||
pub branch_range: Option<BranchRange>,
|
||||
}
|
||||
|
||||
/// Information about all the encodings in this ISA.
|
||||
#[derive(Clone)]
|
||||
pub struct EncInfo {
|
||||
/// Constraints on value operands per recipe.
|
||||
pub constraints: &'static [RecipeConstraints],
|
||||
|
||||
/// Code size information per recipe.
|
||||
pub sizing: &'static [RecipeSizing],
|
||||
|
||||
/// Names of encoding recipes.
|
||||
pub names: &'static [&'static str],
|
||||
}
|
||||
|
||||
impl EncInfo {
|
||||
/// Get the value operand constraints for `enc` if it is a legal encoding.
|
||||
pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
|
||||
self.constraints.get(enc.recipe())
|
||||
}
|
||||
|
||||
/// Create an object that can display an ISA-dependent encoding properly.
|
||||
pub fn display(&self, enc: Encoding) -> DisplayEncoding {
|
||||
DisplayEncoding {
|
||||
encoding: enc,
|
||||
recipe_names: self.names,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the exact size in bytes of instructions encoded with `enc`.
|
||||
///
|
||||
/// Returns 0 for illegal encodings.
|
||||
pub fn bytes(&self, enc: Encoding) -> CodeOffset {
|
||||
self.sizing
|
||||
.get(enc.recipe())
|
||||
.map(|s| CodeOffset::from(s.bytes))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Get the branch range that is supported by `enc`, if any.
|
||||
///
|
||||
/// This will never return `None` for a legal branch encoding.
|
||||
pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
|
||||
self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
|
||||
}
|
||||
}
|
||||
282
lib/codegen/src/isa/mod.rs
Normal file
282
lib/codegen/src/isa/mod.rs
Normal file
@@ -0,0 +1,282 @@
|
||||
//! Instruction Set Architectures.
|
||||
//!
|
||||
//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed
|
||||
//! by the ISA-independent code generator. The sub-modules of this module provide definitions for
|
||||
//! the instruction sets that Cretonne can target. Each sub-module has it's own implementation of
|
||||
//! `TargetIsa`.
|
||||
//!
|
||||
//! # Constructing a `TargetIsa` instance
|
||||
//!
|
||||
//! The target ISA is built from the following information:
|
||||
//!
|
||||
//! - The name of the target ISA as a string. Cretonne is a cross-compiler, so the ISA to target
|
||||
//! can be selected dynamically. Individual ISAs can be left out when Cretonne is compiled, so a
|
||||
//! string is used to identify the proper sub-module.
|
||||
//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags`
|
||||
//! instance.
|
||||
//! - Values for ISA-specific settings.
|
||||
//!
|
||||
//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder`
|
||||
//! appropriate for the requested ISA:
|
||||
//!
|
||||
//! ```
|
||||
//! use cretonne_codegen::settings::{self, Configurable};
|
||||
//! use cretonne_codegen::isa;
|
||||
//!
|
||||
//! let shared_builder = settings::builder();
|
||||
//! let shared_flags = settings::Flags::new(&shared_builder);
|
||||
//!
|
||||
//! match isa::lookup("riscv") {
|
||||
//! Err(_) => {
|
||||
//! // The RISC-V target ISA is not available.
|
||||
//! }
|
||||
//! Ok(mut isa_builder) => {
|
||||
//! isa_builder.set("supports_m", "on");
|
||||
//! let isa = isa_builder.finish(shared_flags);
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
|
||||
//! concurrent function compilations.
|
||||
|
||||
pub use isa::constraints::{BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints};
|
||||
pub use isa::encoding::{EncInfo, Encoding};
|
||||
pub use isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
|
||||
pub use isa::stack::{StackBase, StackBaseMask, StackRef};
|
||||
|
||||
use binemit;
|
||||
use flowgraph;
|
||||
use ir;
|
||||
use isa::enc_tables::Encodings;
|
||||
use regalloc;
|
||||
use result;
|
||||
use settings;
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
use timing;
|
||||
|
||||
#[cfg(build_riscv)]
|
||||
mod riscv;
|
||||
|
||||
#[cfg(build_x86)]
|
||||
mod x86;
|
||||
|
||||
#[cfg(build_arm32)]
|
||||
mod arm32;
|
||||
|
||||
#[cfg(build_arm64)]
|
||||
mod arm64;
|
||||
|
||||
mod constraints;
|
||||
mod enc_tables;
|
||||
mod encoding;
|
||||
pub mod registers;
|
||||
mod stack;
|
||||
|
||||
/// Returns a builder that can create a corresponding `TargetIsa`
|
||||
/// or `Err(LookupError::Unsupported)` if not enabled.
|
||||
macro_rules! isa_builder {
|
||||
($module:ident, $name:ident) => {{
|
||||
#[cfg($name)]
|
||||
fn $name() -> Result<Builder, LookupError> {
|
||||
Ok($module::isa_builder())
|
||||
};
|
||||
#[cfg(not($name))]
|
||||
fn $name() -> Result<Builder, LookupError> {
|
||||
Err(LookupError::Unsupported)
|
||||
}
|
||||
$name()
|
||||
}};
|
||||
}
|
||||
|
||||
/// Look for a supported ISA with the given `name`.
|
||||
/// Return a builder that can create a corresponding `TargetIsa`.
|
||||
pub fn lookup(name: &str) -> Result<Builder, LookupError> {
|
||||
match name {
|
||||
"riscv" => isa_builder!(riscv, build_riscv),
|
||||
"x86" => isa_builder!(x86, build_x86),
|
||||
"arm32" => isa_builder!(arm32, build_arm32),
|
||||
"arm64" => isa_builder!(arm64, build_arm64),
|
||||
_ => Err(LookupError::Unknown),
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes reason for target lookup failure
|
||||
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
|
||||
pub enum LookupError {
|
||||
/// Unknown Target
|
||||
Unknown,
|
||||
|
||||
/// Target known but not built and thus not supported
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
/// Builder for a `TargetIsa`.
|
||||
/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
|
||||
pub struct Builder {
|
||||
setup: settings::Builder,
|
||||
constructor: fn(settings::Flags, &settings::Builder) -> Box<TargetIsa>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
/// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
|
||||
/// fully configured `TargetIsa` trait object.
|
||||
pub fn finish(self, shared_flags: settings::Flags) -> Box<TargetIsa> {
|
||||
(self.constructor)(shared_flags, &self.setup)
|
||||
}
|
||||
}
|
||||
|
||||
impl settings::Configurable for Builder {
|
||||
fn set(&mut self, name: &str, value: &str) -> settings::Result<()> {
|
||||
self.setup.set(name, value)
|
||||
}
|
||||
|
||||
fn enable(&mut self, name: &str) -> settings::Result<()> {
|
||||
self.setup.enable(name)
|
||||
}
|
||||
}
|
||||
|
||||
/// After determining that an instruction doesn't have an encoding, how should we proceed to
|
||||
/// legalize it?
|
||||
///
|
||||
/// The `Encodings` iterator returns a legalization function to call.
|
||||
pub type Legalize = fn(ir::Inst,
|
||||
&mut ir::Function,
|
||||
&mut flowgraph::ControlFlowGraph,
|
||||
&TargetIsa)
|
||||
-> bool;
|
||||
|
||||
/// Methods that are specialized to a target ISA. Implies a Display trait that shows the
|
||||
/// shared flags, as well as any isa-specific flags.
|
||||
pub trait TargetIsa: fmt::Display {
|
||||
/// Get the name of this ISA.
|
||||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Get the ISA-independent flags that were used to make this trait object.
|
||||
fn flags(&self) -> &settings::Flags;
|
||||
|
||||
/// Does the CPU implement scalar comparisons using a CPU flags register?
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Get a data structure describing the registers in this ISA.
|
||||
fn register_info(&self) -> RegInfo;
|
||||
|
||||
/// Returns an iterartor over legal encodings for the instruction.
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a>;
|
||||
|
||||
/// Encode an instruction after determining it is legal.
|
||||
///
|
||||
/// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
|
||||
/// Otherwise, return `Legalize` action.
|
||||
///
|
||||
/// This is also the main entry point for determining if an instruction is legal.
|
||||
fn encode(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: &ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Result<Encoding, Legalize> {
|
||||
let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
|
||||
iter.next().ok_or_else(|| iter.legalize())
|
||||
}
|
||||
|
||||
/// Get a data structure describing the instruction encodings in this ISA.
|
||||
fn encoding_info(&self) -> EncInfo;
|
||||
|
||||
/// Legalize a function signature.
|
||||
///
|
||||
/// This is used to legalize both the signature of the function being compiled and any called
|
||||
/// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
|
||||
/// arguments and return values.
|
||||
///
|
||||
/// Arguments with types that are not supported by the ABI can be expanded into multiple
|
||||
/// arguments:
|
||||
///
|
||||
/// - Integer types that are too large to fit in a register can be broken into multiple
|
||||
/// arguments of a smaller integer type.
|
||||
/// - Floating point types can be bit-cast to an integer type of the same size, and possible
|
||||
/// broken into smaller integer types.
|
||||
/// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
|
||||
///
|
||||
/// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
|
||||
///
|
||||
/// When this function is called to legalize the signature of the function currently begin
|
||||
/// compiler, `current` is true. The legalized signature can then also contain special purpose
|
||||
/// arguments and return values such as:
|
||||
///
|
||||
/// - A `link` argument representing the link registers on RISC architectures that don't push
|
||||
/// the return address on the stack.
|
||||
/// - A `link` return value which will receive the value that was passed to the `link`
|
||||
/// argument.
|
||||
/// - An `sret` argument can be added if one wasn't present already. This is necessary if the
|
||||
/// signature returns more values than registers are available for returning values.
|
||||
/// - An `sret` return value can be added if the ABI requires a function to return its `sret`
|
||||
/// argument in a register.
|
||||
///
|
||||
/// Arguments and return values for the caller's frame pointer and other callee-saved registers
|
||||
/// should not be added by this function. These arguments are not added until after register
|
||||
/// allocation.
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool);
|
||||
|
||||
/// Get the register class that should be used to represent an ABI argument or return value of
|
||||
/// type `ty`. This should be the top-level register class that contains the argument
|
||||
/// registers.
|
||||
///
|
||||
/// This function can assume that it will only be asked to provide register classes for types
|
||||
/// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
|
||||
|
||||
/// Get the set of allocatable registers that can be used when compiling `func`.
|
||||
///
|
||||
/// This set excludes reserved registers like the stack pointer and other special-purpose
|
||||
/// registers.
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
|
||||
|
||||
/// Compute the stack layout and insert prologue and epilogue code into `func`.
|
||||
///
|
||||
/// Return an error if the stack frame is too large.
|
||||
fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
|
||||
let _tt = timing::prologue_epilogue();
|
||||
// This default implementation is unlikely to be good enough.
|
||||
use ir::stackslot::{StackOffset, StackSize};
|
||||
use stack_layout::layout_stack;
|
||||
|
||||
let word_size = if self.flags().is_64bit() { 8 } else { 4 };
|
||||
|
||||
// Account for the SpiderMonkey standard prologue pushes.
|
||||
if func.signature.call_conv == ir::CallConv::SpiderWASM {
|
||||
let bytes = StackSize::from(self.flags().spiderwasm_prologue_words()) * word_size;
|
||||
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
|
||||
ss.offset = Some(-(bytes as StackOffset));
|
||||
func.stack_slots.push(ss);
|
||||
}
|
||||
|
||||
layout_stack(&mut func.stack_slots, word_size)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Emit binary machine code for a single instruction into the `sink` trait object.
|
||||
///
|
||||
/// Note that this will call `put*` methods on the trait object via its vtable which is not the
|
||||
/// fastest way of emitting code.
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut binemit::CodeSink,
|
||||
);
|
||||
|
||||
/// Emit a whole function into memory.
|
||||
///
|
||||
/// This is more performant than calling `emit_inst` for each instruction.
|
||||
fn emit_function(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
|
||||
}
|
||||
322
lib/codegen/src/isa/registers.rs
Normal file
322
lib/codegen/src/isa/registers.rs
Normal file
@@ -0,0 +1,322 @@
|
||||
//! Data structures describing the registers in an ISA.
|
||||
|
||||
use entity::EntityRef;
|
||||
use std::fmt;
|
||||
|
||||
/// Register units are the smallest units of register allocation.
|
||||
///
|
||||
/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA
|
||||
/// has aliasing registers, the aliasing can be modeled with registers that cover multiple
|
||||
/// register units.
|
||||
///
|
||||
/// The register allocator will enforce that each register unit only gets used for one thing.
|
||||
pub type RegUnit = u16;
|
||||
|
||||
/// A bit mask indexed by register units.
|
||||
///
|
||||
/// The size of this type is determined by the target ISA that has the most register units defined.
|
||||
/// Currently that is arm32 which has 64+16 units.
|
||||
///
|
||||
/// This type should be coordinated with meta/cdsl/registers.py.
|
||||
pub type RegUnitMask = [u32; 3];
|
||||
|
||||
/// A bit mask indexed by register classes.
|
||||
///
|
||||
/// The size of this type is determined by the ISA with the most register classes.
|
||||
///
|
||||
/// This type should be coordinated with meta/cdsl/isa.py.
|
||||
pub type RegClassMask = u32;
|
||||
|
||||
/// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA.
|
||||
///
|
||||
/// This can be increased, but should be coordinated with meta/cdsl/isa.py.
|
||||
pub const MAX_TRACKED_TOPRCS: usize = 4;
|
||||
|
||||
/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
|
||||
/// contiguous range of register units.
|
||||
///
|
||||
/// The `RegBank` struct provides a static description of a register bank.
|
||||
pub struct RegBank {
|
||||
/// The name of this register bank as defined in the ISA's `registers.py` file.
|
||||
pub name: &'static str,
|
||||
|
||||
/// The first register unit in this bank.
|
||||
pub first_unit: RegUnit,
|
||||
|
||||
/// The total number of register units in this bank.
|
||||
pub units: RegUnit,
|
||||
|
||||
/// Array of specially named register units. This array can be shorter than the number of units
|
||||
/// in the bank.
|
||||
pub names: &'static [&'static str],
|
||||
|
||||
/// Name prefix to use for those register units in the bank not covered by the `names` array.
|
||||
/// The remaining register units will be named this prefix followed by their decimal offset in
|
||||
/// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ...
|
||||
pub prefix: &'static str,
|
||||
|
||||
/// Index of the first top-level register class in this bank.
|
||||
pub first_toprc: usize,
|
||||
|
||||
/// Number of top-level register classes in this bank.
|
||||
///
|
||||
/// The top-level register classes in a bank are guaranteed to be numbered sequentially from
|
||||
/// `first_toprc`, and all top-level register classes across banks come before any sub-classes.
|
||||
pub num_toprcs: usize,
|
||||
|
||||
/// Is register pressure tracking enabled for this bank?
|
||||
pub pressure_tracking: bool,
|
||||
}
|
||||
|
||||
impl RegBank {
|
||||
/// Does this bank contain `regunit`?
|
||||
fn contains(&self, regunit: RegUnit) -> bool {
|
||||
regunit >= self.first_unit && regunit - self.first_unit < self.units
|
||||
}
|
||||
|
||||
/// Try to parse a regunit name. The name is not expected to begin with `%`.
|
||||
fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
|
||||
match self.names.iter().position(|&x| x == name) {
|
||||
Some(offset) => {
|
||||
// This is one of the special-cased names.
|
||||
Some(offset as RegUnit)
|
||||
}
|
||||
None => {
|
||||
// Try a regular prefixed name.
|
||||
if name.starts_with(self.prefix) {
|
||||
name[self.prefix.len()..].parse().ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}.and_then(|offset| if offset < self.units {
|
||||
Some(offset + self.first_unit)
|
||||
} else {
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
/// Write `regunit` to `w`, assuming that it belongs to this bank.
|
||||
/// All regunits are written with a `%` prefix.
|
||||
fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result {
|
||||
let offset = regunit - self.first_unit;
|
||||
assert!(offset < self.units);
|
||||
if (offset as usize) < self.names.len() {
|
||||
write!(f, "%{}", self.names[offset as usize])
|
||||
} else {
|
||||
write!(f, "%{}{}", self.prefix, offset)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A register class reference.
|
||||
///
|
||||
/// All register classes are statically defined in tables generated from the meta descriptions.
|
||||
pub type RegClass = &'static RegClassData;
|
||||
|
||||
/// Data about a register class.
|
||||
///
|
||||
/// A register class represents a subset of the registers in a bank. It describes the set of
|
||||
/// permitted registers for a register operand in a given encoding of an instruction.
|
||||
///
|
||||
/// A register class can be a subset of another register class. The top-level register classes are
|
||||
/// disjoint.
|
||||
pub struct RegClassData {
|
||||
/// The name of the register class.
|
||||
pub name: &'static str,
|
||||
|
||||
/// The index of this class in the ISA's RegInfo description.
|
||||
pub index: u8,
|
||||
|
||||
/// How many register units to allocate per register.
|
||||
pub width: u8,
|
||||
|
||||
/// Index of the register bank this class belongs to.
|
||||
pub bank: u8,
|
||||
|
||||
/// Index of the top-level register class contains this one.
|
||||
pub toprc: u8,
|
||||
|
||||
/// The first register unit in this class.
|
||||
pub first: RegUnit,
|
||||
|
||||
/// Bit-mask of sub-classes of this register class, including itself.
|
||||
///
|
||||
/// Bits correspond to RC indexes.
|
||||
pub subclasses: RegClassMask,
|
||||
|
||||
/// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the
|
||||
/// first register unit in each allocatable register.
|
||||
pub mask: RegUnitMask,
|
||||
|
||||
/// The global `RegInfo` instance containing that this register class.
|
||||
pub info: &'static RegInfo,
|
||||
}
|
||||
|
||||
impl RegClassData {
|
||||
/// Get the register class index corresponding to the intersection of `self` and `other`.
|
||||
///
|
||||
/// This register class is guaranteed to exist if the register classes overlap. If the register
|
||||
/// classes don't overlap, returns `None`.
|
||||
pub fn intersect_index(&self, other: RegClass) -> Option<RegClassIndex> {
|
||||
// Compute the set of common subclasses.
|
||||
let mask = self.subclasses & other.subclasses;
|
||||
|
||||
if mask == 0 {
|
||||
// No overlap.
|
||||
None
|
||||
} else {
|
||||
// Register class indexes are topologically ordered, so the largest common subclass has
|
||||
// the smallest index.
|
||||
Some(RegClassIndex(mask.trailing_zeros() as u8))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the intersection of `self` and `other`.
|
||||
pub fn intersect(&self, other: RegClass) -> Option<RegClass> {
|
||||
self.intersect_index(other).map(|rci| self.info.rc(rci))
|
||||
}
|
||||
|
||||
/// Returns true if `other` is a subclass of this register class.
|
||||
/// A register class is considered to be a subclass of itself.
|
||||
pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
|
||||
self.subclasses & (1 << other.into().0) != 0
|
||||
}
|
||||
|
||||
/// Get the top-level register class containing this class.
|
||||
pub fn toprc(&self) -> RegClass {
|
||||
self.info.rc(RegClassIndex(self.toprc))
|
||||
}
|
||||
|
||||
/// Get a specific register unit in this class.
|
||||
pub fn unit(&self, offset: usize) -> RegUnit {
|
||||
let uoffset = offset * usize::from(self.width);
|
||||
self.first + uoffset as RegUnit
|
||||
}
|
||||
|
||||
/// Does this register class contain `regunit`?
|
||||
pub fn contains(&self, regunit: RegUnit) -> bool {
|
||||
self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegClassData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(self.name)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RegClassData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(self.name)
|
||||
}
|
||||
}
|
||||
|
||||
/// Within an ISA, register classes are uniquely identified by their index.
|
||||
impl PartialEq for RegClassData {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.index == other.index
|
||||
}
|
||||
}
|
||||
|
||||
/// A small reference to a register class.
|
||||
///
|
||||
/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method
|
||||
/// can be used to get the real register class reference back.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct RegClassIndex(u8);
|
||||
|
||||
impl EntityRef for RegClassIndex {
|
||||
fn new(idx: usize) -> Self {
|
||||
RegClassIndex(idx as u8)
|
||||
}
|
||||
|
||||
fn index(self) -> usize {
|
||||
usize::from(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegClass> for RegClassIndex {
|
||||
fn from(rc: RegClass) -> Self {
|
||||
RegClassIndex(rc.index)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegClassIndex {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "rci{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Test of two registers overlap.
|
||||
///
|
||||
/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to
|
||||
/// determine the width (in regunits) of the register.
|
||||
pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool {
|
||||
let end1 = reg1 + RegUnit::from(rc1.width);
|
||||
let end2 = reg2 + RegUnit::from(rc2.width);
|
||||
!(end1 <= reg2 || end2 <= reg1)
|
||||
}
|
||||
|
||||
/// Information about the registers in an ISA.
|
||||
///
|
||||
/// The `RegUnit` data structure collects all relevant static information about the registers in an
|
||||
/// ISA.
|
||||
#[derive(Clone)]
|
||||
pub struct RegInfo {
|
||||
/// All register banks, ordered by their `first_unit`. The register banks are disjoint, but
|
||||
/// there may be holes of unused register unit numbers between banks due to alignment.
|
||||
pub banks: &'static [RegBank],
|
||||
|
||||
/// All register classes ordered topologically so a sub-class always follows its parent.
|
||||
pub classes: &'static [RegClass],
|
||||
}
|
||||
|
||||
impl RegInfo {
|
||||
/// Get the register bank holding `regunit`.
|
||||
pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> {
|
||||
// We could do a binary search, but most ISAs have only two register banks...
|
||||
self.banks.iter().find(|b| b.contains(regunit))
|
||||
}
|
||||
|
||||
/// Try to parse a regunit name. The name is not expected to begin with `%`.
|
||||
pub fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
|
||||
self.banks
|
||||
.iter()
|
||||
.filter_map(|b| b.parse_regunit(name))
|
||||
.next()
|
||||
}
|
||||
|
||||
/// Make a temporary object that can display a register unit.
|
||||
pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit {
|
||||
DisplayRegUnit {
|
||||
regunit,
|
||||
reginfo: self,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the register class corresponding to `idx`.
|
||||
pub fn rc(&self, idx: RegClassIndex) -> RegClass {
|
||||
self.classes[idx.index()]
|
||||
}
|
||||
|
||||
/// Get the top-level register class containing the `idx` class.
|
||||
pub fn toprc(&self, idx: RegClassIndex) -> RegClass {
|
||||
self.classes[self.rc(idx).toprc as usize]
|
||||
}
|
||||
}
|
||||
|
||||
/// Temporary object that holds enough information to print a register unit.
|
||||
pub struct DisplayRegUnit<'a> {
|
||||
regunit: RegUnit,
|
||||
reginfo: &'a RegInfo,
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for DisplayRegUnit<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.reginfo.bank_containing_regunit(self.regunit) {
|
||||
Some(b) => b.write_regunit(f, self.regunit),
|
||||
None => write!(f, "%INVALID{}", self.regunit),
|
||||
}
|
||||
}
|
||||
}
|
||||
140
lib/codegen/src/isa/riscv/abi.rs
Normal file
140
lib/codegen/src/isa/riscv/abi.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
//! RISC-V ABI implementation.
|
||||
//!
|
||||
//! This module implements the RISC-V calling convention through the primary `legalize_signature()`
|
||||
//! entry point.
|
||||
//!
|
||||
//! This doesn't support the soft-float ABI at the moment.
|
||||
|
||||
use super::registers::{FPR, GPR};
|
||||
use super::settings;
|
||||
use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
|
||||
use isa::RegClass;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
use std::i32;
|
||||
|
||||
struct Args {
|
||||
pointer_bits: u16,
|
||||
pointer_bytes: u32,
|
||||
pointer_type: Type,
|
||||
regs: u32,
|
||||
reg_limit: u32,
|
||||
offset: u32,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
fn new(bits: u16, enable_e: bool) -> Args {
|
||||
Args {
|
||||
pointer_bits: bits,
|
||||
pointer_bytes: u32::from(bits) / 8,
|
||||
pointer_type: Type::int(bits).unwrap(),
|
||||
regs: 0,
|
||||
reg_limit: if enable_e { 6 } else { 8 },
|
||||
offset: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgAssigner for Args {
|
||||
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
|
||||
fn align(value: u32, to: u32) -> u32 {
|
||||
(value + to - 1) & !(to - 1)
|
||||
}
|
||||
|
||||
let ty = arg.value_type;
|
||||
|
||||
// Check for a legal type.
|
||||
// RISC-V doesn't have SIMD at all, so break all vectors down.
|
||||
if ty.is_vector() {
|
||||
return ValueConversion::VectorSplit.into();
|
||||
}
|
||||
|
||||
// Large integers and booleans are broken down to fit in a register.
|
||||
if !ty.is_float() && ty.bits() > self.pointer_bits {
|
||||
// Align registers and stack to a multiple of two pointers.
|
||||
self.regs = align(self.regs, 2);
|
||||
self.offset = align(self.offset, 2 * self.pointer_bytes);
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
|
||||
// Small integers are extended to the size of a pointer register.
|
||||
if ty.is_int() && ty.bits() < self.pointer_bits {
|
||||
match arg.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
|
||||
ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
|
||||
}
|
||||
}
|
||||
|
||||
if self.regs < self.reg_limit {
|
||||
// Assign to a register.
|
||||
let reg = if ty.is_float() {
|
||||
FPR.unit(10 + self.regs as usize)
|
||||
} else {
|
||||
GPR.unit(10 + self.regs as usize)
|
||||
};
|
||||
self.regs += 1;
|
||||
ArgumentLoc::Reg(reg).into()
|
||||
} else {
|
||||
// Assign a stack location.
|
||||
let loc = ArgumentLoc::Stack(self.offset as i32);
|
||||
self.offset += self.pointer_bytes;
|
||||
debug_assert!(self.offset <= i32::MAX as u32);
|
||||
loc.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize `sig` for RISC-V.
|
||||
pub fn legalize_signature(
|
||||
sig: &mut ir::Signature,
|
||||
flags: &shared_settings::Flags,
|
||||
isa_flags: &settings::Flags,
|
||||
current: bool,
|
||||
) {
|
||||
let bits = if flags.is_64bit() { 64 } else { 32 };
|
||||
|
||||
let mut args = Args::new(bits, isa_flags.enable_e());
|
||||
legalize_args(&mut sig.params, &mut args);
|
||||
|
||||
let mut rets = Args::new(bits, isa_flags.enable_e());
|
||||
legalize_args(&mut sig.returns, &mut rets);
|
||||
|
||||
if current {
|
||||
let ptr = Type::int(bits).unwrap();
|
||||
|
||||
// Add the link register as an argument and return value.
|
||||
//
|
||||
// The `jalr` instruction implementing a return can technically accept the return address
|
||||
// in any register, but a micro-architecture with a return address predictor will only
|
||||
// recognize it as a return if the address is in `x1`.
|
||||
let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
|
||||
sig.params.push(link);
|
||||
sig.returns.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: Type) -> RegClass {
|
||||
if ty.is_float() { FPR } else { GPR }
|
||||
}
|
||||
|
||||
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
|
||||
let mut regs = RegisterSet::new();
|
||||
regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
|
||||
// %x1 is the link register which is available for allocation.
|
||||
regs.take(GPR, GPR.unit(2)); // Stack pointer.
|
||||
regs.take(GPR, GPR.unit(3)); // Global pointer.
|
||||
regs.take(GPR, GPR.unit(4)); // Thread pointer.
|
||||
// TODO: %x8 is the frame pointer. Reserve it?
|
||||
|
||||
// Remove %x16 and up for RV32E.
|
||||
if isa_flags.enable_e() {
|
||||
for u in 16..32 {
|
||||
regs.take(GPR, GPR.unit(u));
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
182
lib/codegen/src/isa/riscv/binemit.rs
Normal file
182
lib/codegen/src/isa/riscv/binemit.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
//! Emitting binary RISC-V machine code.
|
||||
|
||||
use binemit::{bad_encoding, CodeSink, Reloc};
|
||||
use ir::{Function, Inst, InstructionData};
|
||||
use isa::{RegUnit, StackBaseMask, StackRef};
|
||||
use predicates::is_signed_int;
|
||||
use regalloc::RegDiversions;
|
||||
use std::u32;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs"));
|
||||
|
||||
/// R-type instructions.
|
||||
///
|
||||
/// 31 24 19 14 11 6
|
||||
/// funct7 rs2 rs1 funct3 rd opcode
|
||||
/// 25 20 15 12 7 0
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
|
||||
fn put_r<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let funct3 = (bits >> 5) & 0x7;
|
||||
let funct7 = (bits >> 8) & 0x7f;
|
||||
let rs1 = u32::from(rs1) & 0x1f;
|
||||
let rs2 = u32::from(rs2) & 0x1f;
|
||||
let rd = u32::from(rd) & 0x1f;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= rd << 7;
|
||||
i |= funct3 << 12;
|
||||
i |= rs1 << 15;
|
||||
i |= rs2 << 20;
|
||||
i |= funct7 << 25;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
|
||||
/// R-type instructions with a shift amount instead of rs2.
|
||||
///
|
||||
/// 31 25 19 14 11 6
|
||||
/// funct7 shamt rs1 funct3 rd opcode
|
||||
/// 25 20 15 12 7 0
|
||||
///
|
||||
/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31.
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
|
||||
fn put_rshamt<CS: CodeSink + ?Sized>(
|
||||
bits: u16,
|
||||
rs1: RegUnit,
|
||||
shamt: i64,
|
||||
rd: RegUnit,
|
||||
sink: &mut CS,
|
||||
) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let funct3 = (bits >> 5) & 0x7;
|
||||
let funct7 = (bits >> 8) & 0x7f;
|
||||
let rs1 = u32::from(rs1) & 0x1f;
|
||||
let shamt = shamt as u32 & 0x3f;
|
||||
let rd = u32::from(rd) & 0x1f;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= rd << 7;
|
||||
i |= funct3 << 12;
|
||||
i |= rs1 << 15;
|
||||
i |= shamt << 20;
|
||||
i |= funct7 << 25;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
|
||||
/// I-type instructions.
|
||||
///
|
||||
/// 31 19 14 11 6
|
||||
/// imm rs1 funct3 rd opcode
|
||||
/// 20 15 12 7 0
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
|
||||
fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let funct3 = (bits >> 5) & 0x7;
|
||||
let rs1 = u32::from(rs1) & 0x1f;
|
||||
let rd = u32::from(rd) & 0x1f;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= rd << 7;
|
||||
i |= funct3 << 12;
|
||||
i |= rs1 << 15;
|
||||
i |= (imm << 20) as u32;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
|
||||
/// U-type instructions.
|
||||
///
|
||||
/// 31 11 6
|
||||
/// imm rd opcode
|
||||
/// 12 7 0
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
|
||||
fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let rd = u32::from(rd) & 0x1f;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= rd << 7;
|
||||
i |= imm as u32 & 0xfffff000;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
|
||||
/// SB-type branch instructions.
|
||||
///
|
||||
/// 31 24 19 14 11 6
|
||||
/// imm rs2 rs1 funct3 imm opcode
|
||||
/// 25 20 15 12 7 0
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
|
||||
fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let funct3 = (bits >> 5) & 0x7;
|
||||
let rs1 = u32::from(rs1) & 0x1f;
|
||||
let rs2 = u32::from(rs2) & 0x1f;
|
||||
|
||||
debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
|
||||
let imm = imm as u32;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= funct3 << 12;
|
||||
i |= rs1 << 15;
|
||||
i |= rs2 << 20;
|
||||
|
||||
// The displacement is completely hashed up.
|
||||
i |= ((imm >> 11) & 0x1) << 7;
|
||||
i |= ((imm >> 1) & 0xf) << 8;
|
||||
i |= ((imm >> 5) & 0x3f) << 25;
|
||||
i |= ((imm >> 12) & 0x1) << 31;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
|
||||
/// UJ-type jump instructions.
|
||||
///
|
||||
/// 31 11 6
|
||||
/// imm rd opcode
|
||||
/// 12 7 0
|
||||
///
|
||||
/// Encoding bits: `opcode[6:2]`
|
||||
fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
|
||||
let bits = u32::from(bits);
|
||||
let opcode5 = bits & 0x1f;
|
||||
let rd = u32::from(rd) & 0x1f;
|
||||
|
||||
debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
|
||||
let imm = imm as u32;
|
||||
|
||||
// 0-6: opcode
|
||||
let mut i = 0x3;
|
||||
i |= opcode5 << 2;
|
||||
i |= rd << 7;
|
||||
|
||||
// The displacement is completely hashed up.
|
||||
i |= imm & 0xff000;
|
||||
i |= ((imm >> 11) & 0x1) << 20;
|
||||
i |= ((imm >> 1) & 0x3ff) << 21;
|
||||
i |= ((imm >> 20) & 0x1) << 31;
|
||||
|
||||
sink.put4(i);
|
||||
}
|
||||
18
lib/codegen/src/isa/riscv/enc_tables.rs
Normal file
18
lib/codegen/src/isa/riscv/enc_tables.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
//! Encoding tables for RISC-V.
|
||||
|
||||
use super::registers::*;
|
||||
use ir;
|
||||
use isa;
|
||||
use isa::constraints::*;
|
||||
use isa::enc_tables::*;
|
||||
use isa::encoding::RecipeSizing;
|
||||
use predicates;
|
||||
|
||||
// Include the generated encoding tables:
|
||||
// - `LEVEL1_RV32`
|
||||
// - `LEVEL1_RV64`
|
||||
// - `LEVEL2`
|
||||
// - `ENCLIST`
|
||||
// - `INFO`
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs"));
|
||||
266
lib/codegen/src/isa/riscv/mod.rs
Normal file
266
lib/codegen/src/isa/riscv/mod.rs
Normal file
@@ -0,0 +1,266 @@
|
||||
//! RISC-V Instruction Set Architecture.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
use binemit::{emit_function, CodeSink, MemoryCodeSink};
|
||||
use ir;
|
||||
use isa::Builder as IsaBuilder;
|
||||
use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
|
||||
use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use regalloc;
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating RISC-V targets.
|
||||
pub fn isa_builder() -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: &shared_settings::Builder,
|
||||
) -> Box<TargetIsa> {
|
||||
let level1 = if shared_flags.is_64bit() {
|
||||
&enc_tables::LEVEL1_RV64[..]
|
||||
} else {
|
||||
&enc_tables::LEVEL1_RV32[..]
|
||||
};
|
||||
Box::new(Isa {
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"riscv"
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
self.cpumode,
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
|
||||
abi::legalize_signature(sig, &self.shared_flags, &self.isa_flags, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func, &self.isa_flags)
|
||||
}
|
||||
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink)
|
||||
}
|
||||
|
||||
fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ir::{Function, InstructionData, Opcode};
|
||||
use ir::{immediates, types};
|
||||
use isa;
|
||||
use settings::{self, Configurable};
|
||||
use std::string::{String, ToString};
|
||||
|
||||
fn encstr(isa: &isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
|
||||
match enc {
|
||||
Ok(e) => isa.encoding_info().display(e).to_string(),
|
||||
Err(_) => "no encoding".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_64bitenc() {
|
||||
let mut shared_builder = settings::builder();
|
||||
shared_builder.enable("is_64bit").unwrap();
|
||||
let shared_flags = settings::Flags::new(&shared_builder);
|
||||
let isa = isa::lookup("riscv").unwrap().finish(shared_flags);
|
||||
|
||||
let mut func = Function::new();
|
||||
let ebb = func.dfg.make_ebb();
|
||||
let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
|
||||
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
};
|
||||
|
||||
// ADDI is I/0b00100
|
||||
assert_eq!(
|
||||
encstr(&*isa, isa.encode(&func, &inst64, types::I64)),
|
||||
"Ii#04"
|
||||
);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
};
|
||||
|
||||
// Immediate is out of range for ADDI.
|
||||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV64.
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
};
|
||||
|
||||
// ADDIW is I/0b00110
|
||||
assert_eq!(
|
||||
encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
|
||||
"Ii#06"
|
||||
);
|
||||
}
|
||||
|
||||
// Same as above, but for RV32.
|
||||
#[test]
|
||||
fn test_32bitenc() {
|
||||
let mut shared_builder = settings::builder();
|
||||
shared_builder.set("is_64bit", "false").unwrap();
|
||||
let shared_flags = settings::Flags::new(&shared_builder);
|
||||
let isa = isa::lookup("riscv").unwrap().finish(shared_flags);
|
||||
|
||||
let mut func = Function::new();
|
||||
let ebb = func.dfg.make_ebb();
|
||||
let arg64 = func.dfg.append_ebb_param(ebb, types::I64);
|
||||
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10.
|
||||
let inst64 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10),
|
||||
};
|
||||
|
||||
// In 32-bit mode, an i64 bit add should be narrowed.
|
||||
assert!(isa.encode(&func, &inst64, types::I64).is_err());
|
||||
|
||||
// Try to encode iadd_imm.i64 v1, -10000.
|
||||
let inst64_large = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg64,
|
||||
imm: immediates::Imm64::new(-10000),
|
||||
};
|
||||
|
||||
// In 32-bit mode, an i64 bit add should be narrowed.
|
||||
assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
|
||||
|
||||
// Create an iadd_imm.i32 which is encodable in RV32.
|
||||
let inst32 = InstructionData::BinaryImm {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg: arg32,
|
||||
imm: immediates::Imm64::new(10),
|
||||
};
|
||||
|
||||
// ADDI is I/0b00100
|
||||
assert_eq!(
|
||||
encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
|
||||
"Ii#04"
|
||||
);
|
||||
|
||||
// Create an imul.i32 which is encodable in RV32, but only when use_m is true.
|
||||
let mul32 = InstructionData::Binary {
|
||||
opcode: Opcode::Imul,
|
||||
args: [arg32, arg32],
|
||||
};
|
||||
|
||||
assert!(isa.encode(&func, &mul32, types::I32).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rv32m() {
|
||||
let mut shared_builder = settings::builder();
|
||||
shared_builder.set("is_64bit", "false").unwrap();
|
||||
let shared_flags = settings::Flags::new(&shared_builder);
|
||||
|
||||
// Set the supports_m stting which in turn enables the use_m predicate that unlocks
|
||||
// encodings for imul.
|
||||
let mut isa_builder = isa::lookup("riscv").unwrap();
|
||||
isa_builder.enable("supports_m").unwrap();
|
||||
|
||||
let isa = isa_builder.finish(shared_flags);
|
||||
|
||||
let mut func = Function::new();
|
||||
let ebb = func.dfg.make_ebb();
|
||||
let arg32 = func.dfg.append_ebb_param(ebb, types::I32);
|
||||
|
||||
// Create an imul.i32 which is encodable in RV32M.
|
||||
let mul32 = InstructionData::Binary {
|
||||
opcode: Opcode::Imul,
|
||||
args: [arg32, arg32],
|
||||
};
|
||||
assert_eq!(
|
||||
encstr(&*isa, isa.encode(&func, &mul32, types::I32)),
|
||||
"R#10c"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
50
lib/codegen/src/isa/riscv/registers.rs
Normal file
50
lib/codegen/src/isa/riscv/registers.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
//! RISC-V register descriptions.
|
||||
|
||||
use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{FPR, GPR, INFO};
|
||||
use isa::RegUnit;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
assert_eq!(INFO.parse_regunit("x0"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("x31"), Some(31));
|
||||
assert_eq!(INFO.parse_regunit("f0"), Some(32));
|
||||
assert_eq!(INFO.parse_regunit("f31"), Some(63));
|
||||
|
||||
assert_eq!(INFO.parse_regunit("x32"), None);
|
||||
assert_eq!(INFO.parse_regunit("f32"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%x0");
|
||||
assert_eq!(uname(1), "%x1");
|
||||
assert_eq!(uname(31), "%x31");
|
||||
assert_eq!(uname(32), "%f0");
|
||||
assert_eq!(uname(33), "%f1");
|
||||
assert_eq!(uname(63), "%f31");
|
||||
assert_eq!(uname(64), "%INVALID64");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classes() {
|
||||
assert!(GPR.contains(GPR.unit(0)));
|
||||
assert!(GPR.contains(GPR.unit(31)));
|
||||
assert!(!FPR.contains(GPR.unit(0)));
|
||||
assert!(!FPR.contains(GPR.unit(31)));
|
||||
assert!(!GPR.contains(FPR.unit(0)));
|
||||
assert!(!GPR.contains(FPR.unit(31)));
|
||||
assert!(FPR.contains(FPR.unit(0)));
|
||||
assert!(FPR.contains(FPR.unit(31)));
|
||||
}
|
||||
}
|
||||
54
lib/codegen/src/isa/riscv/settings.rs
Normal file
54
lib/codegen/src/isa/riscv/settings.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
//! RISC-V Settings.
|
||||
|
||||
use settings::{self, detail, Builder};
|
||||
use std::fmt;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
|
||||
// `Flags` struct with an impl for all of the settings defined in
|
||||
// `lib/codegen/meta/isa/riscv/settings.py`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{builder, Flags};
|
||||
use settings::{self, Configurable};
|
||||
use std::string::ToString;
|
||||
|
||||
#[test]
|
||||
fn display_default() {
|
||||
let shared = settings::Flags::new(&settings::builder());
|
||||
let b = builder();
|
||||
let f = Flags::new(&shared, &b);
|
||||
assert_eq!(
|
||||
f.to_string(),
|
||||
"[riscv]\n\
|
||||
supports_m = false\n\
|
||||
supports_a = false\n\
|
||||
supports_f = false\n\
|
||||
supports_d = false\n\
|
||||
enable_m = true\n\
|
||||
enable_e = false\n"
|
||||
);
|
||||
// Predicates are not part of the Display output.
|
||||
assert_eq!(f.full_float(), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn predicates() {
|
||||
let shared = settings::Flags::new(&settings::builder());
|
||||
let mut b = builder();
|
||||
b.enable("supports_f").unwrap();
|
||||
b.enable("supports_d").unwrap();
|
||||
let f = Flags::new(&shared, &b);
|
||||
assert_eq!(f.full_float(), true);
|
||||
|
||||
let mut sb = settings::builder();
|
||||
sb.set("enable_simd", "false").unwrap();
|
||||
let shared = settings::Flags::new(&sb);
|
||||
let mut b = builder();
|
||||
b.enable("supports_f").unwrap();
|
||||
b.enable("supports_d").unwrap();
|
||||
let f = Flags::new(&shared, &b);
|
||||
assert_eq!(f.full_float(), false);
|
||||
}
|
||||
}
|
||||
94
lib/codegen/src/isa/stack.rs
Normal file
94
lib/codegen/src/isa/stack.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
//! Low-level details of stack accesses.
|
||||
//!
|
||||
//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type
|
||||
//! defined in this module expresses the low-level details of accessing a stack slot from an
|
||||
//! encoded instruction.
|
||||
|
||||
use ir::StackSlot;
|
||||
use ir::stackslot::{StackOffset, StackSlotKind, StackSlots};
|
||||
|
||||
/// A method for referencing a stack slot in the current stack frame.
|
||||
///
|
||||
/// Stack slots are addressed with a constant offset from a base register. The base can be the
|
||||
/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone
|
||||
/// of a large stack frame.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct StackRef {
|
||||
/// The base register to use for addressing.
|
||||
pub base: StackBase,
|
||||
|
||||
/// Immediate offset from the base register to the first byte of the stack slot.
|
||||
pub offset: StackOffset,
|
||||
}
|
||||
|
||||
impl StackRef {
|
||||
/// Get a reference to the stack slot `ss` using one of the base pointers in `mask`.
|
||||
pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option<StackRef> {
|
||||
// Try an SP-relative reference.
|
||||
if mask.contains(StackBase::SP) {
|
||||
return Some(StackRef::sp(ss, frame));
|
||||
}
|
||||
|
||||
// No reference possible with this mask.
|
||||
None
|
||||
}
|
||||
|
||||
/// Get a reference to `ss` using the stack pointer as a base.
|
||||
pub fn sp(ss: StackSlot, frame: &StackSlots) -> StackRef {
|
||||
let size = frame.frame_size.expect(
|
||||
"Stack layout must be computed before referencing stack slots",
|
||||
);
|
||||
let slot = &frame[ss];
|
||||
let offset = if slot.kind == StackSlotKind::OutgoingArg {
|
||||
// Outgoing argument slots have offsets relative to our stack pointer.
|
||||
slot.offset.unwrap()
|
||||
} else {
|
||||
// All other slots have offsets relative to our caller's stack frame.
|
||||
// Offset where SP is pointing. (All ISAs have stacks growing downwards.)
|
||||
let sp_offset = -(size as StackOffset);
|
||||
slot.offset.unwrap() - sp_offset
|
||||
};
|
||||
StackRef {
|
||||
base: StackBase::SP,
|
||||
offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic base register for referencing stack slots.
|
||||
///
|
||||
/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for
|
||||
/// those two base pointers.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum StackBase {
|
||||
/// Use the stack pointer.
|
||||
SP = 0,
|
||||
|
||||
/// Use the frame pointer (if one is present).
|
||||
FP = 1,
|
||||
|
||||
/// Use an explicit zone pointer in a general-purpose register.
|
||||
///
|
||||
/// This feature is not yet implemented.
|
||||
Zone = 2,
|
||||
}
|
||||
|
||||
/// Bit mask of supported stack bases.
|
||||
///
|
||||
/// Many instruction encodings can use different base registers while others only work with the
|
||||
/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given
|
||||
/// instruction encoding.
|
||||
///
|
||||
/// This behaves like a set of `StackBase` variants.
|
||||
///
|
||||
/// The internal representation as a `u8` is public because stack base masks are used in constant
|
||||
/// tables generated from the Python encoding definitions.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct StackBaseMask(pub u8);
|
||||
|
||||
impl StackBaseMask {
|
||||
/// Check if this mask contains the `base` variant.
|
||||
pub fn contains(self, base: StackBase) -> bool {
|
||||
self.0 & (1 << base as usize) != 0
|
||||
}
|
||||
}
|
||||
371
lib/codegen/src/isa/x86/abi.rs
Normal file
371
lib/codegen/src/isa/x86/abi.rs
Normal file
@@ -0,0 +1,371 @@
|
||||
//! x86 ABI implementation.
|
||||
|
||||
use super::registers::{FPR, GPR, RU};
|
||||
use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use cursor::{Cursor, CursorPosition, EncCursor};
|
||||
use ir;
|
||||
use ir::immediates::Imm64;
|
||||
use ir::stackslot::{StackOffset, StackSize};
|
||||
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder,
|
||||
ValueLoc};
|
||||
use isa::{RegClass, RegUnit, TargetIsa};
|
||||
use regalloc::RegisterSet;
|
||||
use result;
|
||||
use settings as shared_settings;
|
||||
use stack_layout::layout_stack;
|
||||
use std::i32;
|
||||
|
||||
/// Argument registers for x86-64
|
||||
static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
|
||||
|
||||
/// Return value registers.
|
||||
static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
|
||||
|
||||
struct Args {
|
||||
pointer_bytes: u32,
|
||||
pointer_bits: u16,
|
||||
pointer_type: ir::Type,
|
||||
gpr: &'static [RU],
|
||||
gpr_used: usize,
|
||||
fpr_limit: usize,
|
||||
fpr_used: usize,
|
||||
offset: u32,
|
||||
call_conv: CallConv,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
fn new(bits: u16, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Args {
|
||||
Args {
|
||||
pointer_bytes: u32::from(bits) / 8,
|
||||
pointer_bits: bits,
|
||||
pointer_type: ir::Type::int(bits).unwrap(),
|
||||
gpr,
|
||||
gpr_used: 0,
|
||||
fpr_limit,
|
||||
fpr_used: 0,
|
||||
offset: 0,
|
||||
call_conv: call_conv,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgAssigner for Args {
|
||||
fn assign(&mut self, arg: &AbiParam) -> ArgAction {
|
||||
let ty = arg.value_type;
|
||||
|
||||
// Check for a legal type.
|
||||
// We don't support SIMD yet, so break all vectors down.
|
||||
if ty.is_vector() {
|
||||
return ValueConversion::VectorSplit.into();
|
||||
}
|
||||
|
||||
// Large integers and booleans are broken down to fit in a register.
|
||||
if !ty.is_float() && ty.bits() > self.pointer_bits {
|
||||
return ValueConversion::IntSplit.into();
|
||||
}
|
||||
|
||||
// Small integers are extended to the size of a pointer register.
|
||||
if ty.is_int() && ty.bits() < self.pointer_bits {
|
||||
match arg.extension {
|
||||
ArgumentExtension::None => {}
|
||||
ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
|
||||
ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
|
||||
}
|
||||
}
|
||||
|
||||
// Handle special-purpose arguments.
|
||||
if ty.is_int() && self.call_conv == CallConv::SpiderWASM {
|
||||
match arg.purpose {
|
||||
// This is SpiderMonkey's `WasmTlsReg`.
|
||||
ArgumentPurpose::VMContext => {
|
||||
return ArgumentLoc::Reg(if self.pointer_bits == 64 {
|
||||
RU::r14
|
||||
} else {
|
||||
RU::rsi
|
||||
} as RegUnit).into()
|
||||
}
|
||||
// This is SpiderMonkey's `WasmTableCallSigReg`.
|
||||
ArgumentPurpose::SignatureId => return ArgumentLoc::Reg(RU::rbx as RegUnit).into(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to use a GPR.
|
||||
if !ty.is_float() && self.gpr_used < self.gpr.len() {
|
||||
let reg = self.gpr[self.gpr_used] as RegUnit;
|
||||
self.gpr_used += 1;
|
||||
return ArgumentLoc::Reg(reg).into();
|
||||
}
|
||||
|
||||
// Try to use an FPR.
|
||||
if ty.is_float() && self.fpr_used < self.fpr_limit {
|
||||
let reg = FPR.unit(self.fpr_used);
|
||||
self.fpr_used += 1;
|
||||
return ArgumentLoc::Reg(reg).into();
|
||||
}
|
||||
|
||||
// Assign a stack location.
|
||||
let loc = ArgumentLoc::Stack(self.offset as i32);
|
||||
self.offset += self.pointer_bytes;
|
||||
debug_assert!(self.offset <= i32::MAX as u32);
|
||||
loc.into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize `sig`.
|
||||
pub fn legalize_signature(sig: &mut ir::Signature, flags: &shared_settings::Flags, _current: bool) {
|
||||
let bits;
|
||||
let mut args;
|
||||
|
||||
if flags.is_64bit() {
|
||||
bits = 64;
|
||||
args = Args::new(bits, &ARG_GPRS, 8, sig.call_conv);
|
||||
} else {
|
||||
bits = 32;
|
||||
args = Args::new(bits, &[], 0, sig.call_conv);
|
||||
}
|
||||
|
||||
legalize_args(&mut sig.params, &mut args);
|
||||
|
||||
let mut rets = Args::new(bits, &RET_GPRS, 2, sig.call_conv);
|
||||
legalize_args(&mut sig.returns, &mut rets);
|
||||
}
|
||||
|
||||
/// Get register class for a type appearing in a legalized signature.
|
||||
pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
if ty.is_int() || ty.is_bool() {
|
||||
GPR
|
||||
} else {
|
||||
FPR
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function, flags: &shared_settings::Flags) -> RegisterSet {
|
||||
let mut regs = RegisterSet::new();
|
||||
regs.take(GPR, RU::rsp as RegUnit);
|
||||
regs.take(GPR, RU::rbp as RegUnit);
|
||||
|
||||
// 32-bit arch only has 8 registers.
|
||||
if !flags.is_64bit() {
|
||||
for i in 8..16 {
|
||||
regs.take(GPR, GPR.unit(i));
|
||||
regs.take(FPR, FPR.unit(i));
|
||||
}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
/// Get the set of callee-saved registers.
|
||||
fn callee_saved_gprs(flags: &shared_settings::Flags) -> &'static [RU] {
|
||||
if flags.is_64bit() {
|
||||
&[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
|
||||
} else {
|
||||
&[RU::rbx, RU::rsi, RU::rdi]
|
||||
}
|
||||
}
|
||||
|
||||
fn callee_saved_gprs_used(flags: &shared_settings::Flags, func: &ir::Function) -> RegisterSet {
|
||||
let mut all_callee_saved = RegisterSet::empty();
|
||||
for reg in callee_saved_gprs(flags) {
|
||||
all_callee_saved.free(GPR, *reg as RegUnit);
|
||||
}
|
||||
|
||||
let mut used = RegisterSet::empty();
|
||||
for value_loc in func.locations.values() {
|
||||
// Note that `value_loc` here contains only a single unit of a potentially multi-unit
|
||||
// register. We don't use registers that overlap each other in the x86 ISA, but in others
|
||||
// we do. So this should not be blindly reused.
|
||||
if let ValueLoc::Reg(ru) = *value_loc {
|
||||
if !used.is_avail(GPR, ru) {
|
||||
used.free(GPR, ru);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// regmove and regfill instructions may temporarily divert values into other registers,
|
||||
// and these are not reflected in `func.locations`. Scan the function for such instructions
|
||||
// and note which callee-saved registers they use.
|
||||
//
|
||||
// TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
|
||||
// to avoid this step.
|
||||
for ebb in &func.layout {
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
match func.dfg[inst] {
|
||||
ir::instructions::InstructionData::RegMove { dst, .. } |
|
||||
ir::instructions::InstructionData::RegFill { dst, .. } => {
|
||||
if !used.is_avail(GPR, dst) {
|
||||
used.free(GPR, dst);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
used.intersect(&all_callee_saved);
|
||||
return used;
|
||||
}
|
||||
|
||||
pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
|
||||
match func.signature.call_conv {
|
||||
ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
|
||||
ir::CallConv::SpiderWASM => spiderwasm_prologue_epilogue(func, isa),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn spiderwasm_prologue_epilogue(
|
||||
func: &mut ir::Function,
|
||||
isa: &TargetIsa,
|
||||
) -> result::CtonResult {
|
||||
// Spiderwasm on 32-bit x86 always aligns its stack pointer to 16 bytes.
|
||||
let stack_align = 16;
|
||||
let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
|
||||
let bytes = StackSize::from(isa.flags().spiderwasm_prologue_words()) * word_size;
|
||||
|
||||
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
|
||||
ss.offset = Some(-(bytes as StackOffset));
|
||||
func.stack_slots.push(ss);
|
||||
|
||||
layout_stack(&mut func.stack_slots, stack_align)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert a System V-compatible prologue and epilogue.
|
||||
pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
|
||||
// The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but
|
||||
// newer versions use a 16-byte aligned stack pointer.
|
||||
let stack_align = 16;
|
||||
let word_size = if isa.flags().is_64bit() { 8 } else { 4 };
|
||||
let csr_type = if isa.flags().is_64bit() {
|
||||
ir::types::I64
|
||||
} else {
|
||||
ir::types::I32
|
||||
};
|
||||
|
||||
let csrs = callee_saved_gprs_used(isa.flags(), func);
|
||||
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers
|
||||
//
|
||||
// Pushing the return address is an implicit function of the `call`
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size as usize) as i32;
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
offset: Some(-csr_stack_size),
|
||||
});
|
||||
|
||||
let total_stack_size = layout_stack(&mut func.stack_slots, stack_align)? as i32;
|
||||
let local_stack_size = i64::from(total_stack_size - csr_stack_size);
|
||||
|
||||
// Add CSRs to function signature
|
||||
let fp_arg = ir::AbiParam::special_reg(
|
||||
csr_type,
|
||||
ir::ArgumentPurpose::FramePointer,
|
||||
RU::rbp as RegUnit,
|
||||
);
|
||||
func.signature.params.push(fp_arg);
|
||||
func.signature.returns.push(fp_arg);
|
||||
|
||||
for csr in csrs.iter(GPR) {
|
||||
let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
|
||||
func.signature.params.push(csr_arg);
|
||||
func.signature.returns.push(csr_arg);
|
||||
}
|
||||
|
||||
// Set up the cursor and insert the prologue
|
||||
let entry_ebb = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
|
||||
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert the prologue for a given function.
|
||||
fn insert_system_v_prologue(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
// Append param to entry EBB
|
||||
let ebb = pos.current_ebb().expect("missing ebb under cursor");
|
||||
let fp = pos.func.dfg.append_ebb_param(ebb, csr_type);
|
||||
pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
|
||||
|
||||
pos.ins().x86_push(fp);
|
||||
pos.ins().copy_special(
|
||||
RU::rsp as RegUnit,
|
||||
RU::rbp as RegUnit,
|
||||
);
|
||||
|
||||
for reg in csrs.iter(GPR) {
|
||||
// Append param to entry EBB
|
||||
let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
|
||||
|
||||
// Assign it a location
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
// Remember it so we can push it momentarily
|
||||
pos.ins().x86_push(csr_arg);
|
||||
}
|
||||
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_imm(Imm64::new(-stack_size));
|
||||
}
|
||||
}
|
||||
|
||||
/// Find all `return` instructions and insert epilogues before them.
|
||||
fn insert_system_v_epilogues(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
while let Some(ebb) = pos.next_ebb() {
|
||||
pos.goto_last_inst(ebb);
|
||||
if let Some(inst) = pos.current_inst() {
|
||||
if pos.func.dfg[inst].opcode().is_return() {
|
||||
insert_system_v_epilogue(inst, stack_size, pos, csr_type, csrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert an epilogue given a specific `return` instruction.
|
||||
fn insert_system_v_epilogue(
|
||||
inst: ir::Inst,
|
||||
stack_size: i64,
|
||||
pos: &mut EncCursor,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_imm(Imm64::new(stack_size));
|
||||
}
|
||||
|
||||
// Pop all the callee-saved registers, stepping backward each time to
|
||||
// preserve the correct order.
|
||||
let fp_ret = pos.ins().x86_pop(csr_type);
|
||||
pos.prev_inst();
|
||||
|
||||
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
|
||||
pos.func.dfg.append_inst_arg(inst, fp_ret);
|
||||
|
||||
for reg in csrs.iter(GPR) {
|
||||
let csr_ret = pos.ins().x86_pop(csr_type);
|
||||
pos.prev_inst();
|
||||
|
||||
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, csr_ret);
|
||||
}
|
||||
}
|
||||
300
lib/codegen/src/isa/x86/binemit.rs
Normal file
300
lib/codegen/src/isa/x86/binemit.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
//! Emitting binary x86 machine code.
|
||||
|
||||
use super::registers::RU;
|
||||
use binemit::{bad_encoding, CodeSink, Reloc};
|
||||
use ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use ir::{Ebb, Function, Inst, InstructionData, Opcode, TrapCode};
|
||||
use isa::{RegUnit, StackBase, StackBaseMask, StackRef};
|
||||
use regalloc::RegDiversions;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
|
||||
|
||||
// Convert a stack base to the corresponding register.
|
||||
fn stk_base(base: StackBase) -> RegUnit {
|
||||
let ru = match base {
|
||||
StackBase::SP => RU::rsp,
|
||||
StackBase::FP => RU::rbp,
|
||||
StackBase::Zone => unimplemented!(),
|
||||
};
|
||||
ru as RegUnit
|
||||
}
|
||||
|
||||
// Mandatory prefix bytes for Mp* opcodes.
|
||||
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
|
||||
|
||||
// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
|
||||
const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
|
||||
|
||||
// A REX prefix with no bits set: 0b0100WRXB.
|
||||
const BASE_REX: u8 = 0b0100_0000;
|
||||
|
||||
// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
|
||||
// This is used for instructions that encode a register in the low 3 bits of the opcode and for
|
||||
// instructions that use the ModR/M `reg` field for something else.
|
||||
fn rex1(reg_b: RegUnit) -> u8 {
|
||||
let b = ((reg_b >> 3) & 1) as u8;
|
||||
BASE_REX | b
|
||||
}
|
||||
|
||||
// Create a dual-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
BASE_REX | b | (r << 2)
|
||||
}
|
||||
|
||||
// Emit a REX prefix.
|
||||
//
|
||||
// The R, X, and B bits are computed from registers using the functions above. The W bit is
|
||||
// extracted from `bits`.
|
||||
fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(rex & 0xf8, BASE_REX);
|
||||
let w = ((bits >> 15) & 1) as u8;
|
||||
sink.put1(rex | (w << 3));
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with no REX prefix.
|
||||
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with REX prefix.
|
||||
fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX
|
||||
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX with REX prefix.
|
||||
fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix.
|
||||
fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix and REX.
|
||||
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix.
|
||||
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
|
||||
fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
||||
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
||||
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte for reg-reg operands.
|
||||
fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte where the reg bits are part of the opcode.
|
||||
fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
|
||||
let reg = (bits >> 12) as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
|
||||
/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
|
||||
/// absolute immediate 32-bit address.
|
||||
fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b00000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
|
||||
/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
|
||||
/// section 2.2.1.6.
|
||||
fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b101, reg, sink)
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b01000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b10000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
|
||||
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp32(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a SIB byte with a base register and no scale+index.
|
||||
fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
|
||||
let base = base as u8 & 7;
|
||||
// SIB SS_III_BBB.
|
||||
let mut b = 0b00_100_000;
|
||||
b |= base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for an integer condition code.
|
||||
///
|
||||
/// Add this offset to a base opcode for:
|
||||
///
|
||||
/// ---- 0x70: Short conditional branch.
|
||||
/// 0x0f 0x80: Long conditional branch.
|
||||
/// 0x0f 0x90: SetCC.
|
||||
///
|
||||
fn icc2opc(cond: IntCC) -> u16 {
|
||||
use ir::condcodes::IntCC::*;
|
||||
match cond {
|
||||
// 0x0 = Overflow.
|
||||
// 0x1 = !Overflow.
|
||||
UnsignedLessThan => 0x2,
|
||||
UnsignedGreaterThanOrEqual => 0x3,
|
||||
Equal => 0x4,
|
||||
NotEqual => 0x5,
|
||||
UnsignedLessThanOrEqual => 0x6,
|
||||
UnsignedGreaterThan => 0x7,
|
||||
// 0x8 = Sign.
|
||||
// 0x9 = !Sign.
|
||||
// 0xa = Parity even.
|
||||
// 0xb = Parity odd.
|
||||
SignedLessThan => 0xc,
|
||||
SignedGreaterThanOrEqual => 0xd,
|
||||
SignedLessThanOrEqual => 0xe,
|
||||
SignedGreaterThan => 0xf,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for a floating point condition code.
|
||||
///
|
||||
/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
|
||||
///
|
||||
/// ZPC OSA
|
||||
/// UN 111 000
|
||||
/// GT 000 000
|
||||
/// LT 001 000
|
||||
/// EQ 100 000
|
||||
///
|
||||
/// Not all floating point condition codes are supported.
|
||||
fn fcc2opc(cond: FloatCC) -> u16 {
|
||||
use ir::condcodes::FloatCC::*;
|
||||
match cond {
|
||||
Ordered => 0xb, // EQ|LT|GT => *np (P=0)
|
||||
Unordered => 0xa, // UN => *p (P=1)
|
||||
OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
|
||||
UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
|
||||
GreaterThan => 0x7, // GT => *a (C=0&Z=0)
|
||||
GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
|
||||
UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
|
||||
UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
|
||||
Equal | // EQ
|
||||
NotEqual | // UN|LT|GT
|
||||
LessThan | // LT
|
||||
LessThanOrEqual | // LT|EQ
|
||||
UnorderedOrGreaterThan | // UN|GT
|
||||
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
|
||||
=> panic!("{} not supported", cond),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a single-byte branch displacement to `destination`.
|
||||
fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
|
||||
sink.put1(delta as u8);
|
||||
}
|
||||
|
||||
/// Emit a single-byte branch displacement to `destination`.
|
||||
fn disp4<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
}
|
||||
509
lib/codegen/src/isa/x86/enc_tables.rs
Normal file
509
lib/codegen/src/isa/x86/enc_tables.rs
Normal file
@@ -0,0 +1,509 @@
|
||||
//! Encoding tables for x86 ISAs.
|
||||
|
||||
use super::registers::*;
|
||||
use bitset::BitSet;
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::condcodes::IntCC;
|
||||
use ir::{self, InstBuilder};
|
||||
use isa;
|
||||
use isa::constraints::*;
|
||||
use isa::enc_tables::*;
|
||||
use isa::encoding::RecipeSizing;
|
||||
use predicates;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
|
||||
|
||||
/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
|
||||
fn expand_sdivrem(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
isa: &isa::TargetIsa,
|
||||
) {
|
||||
let (x, y, is_srem) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Sdiv,
|
||||
args,
|
||||
} => (args[0], args[1], false),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Srem,
|
||||
args,
|
||||
} => (args[0], args[1], true),
|
||||
_ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.func.dfg.clear_results(inst);
|
||||
|
||||
// If we can tolerate native division traps, sdiv doesn't need branching.
|
||||
if !avoid_div_traps && !is_srem {
|
||||
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||
pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
|
||||
pos.remove_inst();
|
||||
return;
|
||||
}
|
||||
|
||||
// EBB handling the -1 divisor case.
|
||||
let minus_one = pos.func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = pos.func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
pos.func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// Start by checking for a -1 divisor which needs to be handled specially.
|
||||
let is_m1 = pos.ins().ifcmp_imm(y, -1);
|
||||
pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
|
||||
|
||||
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||
if avoid_div_traps {
|
||||
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||
}
|
||||
|
||||
// Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
|
||||
// by zero.
|
||||
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||
let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
|
||||
let divres = if is_srem { rem } else { quot };
|
||||
pos.ins().jump(done, &[divres]);
|
||||
|
||||
// Now deal with the -1 divisor case.
|
||||
pos.insert_ebb(minus_one);
|
||||
let m1_result = if is_srem {
|
||||
// x % -1 = 0.
|
||||
pos.ins().iconst(ty, 0)
|
||||
} else {
|
||||
// Explicitly check for overflow: Trap when x == INT_MIN.
|
||||
debug_assert!(avoid_div_traps, "Native trapping divide handled above");
|
||||
let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
|
||||
pos.ins().trapif(
|
||||
IntCC::Equal,
|
||||
f,
|
||||
ir::TrapCode::IntegerOverflow,
|
||||
);
|
||||
// x / -1 = -x.
|
||||
pos.ins().irsub_imm(x, 0)
|
||||
};
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[m1_result]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, minus_one);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
|
||||
fn expand_udivrem(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &isa::TargetIsa,
|
||||
) {
|
||||
let (x, y, is_urem) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Udiv,
|
||||
args,
|
||||
} => (args[0], args[1], false),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Urem,
|
||||
args,
|
||||
} => (args[0], args[1], true),
|
||||
_ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.func.dfg.clear_results(inst);
|
||||
|
||||
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||
if avoid_div_traps {
|
||||
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||
}
|
||||
|
||||
// Now it is safe to execute the `x86_udivmodx` instruction.
|
||||
let xhi = pos.ins().iconst(ty, 0);
|
||||
let reuse = if is_urem {
|
||||
[None, Some(result)]
|
||||
} else {
|
||||
[Some(result), None]
|
||||
};
|
||||
pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
|
||||
pos.remove_inst();
|
||||
}
|
||||
|
||||
/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
|
||||
/// instructions.
|
||||
fn expand_minmax(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::FloatCC;
|
||||
|
||||
let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Fmin,
|
||||
args,
|
||||
} => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
|
||||
ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Fmax,
|
||||
args,
|
||||
} => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
|
||||
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
|
||||
// We need to handle the following conditions, depending on how x and y compare:
|
||||
//
|
||||
// 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
|
||||
// 2. EQ: We need to use `bitwise_opc` to make sure that
|
||||
// fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
|
||||
// 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
|
||||
|
||||
// EBB handling case 3) where one operand is NaN.
|
||||
let uno_ebb = func.dfg.make_ebb();
|
||||
|
||||
// EBB that handles the unordered or equal cases 2) and 3).
|
||||
let ueq_ebb = func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// The basic blocks are laid out to minimize branching for the common cases:
|
||||
//
|
||||
// 1) One branch not taken, one jump.
|
||||
// 2) One branch taken.
|
||||
// 3) Two branches taken, one jump.
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// Test for case 1) ordered and not equal.
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
|
||||
pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
|
||||
|
||||
// Handle the common ordered, not equal (LT|GT) case.
|
||||
let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
|
||||
let one_result = pos.func.dfg.first_result(one_inst);
|
||||
pos.ins().jump(done, &[one_result]);
|
||||
|
||||
// Case 3) Unordered.
|
||||
// We know that at least one operand is a NaN that needs to be propagated. We simply use an
|
||||
// `fadd` instruction which has the same NaN propagation semantics.
|
||||
pos.insert_ebb(uno_ebb);
|
||||
let uno_result = pos.ins().fadd(x, y);
|
||||
pos.ins().jump(done, &[uno_result]);
|
||||
|
||||
// Case 2) or 3).
|
||||
pos.insert_ebb(ueq_ebb);
|
||||
// Test for case 3) (UN) one value is NaN.
|
||||
// TODO: When we get support for flag values, we can reuse the above comparison.
|
||||
let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
|
||||
pos.ins().brnz(cmp_uno, uno_ebb, &[]);
|
||||
|
||||
// We are now in case 2) where x and y compare EQ.
|
||||
// We need a bitwise operation to get the sign right.
|
||||
let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
|
||||
let bw_result = pos.func.dfg.first_result(bw_inst);
|
||||
// This should become a fall-through for this second most common case.
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[bw_result]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, ueq_ebb);
|
||||
cfg.recompute_ebb(pos.func, uno_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
|
||||
/// i64 with a pattern, the rest needs more code.
|
||||
fn expand_fcvt_from_uint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::IntCC;
|
||||
|
||||
let x;
|
||||
match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtFromUint,
|
||||
arg,
|
||||
} => x = arg,
|
||||
_ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
|
||||
}
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Conversion from unsigned 32-bit is easy on x86-64.
|
||||
// TODO: This should be guarded by an ISA check.
|
||||
if xty == ir::types::I32 {
|
||||
let wide = pos.ins().uextend(ir::types::I64, x);
|
||||
pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
|
||||
return;
|
||||
}
|
||||
|
||||
let old_ebb = pos.func.layout.pp_ebb(inst);
|
||||
|
||||
// EBB handling the case where x < 0.
|
||||
let neg_ebb = pos.func.dfg.make_ebb();
|
||||
|
||||
// Final EBB with one argument representing the final result value.
|
||||
let done = pos.func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
pos.func.dfg.clear_results(inst);
|
||||
pos.func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
// If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
|
||||
let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
|
||||
pos.ins().brnz(is_neg, neg_ebb, &[]);
|
||||
|
||||
// Easy case: just use a signed conversion.
|
||||
let posres = pos.ins().fcvt_from_sint(ty, x);
|
||||
pos.ins().jump(done, &[posres]);
|
||||
|
||||
// Now handle the negative case.
|
||||
pos.insert_ebb(neg_ebb);
|
||||
|
||||
// Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
|
||||
// back up on the FP side.
|
||||
let ihalf = pos.ins().ushr_imm(x, 1);
|
||||
let lsb = pos.ins().band_imm(x, 1);
|
||||
let ifinal = pos.ins().bor(ihalf, lsb);
|
||||
let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
|
||||
let negres = pos.ins().fadd(fhalf, fhalf);
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[negres]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, neg_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_sint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::{FloatCC, IntCC};
|
||||
use ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x;
|
||||
match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToSint,
|
||||
arg,
|
||||
} => x = arg,
|
||||
_ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
|
||||
}
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
|
||||
// It produces an INT_MIN result instead.
|
||||
func.dfg.replace(inst).x86_cvtt2si(ty, x);
|
||||
|
||||
let mut pos = FuncCursor::new(func).after_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
let is_done = pos.ins().icmp_imm(
|
||||
IntCC::NotEqual,
|
||||
result,
|
||||
1 << (ty.lane_bits() - 1),
|
||||
);
|
||||
pos.ins().brnz(is_done, done, &[]);
|
||||
|
||||
// We now have the following possibilities:
|
||||
//
|
||||
// 1. INT_MIN was actually the correct conversion result.
|
||||
// 2. The input was NaN -> trap bad_toint
|
||||
// 3. The input was out of range -> trap int_ovf
|
||||
//
|
||||
|
||||
// Check for NaN.
|
||||
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
|
||||
pos.ins().trapnz(
|
||||
is_nan,
|
||||
ir::TrapCode::BadConversionToInteger,
|
||||
);
|
||||
|
||||
// Check for case 1: INT_MIN is the correct result.
|
||||
// Determine the smallest floating point number that would convert to INT_MIN.
|
||||
let mut overflow_cc = FloatCC::LessThan;
|
||||
let output_bits = ty.lane_bits();
|
||||
let flimit = match xty {
|
||||
// An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
ir::types::F32 => {
|
||||
pos.ins().f32const(if output_bits < 32 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee32::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
ir::types::F64 => {
|
||||
// An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
|
||||
// there are values less than -2^(N-1) that convert correctly to INT_MIN.
|
||||
pos.ins().f64const(if output_bits < 64 {
|
||||
overflow_cc = FloatCC::LessThanOrEqual;
|
||||
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
|
||||
} else {
|
||||
Ieee64::pow2(output_bits - 1).neg()
|
||||
})
|
||||
}
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
|
||||
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
|
||||
|
||||
// Finally, we could have a positive value that is too large.
|
||||
let fzero = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
|
||||
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
|
||||
|
||||
pos.ins().jump(done, &[]);
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
|
||||
fn expand_fcvt_to_uint(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &isa::TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::{FloatCC, IntCC};
|
||||
use ir::immediates::{Ieee32, Ieee64};
|
||||
|
||||
let x;
|
||||
match func.dfg[inst] {
|
||||
ir::InstructionData::Unary {
|
||||
opcode: ir::Opcode::FcvtToUint,
|
||||
arg,
|
||||
} => x = arg,
|
||||
_ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
|
||||
}
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let xty = func.dfg.value_type(x);
|
||||
let result = func.dfg.first_result(inst);
|
||||
let ty = func.dfg.value_type(result);
|
||||
|
||||
// EBB handling numbers >= 2^(N-1).
|
||||
let large = func.dfg.make_ebb();
|
||||
|
||||
// Final EBB after the bad value checks.
|
||||
let done = func.dfg.make_ebb();
|
||||
|
||||
// Move the `inst` result value onto the `done` EBB.
|
||||
func.dfg.clear_results(inst);
|
||||
func.dfg.attach_ebb_param(done, result);
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
|
||||
// the destination integer type.
|
||||
let pow2nm1 = match xty {
|
||||
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
|
||||
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
|
||||
_ => panic!("Can't convert {}", xty),
|
||||
};
|
||||
let is_large = pos.ins().ffcmp(x, pow2nm1);
|
||||
pos.ins().brff(
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
is_large,
|
||||
large,
|
||||
&[],
|
||||
);
|
||||
|
||||
// We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
|
||||
// previous comparison.
|
||||
pos.ins().trapff(
|
||||
FloatCC::Unordered,
|
||||
is_large,
|
||||
ir::TrapCode::BadConversionToInteger,
|
||||
);
|
||||
|
||||
// Now we know that x < 2^(N-1) and not NaN.
|
||||
let sres = pos.ins().x86_cvtt2si(ty, x);
|
||||
let is_neg = pos.ins().ifcmp_imm(sres, 0);
|
||||
pos.ins().brif(
|
||||
IntCC::SignedGreaterThanOrEqual,
|
||||
is_neg,
|
||||
done,
|
||||
&[sres],
|
||||
);
|
||||
pos.ins().trap(ir::TrapCode::IntegerOverflow);
|
||||
|
||||
// Handle the case where x >= 2^(N-1) and not NaN.
|
||||
pos.insert_ebb(large);
|
||||
let adjx = pos.ins().fsub(x, pow2nm1);
|
||||
let lres = pos.ins().x86_cvtt2si(ty, adjx);
|
||||
let is_neg = pos.ins().ifcmp_imm(lres, 0);
|
||||
pos.ins().trapif(
|
||||
IntCC::SignedLessThan,
|
||||
is_neg,
|
||||
ir::TrapCode::IntegerOverflow,
|
||||
);
|
||||
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
|
||||
|
||||
// Recycle the original instruction as a jump.
|
||||
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
|
||||
|
||||
// Finally insert a label for the completion.
|
||||
pos.next_inst();
|
||||
pos.insert_ebb(done);
|
||||
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, large);
|
||||
cfg.recompute_ebb(pos.func, done);
|
||||
}
|
||||
129
lib/codegen/src/isa/x86/mod.rs
Normal file
129
lib/codegen/src/isa/x86/mod.rs
Normal file
@@ -0,0 +1,129 @@
|
||||
//! x86 Instruction Set Architectures.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
use binemit::{emit_function, CodeSink, MemoryCodeSink};
|
||||
use ir;
|
||||
use isa::Builder as IsaBuilder;
|
||||
use isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
|
||||
use isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use regalloc;
|
||||
use result;
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
use timing;
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating x86 targets.
|
||||
pub fn isa_builder() -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: &shared_settings::Builder,
|
||||
) -> Box<TargetIsa> {
|
||||
let level1 = if shared_flags.is_64bit() {
|
||||
&enc_tables::LEVEL1_I64[..]
|
||||
} else {
|
||||
&enc_tables::LEVEL1_I32[..]
|
||||
};
|
||||
Box::new(Isa {
|
||||
isa_flags: settings::Flags::new(&shared_flags, builder),
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"x86"
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
self.cpumode,
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
|
||||
abi::legalize_signature(sig, &self.shared_flags, current)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func, &self.shared_flags)
|
||||
}
|
||||
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink)
|
||||
}
|
||||
|
||||
fn emit_function(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink)
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, func: &mut ir::Function) -> result::CtonResult {
|
||||
let _tt = timing::prologue_epilogue();
|
||||
abi::prologue_epilogue(func, self)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
63
lib/codegen/src/isa/x86/registers.rs
Normal file
63
lib/codegen/src/isa/x86/registers.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! x86 register descriptions.
|
||||
|
||||
use isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use isa::RegUnit;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
// The encoding of integer registers is not alphabetical.
|
||||
assert_eq!(INFO.parse_regunit("rax"), Some(0));
|
||||
assert_eq!(INFO.parse_regunit("rbx"), Some(3));
|
||||
assert_eq!(INFO.parse_regunit("rcx"), Some(1));
|
||||
assert_eq!(INFO.parse_regunit("rdx"), Some(2));
|
||||
assert_eq!(INFO.parse_regunit("rsi"), Some(6));
|
||||
assert_eq!(INFO.parse_regunit("rdi"), Some(7));
|
||||
assert_eq!(INFO.parse_regunit("rbp"), Some(5));
|
||||
assert_eq!(INFO.parse_regunit("rsp"), Some(4));
|
||||
assert_eq!(INFO.parse_regunit("r8"), Some(8));
|
||||
assert_eq!(INFO.parse_regunit("r15"), Some(15));
|
||||
|
||||
assert_eq!(INFO.parse_regunit("xmm0"), Some(16));
|
||||
assert_eq!(INFO.parse_regunit("xmm15"), Some(31));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn uname(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(ru).to_string()
|
||||
}
|
||||
|
||||
assert_eq!(uname(0), "%rax");
|
||||
assert_eq!(uname(3), "%rbx");
|
||||
assert_eq!(uname(1), "%rcx");
|
||||
assert_eq!(uname(2), "%rdx");
|
||||
assert_eq!(uname(6), "%rsi");
|
||||
assert_eq!(uname(7), "%rdi");
|
||||
assert_eq!(uname(5), "%rbp");
|
||||
assert_eq!(uname(4), "%rsp");
|
||||
assert_eq!(uname(8), "%r8");
|
||||
assert_eq!(uname(15), "%r15");
|
||||
assert_eq!(uname(16), "%xmm0");
|
||||
assert_eq!(uname(31), "%xmm15");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regclasses() {
|
||||
assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
|
||||
assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(GPR.intersect_index(FPR), None);
|
||||
assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(FPR), None);
|
||||
assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
|
||||
assert_eq!(FPR.intersect_index(GPR), None);
|
||||
assert_eq!(FPR.intersect_index(ABCD), None);
|
||||
}
|
||||
}
|
||||
52
lib/codegen/src/isa/x86/settings.rs
Normal file
52
lib/codegen/src/isa/x86/settings.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use settings::{self, detail, Builder};
|
||||
use std::fmt;
|
||||
|
||||
// Include code generated by `lib/codegen/meta/gen_settings.py`. This file contains a public
|
||||
// `Flags` struct with an impl for all of the settings defined in
|
||||
// `lib/codegen/meta/isa/x86/settings.py`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{builder, Flags};
|
||||
use settings::{self, Configurable};
|
||||
|
||||
#[test]
|
||||
fn presets() {
|
||||
let shared = settings::Flags::new(&settings::builder());
|
||||
|
||||
// Nehalem has SSE4.1 but not BMI1.
|
||||
let mut b1 = builder();
|
||||
b1.enable("nehalem").unwrap();
|
||||
let f1 = Flags::new(&shared, &b1);
|
||||
assert_eq!(f1.has_sse41(), true);
|
||||
assert_eq!(f1.has_bmi1(), false);
|
||||
|
||||
let mut b2 = builder();
|
||||
b2.enable("haswell").unwrap();
|
||||
let f2 = Flags::new(&shared, &b2);
|
||||
assert_eq!(f2.has_sse41(), true);
|
||||
assert_eq!(f2.has_bmi1(), true);
|
||||
}
|
||||
#[test]
|
||||
fn display_presets() {
|
||||
// Spot check that the flags Display impl does not cause a panic
|
||||
let shared = settings::Flags::new(&settings::builder());
|
||||
|
||||
let b1 = builder();
|
||||
let f1 = Flags::new(&shared, &b1);
|
||||
let _ = format!("{}", f1);
|
||||
|
||||
let mut b2 = builder();
|
||||
b2.enable("nehalem").unwrap();
|
||||
let f2 = Flags::new(&shared, &b1);
|
||||
let _ = format!("{}", f2);
|
||||
|
||||
let mut b3 = builder();
|
||||
b3.enable("haswell").unwrap();
|
||||
let f3 = Flags::new(&shared, &b1);
|
||||
let _ = format!("{}", f3);
|
||||
}
|
||||
}
|
||||
98
lib/codegen/src/iterators.rs
Normal file
98
lib/codegen/src/iterators.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
//! Iterator utilities.
|
||||
|
||||
/// Extra methods for iterators.
|
||||
pub trait IteratorExtras: Iterator {
|
||||
/// Create an iterator that produces adjacent pairs of elements from the iterator.
|
||||
fn adjacent_pairs(mut self) -> AdjacentPairs<Self>
|
||||
where
|
||||
Self: Sized,
|
||||
Self::Item: Clone,
|
||||
{
|
||||
let elem = self.next();
|
||||
AdjacentPairs { iter: self, elem }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IteratorExtras for T
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
}
|
||||
|
||||
/// Adjacent pairs iterator returned by `adjacent_pairs()`.
|
||||
///
|
||||
/// This wraps another iterator and produces a sequence of adjacent pairs of elements.
|
||||
pub struct AdjacentPairs<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: Clone,
|
||||
{
|
||||
iter: I,
|
||||
elem: Option<I::Item>,
|
||||
}
|
||||
|
||||
impl<I> Iterator for AdjacentPairs<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: Clone,
|
||||
{
|
||||
type Item = (I::Item, I::Item);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.elem.take().and_then(|e| {
|
||||
self.elem = self.iter.next();
|
||||
self.elem.clone().map(|n| (e, n))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn adjpairs() {
|
||||
use super::IteratorExtras;
|
||||
|
||||
assert_eq!(
|
||||
[1, 2, 3, 4]
|
||||
.iter()
|
||||
.cloned()
|
||||
.adjacent_pairs()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![(1, 2), (2, 3), (3, 4)]
|
||||
);
|
||||
assert_eq!(
|
||||
[2, 3, 4]
|
||||
.iter()
|
||||
.cloned()
|
||||
.adjacent_pairs()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![(2, 3), (3, 4)]
|
||||
);
|
||||
assert_eq!(
|
||||
[2, 3, 4]
|
||||
.iter()
|
||||
.cloned()
|
||||
.adjacent_pairs()
|
||||
.collect::<Vec<_>>(),
|
||||
vec![(2, 3), (3, 4)]
|
||||
);
|
||||
assert_eq!(
|
||||
[3, 4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
|
||||
vec![(3, 4)]
|
||||
);
|
||||
assert_eq!(
|
||||
[4].iter().cloned().adjacent_pairs().collect::<Vec<_>>(),
|
||||
vec![]
|
||||
);
|
||||
assert_eq!(
|
||||
[]
|
||||
.iter()
|
||||
.cloned()
|
||||
.adjacent_pairs()
|
||||
.collect::<Vec<(i32, i32)>>(),
|
||||
vec![]
|
||||
);
|
||||
}
|
||||
}
|
||||
683
lib/codegen/src/legalizer/boundary.rs
Normal file
683
lib/codegen/src/legalizer/boundary.rs
Normal file
@@ -0,0 +1,683 @@
|
||||
//! Legalize ABI boundaries.
|
||||
//!
|
||||
//! This legalizer sub-module contains code for dealing with ABI boundaries:
|
||||
//!
|
||||
//! - Function arguments passed to the entry block.
|
||||
//! - Function arguments passed to call instructions.
|
||||
//! - Return values from call instructions.
|
||||
//! - Return values passed to return instructions.
|
||||
//!
|
||||
//! The ABI boundary legalization happens in two phases:
|
||||
//!
|
||||
//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information
|
||||
//! and possibly new argument types. It also rewrites the entry block arguments to match.
|
||||
//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions
|
||||
//! to match the new ABI signatures.
|
||||
//!
|
||||
//! Between the two phases, preamble signatures and call/return arguments don't match. This
|
||||
//! intermediate state doesn't type check.
|
||||
|
||||
use abi::{legalize_abi_value, ValueConversion};
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::instructions::CallInfo;
|
||||
use ir::{AbiParam, ArgumentLoc, ArgumentPurpose, DataFlowGraph, Ebb, Function, Inst, InstBuilder,
|
||||
SigRef, Signature, Type, Value, ValueLoc};
|
||||
use isa::TargetIsa;
|
||||
use legalizer::split::{isplit, vsplit};
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Legalize all the function signatures in `func`.
|
||||
///
|
||||
/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't
|
||||
/// change the entry block arguments, calls, or return instructions, so this can leave the function
|
||||
/// in a state with type discrepancies.
|
||||
pub fn legalize_signatures(func: &mut Function, isa: &TargetIsa) {
|
||||
isa.legalize_signature(&mut func.signature, true);
|
||||
func.signature.compute_argument_bytes();
|
||||
for sig_data in func.dfg.signatures.values_mut() {
|
||||
isa.legalize_signature(sig_data, false);
|
||||
sig_data.compute_argument_bytes();
|
||||
}
|
||||
|
||||
if let Some(entry) = func.layout.entry_block() {
|
||||
legalize_entry_params(func, entry);
|
||||
spill_entry_params(func, entry);
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize the entry block parameters after `func`'s signature has been legalized.
|
||||
///
|
||||
/// The legalized signature may contain more parameters than the original signature, and the
|
||||
/// parameter types have been changed. This function goes through the parameters of the entry EBB
|
||||
/// and replaces them with parameters of the right type for the ABI.
|
||||
///
|
||||
/// The original entry EBB parameters are computed from the new ABI parameters by code inserted at
|
||||
/// the top of the entry block.
|
||||
fn legalize_entry_params(func: &mut Function, entry: Ebb) {
|
||||
let mut has_sret = false;
|
||||
let mut has_link = false;
|
||||
let mut has_vmctx = false;
|
||||
let mut has_sigid = false;
|
||||
|
||||
// Insert position for argument conversion code.
|
||||
// We want to insert instructions before the first instruction in the entry block.
|
||||
// If the entry block is empty, append instructions to it instead.
|
||||
let mut pos = FuncCursor::new(func).at_first_inst(entry);
|
||||
|
||||
// Keep track of the argument types in the ABI-legalized signature.
|
||||
let mut abi_arg = 0;
|
||||
|
||||
// Process the EBB parameters one at a time, possibly replacing one argument with multiple new
|
||||
// ones. We do this by detaching the entry EBB parameters first.
|
||||
let ebb_params = pos.func.dfg.detach_ebb_params(entry);
|
||||
let mut old_arg = 0;
|
||||
while let Some(arg) = ebb_params.get(old_arg, &pos.func.dfg.value_lists) {
|
||||
old_arg += 1;
|
||||
|
||||
let abi_type = pos.func.signature.params[abi_arg];
|
||||
let arg_type = pos.func.dfg.value_type(arg);
|
||||
if arg_type == abi_type.value_type {
|
||||
// No value translation is necessary, this argument matches the ABI type.
|
||||
// Just use the original EBB argument value. This is the most common case.
|
||||
pos.func.dfg.attach_ebb_param(entry, arg);
|
||||
match abi_type.purpose {
|
||||
ArgumentPurpose::Normal => {}
|
||||
ArgumentPurpose::FramePointer => {}
|
||||
ArgumentPurpose::CalleeSaved => {}
|
||||
ArgumentPurpose::StructReturn => {
|
||||
debug_assert!(!has_sret, "Multiple sret arguments found");
|
||||
has_sret = true;
|
||||
}
|
||||
ArgumentPurpose::VMContext => {
|
||||
debug_assert!(!has_vmctx, "Multiple vmctx arguments found");
|
||||
has_vmctx = true;
|
||||
}
|
||||
ArgumentPurpose::SignatureId => {
|
||||
debug_assert!(!has_sigid, "Multiple sigid arguments found");
|
||||
has_sigid = true;
|
||||
}
|
||||
_ => panic!("Unexpected special-purpose arg {}", abi_type),
|
||||
}
|
||||
abi_arg += 1;
|
||||
} else {
|
||||
// Compute the value we want for `arg` from the legalized ABI parameters.
|
||||
let mut get_arg = |func: &mut Function, ty| {
|
||||
let abi_type = func.signature.params[abi_arg];
|
||||
debug_assert_eq!(
|
||||
abi_type.purpose,
|
||||
ArgumentPurpose::Normal,
|
||||
"Can't legalize special-purpose argument"
|
||||
);
|
||||
if ty == abi_type.value_type {
|
||||
abi_arg += 1;
|
||||
Ok(func.dfg.append_ebb_param(entry, ty))
|
||||
} else {
|
||||
Err(abi_type)
|
||||
}
|
||||
};
|
||||
let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg);
|
||||
// The old `arg` is no longer an attached EBB argument, but there are probably still
|
||||
// uses of the value.
|
||||
debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted);
|
||||
}
|
||||
}
|
||||
|
||||
// The legalized signature may contain additional parameters representing special-purpose
|
||||
// registers.
|
||||
for &arg in &pos.func.signature.params[abi_arg..] {
|
||||
match arg.purpose {
|
||||
// Any normal parameters should have been processed above.
|
||||
ArgumentPurpose::Normal => {
|
||||
panic!("Leftover arg: {}", arg);
|
||||
}
|
||||
// The callee-save parameters should not appear until after register allocation is
|
||||
// done.
|
||||
ArgumentPurpose::FramePointer |
|
||||
ArgumentPurpose::CalleeSaved => {
|
||||
panic!("Premature callee-saved arg {}", arg);
|
||||
}
|
||||
// These can be meaningfully added by `legalize_signature()`.
|
||||
ArgumentPurpose::Link => {
|
||||
debug_assert!(!has_link, "Multiple link parameters found");
|
||||
has_link = true;
|
||||
}
|
||||
ArgumentPurpose::StructReturn => {
|
||||
debug_assert!(!has_sret, "Multiple sret parameters found");
|
||||
has_sret = true;
|
||||
}
|
||||
ArgumentPurpose::VMContext => {
|
||||
debug_assert!(!has_vmctx, "Multiple vmctx parameters found");
|
||||
has_vmctx = true;
|
||||
}
|
||||
ArgumentPurpose::SignatureId => {
|
||||
debug_assert!(!has_sigid, "Multiple sigid parameters found");
|
||||
has_sigid = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Just create entry block values to match here. We will use them in `handle_return_abi()`
|
||||
// below.
|
||||
pos.func.dfg.append_ebb_param(entry, arg.value_type);
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize the results returned from a call instruction to match the ABI signature.
|
||||
///
|
||||
/// The cursor `pos` points to a call instruction with at least one return value. The cursor will
|
||||
/// be left pointing after the instructions inserted to convert the return values.
|
||||
///
|
||||
/// This function is very similar to the `legalize_entry_params` function above.
|
||||
///
|
||||
/// Returns the possibly new instruction representing the call.
|
||||
fn legalize_inst_results<ResType>(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst
|
||||
where
|
||||
ResType: FnMut(&Function, usize) -> AbiParam,
|
||||
{
|
||||
let call = pos.current_inst().expect(
|
||||
"Cursor must point to a call instruction",
|
||||
);
|
||||
|
||||
// We theoretically allow for call instructions that return a number of fixed results before
|
||||
// the call return values. In practice, it doesn't happen.
|
||||
let fixed_results = pos.func.dfg[call].opcode().constraints().fixed_results();
|
||||
debug_assert_eq!(fixed_results, 0, "Fixed results on calls not supported");
|
||||
|
||||
let results = pos.func.dfg.detach_results(call);
|
||||
let mut next_res = 0;
|
||||
let mut abi_res = 0;
|
||||
|
||||
// Point immediately after the call.
|
||||
pos.next_inst();
|
||||
|
||||
while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) {
|
||||
next_res += 1;
|
||||
|
||||
let res_type = pos.func.dfg.value_type(res);
|
||||
if res_type == get_abi_type(pos.func, abi_res).value_type {
|
||||
// No value translation is necessary, this result matches the ABI type.
|
||||
pos.func.dfg.attach_result(call, res);
|
||||
abi_res += 1;
|
||||
} else {
|
||||
let mut get_res = |func: &mut Function, ty| {
|
||||
let abi_type = get_abi_type(func, abi_res);
|
||||
if ty == abi_type.value_type {
|
||||
let last_res = func.dfg.append_result(call, ty);
|
||||
abi_res += 1;
|
||||
Ok(last_res)
|
||||
} else {
|
||||
Err(abi_type)
|
||||
}
|
||||
};
|
||||
let v = convert_from_abi(pos, res_type, Some(res), &mut get_res);
|
||||
debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v);
|
||||
}
|
||||
}
|
||||
|
||||
call
|
||||
}
|
||||
|
||||
/// Compute original value of type `ty` from the legalized ABI arguments.
|
||||
///
|
||||
/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an
|
||||
/// ABI argument. It returns:
|
||||
///
|
||||
/// - `Ok(arg)` if the requested type matches the next ABI argument.
|
||||
/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`.
|
||||
///
|
||||
/// If the `into_result` value is provided, the converted result will be written into that value.
|
||||
fn convert_from_abi<GetArg>(
|
||||
pos: &mut FuncCursor,
|
||||
ty: Type,
|
||||
into_result: Option<Value>,
|
||||
get_arg: &mut GetArg,
|
||||
) -> Value
|
||||
where
|
||||
GetArg: FnMut(&mut Function, Type) -> Result<Value, AbiParam>,
|
||||
{
|
||||
// Terminate the recursion when we get the desired type.
|
||||
let arg_type = match get_arg(pos.func, ty) {
|
||||
Ok(v) => {
|
||||
debug_assert_eq!(pos.func.dfg.value_type(v), ty);
|
||||
debug_assert_eq!(into_result, None);
|
||||
return v;
|
||||
}
|
||||
Err(t) => t,
|
||||
};
|
||||
|
||||
// Reconstruct how `ty` was legalized into the `arg_type` argument.
|
||||
let conversion = legalize_abi_value(ty, &arg_type);
|
||||
|
||||
dbg!("convert_from_abi({}): {:?}", ty, conversion);
|
||||
|
||||
// The conversion describes value to ABI argument. We implement the reverse conversion here.
|
||||
match conversion {
|
||||
// Construct a `ty` by concatenating two ABI integers.
|
||||
ValueConversion::IntSplit => {
|
||||
let abi_ty = ty.half_width().expect("Invalid type for conversion");
|
||||
let lo = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
let hi = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
dbg!(
|
||||
"intsplit {}: {}, {}: {}",
|
||||
lo,
|
||||
pos.func.dfg.value_type(lo),
|
||||
hi,
|
||||
pos.func.dfg.value_type(hi)
|
||||
);
|
||||
pos.ins().with_results([into_result]).iconcat(lo, hi)
|
||||
}
|
||||
// Construct a `ty` by concatenating two halves of a vector.
|
||||
ValueConversion::VectorSplit => {
|
||||
let abi_ty = ty.half_vector().expect("Invalid type for conversion");
|
||||
let lo = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
let hi = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
pos.ins().with_results([into_result]).vconcat(lo, hi)
|
||||
}
|
||||
// Construct a `ty` by bit-casting from an integer type.
|
||||
ValueConversion::IntBits => {
|
||||
debug_assert!(!ty.is_int());
|
||||
let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
|
||||
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
pos.ins().with_results([into_result]).bitcast(ty, arg)
|
||||
}
|
||||
// ABI argument is a sign-extended version of the value we want.
|
||||
ValueConversion::Sext(abi_ty) => {
|
||||
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
// TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
|
||||
// We could insert an `assert_sreduce` which would fold with a following `sextend` of
|
||||
// this value.
|
||||
pos.ins().with_results([into_result]).ireduce(ty, arg)
|
||||
}
|
||||
ValueConversion::Uext(abi_ty) => {
|
||||
let arg = convert_from_abi(pos, abi_ty, None, get_arg);
|
||||
// TODO: Currently, we don't take advantage of the ABI argument being sign-extended.
|
||||
// We could insert an `assert_ureduce` which would fold with a following `uextend` of
|
||||
// this value.
|
||||
pos.ins().with_results([into_result]).ireduce(ty, arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `value` to match an ABI signature by inserting instructions at `pos`.
|
||||
///
|
||||
/// This may require expanding the value to multiple ABI arguments. The conversion process is
|
||||
/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented
|
||||
/// to the closure, it will perform one of two actions:
|
||||
///
|
||||
/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list
|
||||
/// of arguments and return `Ok(())`.
|
||||
/// 2. If the suggested argument doesn't have the right value type, don't change anything, but
|
||||
/// return the `Err(AbiParam)` that is needed.
|
||||
///
|
||||
fn convert_to_abi<PutArg>(
|
||||
pos: &mut FuncCursor,
|
||||
cfg: &ControlFlowGraph,
|
||||
value: Value,
|
||||
put_arg: &mut PutArg,
|
||||
) where
|
||||
PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>,
|
||||
{
|
||||
// Start by invoking the closure to either terminate the recursion or get the argument type
|
||||
// we're trying to match.
|
||||
let arg_type = match put_arg(pos.func, value) {
|
||||
Ok(_) => return,
|
||||
Err(t) => t,
|
||||
};
|
||||
|
||||
let ty = pos.func.dfg.value_type(value);
|
||||
match legalize_abi_value(ty, &arg_type) {
|
||||
ValueConversion::IntSplit => {
|
||||
let curpos = pos.position();
|
||||
let srcloc = pos.srcloc();
|
||||
let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value);
|
||||
convert_to_abi(pos, cfg, lo, put_arg);
|
||||
convert_to_abi(pos, cfg, hi, put_arg);
|
||||
}
|
||||
ValueConversion::VectorSplit => {
|
||||
let curpos = pos.position();
|
||||
let srcloc = pos.srcloc();
|
||||
let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value);
|
||||
convert_to_abi(pos, cfg, lo, put_arg);
|
||||
convert_to_abi(pos, cfg, hi, put_arg);
|
||||
}
|
||||
ValueConversion::IntBits => {
|
||||
debug_assert!(!ty.is_int());
|
||||
let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion");
|
||||
let arg = pos.ins().bitcast(abi_ty, value);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
ValueConversion::Sext(abi_ty) => {
|
||||
let arg = pos.ins().sextend(abi_ty, value);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
ValueConversion::Uext(abi_ty) => {
|
||||
let arg = pos.ins().uextend(abi_ty, value);
|
||||
convert_to_abi(pos, cfg, arg, put_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a sequence of arguments match a desired sequence of argument types.
|
||||
fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool {
|
||||
let arg_types = args.iter().map(|&v| dfg.value_type(v));
|
||||
let sig_types = types.iter().map(|&at| at.value_type);
|
||||
arg_types.eq(sig_types)
|
||||
}
|
||||
|
||||
/// Check if the arguments of the call `inst` match the signature.
|
||||
///
|
||||
/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the
|
||||
/// signature doesn't match.
|
||||
fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> {
|
||||
// Extract the signature and argument values.
|
||||
let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) {
|
||||
CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args),
|
||||
CallInfo::Indirect(sig_ref, args) => (sig_ref, args),
|
||||
CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]),
|
||||
};
|
||||
let sig = &dfg.signatures[sig_ref];
|
||||
|
||||
if check_arg_types(dfg, args, &sig.params[..]) &&
|
||||
check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..])
|
||||
{
|
||||
// All types check out.
|
||||
Ok(())
|
||||
} else {
|
||||
// Call types need fixing.
|
||||
Err(sig_ref)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the arguments of the return `inst` match the signature.
|
||||
fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool {
|
||||
check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns)
|
||||
}
|
||||
|
||||
/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`.
|
||||
///
|
||||
/// - `abi_args` is the number of arguments that the ABI signature requires.
|
||||
/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI
|
||||
/// argument number in `0..abi_args`.
|
||||
///
|
||||
fn legalize_inst_arguments<ArgType>(
|
||||
pos: &mut FuncCursor,
|
||||
cfg: &ControlFlowGraph,
|
||||
abi_args: usize,
|
||||
mut get_abi_type: ArgType,
|
||||
) where
|
||||
ArgType: FnMut(&Function, usize) -> AbiParam,
|
||||
{
|
||||
let inst = pos.current_inst().expect(
|
||||
"Cursor must point to a call instruction",
|
||||
);
|
||||
|
||||
// Lift the value list out of the call instruction so we modify it.
|
||||
let mut vlist = pos.func.dfg[inst].take_value_list().expect(
|
||||
"Call must have a value list",
|
||||
);
|
||||
|
||||
// The value list contains all arguments to the instruction, including the callee on an
|
||||
// indirect call which isn't part of the call arguments that must match the ABI signature.
|
||||
// Figure out how many fixed values are at the front of the list. We won't touch those.
|
||||
let fixed_values = pos.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.fixed_value_arguments();
|
||||
let have_args = vlist.len(&pos.func.dfg.value_lists) - fixed_values;
|
||||
|
||||
// Grow the value list to the right size and shift all the existing arguments to the right.
|
||||
// This lets us write the new argument values into the list without overwriting the old
|
||||
// arguments.
|
||||
//
|
||||
// Before:
|
||||
//
|
||||
// <--> fixed_values
|
||||
// <-----------> have_args
|
||||
// [FFFFOOOOOOOOOOOOO]
|
||||
//
|
||||
// After grow_at():
|
||||
//
|
||||
// <--> fixed_values
|
||||
// <-----------> have_args
|
||||
// <------------------> abi_args
|
||||
// [FFFF-------OOOOOOOOOOOOO]
|
||||
// ^
|
||||
// old_arg_offset
|
||||
//
|
||||
// After writing the new arguments:
|
||||
//
|
||||
// <--> fixed_values
|
||||
// <------------------> abi_args
|
||||
// [FFFFNNNNNNNNNNNNNNNNNNNN]
|
||||
//
|
||||
vlist.grow_at(
|
||||
fixed_values,
|
||||
abi_args - have_args,
|
||||
&mut pos.func.dfg.value_lists,
|
||||
);
|
||||
let old_arg_offset = fixed_values + abi_args - have_args;
|
||||
|
||||
let mut abi_arg = 0;
|
||||
for old_arg in 0..have_args {
|
||||
let old_value = vlist
|
||||
.get(old_arg_offset + old_arg, &pos.func.dfg.value_lists)
|
||||
.unwrap();
|
||||
let mut put_arg = |func: &mut Function, arg| {
|
||||
let abi_type = get_abi_type(func, abi_arg);
|
||||
if func.dfg.value_type(arg) == abi_type.value_type {
|
||||
// This is the argument type we need.
|
||||
vlist.as_mut_slice(&mut func.dfg.value_lists)[fixed_values + abi_arg] = arg;
|
||||
abi_arg += 1;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(abi_type)
|
||||
}
|
||||
};
|
||||
convert_to_abi(pos, cfg, old_value, &mut put_arg);
|
||||
}
|
||||
|
||||
// Put the modified value list back.
|
||||
pos.func.dfg[inst].put_value_list(vlist);
|
||||
}
|
||||
|
||||
/// Insert ABI conversion code before and after the call instruction at `pos`.
|
||||
///
|
||||
/// Instructions inserted before the call will compute the appropriate ABI values for the
|
||||
/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to
|
||||
/// match the new signature.
|
||||
///
|
||||
/// Instructions will be inserted after the call to convert returned ABI values back to the
|
||||
/// original return values. The call's result values will be adapted to match the new signature.
|
||||
///
|
||||
/// Returns `true` if any instructions were inserted.
|
||||
pub fn handle_call_abi(mut inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
|
||||
let pos = &mut FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start by checking if the argument types already match the signature.
|
||||
let sig_ref = match check_call_signature(&pos.func.dfg, inst) {
|
||||
Ok(_) => return spill_call_arguments(pos),
|
||||
Err(s) => s,
|
||||
};
|
||||
|
||||
// OK, we need to fix the call arguments to match the ABI signature.
|
||||
let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.dfg.signatures[sig_ref].params[abi_arg]
|
||||
});
|
||||
|
||||
if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
|
||||
inst = legalize_inst_results(pos, |func, abi_res| {
|
||||
func.dfg.signatures[sig_ref].returns[abi_res]
|
||||
});
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
check_call_signature(&pos.func.dfg, inst).is_ok(),
|
||||
"Signature still wrong: {}, {}{}",
|
||||
pos.func.dfg.display_inst(inst, None),
|
||||
sig_ref,
|
||||
pos.func.dfg.signatures[sig_ref]
|
||||
);
|
||||
|
||||
// Go back and insert spills for any stack arguments.
|
||||
pos.goto_inst(inst);
|
||||
spill_call_arguments(pos);
|
||||
|
||||
// Yes, we changed stuff.
|
||||
true
|
||||
}
|
||||
|
||||
/// Insert ABI conversion code before and after the return instruction at `inst`.
|
||||
///
|
||||
/// Return `true` if any instructions were inserted.
|
||||
pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool {
|
||||
// Check if the returned types already match the signature.
|
||||
if check_return_signature(&func.dfg, inst, &func.signature) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to
|
||||
// the legalized signature.
|
||||
let special_args = func.signature
|
||||
.returns
|
||||
.iter()
|
||||
.rev()
|
||||
.take_while(|&rt| {
|
||||
rt.purpose == ArgumentPurpose::Link || rt.purpose == ArgumentPurpose::StructReturn ||
|
||||
rt.purpose == ArgumentPurpose::VMContext
|
||||
})
|
||||
.count();
|
||||
let abi_args = func.signature.returns.len() - special_args;
|
||||
|
||||
let pos = &mut FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
|
||||
func.signature.returns[abi_arg]
|
||||
});
|
||||
debug_assert_eq!(pos.func.dfg.inst_variable_args(inst).len(), abi_args);
|
||||
|
||||
// Append special return arguments for any `sret`, `link`, and `vmctx` return values added to
|
||||
// the legalized signature. These values should simply be propagated from the entry block
|
||||
// arguments.
|
||||
if special_args > 0 {
|
||||
dbg!(
|
||||
"Adding {} special-purpose arguments to {}",
|
||||
special_args,
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
);
|
||||
let mut vlist = pos.func.dfg[inst].take_value_list().unwrap();
|
||||
for arg in &pos.func.signature.returns[abi_args..] {
|
||||
match arg.purpose {
|
||||
ArgumentPurpose::Link |
|
||||
ArgumentPurpose::StructReturn |
|
||||
ArgumentPurpose::VMContext => {}
|
||||
ArgumentPurpose::Normal => panic!("unexpected return value {}", arg),
|
||||
_ => panic!("Unsupported special purpose return value {}", arg),
|
||||
}
|
||||
// A `link`/`sret`/`vmctx` return value can only appear in a signature that has a
|
||||
// unique matching argument. They are appended at the end, so search the signature from
|
||||
// the end.
|
||||
let idx = pos.func
|
||||
.signature
|
||||
.params
|
||||
.iter()
|
||||
.rposition(|t| t.purpose == arg.purpose)
|
||||
.expect("No matching special purpose argument.");
|
||||
// Get the corresponding entry block value and add it to the return instruction's
|
||||
// arguments.
|
||||
let val = pos.func.dfg.ebb_params(
|
||||
pos.func.layout.entry_block().unwrap(),
|
||||
)
|
||||
[idx];
|
||||
debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type);
|
||||
vlist.push(val, &mut pos.func.dfg.value_lists);
|
||||
}
|
||||
pos.func.dfg[inst].put_value_list(vlist);
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
check_return_signature(&pos.func.dfg, inst, &pos.func.signature),
|
||||
"Signature still wrong: {} / signature {}",
|
||||
pos.func.dfg.display_inst(inst, None),
|
||||
pos.func.signature
|
||||
);
|
||||
|
||||
// Yes, we changed stuff.
|
||||
true
|
||||
}
|
||||
|
||||
/// Assign stack slots to incoming function parameters on the stack.
|
||||
///
|
||||
/// Values that are passed into the function on the stack must be assigned to an `IncomingArg`
|
||||
/// stack slot already during legalization.
|
||||
fn spill_entry_params(func: &mut Function, entry: Ebb) {
|
||||
for (abi, &arg) in func.signature.params.iter().zip(func.dfg.ebb_params(entry)) {
|
||||
if let ArgumentLoc::Stack(offset) = abi.location {
|
||||
let ss = func.stack_slots.make_incoming_arg(abi.value_type, offset);
|
||||
func.locations[arg] = ValueLoc::Stack(ss);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Assign stack slots to outgoing function arguments on the stack.
|
||||
///
|
||||
/// Values that are passed to a called function on the stack must be assigned to a matching
|
||||
/// `OutgoingArg` stack slot. The assignment must happen immediately before the call.
|
||||
///
|
||||
/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches
|
||||
/// or calls between writing the stack slots and the call instruction. Writing the slots earlier
|
||||
/// could help reduce register pressure before the call.
|
||||
fn spill_call_arguments(pos: &mut FuncCursor) -> bool {
|
||||
let inst = pos.current_inst().expect(
|
||||
"Cursor must point to a call instruction",
|
||||
);
|
||||
let sig_ref = pos.func.dfg.call_signature(inst).expect(
|
||||
"Call instruction expected.",
|
||||
);
|
||||
|
||||
// Start by building a list of stack slots and arguments to be replaced.
|
||||
// This requires borrowing `pos.func.dfg`, so we can't change anything.
|
||||
let arglist = {
|
||||
let locations = &pos.func.locations;
|
||||
let stack_slots = &mut pos.func.stack_slots;
|
||||
pos.func
|
||||
.dfg
|
||||
.inst_variable_args(inst)
|
||||
.iter()
|
||||
.zip(&pos.func.dfg.signatures[sig_ref].params)
|
||||
.enumerate()
|
||||
.filter_map(|(idx, (&arg, abi))| {
|
||||
match abi.location {
|
||||
ArgumentLoc::Stack(offset) => {
|
||||
// Assign `arg` to a new stack slot, unless it's already in the correct
|
||||
// slot. The legalization needs to be idempotent, so we should see a
|
||||
// correct outgoing slot on the second pass.
|
||||
let ss = stack_slots.get_outgoing_arg(abi.value_type, offset);
|
||||
if locations[arg] != ValueLoc::Stack(ss) {
|
||||
Some((idx, arg, ss))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
if arglist.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Insert the spill instructions and rewrite call arguments.
|
||||
for (idx, arg, ss) in arglist {
|
||||
let stack_val = pos.ins().spill(arg);
|
||||
pos.func.locations[stack_val] = ValueLoc::Stack(ss);
|
||||
pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val;
|
||||
}
|
||||
|
||||
// We changed stuff.
|
||||
true
|
||||
}
|
||||
60
lib/codegen/src/legalizer/call.rs
Normal file
60
lib/codegen/src/legalizer/call.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! Legalization of calls.
|
||||
//!
|
||||
//! This module exports the `expand_call` function which transforms a `call`
|
||||
//! instruction into `func_addr` and `call_indirect` instructions.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{self, InstBuilder};
|
||||
use isa::TargetIsa;
|
||||
|
||||
/// Expand a `call` instruction.
|
||||
pub fn expand_call(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
// Unpack the instruction.
|
||||
let (func_ref, old_args) = match func.dfg[inst] {
|
||||
ir::InstructionData::Call {
|
||||
opcode,
|
||||
ref args,
|
||||
func_ref,
|
||||
} => {
|
||||
debug_assert_eq!(opcode, ir::Opcode::Call);
|
||||
(func_ref, args.clone())
|
||||
}
|
||||
_ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
let ptr_ty = if isa.flags().is_64bit() {
|
||||
ir::types::I64
|
||||
} else {
|
||||
ir::types::I32
|
||||
};
|
||||
|
||||
let sig = func.dfg.ext_funcs[func_ref].signature;
|
||||
|
||||
let callee = {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.ins().func_addr(ptr_ty, func_ref)
|
||||
};
|
||||
|
||||
let mut new_args = ir::ValueList::default();
|
||||
new_args.push(callee, &mut func.dfg.value_lists);
|
||||
for i in 0..old_args.len(&func.dfg.value_lists) {
|
||||
new_args.push(
|
||||
old_args.as_slice(&func.dfg.value_lists)[i],
|
||||
&mut func.dfg.value_lists,
|
||||
);
|
||||
}
|
||||
|
||||
func.dfg.replace(inst).CallIndirect(
|
||||
ir::Opcode::CallIndirect,
|
||||
ptr_ty,
|
||||
sig,
|
||||
new_args,
|
||||
);
|
||||
}
|
||||
67
lib/codegen/src/legalizer/globalvar.rs
Normal file
67
lib/codegen/src/legalizer/globalvar.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
//! Legalization of global variables.
|
||||
//!
|
||||
//! This module exports the `expand_global_addr` function which transforms a `global_addr`
|
||||
//! instruction into code that depends on the kind of global variable referenced.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{self, InstBuilder};
|
||||
use isa::TargetIsa;
|
||||
|
||||
/// Expand a `global_addr` instruction according to the definition of the global variable.
|
||||
pub fn expand_global_addr(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
// Unpack the instruction.
|
||||
let gv = match func.dfg[inst] {
|
||||
ir::InstructionData::UnaryGlobalVar { opcode, global_var } => {
|
||||
debug_assert_eq!(opcode, ir::Opcode::GlobalAddr);
|
||||
global_var
|
||||
}
|
||||
_ => panic!("Wanted global_addr: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
match func.global_vars[gv] {
|
||||
ir::GlobalVarData::VMContext { offset } => vmctx_addr(inst, func, offset.into()),
|
||||
ir::GlobalVarData::Deref { base, offset } => deref_addr(inst, func, base, offset.into()),
|
||||
ir::GlobalVarData::Sym { .. } => globalsym(inst, func, gv),
|
||||
}
|
||||
}
|
||||
|
||||
/// Expand a `global_addr` instruction for a vmctx global.
|
||||
fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function, offset: i64) {
|
||||
// Get the value representing the `vmctx` argument.
|
||||
let vmctx = func.special_param(ir::ArgumentPurpose::VMContext).expect(
|
||||
"Missing vmctx parameter",
|
||||
);
|
||||
|
||||
// Simply replace the `global_addr` instruction with an `iadd_imm`, reusing the result value.
|
||||
func.dfg.replace(inst).iadd_imm(vmctx, offset);
|
||||
}
|
||||
|
||||
/// Expand a `global_addr` instruction for a deref global.
|
||||
fn deref_addr(inst: ir::Inst, func: &mut ir::Function, base: ir::GlobalVar, offset: i64) {
|
||||
// We need to load a pointer from the `base` global variable, so insert a new `global_addr`
|
||||
// instruction. This depends on the iterative legalization loop. Note that the IR verifier
|
||||
// detects any cycles in the `deref` globals.
|
||||
let ptr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
let base_addr = pos.ins().global_addr(ptr_ty, base);
|
||||
let mut mflags = ir::MemFlags::new();
|
||||
// Deref globals are required to be accessible and aligned.
|
||||
mflags.set_notrap();
|
||||
mflags.set_aligned();
|
||||
let base_ptr = pos.ins().load(ptr_ty, mflags, base_addr, 0);
|
||||
pos.func.dfg.replace(inst).iadd_imm(base_ptr, offset);
|
||||
}
|
||||
|
||||
/// Expand a `global_addr` instruction for a symbolic name global.
|
||||
fn globalsym(inst: ir::Inst, func: &mut ir::Function, gv: ir::GlobalVar) {
|
||||
let ptr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
func.dfg.replace(inst).globalsym_addr(ptr_ty, gv);
|
||||
}
|
||||
190
lib/codegen/src/legalizer/heap.rs
Normal file
190
lib/codegen/src/legalizer/heap.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
//! Legalization of heaps.
|
||||
//!
|
||||
//! This module exports the `expand_heap_addr` function which transforms a `heap_addr`
|
||||
//! instruction into code that depends on the kind of heap referenced.
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::condcodes::IntCC;
|
||||
use ir::{self, InstBuilder, MemFlags};
|
||||
use isa::TargetIsa;
|
||||
|
||||
/// Expand a `heap_addr` instruction according to the definition of the heap.
|
||||
pub fn expand_heap_addr(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
// Unpack the instruction.
|
||||
let (heap, offset, size) = match func.dfg[inst] {
|
||||
ir::InstructionData::HeapAddr {
|
||||
opcode,
|
||||
heap,
|
||||
arg,
|
||||
imm,
|
||||
} => {
|
||||
debug_assert_eq!(opcode, ir::Opcode::HeapAddr);
|
||||
(heap, arg, imm.into())
|
||||
}
|
||||
_ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
match func.heaps[heap].style {
|
||||
ir::HeapStyle::Dynamic { bound_gv } => {
|
||||
dynamic_addr(inst, heap, offset, size, bound_gv, func)
|
||||
}
|
||||
ir::HeapStyle::Static { bound } => {
|
||||
static_addr(inst, heap, offset, size, bound.into(), func, cfg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Expand a `heap_addr` for a dynamic heap.
|
||||
fn dynamic_addr(
|
||||
inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
offset: ir::Value,
|
||||
size: u32,
|
||||
bound_gv: ir::GlobalVar,
|
||||
func: &mut ir::Function,
|
||||
) {
|
||||
let size = i64::from(size);
|
||||
let offset_ty = func.dfg.value_type(offset);
|
||||
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
let min_size = func.heaps[heap].min_size.into();
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start with the bounds check. Trap if `offset + size > bound`.
|
||||
let bound_addr = pos.ins().global_addr(addr_ty, bound_gv);
|
||||
let mut mflags = MemFlags::new();
|
||||
// The bound variable is requied to be accessible and aligned.
|
||||
mflags.set_notrap();
|
||||
mflags.set_aligned();
|
||||
let bound = pos.ins().load(offset_ty, mflags, bound_addr, 0);
|
||||
|
||||
let oob;
|
||||
if size == 1 {
|
||||
// `offset > bound - 1` is the same as `offset >= bound`.
|
||||
oob = pos.ins().icmp(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
offset,
|
||||
bound,
|
||||
);
|
||||
} else if size <= min_size {
|
||||
// We know that bound >= min_size, so here we can compare `offset > bound - size` without
|
||||
// wrapping.
|
||||
let adj_bound = pos.ins().iadd_imm(bound, -size);
|
||||
oob = pos.ins().icmp(
|
||||
IntCC::UnsignedGreaterThan,
|
||||
offset,
|
||||
adj_bound,
|
||||
);
|
||||
} else {
|
||||
// We need an overflow check for the adjusted offset.
|
||||
let size_val = pos.ins().iconst(offset_ty, size);
|
||||
let (adj_offset, overflow) = pos.ins().iadd_cout(offset, size_val);
|
||||
pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
|
||||
oob = pos.ins().icmp(
|
||||
IntCC::UnsignedGreaterThan,
|
||||
adj_offset,
|
||||
bound,
|
||||
);
|
||||
}
|
||||
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
|
||||
|
||||
offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
|
||||
}
|
||||
|
||||
/// Expand a `heap_addr` for a static heap.
|
||||
fn static_addr(
|
||||
inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
offset: ir::Value,
|
||||
size: u32,
|
||||
bound: i64,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
) {
|
||||
let size = i64::from(size);
|
||||
let offset_ty = func.dfg.value_type(offset);
|
||||
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Start with the bounds check. Trap if `offset + size > bound`.
|
||||
if size > bound {
|
||||
// This will simply always trap since `offset >= 0`.
|
||||
pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
|
||||
pos.func.dfg.replace(inst).iconst(addr_ty, 0);
|
||||
|
||||
// Split Ebb, as the trap is a terminator instruction.
|
||||
let curr_ebb = pos.current_ebb().expect("Cursor is not in an ebb");
|
||||
let new_ebb = pos.func.dfg.make_ebb();
|
||||
pos.insert_ebb(new_ebb);
|
||||
cfg.recompute_ebb(pos.func, curr_ebb);
|
||||
cfg.recompute_ebb(pos.func, new_ebb);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check `offset > limit` which is now known non-negative.
|
||||
let limit = bound - size;
|
||||
|
||||
// We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or
|
||||
// more.
|
||||
if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
|
||||
let oob = if limit & 1 == 1 {
|
||||
// Prefer testing `offset >= limit - 1` when limit is odd because an even number is
|
||||
// likely to be a convenient constant on ARM and other RISC architectures.
|
||||
pos.ins().icmp_imm(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
offset,
|
||||
limit - 1,
|
||||
)
|
||||
} else {
|
||||
pos.ins().icmp_imm(
|
||||
IntCC::UnsignedGreaterThan,
|
||||
offset,
|
||||
limit,
|
||||
)
|
||||
};
|
||||
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
offset_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
|
||||
}
|
||||
|
||||
/// Emit code for the base address computation of a `heap_addr` instruction.
|
||||
///
|
||||
///
|
||||
fn offset_addr(
|
||||
inst: ir::Inst,
|
||||
heap: ir::Heap,
|
||||
addr_ty: ir::Type,
|
||||
mut offset: ir::Value,
|
||||
offset_ty: ir::Type,
|
||||
func: &mut ir::Function,
|
||||
) {
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
// Convert `offset` to `addr_ty`.
|
||||
if offset_ty != addr_ty {
|
||||
offset = pos.ins().uextend(addr_ty, offset);
|
||||
}
|
||||
|
||||
// Add the heap base address base
|
||||
match pos.func.heaps[heap].base {
|
||||
ir::HeapBase::ReservedReg => unimplemented!(),
|
||||
ir::HeapBase::GlobalVar(base_gv) => {
|
||||
let base_addr = pos.ins().global_addr(addr_ty, base_gv);
|
||||
let mut mflags = MemFlags::new();
|
||||
// The base address variable is requied to be accessible and aligned.
|
||||
mflags.set_notrap();
|
||||
mflags.set_aligned();
|
||||
let base = pos.ins().load(addr_ty, mflags, base_addr, 0);
|
||||
pos.func.dfg.replace(inst).iadd(base, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
63
lib/codegen/src/legalizer/libcall.rs
Normal file
63
lib/codegen/src/legalizer/libcall.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
//! Expanding instructions as runtime library calls.
|
||||
|
||||
use ir;
|
||||
use ir::InstBuilder;
|
||||
|
||||
/// Try to expand `inst` as a library call, returning true is successful.
|
||||
pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function) -> bool {
|
||||
// Does the opcode/ctrl_type combo even have a well-known runtime library name.
|
||||
let libcall =
|
||||
match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst)) {
|
||||
Some(lc) => lc,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
let funcref = find_funcref(libcall, func).unwrap_or_else(|| make_funcref(libcall, inst, func));
|
||||
|
||||
// Now we convert `inst` to a call. First save the arguments.
|
||||
let mut args = vec![];
|
||||
args.extend_from_slice(func.dfg.inst_args(inst));
|
||||
// The replace builder will preserve the instruction result values.
|
||||
func.dfg.replace(inst).call(funcref, &args);
|
||||
|
||||
// TODO: ask the ISA to legalize the signature.
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Get the existing function reference for `libcall` in `func` if it exists.
|
||||
fn find_funcref(libcall: ir::LibCall, func: &ir::Function) -> Option<ir::FuncRef> {
|
||||
// We're assuming that all libcall function decls are at the end.
|
||||
// If we get this wrong, worst case we'll have duplicate libcall decls which is harmless.
|
||||
for (fref, func_data) in func.dfg.ext_funcs.iter().rev() {
|
||||
match func_data.name {
|
||||
ir::ExternalName::LibCall(lc) => {
|
||||
if lc == libcall {
|
||||
return Some(fref);
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a funcref for `libcall` with a signature matching `inst`.
|
||||
fn make_funcref(libcall: ir::LibCall, inst: ir::Inst, func: &mut ir::Function) -> ir::FuncRef {
|
||||
// Start with a system_v calling convention. We'll give the ISA a chance to change it.
|
||||
let mut sig = ir::Signature::new(ir::CallConv::SystemV);
|
||||
for &v in func.dfg.inst_args(inst) {
|
||||
sig.params.push(ir::AbiParam::new(func.dfg.value_type(v)));
|
||||
}
|
||||
for &v in func.dfg.inst_results(inst) {
|
||||
sig.returns.push(ir::AbiParam::new(func.dfg.value_type(v)));
|
||||
}
|
||||
let sigref = func.import_signature(sig);
|
||||
|
||||
// TODO: Can libcalls be colocated in some circumstances?
|
||||
func.import_function(ir::ExtFuncData {
|
||||
name: ir::ExternalName::LibCall(libcall),
|
||||
signature: sigref,
|
||||
colocated: false,
|
||||
})
|
||||
}
|
||||
302
lib/codegen/src/legalizer/mod.rs
Normal file
302
lib/codegen/src/legalizer/mod.rs
Normal file
@@ -0,0 +1,302 @@
|
||||
//! Legalize instructions.
|
||||
//!
|
||||
//! A legal instruction is one that can be mapped directly to a machine code instruction for the
|
||||
//! target ISA. The `legalize_function()` function takes as input any function and transforms it
|
||||
//! into an equivalent function using only legal instructions.
|
||||
//!
|
||||
//! The characteristics of legal instructions depend on the target ISA, so any given instruction
|
||||
//! can be legal for one ISA and illegal for another.
|
||||
//!
|
||||
//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map
|
||||
//! which provides a legal encoding recipe for every instruction.
|
||||
//!
|
||||
//! The legalizer does not deal with register allocation constraints. These constraints are derived
|
||||
//! from the encoding recipes, and solved later by the register allocator.
|
||||
|
||||
use bitset::BitSet;
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{self, InstBuilder};
|
||||
use isa::TargetIsa;
|
||||
use timing;
|
||||
|
||||
mod boundary;
|
||||
mod call;
|
||||
mod globalvar;
|
||||
mod heap;
|
||||
mod libcall;
|
||||
mod split;
|
||||
|
||||
use self::globalvar::expand_global_addr;
|
||||
use self::heap::expand_heap_addr;
|
||||
use self::call::expand_call;
|
||||
use self::libcall::expand_as_libcall;
|
||||
|
||||
/// Legalize `func` for `isa`.
|
||||
///
|
||||
/// - Transform any instructions that don't have a legal representation in `isa`.
|
||||
/// - Fill out `func.encodings`.
|
||||
///
|
||||
pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &TargetIsa) {
|
||||
let _tt = timing::legalize();
|
||||
debug_assert!(cfg.is_valid());
|
||||
|
||||
boundary::legalize_signatures(func, isa);
|
||||
|
||||
func.encodings.resize(func.dfg.num_insts());
|
||||
|
||||
let mut pos = FuncCursor::new(func);
|
||||
|
||||
// Process EBBs in layout order. Some legalization actions may split the current EBB or append
|
||||
// new ones to the end. We need to make sure we visit those new EBBs too.
|
||||
while let Some(_ebb) = pos.next_ebb() {
|
||||
// Keep track of the cursor position before the instruction being processed, so we can
|
||||
// double back when replacing instructions.
|
||||
let mut prev_pos = pos.position();
|
||||
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
let opcode = pos.func.dfg[inst].opcode();
|
||||
|
||||
// Check for ABI boundaries that need to be converted to the legalized signature.
|
||||
if opcode.is_call() {
|
||||
if boundary::handle_call_abi(inst, pos.func, cfg) {
|
||||
// Go back and legalize the inserted argument conversion instructions.
|
||||
pos.set_position(prev_pos);
|
||||
continue;
|
||||
}
|
||||
} else if opcode.is_return() {
|
||||
if boundary::handle_return_abi(inst, pos.func, cfg) {
|
||||
// Go back and legalize the inserted return value conversion instructions.
|
||||
pos.set_position(prev_pos);
|
||||
continue;
|
||||
}
|
||||
} else if opcode.is_branch() {
|
||||
split::simplify_branch_arguments(&mut pos.func.dfg, inst);
|
||||
}
|
||||
|
||||
match pos.func.update_encoding(inst, isa) {
|
||||
Ok(()) => {}
|
||||
Err(action) => {
|
||||
// We should transform the instruction into legal equivalents.
|
||||
let changed = action(inst, pos.func, cfg, isa);
|
||||
// If the current instruction was replaced, we need to double back and revisit
|
||||
// the expanded sequence. This is both to assign encodings and possible to
|
||||
// expand further.
|
||||
// There's a risk of infinite looping here if the legalization patterns are
|
||||
// unsound. Should we attempt to detect that?
|
||||
if changed {
|
||||
pos.set_position(prev_pos);
|
||||
continue;
|
||||
}
|
||||
|
||||
// We don't have any pattern expansion for this instruction either.
|
||||
// Try converting it to a library call as a last resort.
|
||||
if expand_as_libcall(inst, pos.func) {
|
||||
pos.set_position(prev_pos);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remember this position in case we need to double back.
|
||||
prev_pos = pos.position();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
|
||||
// `lib/codegen/meta/base/legalize.py`.
|
||||
//
|
||||
// Concretely, this defines private functions `narrow()`, and `expand()`.
|
||||
include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
|
||||
|
||||
/// Custom expansion for conditional trap instructions.
|
||||
/// TODO: Add CFG support to the Python patterns so we won't have to do this.
|
||||
fn expand_cond_trap(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
// Parse the instruction.
|
||||
let trapz;
|
||||
let (arg, code) = match func.dfg[inst] {
|
||||
ir::InstructionData::CondTrap { opcode, arg, code } => {
|
||||
// We want to branch *over* an unconditional trap.
|
||||
trapz = match opcode {
|
||||
ir::Opcode::Trapz => true,
|
||||
ir::Opcode::Trapnz => false,
|
||||
_ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
(arg, code)
|
||||
}
|
||||
_ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
// Split the EBB after `inst`:
|
||||
//
|
||||
// trapnz arg
|
||||
//
|
||||
// Becomes:
|
||||
//
|
||||
// brz arg, new_ebb
|
||||
// trap
|
||||
// new_ebb:
|
||||
//
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let new_ebb = func.dfg.make_ebb();
|
||||
if trapz {
|
||||
func.dfg.replace(inst).brnz(arg, new_ebb, &[]);
|
||||
} else {
|
||||
func.dfg.replace(inst).brz(arg, new_ebb, &[]);
|
||||
}
|
||||
|
||||
let mut pos = FuncCursor::new(func).after_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.ins().trap(code);
|
||||
pos.insert_ebb(new_ebb);
|
||||
|
||||
// Finally update the CFG.
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
cfg.recompute_ebb(pos.func, new_ebb);
|
||||
}
|
||||
|
||||
/// Jump tables.
|
||||
fn expand_br_table(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::IntCC;
|
||||
|
||||
let (arg, table) = match func.dfg[inst] {
|
||||
ir::InstructionData::BranchTable {
|
||||
opcode: ir::Opcode::BrTable,
|
||||
arg,
|
||||
table,
|
||||
} => (arg, table),
|
||||
_ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
// This is a poor man's jump table using just a sequence of conditional branches.
|
||||
// TODO: Lower into a jump table load and indirect branch.
|
||||
let table_size = func.jump_tables[table].len();
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
for i in 0..table_size {
|
||||
if let Some(dest) = pos.func.jump_tables[table].get_entry(i) {
|
||||
let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64);
|
||||
pos.ins().brnz(t, dest, &[]);
|
||||
}
|
||||
}
|
||||
|
||||
// `br_table` falls through when nothing matches.
|
||||
let ebb = pos.current_ebb().unwrap();
|
||||
pos.remove_inst();
|
||||
cfg.recompute_ebb(pos.func, ebb);
|
||||
}
|
||||
|
||||
/// Expand the select instruction.
|
||||
///
|
||||
/// Conditional moves are available in some ISAs for some register classes. The remaining selects
|
||||
/// are handled by a branch.
|
||||
fn expand_select(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
let (ctrl, tval, fval) = match func.dfg[inst] {
|
||||
ir::InstructionData::Ternary {
|
||||
opcode: ir::Opcode::Select,
|
||||
args,
|
||||
} => (args[0], args[1], args[2]),
|
||||
_ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
// Replace `result = select ctrl, tval, fval` with:
|
||||
//
|
||||
// brnz ctrl, new_ebb(tval)
|
||||
// jump new_ebb(fval)
|
||||
// new_ebb(result):
|
||||
let old_ebb = func.layout.pp_ebb(inst);
|
||||
let result = func.dfg.first_result(inst);
|
||||
func.dfg.clear_results(inst);
|
||||
let new_ebb = func.dfg.make_ebb();
|
||||
func.dfg.attach_ebb_param(new_ebb, result);
|
||||
|
||||
func.dfg.replace(inst).brnz(ctrl, new_ebb, &[tval]);
|
||||
let mut pos = FuncCursor::new(func).after_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
pos.ins().jump(new_ebb, &[fval]);
|
||||
pos.insert_ebb(new_ebb);
|
||||
|
||||
cfg.recompute_ebb(pos.func, new_ebb);
|
||||
cfg.recompute_ebb(pos.func, old_ebb);
|
||||
}
|
||||
|
||||
/// Expand illegal `f32const` and `f64const` instructions.
|
||||
fn expand_fconst(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_isa: &TargetIsa,
|
||||
) {
|
||||
let ty = func.dfg.value_type(func.dfg.first_result(inst));
|
||||
debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty);
|
||||
|
||||
// In the future, we may want to generate constant pool entries for these constants, but for
|
||||
// now use an `iconst` and a bit cast.
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
let ival = match pos.func.dfg[inst] {
|
||||
ir::InstructionData::UnaryIeee32 {
|
||||
opcode: ir::Opcode::F32const,
|
||||
imm,
|
||||
} => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())),
|
||||
ir::InstructionData::UnaryIeee64 {
|
||||
opcode: ir::Opcode::F64const,
|
||||
imm,
|
||||
} => pos.ins().iconst(ir::types::I64, imm.bits() as i64),
|
||||
_ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
pos.func.dfg.replace(inst).bitcast(ty, ival);
|
||||
}
|
||||
|
||||
/// Expand the stack check instruction.
|
||||
pub fn expand_stack_check(
|
||||
inst: ir::Inst,
|
||||
func: &mut ir::Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
use ir::condcodes::IntCC;
|
||||
|
||||
let gv = match func.dfg[inst] {
|
||||
ir::InstructionData::UnaryGlobalVar { global_var, .. } => global_var,
|
||||
_ => panic!("Want stack_check: {}", func.dfg.display_inst(inst, isa)),
|
||||
};
|
||||
let ptr_ty = if isa.flags().is_64bit() {
|
||||
ir::types::I64
|
||||
} else {
|
||||
ir::types::I32
|
||||
};
|
||||
|
||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
let limit_addr = pos.ins().global_addr(ptr_ty, gv);
|
||||
|
||||
let mut mflags = ir::MemFlags::new();
|
||||
mflags.set_aligned();
|
||||
mflags.set_notrap();
|
||||
let limit = pos.ins().load(ptr_ty, mflags, limit_addr, 0);
|
||||
let cflags = pos.ins().ifcmp_sp(limit);
|
||||
pos.func.dfg.replace(inst).trapif(
|
||||
IntCC::UnsignedGreaterThanOrEqual,
|
||||
cflags,
|
||||
ir::TrapCode::StackOverflow,
|
||||
);
|
||||
}
|
||||
342
lib/codegen/src/legalizer/split.rs
Normal file
342
lib/codegen/src/legalizer/split.rs
Normal file
@@ -0,0 +1,342 @@
|
||||
//! Value splitting.
|
||||
//!
|
||||
//! Some value types are too large to fit in registers, so they need to be split into smaller parts
|
||||
//! that the ISA can operate on. There's two dimensions of splitting, represented by two
|
||||
//! complementary instruction pairs:
|
||||
//!
|
||||
//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
|
||||
//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
|
||||
//! lane types.
|
||||
//!
|
||||
//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
|
||||
//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
|
||||
//! This breakdown is handled by the ABI lowering.
|
||||
//!
|
||||
//! When legalizing a single instruction, it is wrapped in splits and concatenations:
|
||||
//!
|
||||
//!```cton
|
||||
//! v1 = bxor.i64 v2, v3
|
||||
//! ```
|
||||
//!
|
||||
//! becomes:
|
||||
//!
|
||||
//!```cton
|
||||
//! v20, v21 = isplit v2
|
||||
//! v30, v31 = isplit v3
|
||||
//! v10 = bxor.i32 v20, v30
|
||||
//! v11 = bxor.i32 v21, v31
|
||||
//! v1 = iconcat v10, v11
|
||||
//! ```
|
||||
//!
|
||||
//! This local expansion approach still leaves the original `i64` values in the code as operands on
|
||||
//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
|
||||
//! values are constantly split and concatenated.
|
||||
//!
|
||||
//! # Optimized splitting
|
||||
//!
|
||||
//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
|
||||
//! first check if the value is defined by the corresponding concatenation. If so, then just use
|
||||
//! the two concatenation inputs directly:
|
||||
//!
|
||||
//! ```cton
|
||||
//! v4 = iadd_imm.i64 v1, 1
|
||||
//! ```
|
||||
//!
|
||||
//! becomes, using the expanded code from above:
|
||||
//!
|
||||
//! ```cton
|
||||
//! v40, v5 = iadd_imm_cout.i32 v10, 1
|
||||
//! v6 = bint.i32
|
||||
//! v41 = iadd.i32 v11, v6
|
||||
//! v4 = iconcat v40, v41
|
||||
//! ```
|
||||
//!
|
||||
//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
|
||||
//! can be trivially deleted by a dead code elimination pass.
|
||||
//!
|
||||
//! # EBB arguments
|
||||
//!
|
||||
//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
|
||||
//! up with no `i64` values anywhere, except for EBB arguments. We can work around this by
|
||||
//! iteratively splitting EBB arguments too. That should leave us with no illegal value types
|
||||
//! anywhere.
|
||||
//!
|
||||
//! It is possible to have circular dependencies of EBB arguments that are never used by any real
|
||||
//! instructions. These loops will remain in the program.
|
||||
|
||||
use cursor::{Cursor, CursorPosition, FuncCursor};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
|
||||
use std::iter;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
|
||||
/// if possible.
|
||||
pub fn isplit(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
) -> (Value, Value) {
|
||||
split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
|
||||
}
|
||||
|
||||
/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
|
||||
/// possible.
|
||||
pub fn vsplit(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
) -> (Value, Value) {
|
||||
split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
|
||||
}
|
||||
|
||||
/// After splitting an EBB argument, we need to go back and fix up all of the predecessor
|
||||
/// instructions. This is potentially a recursive operation, but we don't implement it recursively
|
||||
/// since that could use up too muck stack.
|
||||
///
|
||||
/// Instead, the repairs are deferred and placed on a work list in stack form.
|
||||
struct Repair {
|
||||
concat: Opcode,
|
||||
// The argument type after splitting.
|
||||
split_type: Type,
|
||||
// The destination EBB whose arguments have been split.
|
||||
ebb: Ebb,
|
||||
// Number of the original EBB argument which has been replaced by the low part.
|
||||
num: usize,
|
||||
// Number of the new EBB argument which represents the high part after the split.
|
||||
hi_num: usize,
|
||||
}
|
||||
|
||||
/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
|
||||
fn split_any(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
concat: Opcode,
|
||||
) -> (Value, Value) {
|
||||
let mut repairs = Vec::new();
|
||||
let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
|
||||
let result = split_value(pos, value, concat, &mut repairs);
|
||||
|
||||
// We have split the value requested, and now we may need to fix some EBB predecessors.
|
||||
while let Some(repair) = repairs.pop() {
|
||||
for (_, inst) in cfg.pred_iter(repair.ebb) {
|
||||
let branch_opc = pos.func.dfg[inst].opcode();
|
||||
debug_assert!(
|
||||
branch_opc.is_branch(),
|
||||
"Predecessor not a branch: {}",
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
);
|
||||
let fixed_args = branch_opc.constraints().fixed_value_arguments();
|
||||
let mut args = pos.func.dfg[inst].take_value_list().expect(
|
||||
"Branches must have value lists.",
|
||||
);
|
||||
let num_args = args.len(&pos.func.dfg.value_lists);
|
||||
// Get the old value passed to the EBB argument we're repairing.
|
||||
let old_arg = args.get(fixed_args + repair.num, &pos.func.dfg.value_lists)
|
||||
.expect("Too few branch arguments");
|
||||
|
||||
// It's possible that the CFG's predecessor list has duplicates. Detect them here.
|
||||
if pos.func.dfg.value_type(old_arg) == repair.split_type {
|
||||
pos.func.dfg[inst].put_value_list(args);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Split the old argument, possibly causing more repairs to be scheduled.
|
||||
pos.goto_inst(inst);
|
||||
let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
|
||||
|
||||
// The `lo` part replaces the original argument.
|
||||
*args.get_mut(fixed_args + repair.num, &mut pos.func.dfg.value_lists)
|
||||
.unwrap() = lo;
|
||||
|
||||
// The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
|
||||
// same EBB, there could be multiple arguments missing.
|
||||
if num_args > fixed_args + repair.hi_num {
|
||||
*args.get_mut(fixed_args + repair.hi_num, &mut pos.func.dfg.value_lists)
|
||||
.unwrap() = hi;
|
||||
} else {
|
||||
// We need to append one or more arguments. If we're adding more than one argument,
|
||||
// there must be pending repairs on the stack that will fill in the correct values
|
||||
// instead of `hi`.
|
||||
args.extend(
|
||||
iter::repeat(hi).take(1 + fixed_args + repair.hi_num - num_args),
|
||||
&mut pos.func.dfg.value_lists,
|
||||
);
|
||||
}
|
||||
|
||||
// Put the value list back after manipulating it.
|
||||
pos.func.dfg[inst].put_value_list(args);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Split a single value using the integer or vector semantics given by the `concat` opcode.
|
||||
///
|
||||
/// If the value is defined by a `concat` instruction, just reuse the operand values of that
|
||||
/// instruction.
|
||||
///
|
||||
/// Return the two new values representing the parts of `value`.
|
||||
fn split_value(
|
||||
pos: &mut FuncCursor,
|
||||
value: Value,
|
||||
concat: Opcode,
|
||||
repairs: &mut Vec<Repair>,
|
||||
) -> (Value, Value) {
|
||||
let value = pos.func.dfg.resolve_aliases(value);
|
||||
let mut reuse = None;
|
||||
|
||||
match pos.func.dfg.value_def(value) {
|
||||
ValueDef::Result(inst, num) => {
|
||||
// This is an instruction result. See if the value was created by a `concat`
|
||||
// instruction.
|
||||
if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
|
||||
debug_assert_eq!(num, 0);
|
||||
if opcode == concat {
|
||||
reuse = Some((args[0], args[1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueDef::Param(ebb, num) => {
|
||||
// This is an EBB parameter. We can split the parameter value unless this is the entry
|
||||
// block.
|
||||
if pos.func.layout.entry_block() != Some(ebb) {
|
||||
// We are going to replace the parameter at `num` with two new arguments.
|
||||
// Determine the new value types.
|
||||
let ty = pos.func.dfg.value_type(value);
|
||||
let split_type = match concat {
|
||||
Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
|
||||
Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
|
||||
_ => panic!("Unhandled concat opcode: {}", concat),
|
||||
};
|
||||
|
||||
// Since the `repairs` stack potentially contains other parameter numbers for
|
||||
// `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
|
||||
// `repairs` entries.
|
||||
//
|
||||
// Replace the original `value` with the low part, and append the high part at the
|
||||
// end of the argument list.
|
||||
let lo = pos.func.dfg.replace_ebb_param(value, split_type);
|
||||
let hi_num = pos.func.dfg.num_ebb_params(ebb);
|
||||
let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
|
||||
reuse = Some((lo, hi));
|
||||
|
||||
// Now the original value is dangling. Insert a concatenation instruction that can
|
||||
// compute it from the two new parameters. This also serves as a record of what we
|
||||
// did so a future call to this function doesn't have to redo the work.
|
||||
//
|
||||
// Note that it is safe to move `pos` here since `reuse` was set above, so we don't
|
||||
// need to insert a split instruction before returning.
|
||||
pos.goto_first_inst(ebb);
|
||||
pos.ins().with_result(value).Binary(
|
||||
concat,
|
||||
split_type,
|
||||
lo,
|
||||
hi,
|
||||
);
|
||||
|
||||
// Finally, splitting the EBB parameter is not enough. We also have to repair all
|
||||
// of the predecessor instructions that branch here.
|
||||
add_repair(concat, split_type, ebb, num, hi_num, repairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Did the code above succeed in finding values we can reuse?
|
||||
if let Some(pair) = reuse {
|
||||
pair
|
||||
} else {
|
||||
// No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
|
||||
// has not been moved by the EBB argument code above when `reuse` is `None`.
|
||||
match concat {
|
||||
Opcode::Iconcat => pos.ins().isplit(value),
|
||||
Opcode::Vconcat => pos.ins().vsplit(value),
|
||||
_ => panic!("Unhandled concat opcode: {}", concat),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add a repair entry to the work list.
|
||||
fn add_repair(
|
||||
concat: Opcode,
|
||||
split_type: Type,
|
||||
ebb: Ebb,
|
||||
num: usize,
|
||||
hi_num: usize,
|
||||
repairs: &mut Vec<Repair>,
|
||||
) {
|
||||
repairs.push(Repair {
|
||||
concat,
|
||||
split_type,
|
||||
ebb,
|
||||
num,
|
||||
hi_num,
|
||||
});
|
||||
}
|
||||
|
||||
/// Strip concat-split chains. Return a simpler way of computing the same value.
|
||||
///
|
||||
/// Given this input:
|
||||
///
|
||||
/// ```cton
|
||||
/// v10 = iconcat v1, v2
|
||||
/// v11, v12 = isplit v10
|
||||
/// ```
|
||||
///
|
||||
/// This function resolves `v11` to `v1` and `v12` to `v2`.
|
||||
fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
|
||||
let value = dfg.resolve_aliases(value);
|
||||
|
||||
// Deconstruct a split instruction.
|
||||
let split_res;
|
||||
let concat_opc;
|
||||
let split_arg;
|
||||
if let ValueDef::Result(inst, num) = dfg.value_def(value) {
|
||||
split_res = num;
|
||||
concat_opc = match dfg[inst].opcode() {
|
||||
Opcode::Isplit => Opcode::Iconcat,
|
||||
Opcode::Vsplit => Opcode::Vconcat,
|
||||
_ => return value,
|
||||
};
|
||||
split_arg = dfg.inst_args(inst)[0];
|
||||
} else {
|
||||
return value;
|
||||
}
|
||||
|
||||
// See if split_arg is defined by a concatenation instruction.
|
||||
if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
|
||||
if dfg[inst].opcode() == concat_opc {
|
||||
return dfg.inst_args(inst)[split_res];
|
||||
}
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
|
||||
/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
|
||||
/// legalized.
|
||||
///
|
||||
/// The branch argument repairs performed by `split_any()` above may be performed on branches that
|
||||
/// have not yet been legalized. The repaired arguments can be defined by actual split
|
||||
/// instructions in that case.
|
||||
///
|
||||
/// After legalizing the instructions computing the value that was split, it is likely that we can
|
||||
/// avoid depending on the split instruction. Its input probably comes from a concatenation.
|
||||
pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
|
||||
let mut new_args = Vec::new();
|
||||
|
||||
for &arg in dfg.inst_args(branch) {
|
||||
let new_arg = resolve_splits(dfg, arg);
|
||||
new_args.push(new_arg);
|
||||
}
|
||||
|
||||
dfg.inst_args_mut(branch).copy_from_slice(&new_args);
|
||||
}
|
||||
111
lib/codegen/src/lib.rs
Normal file
111
lib/codegen/src/lib.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Cretonne code generation library.
|
||||
|
||||
#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
|
||||
#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
|
||||
#![cfg_attr(feature="cargo-clippy", allow(
|
||||
// Rustfmt 0.9.0 is at odds with this lint:
|
||||
block_in_if_condition_stmt,
|
||||
// Produces only a false positive:
|
||||
while_let_loop,
|
||||
// Produces many false positives, but did produce some valid lints, now fixed:
|
||||
needless_lifetimes,
|
||||
// Generated code makes some style transgressions, but readability doesn't suffer much:
|
||||
many_single_char_names,
|
||||
identity_op,
|
||||
needless_borrow,
|
||||
cast_lossless,
|
||||
unreadable_literal,
|
||||
assign_op_pattern,
|
||||
empty_line_after_outer_attr,
|
||||
// Hard to avoid in generated code:
|
||||
cyclomatic_complexity,
|
||||
too_many_arguments,
|
||||
// Code generator doesn't have a way to collapse identical arms:
|
||||
match_same_arms,
|
||||
// These are relatively minor style issues, but would be easy to fix:
|
||||
new_without_default,
|
||||
new_without_default_derive,
|
||||
should_implement_trait,
|
||||
redundant_field_names,
|
||||
useless_let_if_seq,
|
||||
len_without_is_empty))]
|
||||
|
||||
// Turns on no_std and alloc features if std is not available.
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
#![cfg_attr(not(feature = "std"), feature(alloc))]
|
||||
|
||||
// Include the `hashmap_core` crate if std is not available.
|
||||
#[allow(unused_extern_crates)]
|
||||
#[cfg(not(feature = "std"))]
|
||||
extern crate hashmap_core;
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[macro_use]
|
||||
extern crate alloc;
|
||||
extern crate failure;
|
||||
#[macro_use]
|
||||
extern crate failure_derive;
|
||||
|
||||
pub use context::Context;
|
||||
pub use legalizer::legalize_function;
|
||||
pub use verifier::verify_function;
|
||||
pub use write::write_function;
|
||||
|
||||
/// Version number of the cretonne-codegen crate.
|
||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
#[macro_use]
|
||||
pub extern crate cretonne_entity as entity;
|
||||
|
||||
#[macro_use]
|
||||
pub mod dbg;
|
||||
|
||||
pub mod bforest;
|
||||
pub mod binemit;
|
||||
pub mod cfg_printer;
|
||||
pub mod cursor;
|
||||
pub mod dominator_tree;
|
||||
pub mod flowgraph;
|
||||
pub mod ir;
|
||||
pub mod isa;
|
||||
pub mod loop_analysis;
|
||||
pub mod print_errors;
|
||||
pub mod result;
|
||||
pub mod settings;
|
||||
pub mod timing;
|
||||
pub mod verifier;
|
||||
|
||||
pub use entity::packed_option;
|
||||
|
||||
mod abi;
|
||||
mod bitset;
|
||||
mod constant_hash;
|
||||
mod context;
|
||||
mod dce;
|
||||
mod divconst_magic_numbers;
|
||||
mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
mod preopt;
|
||||
mod ref_slice;
|
||||
mod regalloc;
|
||||
mod scoped_hash_map;
|
||||
mod simple_gvn;
|
||||
mod stack_layout;
|
||||
mod topo_order;
|
||||
mod unreachable_code;
|
||||
mod write;
|
||||
|
||||
/// This replaces `std` in builds with `core`.
|
||||
#[cfg(not(feature = "std"))]
|
||||
mod std {
|
||||
pub use core::*;
|
||||
pub use alloc::{boxed, vec, string};
|
||||
pub mod collections {
|
||||
pub use hashmap_core::{HashMap, HashSet};
|
||||
pub use hashmap_core::map as hash_map;
|
||||
pub use alloc::BTreeSet;
|
||||
}
|
||||
}
|
||||
220
lib/codegen/src/licm.rs
Normal file
220
lib/codegen/src/licm.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
//! A Loop Invariant Code Motion optimization pass
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use dominator_tree::DominatorTree;
|
||||
use entity::{EntityList, ListPool};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value};
|
||||
use loop_analysis::{Loop, LoopAnalysis};
|
||||
use std::collections::HashSet;
|
||||
use std::vec::Vec;
|
||||
use timing;
|
||||
|
||||
/// Performs the LICM pass by detecting loops within the CFG and moving
|
||||
/// loop-invariant instructions out of them.
|
||||
/// Changes the CFG and domtree in-place during the operation.
|
||||
pub fn do_licm(
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
loop_analysis: &mut LoopAnalysis,
|
||||
) {
|
||||
let _tt = timing::licm();
|
||||
debug_assert!(cfg.is_valid());
|
||||
debug_assert!(domtree.is_valid());
|
||||
debug_assert!(loop_analysis.is_valid());
|
||||
|
||||
for lp in loop_analysis.loops() {
|
||||
// For each loop that we want to optimize we determine the set of loop-invariant
|
||||
// instructions
|
||||
let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
|
||||
// Then we create the loop's pre-header and fill it with the invariant instructions
|
||||
// Then we remove the invariant instructions from the loop body
|
||||
if !invariant_insts.is_empty() {
|
||||
// If the loop has a natural pre-header we use it, otherwise we create it.
|
||||
let mut pos;
|
||||
match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) {
|
||||
None => {
|
||||
let pre_header =
|
||||
create_pre_header(loop_analysis.loop_header(lp), func, cfg, domtree);
|
||||
pos = FuncCursor::new(func).at_last_inst(pre_header);
|
||||
}
|
||||
// If there is a natural pre-header we insert new instructions just before the
|
||||
// related jumping instruction (which is not necessarily at the end).
|
||||
Some((_, last_inst)) => {
|
||||
pos = FuncCursor::new(func).at_inst(last_inst);
|
||||
}
|
||||
};
|
||||
// The last instruction of the pre-header is the termination instruction (usually
|
||||
// a jump) so we need to insert just before this.
|
||||
for inst in invariant_insts {
|
||||
pos.insert_inst(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
// We have to recompute the domtree to account for the changes
|
||||
cfg.compute(func);
|
||||
domtree.compute(func, cfg);
|
||||
}
|
||||
|
||||
// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
|
||||
// A jump instruction to the header is placed at the end of the pre-header.
|
||||
fn create_pre_header(
|
||||
header: Ebb,
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
) -> Ebb {
|
||||
let pool = &mut ListPool::<Value>::new();
|
||||
let header_args_values: Vec<Value> = func.dfg.ebb_params(header).into_iter().cloned().collect();
|
||||
let header_args_types: Vec<Type> = header_args_values
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|val| func.dfg.value_type(val))
|
||||
.collect();
|
||||
let pre_header = func.dfg.make_ebb();
|
||||
let mut pre_header_args_value: EntityList<Value> = EntityList::new();
|
||||
for typ in header_args_types {
|
||||
pre_header_args_value.push(func.dfg.append_ebb_param(pre_header, typ), pool);
|
||||
}
|
||||
for (_, last_inst) in cfg.pred_iter(header) {
|
||||
// We only follow normal edges (not the back edges)
|
||||
if !domtree.dominates(header, last_inst, &func.layout) {
|
||||
change_branch_jump_destination(last_inst, pre_header, func);
|
||||
}
|
||||
}
|
||||
{
|
||||
let mut pos = FuncCursor::new(func).at_top(header);
|
||||
// Inserts the pre-header at the right place in the layout.
|
||||
pos.insert_ebb(pre_header);
|
||||
pos.next_inst();
|
||||
pos.ins().jump(header, pre_header_args_value.as_slice(pool));
|
||||
}
|
||||
pre_header
|
||||
}
|
||||
|
||||
// Detects if a loop header has a natural pre-header.
|
||||
//
|
||||
// A loop header has a pre-header if there is only one predecessor that the header doesn't
|
||||
// dominate.
|
||||
// Returns the pre-header Ebb and the instruction jumping to the header.
|
||||
fn has_pre_header(
|
||||
layout: &Layout,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
header: Ebb,
|
||||
) -> Option<(Ebb, Inst)> {
|
||||
let mut result = None;
|
||||
let mut found = false;
|
||||
for (pred_ebb, last_inst) in cfg.pred_iter(header) {
|
||||
// We only count normal edges (not the back edges)
|
||||
if !domtree.dominates(header, last_inst, layout) {
|
||||
if found {
|
||||
// We have already found one, there are more than one
|
||||
return None;
|
||||
} else {
|
||||
result = Some((pred_ebb, last_inst));
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
|
||||
// or non-branch instruction.
|
||||
fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
|
||||
match func.dfg[inst].branch_destination_mut() {
|
||||
None => (),
|
||||
Some(instruction_dest) => *instruction_dest = new_ebb,
|
||||
}
|
||||
}
|
||||
|
||||
/// Test whether the given opcode is unsafe to even consider for LICM.
|
||||
fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
|
||||
opcode.can_load() || opcode.can_store() || opcode.is_call() || opcode.is_branch() ||
|
||||
opcode.is_terminator() || opcode.is_return() ||
|
||||
opcode.can_trap() || opcode.other_side_effects() || opcode.writes_cpu_flags()
|
||||
}
|
||||
|
||||
/// Test whether the given instruction is loop-invariant.
|
||||
fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &HashSet<Value>) -> bool {
|
||||
if trivially_unsafe_for_licm(dfg[inst].opcode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let inst_args = dfg.inst_args(inst);
|
||||
for arg in inst_args {
|
||||
let arg = dfg.resolve_aliases(*arg);
|
||||
if loop_values.contains(&arg) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Traverses a loop in reverse post-order from a header EBB and identify loop-invariant
|
||||
// instructions. These loop-invariant instructions are then removed from the code and returned
|
||||
// (in reverse post-order) for later use.
|
||||
fn remove_loop_invariant_instructions(
|
||||
lp: Loop,
|
||||
func: &mut Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
loop_analysis: &LoopAnalysis,
|
||||
) -> Vec<Inst> {
|
||||
let mut loop_values: HashSet<Value> = HashSet::new();
|
||||
let mut invariant_insts: Vec<Inst> = Vec::new();
|
||||
let mut pos = FuncCursor::new(func);
|
||||
// We traverse the loop EBB in reverse post-order.
|
||||
for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
|
||||
// Arguments of the EBB are loop values
|
||||
for val in pos.func.dfg.ebb_params(*ebb) {
|
||||
loop_values.insert(*val);
|
||||
}
|
||||
pos.goto_top(*ebb);
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(block_in_if_condition_stmt))]
|
||||
'next_inst: while let Some(inst) = pos.next_inst() {
|
||||
if is_loop_invariant(inst, &pos.func.dfg, &loop_values) {
|
||||
// If all the instruction's argument are defined outside the loop
|
||||
// then this instruction is loop-invariant
|
||||
invariant_insts.push(inst);
|
||||
// We remove it from the loop
|
||||
pos.remove_inst_and_step_back();
|
||||
} else {
|
||||
// If the instruction is not loop-invariant we push its results in the set of
|
||||
// loop values
|
||||
for out in pos.func.dfg.inst_results(inst) {
|
||||
loop_values.insert(*out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
invariant_insts
|
||||
}
|
||||
|
||||
/// Return ebbs from a loop in post-order, starting from an entry point in the block.
|
||||
fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, cfg: &ControlFlowGraph, lp: Loop) -> Vec<Ebb> {
|
||||
let mut grey = HashSet::new();
|
||||
let mut black = HashSet::new();
|
||||
let mut stack = vec![loop_analysis.loop_header(lp)];
|
||||
let mut postorder = Vec::new();
|
||||
|
||||
while !stack.is_empty() {
|
||||
let node = stack.pop().unwrap();
|
||||
if !grey.contains(&node) {
|
||||
// This is a white node. Mark it as gray.
|
||||
grey.insert(node);
|
||||
stack.push(node);
|
||||
// Get any children we've never seen before.
|
||||
for child in cfg.succ_iter(node) {
|
||||
if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
} else if !black.contains(&node) {
|
||||
postorder.push(node);
|
||||
black.insert(node);
|
||||
}
|
||||
}
|
||||
postorder
|
||||
}
|
||||
342
lib/codegen/src/loop_analysis.rs
Normal file
342
lib/codegen/src/loop_analysis.rs
Normal file
@@ -0,0 +1,342 @@
|
||||
//! A loop analysis represented as mappings of loops to their header Ebb
|
||||
//! and parent in the loop tree.
|
||||
|
||||
use dominator_tree::DominatorTree;
|
||||
use entity::EntityMap;
|
||||
use entity::{Keys, PrimaryMap};
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{Ebb, Function, Layout};
|
||||
use packed_option::PackedOption;
|
||||
use std::vec::Vec;
|
||||
use timing;
|
||||
|
||||
/// A opaque reference to a code loop.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Loop(u32);
|
||||
entity_impl!(Loop, "loop");
|
||||
|
||||
/// Loop tree information for a single function.
|
||||
///
|
||||
/// Loops are referenced by the Loop object, and for each loop you can access its header EBB,
|
||||
/// its eventual parent in the loop tree and all the EBB belonging to the loop.
|
||||
pub struct LoopAnalysis {
|
||||
loops: PrimaryMap<Loop, LoopData>,
|
||||
ebb_loop_map: EntityMap<Ebb, PackedOption<Loop>>,
|
||||
valid: bool,
|
||||
}
|
||||
|
||||
struct LoopData {
|
||||
header: Ebb,
|
||||
parent: PackedOption<Loop>,
|
||||
}
|
||||
|
||||
impl LoopData {
|
||||
/// Creates a `LoopData` object with the loop header and its eventual parent in the loop tree.
|
||||
pub fn new(header: Ebb, parent: Option<Loop>) -> LoopData {
|
||||
LoopData {
|
||||
header: header,
|
||||
parent: parent.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods for querying the loop analysis.
|
||||
impl LoopAnalysis {
|
||||
/// Allocate a new blank loop analysis struct. Use `compute` to compute the loop analysis for
|
||||
/// a function.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
valid: false,
|
||||
loops: PrimaryMap::new(),
|
||||
ebb_loop_map: EntityMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns all the loops contained in a function.
|
||||
pub fn loops(&self) -> Keys<Loop> {
|
||||
self.loops.keys()
|
||||
}
|
||||
|
||||
/// Returns the header EBB of a particular loop.
|
||||
///
|
||||
/// The characteristic property of a loop header block is that it dominates some of its
|
||||
/// predecessors.
|
||||
pub fn loop_header(&self, lp: Loop) -> Ebb {
|
||||
self.loops[lp].header
|
||||
}
|
||||
|
||||
/// Return the eventual parent of a loop in the loop tree.
|
||||
pub fn loop_parent(&self, lp: Loop) -> Option<Loop> {
|
||||
self.loops[lp].parent.expand()
|
||||
}
|
||||
|
||||
/// Determine if an Ebb belongs to a loop by running a finger along the loop tree.
|
||||
///
|
||||
/// Returns `true` if `ebb` is in loop `lp`.
|
||||
pub fn is_in_loop(&self, ebb: Ebb, lp: Loop) -> bool {
|
||||
let ebb_loop = self.ebb_loop_map[ebb];
|
||||
match ebb_loop.expand() {
|
||||
None => false,
|
||||
Some(ebb_loop) => self.is_child_loop(ebb_loop, lp),
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines if a loop is contained in another loop.
|
||||
///
|
||||
/// `is_child_loop(child,parent)` returns `true` if and only if `child` is a child loop of
|
||||
/// `parent` (or `child == parent`).
|
||||
pub fn is_child_loop(&self, child: Loop, parent: Loop) -> bool {
|
||||
let mut finger = Some(child);
|
||||
while let Some(finger_loop) = finger {
|
||||
if finger_loop == parent {
|
||||
return true;
|
||||
}
|
||||
finger = self.loop_parent(finger_loop);
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl LoopAnalysis {
|
||||
/// Detects the loops in a function. Needs the control flow graph and the dominator tree.
|
||||
pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) {
|
||||
let _tt = timing::loop_analysis();
|
||||
self.loops.clear();
|
||||
self.ebb_loop_map.clear();
|
||||
self.ebb_loop_map.resize(func.dfg.num_ebbs());
|
||||
self.find_loop_headers(cfg, domtree, &func.layout);
|
||||
self.discover_loop_blocks(cfg, domtree, &func.layout);
|
||||
self.valid = true;
|
||||
}
|
||||
|
||||
/// Check if the loop analysis is in a valid state.
|
||||
///
|
||||
/// Note that this doesn't perform any kind of validity checks. It simply checks if the
|
||||
/// `compute()` method has been called since the last `clear()`. It does not check that the
|
||||
/// loop analysis is consistent with the CFG.
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.valid
|
||||
}
|
||||
|
||||
/// Clear all the data structures contanted in the loop analysis. This will leave the
|
||||
/// analysis in a similar state to a context returned by `new()` except that allocated
|
||||
/// memory be retained.
|
||||
pub fn clear(&mut self) {
|
||||
self.loops.clear();
|
||||
self.ebb_loop_map.clear();
|
||||
self.valid = false;
|
||||
}
|
||||
|
||||
// Traverses the CFG in reverse postorder and create a loop object for every EBB having a
|
||||
// back edge.
|
||||
fn find_loop_headers(
|
||||
&mut self,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
layout: &Layout,
|
||||
) {
|
||||
// We traverse the CFG in reverse postorder
|
||||
for &ebb in domtree.cfg_postorder().iter().rev() {
|
||||
for (_, pred_inst) in cfg.pred_iter(ebb) {
|
||||
// If the ebb dominates one of its predecessors it is a back edge
|
||||
if domtree.dominates(ebb, pred_inst, layout) {
|
||||
// This ebb is a loop header, so we create its associated loop
|
||||
let lp = self.loops.push(LoopData::new(ebb, None));
|
||||
self.ebb_loop_map[ebb] = lp.into();
|
||||
break;
|
||||
// We break because we only need one back edge to identify a loop header.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Intended to be called after `find_loop_headers`. For each detected loop header,
|
||||
// discovers all the ebb belonging to the loop and its inner loops. After a call to this
|
||||
// function, the loop tree is fully constructed.
|
||||
fn discover_loop_blocks(
|
||||
&mut self,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
layout: &Layout,
|
||||
) {
|
||||
let mut stack: Vec<Ebb> = Vec::new();
|
||||
// We handle each loop header in reverse order, corresponding to a pseudo postorder
|
||||
// traversal of the graph.
|
||||
for lp in self.loops().rev() {
|
||||
for (pred, pred_inst) in cfg.pred_iter(self.loops[lp].header) {
|
||||
// We follow the back edges
|
||||
if domtree.dominates(self.loops[lp].header, pred_inst, layout) {
|
||||
stack.push(pred);
|
||||
}
|
||||
}
|
||||
while let Some(node) = stack.pop() {
|
||||
let continue_dfs: Option<Ebb>;
|
||||
match self.ebb_loop_map[node].expand() {
|
||||
None => {
|
||||
// The node hasn't been visited yet, we tag it as part of the loop
|
||||
self.ebb_loop_map[node] = PackedOption::from(lp);
|
||||
continue_dfs = Some(node);
|
||||
}
|
||||
Some(node_loop) => {
|
||||
// We copy the node_loop into a mutable reference passed along the while
|
||||
let mut node_loop = node_loop;
|
||||
// The node is part of a loop, which can be lp or an inner loop
|
||||
let mut node_loop_parent_option = self.loops[node_loop].parent;
|
||||
while let Some(node_loop_parent) = node_loop_parent_option.expand() {
|
||||
if node_loop_parent == lp {
|
||||
// We have encounterd lp so we stop (already visited)
|
||||
break;
|
||||
} else {
|
||||
//
|
||||
node_loop = node_loop_parent;
|
||||
// We lookup the parent loop
|
||||
node_loop_parent_option = self.loops[node_loop].parent;
|
||||
}
|
||||
}
|
||||
// Now node_loop_parent is either:
|
||||
// - None and node_loop is an new inner loop of lp
|
||||
// - Some(...) and the initial node_loop was a known inner loop of lp
|
||||
match node_loop_parent_option.expand() {
|
||||
Some(_) => continue_dfs = None,
|
||||
None => {
|
||||
if node_loop != lp {
|
||||
self.loops[node_loop].parent = lp.into();
|
||||
continue_dfs = Some(self.loops[node_loop].header)
|
||||
} else {
|
||||
// If lp is a one-block loop then we make sure we stop
|
||||
continue_dfs = None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now we have handled the popped node and need to continue the DFS by adding the
|
||||
// predecessors of that node
|
||||
if let Some(continue_dfs) = continue_dfs {
|
||||
for (pred, _) in cfg.pred_iter(continue_dfs) {
|
||||
stack.push(pred)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use dominator_tree::DominatorTree;
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::{types, Function, InstBuilder};
|
||||
use loop_analysis::{Loop, LoopAnalysis};
|
||||
use std::vec::Vec;
|
||||
|
||||
#[test]
|
||||
fn nested_loops_detection() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
let ebb3 = func.dfg.make_ebb();
|
||||
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
|
||||
|
||||
{
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
cur.ins().jump(ebb1, &[]);
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
cur.ins().jump(ebb2, &[]);
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
cur.ins().brnz(cond, ebb1, &[]);
|
||||
cur.ins().jump(ebb3, &[]);
|
||||
|
||||
cur.insert_ebb(ebb3);
|
||||
cur.ins().brnz(cond, ebb0, &[]);
|
||||
}
|
||||
|
||||
let mut loop_analysis = LoopAnalysis::new();
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
let mut domtree = DominatorTree::new();
|
||||
cfg.compute(&func);
|
||||
domtree.compute(&func, &cfg);
|
||||
loop_analysis.compute(&func, &cfg, &domtree);
|
||||
|
||||
let loops = loop_analysis.loops().collect::<Vec<Loop>>();
|
||||
assert_eq!(loops.len(), 2);
|
||||
assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
|
||||
assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
|
||||
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
|
||||
assert_eq!(loop_analysis.loop_parent(loops[0]), None);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb3, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[1]), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn complex_loop_detection() {
|
||||
let mut func = Function::new();
|
||||
let ebb0 = func.dfg.make_ebb();
|
||||
let ebb1 = func.dfg.make_ebb();
|
||||
let ebb2 = func.dfg.make_ebb();
|
||||
let ebb3 = func.dfg.make_ebb();
|
||||
let ebb4 = func.dfg.make_ebb();
|
||||
let ebb5 = func.dfg.make_ebb();
|
||||
let cond = func.dfg.append_ebb_param(ebb0, types::I32);
|
||||
|
||||
{
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_ebb(ebb0);
|
||||
cur.ins().brnz(cond, ebb1, &[]);
|
||||
cur.ins().jump(ebb3, &[]);
|
||||
|
||||
cur.insert_ebb(ebb1);
|
||||
cur.ins().jump(ebb2, &[]);
|
||||
|
||||
cur.insert_ebb(ebb2);
|
||||
cur.ins().brnz(cond, ebb1, &[]);
|
||||
cur.ins().jump(ebb5, &[]);
|
||||
|
||||
cur.insert_ebb(ebb3);
|
||||
cur.ins().jump(ebb4, &[]);
|
||||
|
||||
cur.insert_ebb(ebb4);
|
||||
cur.ins().brnz(cond, ebb3, &[]);
|
||||
cur.ins().jump(ebb5, &[]);
|
||||
|
||||
cur.insert_ebb(ebb5);
|
||||
cur.ins().brnz(cond, ebb0, &[]);
|
||||
}
|
||||
|
||||
let mut loop_analysis = LoopAnalysis::new();
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
let mut domtree = DominatorTree::new();
|
||||
cfg.compute(&func);
|
||||
domtree.compute(&func, &cfg);
|
||||
loop_analysis.compute(&func, &cfg, &domtree);
|
||||
|
||||
let loops = loop_analysis.loops().collect::<Vec<Loop>>();
|
||||
assert_eq!(loops.len(), 3);
|
||||
assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
|
||||
assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
|
||||
assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
|
||||
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
|
||||
assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
|
||||
assert_eq!(loop_analysis.loop_parent(loops[0]), None);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
|
||||
assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
|
||||
}
|
||||
}
|
||||
77
lib/codegen/src/partition_slice.rs
Normal file
77
lib/codegen/src/partition_slice.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
//! Rearrange the elements in a slice according to a predicate.
|
||||
|
||||
/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
|
||||
/// the elements where `p(t)` is false.
|
||||
///
|
||||
/// The order of elements is not preserved, unless the slice is already partitioned.
|
||||
///
|
||||
/// Returns the number of elements where `p(t)` is true.
|
||||
pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
|
||||
where
|
||||
F: FnMut(&T) -> bool,
|
||||
{
|
||||
// Count the length of the prefix where `p` returns true.
|
||||
let mut count = match s.iter().position(|t| !p(t)) {
|
||||
Some(t) => t,
|
||||
None => return s.len(),
|
||||
};
|
||||
|
||||
// Swap remaining `true` elements into place.
|
||||
//
|
||||
// This actually preserves the order of the `true` elements, but the `false` elements get
|
||||
// shuffled.
|
||||
for i in count + 1..s.len() {
|
||||
if p(&s[i]) {
|
||||
s.swap(count, i);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::partition_slice;
|
||||
use std::vec::Vec;
|
||||
|
||||
fn check(x: &[u32], want: &[u32]) {
|
||||
assert_eq!(x.len(), want.len());
|
||||
let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
|
||||
let mut v = Vec::new();
|
||||
v.extend(x.iter().cloned());
|
||||
let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
|
||||
assert_eq!(v, want);
|
||||
assert_eq!(count, want_count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
check(&[], &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn singles() {
|
||||
check(&[0], &[0]);
|
||||
check(&[1], &[1]);
|
||||
check(&[10], &[10]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doubles() {
|
||||
check(&[0, 0], &[0, 0]);
|
||||
check(&[0, 5], &[0, 5]);
|
||||
check(&[5, 0], &[0, 5]);
|
||||
check(&[5, 4], &[5, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn longer() {
|
||||
check(&[1, 2, 3], &[1, 2, 3]);
|
||||
check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
|
||||
check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
|
||||
check(&[1, 20, 10], &[20, 10, 1]);
|
||||
check(&[1, 20, 3, 10], &[20, 10, 3, 1]);
|
||||
check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
|
||||
}
|
||||
}
|
||||
203
lib/codegen/src/postopt.rs
Normal file
203
lib/codegen/src/postopt.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
//! A post-legalization rewriting pass.
|
||||
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use cursor::{Cursor, EncCursor};
|
||||
use ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use ir::dfg::ValueDef;
|
||||
use ir::immediates::Imm64;
|
||||
use ir::instructions::{Opcode, ValueList};
|
||||
use ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Value};
|
||||
use isa::TargetIsa;
|
||||
use timing;
|
||||
|
||||
/// Information collected about a compare+branch sequence.
|
||||
struct CmpBrInfo {
|
||||
/// The branch instruction.
|
||||
br_inst: Inst,
|
||||
/// The icmp, icmp_imm, or fcmp instruction.
|
||||
cmp_inst: Inst,
|
||||
/// The destination of the branch.
|
||||
destination: Ebb,
|
||||
/// The arguments of the branch.
|
||||
args: ValueList,
|
||||
/// The first argument to the comparison. The second is in the `kind` field.
|
||||
cmp_arg: Value,
|
||||
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
|
||||
/// before the branch.
|
||||
invert_branch_cond: bool,
|
||||
/// The kind of comparison, and the second argument.
|
||||
kind: CmpBrKind,
|
||||
}
|
||||
|
||||
enum CmpBrKind {
|
||||
Icmp { cond: IntCC, arg: Value },
|
||||
IcmpImm { cond: IntCC, imm: Imm64 },
|
||||
Fcmp { cond: FloatCC, arg: Value },
|
||||
}
|
||||
|
||||
/// Optimize comparisons to use flags values, to avoid materializing conditions
|
||||
/// in integer registers.
|
||||
///
|
||||
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
|
||||
/// sequences.
|
||||
fn optimize_cpu_flags(
|
||||
pos: &mut EncCursor,
|
||||
inst: Inst,
|
||||
last_flags_clobber: Option<Inst>,
|
||||
isa: &TargetIsa,
|
||||
) {
|
||||
// Look for compare and branch patterns.
|
||||
// This code could be considerably simplified with non-lexical lifetimes.
|
||||
let info = match pos.func.dfg[inst] {
|
||||
InstructionData::Branch {
|
||||
opcode,
|
||||
destination,
|
||||
ref args,
|
||||
} => {
|
||||
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
|
||||
let invert_branch_cond = match opcode {
|
||||
Opcode::Brz => true,
|
||||
Opcode::Brnz => false,
|
||||
_ => panic!(),
|
||||
};
|
||||
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
match pos.func.dfg[cond_inst] {
|
||||
InstructionData::IntCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Icmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
},
|
||||
InstructionData::IntCompareImm {
|
||||
cond,
|
||||
arg: cmp_arg,
|
||||
imm: cmp_imm,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg,
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
|
||||
},
|
||||
InstructionData::FloatCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Fcmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
},
|
||||
_ => return,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO: trapif, trueif, selectif, and their ff counterparts.
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// If any instructions clobber the flags between the comparison and the branch,
|
||||
// don't optimize them.
|
||||
if last_flags_clobber != Some(info.cmp_inst) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We found a compare+branch pattern. Transform it to use flags.
|
||||
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
|
||||
pos.goto_inst(info.cmp_inst);
|
||||
match info.kind {
|
||||
CmpBrKind::Icmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brif(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
CmpBrKind::IcmpImm { mut cond, imm } => {
|
||||
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brif(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
CmpBrKind::Fcmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func.dfg.replace(info.br_inst).brff(
|
||||
cond,
|
||||
flags,
|
||||
info.destination,
|
||||
&args,
|
||||
);
|
||||
}
|
||||
}
|
||||
pos.func.update_encoding(info.cmp_inst, isa).is_ok();
|
||||
pos.func.update_encoding(info.br_inst, isa).is_ok();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// The main post-opt pass.
|
||||
|
||||
pub fn do_postopt(func: &mut Function, isa: &TargetIsa) {
|
||||
let _tt = timing::postopt();
|
||||
let mut pos = EncCursor::new(func, isa);
|
||||
while let Some(_ebb) = pos.next_ebb() {
|
||||
let mut last_flags_clobber = None;
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if isa.uses_cpu_flags() {
|
||||
// Optimize instructions to make use of flags.
|
||||
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
|
||||
|
||||
// Track the most recent seen instruction that clobbers the flags.
|
||||
if let Some(constraints) =
|
||||
isa.encoding_info().operand_constraints(
|
||||
pos.func.encodings[inst],
|
||||
)
|
||||
{
|
||||
if constraints.clobbers_flags {
|
||||
last_flags_clobber = Some(inst)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
87
lib/codegen/src/predicates.rs
Normal file
87
lib/codegen/src/predicates.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
//! Predicate functions for testing instruction fields.
|
||||
//!
|
||||
//! This module defines functions that are used by the instruction predicates defined by
|
||||
//! `lib/codegen/meta/cdsl/predicates.py` classes.
|
||||
//!
|
||||
//! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
|
||||
//! bound is implemented by all the native integer types as well as `Imm64`.
|
||||
//!
|
||||
//! Some of these predicates may be unused in certain ISA configurations, so we suppress the
|
||||
//! dead code warning.
|
||||
|
||||
use ir;
|
||||
|
||||
/// Check that `x` is the same as `y`.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
|
||||
x == y.into()
|
||||
}
|
||||
|
||||
/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_signed_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
|
||||
let s = x.into();
|
||||
s == (s >> sc << (64 - wd + sc) >> (64 - wd))
|
||||
}
|
||||
|
||||
/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_unsigned_int<T: Into<i64>>(x: T, wd: u8, sc: u8) -> bool {
|
||||
let u = x.into() as u64;
|
||||
// Bit-mask of the permitted bits.
|
||||
let m = (1 << wd) - (1 << sc);
|
||||
u == (u & m)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool {
|
||||
func.dfg.ext_funcs[func_ref].colocated
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn is_colocated_data(global_var: ir::GlobalVar, func: &ir::Function) -> bool {
|
||||
match func.global_vars[global_var] {
|
||||
ir::GlobalVarData::Sym { colocated, .. } => colocated,
|
||||
_ => panic!("is_colocated_data only makes sense for data with symbolic addresses"),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn cvt_u32() {
|
||||
let x1 = 0u32;
|
||||
let x2 = 1u32;
|
||||
let x3 = 0xffff_fff0u32;
|
||||
|
||||
assert!(is_signed_int(x1, 1, 0));
|
||||
assert!(is_signed_int(x1, 2, 1));
|
||||
assert!(is_signed_int(x2, 2, 0));
|
||||
assert!(!is_signed_int(x2, 2, 1));
|
||||
|
||||
// `u32` doesn't sign-extend when converted to `i64`.
|
||||
assert!(!is_signed_int(x3, 8, 0));
|
||||
|
||||
assert!(is_unsigned_int(x1, 1, 0));
|
||||
assert!(is_unsigned_int(x1, 8, 4));
|
||||
assert!(is_unsigned_int(x2, 1, 0));
|
||||
assert!(!is_unsigned_int(x2, 8, 4));
|
||||
assert!(!is_unsigned_int(x3, 1, 0));
|
||||
assert!(is_unsigned_int(x3, 32, 4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cvt_imm64() {
|
||||
use ir::immediates::Imm64;
|
||||
|
||||
let x1 = Imm64::new(-8);
|
||||
let x2 = Imm64::new(8);
|
||||
|
||||
assert!(is_signed_int(x1, 16, 2));
|
||||
assert!(is_signed_int(x2, 16, 2));
|
||||
assert!(!is_signed_int(x1, 16, 4));
|
||||
assert!(!is_signed_int(x2, 16, 4));
|
||||
}
|
||||
}
|
||||
574
lib/codegen/src/preopt.rs
Normal file
574
lib/codegen/src/preopt.rs
Normal file
@@ -0,0 +1,574 @@
|
||||
//! A pre-legalization rewriting pass.
|
||||
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use cursor::{Cursor, FuncCursor};
|
||||
use divconst_magic_numbers::{MS32, MS64, MU32, MU64};
|
||||
use divconst_magic_numbers::{magicS32, magicS64, magicU32, magicU64};
|
||||
use ir::Inst;
|
||||
use ir::dfg::ValueDef;
|
||||
use ir::instructions::Opcode;
|
||||
use ir::types::{I32, I64};
|
||||
use ir::{DataFlowGraph, Function, InstBuilder, InstructionData, Type, Value};
|
||||
use timing;
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// Pattern-match helpers and transformation for div and rem by constants.
|
||||
|
||||
// Simple math helpers
|
||||
|
||||
/// if `x` is a power of two, or the negation thereof, return the power along
|
||||
/// with a boolean that indicates whether `x` is negative. Else return None.
|
||||
#[inline]
|
||||
fn isPowerOf2_S32(x: i32) -> Option<(bool, u32)> {
|
||||
// We have to special-case this because abs(x) isn't representable.
|
||||
if x == -0x8000_0000 {
|
||||
return Some((true, 31));
|
||||
}
|
||||
let abs_x = i32::wrapping_abs(x) as u32;
|
||||
if abs_x.is_power_of_two() {
|
||||
return Some((x < 0, abs_x.trailing_zeros()));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Same comments as for isPowerOf2_S64 apply.
|
||||
#[inline]
|
||||
fn isPowerOf2_S64(x: i64) -> Option<(bool, u32)> {
|
||||
// We have to special-case this because abs(x) isn't representable.
|
||||
if x == -0x8000_0000_0000_0000 {
|
||||
return Some((true, 63));
|
||||
}
|
||||
let abs_x = i64::wrapping_abs(x) as u64;
|
||||
if abs_x.is_power_of_two() {
|
||||
return Some((x < 0, abs_x.trailing_zeros()));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum DivRemByConstInfo {
|
||||
DivU32(Value, u32), // In all cases, the arguments are:
|
||||
DivU64(Value, u64), // left operand, right operand
|
||||
DivS32(Value, i32),
|
||||
DivS64(Value, i64),
|
||||
RemU32(Value, u32),
|
||||
RemU64(Value, u64),
|
||||
RemS32(Value, i32),
|
||||
RemS64(Value, i64),
|
||||
}
|
||||
|
||||
/// Possibly create a DivRemByConstInfo from the given components, by
|
||||
/// figuring out which, if any, of the 8 cases apply, and also taking care to
|
||||
/// sanity-check the immediate.
|
||||
fn package_up_divrem_info(
|
||||
argL: Value,
|
||||
argL_ty: Type,
|
||||
argRs: i64,
|
||||
isSigned: bool,
|
||||
isRem: bool,
|
||||
) -> Option<DivRemByConstInfo> {
|
||||
let argRu: u64 = argRs as u64;
|
||||
if !isSigned && argL_ty == I32 && argRu < 0x1_0000_0000 {
|
||||
let con = if isRem {
|
||||
DivRemByConstInfo::RemU32
|
||||
} else {
|
||||
DivRemByConstInfo::DivU32
|
||||
};
|
||||
return Some(con(argL, argRu as u32));
|
||||
}
|
||||
if !isSigned && argL_ty == I64 {
|
||||
// unsigned 64, no range constraint
|
||||
let con = if isRem {
|
||||
DivRemByConstInfo::RemU64
|
||||
} else {
|
||||
DivRemByConstInfo::DivU64
|
||||
};
|
||||
return Some(con(argL, argRu));
|
||||
}
|
||||
if isSigned && argL_ty == I32 && (argRu <= 0x7fff_ffff || argRu >= 0xffff_ffff_8000_0000) {
|
||||
let con = if isRem {
|
||||
DivRemByConstInfo::RemS32
|
||||
} else {
|
||||
DivRemByConstInfo::DivS32
|
||||
};
|
||||
return Some(con(argL, argRu as i32));
|
||||
}
|
||||
if isSigned && argL_ty == I64 {
|
||||
// signed 64, no range constraint
|
||||
let con = if isRem {
|
||||
DivRemByConstInfo::RemS64
|
||||
} else {
|
||||
DivRemByConstInfo::DivS64
|
||||
};
|
||||
return Some(con(argL, argRu as i64));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Examine `idata` to see if it is a div or rem by a constant, and if so
|
||||
/// return the operands, signedness, operation size and div-vs-rem-ness in a
|
||||
/// handy bundle.
|
||||
fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
|
||||
let idata: &InstructionData = &dfg[inst];
|
||||
|
||||
if let InstructionData::BinaryImm { opcode, arg, imm } = *idata {
|
||||
let (isSigned, isRem) = match opcode {
|
||||
Opcode::UdivImm => (false, false),
|
||||
Opcode::UremImm => (false, true),
|
||||
Opcode::SdivImm => (true, false),
|
||||
Opcode::SremImm => (true, true),
|
||||
_other => return None,
|
||||
};
|
||||
// Pull the operation size (type) from the left arg
|
||||
let argL_ty = dfg.value_type(arg);
|
||||
return package_up_divrem_info(arg, argL_ty, imm.into(), isSigned, isRem);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Actually do the transformation given a bundle containing the relevant
|
||||
/// information. `divrem_info` describes a div or rem by a constant, that
|
||||
/// `pos` currently points at, and `inst` is the associated instruction.
|
||||
/// `inst` is replaced by a sequence of other operations that calculate the
|
||||
/// same result. Note that there are various `divrem_info` cases where we
|
||||
/// cannot do any transformation, in which case `inst` is left unchanged.
|
||||
fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
|
||||
let isRem = match *divrem_info {
|
||||
DivRemByConstInfo::DivU32(_, _) |
|
||||
DivRemByConstInfo::DivU64(_, _) |
|
||||
DivRemByConstInfo::DivS32(_, _) |
|
||||
DivRemByConstInfo::DivS64(_, _) => false,
|
||||
DivRemByConstInfo::RemU32(_, _) |
|
||||
DivRemByConstInfo::RemU64(_, _) |
|
||||
DivRemByConstInfo::RemS32(_, _) |
|
||||
DivRemByConstInfo::RemS64(_, _) => true,
|
||||
};
|
||||
|
||||
match *divrem_info {
|
||||
// -------------------- U32 --------------------
|
||||
|
||||
// U32 div, rem by zero: ignore
|
||||
DivRemByConstInfo::DivU32(_n1, 0) |
|
||||
DivRemByConstInfo::RemU32(_n1, 0) => {}
|
||||
|
||||
// U32 div by 1: identity
|
||||
// U32 rem by 1: zero
|
||||
DivRemByConstInfo::DivU32(n1, 1) |
|
||||
DivRemByConstInfo::RemU32(n1, 1) => {
|
||||
if isRem {
|
||||
pos.func.dfg.replace(inst).iconst(I32, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(n1);
|
||||
}
|
||||
}
|
||||
|
||||
// U32 div, rem by a power-of-2
|
||||
DivRemByConstInfo::DivU32(n1, d) |
|
||||
DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => {
|
||||
debug_assert!(d >= 2);
|
||||
// compute k where d == 2^k
|
||||
let k = d.trailing_zeros();
|
||||
debug_assert!(k >= 1 && k <= 31);
|
||||
if isRem {
|
||||
let mask = (1u64 << k) - 1;
|
||||
pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
|
||||
}
|
||||
}
|
||||
|
||||
// U32 div, rem by non-power-of-2
|
||||
DivRemByConstInfo::DivU32(n1, d) |
|
||||
DivRemByConstInfo::RemU32(n1, d) => {
|
||||
debug_assert!(d >= 3);
|
||||
let MU32 {
|
||||
mulBy,
|
||||
doAdd,
|
||||
shiftBy,
|
||||
} = magicU32(d);
|
||||
let qf; // final quotient
|
||||
let q0 = pos.ins().iconst(I32, mulBy as i64);
|
||||
let q1 = pos.ins().umulhi(n1, q0);
|
||||
if doAdd {
|
||||
debug_assert!(shiftBy >= 1 && shiftBy <= 32);
|
||||
let t1 = pos.ins().isub(n1, q1);
|
||||
let t2 = pos.ins().ushr_imm(t1, 1);
|
||||
let t3 = pos.ins().iadd(t2, q1);
|
||||
// I never found any case where shiftBy == 1 here.
|
||||
// So there's no attempt to fold out a zero shift.
|
||||
debug_assert_ne!(shiftBy, 1);
|
||||
qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
|
||||
} else {
|
||||
debug_assert!(shiftBy >= 0 && shiftBy <= 31);
|
||||
// Whereas there are known cases here for shiftBy == 0.
|
||||
if shiftBy > 0 {
|
||||
qf = pos.ins().ushr_imm(q1, shiftBy as i64);
|
||||
} else {
|
||||
qf = q1;
|
||||
}
|
||||
}
|
||||
// Now qf holds the final quotient. If necessary calculate the
|
||||
// remainder instead.
|
||||
if isRem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(qf);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- U64 --------------------
|
||||
|
||||
// U64 div, rem by zero: ignore
|
||||
DivRemByConstInfo::DivU64(_n1, 0) |
|
||||
DivRemByConstInfo::RemU64(_n1, 0) => {}
|
||||
|
||||
// U64 div by 1: identity
|
||||
// U64 rem by 1: zero
|
||||
DivRemByConstInfo::DivU64(n1, 1) |
|
||||
DivRemByConstInfo::RemU64(n1, 1) => {
|
||||
if isRem {
|
||||
pos.func.dfg.replace(inst).iconst(I64, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(n1);
|
||||
}
|
||||
}
|
||||
|
||||
// U64 div, rem by a power-of-2
|
||||
DivRemByConstInfo::DivU64(n1, d) |
|
||||
DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => {
|
||||
debug_assert!(d >= 2);
|
||||
// compute k where d == 2^k
|
||||
let k = d.trailing_zeros();
|
||||
debug_assert!(k >= 1 && k <= 63);
|
||||
if isRem {
|
||||
let mask = (1u64 << k) - 1;
|
||||
pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
|
||||
}
|
||||
}
|
||||
|
||||
// U64 div, rem by non-power-of-2
|
||||
DivRemByConstInfo::DivU64(n1, d) |
|
||||
DivRemByConstInfo::RemU64(n1, d) => {
|
||||
debug_assert!(d >= 3);
|
||||
let MU64 {
|
||||
mulBy,
|
||||
doAdd,
|
||||
shiftBy,
|
||||
} = magicU64(d);
|
||||
let qf; // final quotient
|
||||
let q0 = pos.ins().iconst(I64, mulBy as i64);
|
||||
let q1 = pos.ins().umulhi(n1, q0);
|
||||
if doAdd {
|
||||
debug_assert!(shiftBy >= 1 && shiftBy <= 64);
|
||||
let t1 = pos.ins().isub(n1, q1);
|
||||
let t2 = pos.ins().ushr_imm(t1, 1);
|
||||
let t3 = pos.ins().iadd(t2, q1);
|
||||
// I never found any case where shiftBy == 1 here.
|
||||
// So there's no attempt to fold out a zero shift.
|
||||
debug_assert_ne!(shiftBy, 1);
|
||||
qf = pos.ins().ushr_imm(t3, (shiftBy - 1) as i64);
|
||||
} else {
|
||||
debug_assert!(shiftBy >= 0 && shiftBy <= 63);
|
||||
// Whereas there are known cases here for shiftBy == 0.
|
||||
if shiftBy > 0 {
|
||||
qf = pos.ins().ushr_imm(q1, shiftBy as i64);
|
||||
} else {
|
||||
qf = q1;
|
||||
}
|
||||
}
|
||||
// Now qf holds the final quotient. If necessary calculate the
|
||||
// remainder instead.
|
||||
if isRem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(qf);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- S32 --------------------
|
||||
|
||||
// S32 div, rem by zero or -1: ignore
|
||||
DivRemByConstInfo::DivS32(_n1, -1) |
|
||||
DivRemByConstInfo::RemS32(_n1, -1) |
|
||||
DivRemByConstInfo::DivS32(_n1, 0) |
|
||||
DivRemByConstInfo::RemS32(_n1, 0) => {}
|
||||
|
||||
// S32 div by 1: identity
|
||||
// S32 rem by 1: zero
|
||||
DivRemByConstInfo::DivS32(n1, 1) |
|
||||
DivRemByConstInfo::RemS32(n1, 1) => {
|
||||
if isRem {
|
||||
pos.func.dfg.replace(inst).iconst(I32, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(n1);
|
||||
}
|
||||
}
|
||||
|
||||
DivRemByConstInfo::DivS32(n1, d) |
|
||||
DivRemByConstInfo::RemS32(n1, d) => {
|
||||
if let Some((isNeg, k)) = isPowerOf2_S32(d) {
|
||||
// k can be 31 only in the case that d is -2^31.
|
||||
debug_assert!(k >= 1 && k <= 31);
|
||||
let t1 = if k - 1 == 0 {
|
||||
n1
|
||||
} else {
|
||||
pos.ins().sshr_imm(n1, (k - 1) as i64)
|
||||
};
|
||||
let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
|
||||
let t3 = pos.ins().iadd(n1, t2);
|
||||
if isRem {
|
||||
// S32 rem by a power-of-2
|
||||
let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
|
||||
// Curiously, we don't care here what the sign of d is.
|
||||
pos.func.dfg.replace(inst).isub(n1, t4);
|
||||
} else {
|
||||
// S32 div by a power-of-2
|
||||
let t4 = pos.ins().sshr_imm(t3, k as i64);
|
||||
if isNeg {
|
||||
pos.func.dfg.replace(inst).irsub_imm(t4, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(t4);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// S32 div, rem by a non-power-of-2
|
||||
debug_assert!(d < -2 || d > 2);
|
||||
let MS32 { mulBy, shiftBy } = magicS32(d);
|
||||
let q0 = pos.ins().iconst(I32, mulBy as i64);
|
||||
let q1 = pos.ins().smulhi(n1, q0);
|
||||
let q2 = if d > 0 && mulBy < 0 {
|
||||
pos.ins().iadd(q1, n1)
|
||||
} else if d < 0 && mulBy > 0 {
|
||||
pos.ins().isub(q1, n1)
|
||||
} else {
|
||||
q1
|
||||
};
|
||||
debug_assert!(shiftBy >= 0 && shiftBy <= 31);
|
||||
let q3 = if shiftBy == 0 {
|
||||
q2
|
||||
} else {
|
||||
pos.ins().sshr_imm(q2, shiftBy as i64)
|
||||
};
|
||||
let t1 = pos.ins().ushr_imm(q3, 31);
|
||||
let qf = pos.ins().iadd(q3, t1);
|
||||
// Now qf holds the final quotient. If necessary calculate
|
||||
// the remainder instead.
|
||||
if isRem {
|
||||
let tt = pos.ins().imul_imm(qf, d as i64);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(qf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- S64 --------------------
|
||||
|
||||
// S64 div, rem by zero or -1: ignore
|
||||
DivRemByConstInfo::DivS64(_n1, -1) |
|
||||
DivRemByConstInfo::RemS64(_n1, -1) |
|
||||
DivRemByConstInfo::DivS64(_n1, 0) |
|
||||
DivRemByConstInfo::RemS64(_n1, 0) => {}
|
||||
|
||||
// S64 div by 1: identity
|
||||
// S64 rem by 1: zero
|
||||
DivRemByConstInfo::DivS64(n1, 1) |
|
||||
DivRemByConstInfo::RemS64(n1, 1) => {
|
||||
if isRem {
|
||||
pos.func.dfg.replace(inst).iconst(I64, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(n1);
|
||||
}
|
||||
}
|
||||
|
||||
DivRemByConstInfo::DivS64(n1, d) |
|
||||
DivRemByConstInfo::RemS64(n1, d) => {
|
||||
if let Some((isNeg, k)) = isPowerOf2_S64(d) {
|
||||
// k can be 63 only in the case that d is -2^63.
|
||||
debug_assert!(k >= 1 && k <= 63);
|
||||
let t1 = if k - 1 == 0 {
|
||||
n1
|
||||
} else {
|
||||
pos.ins().sshr_imm(n1, (k - 1) as i64)
|
||||
};
|
||||
let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
|
||||
let t3 = pos.ins().iadd(n1, t2);
|
||||
if isRem {
|
||||
// S64 rem by a power-of-2
|
||||
let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
|
||||
// Curiously, we don't care here what the sign of d is.
|
||||
pos.func.dfg.replace(inst).isub(n1, t4);
|
||||
} else {
|
||||
// S64 div by a power-of-2
|
||||
let t4 = pos.ins().sshr_imm(t3, k as i64);
|
||||
if isNeg {
|
||||
pos.func.dfg.replace(inst).irsub_imm(t4, 0);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(t4);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// S64 div, rem by a non-power-of-2
|
||||
debug_assert!(d < -2 || d > 2);
|
||||
let MS64 { mulBy, shiftBy } = magicS64(d);
|
||||
let q0 = pos.ins().iconst(I64, mulBy);
|
||||
let q1 = pos.ins().smulhi(n1, q0);
|
||||
let q2 = if d > 0 && mulBy < 0 {
|
||||
pos.ins().iadd(q1, n1)
|
||||
} else if d < 0 && mulBy > 0 {
|
||||
pos.ins().isub(q1, n1)
|
||||
} else {
|
||||
q1
|
||||
};
|
||||
debug_assert!(shiftBy >= 0 && shiftBy <= 63);
|
||||
let q3 = if shiftBy == 0 {
|
||||
q2
|
||||
} else {
|
||||
pos.ins().sshr_imm(q2, shiftBy as i64)
|
||||
};
|
||||
let t1 = pos.ins().ushr_imm(q3, 63);
|
||||
let qf = pos.ins().iadd(q3, t1);
|
||||
// Now qf holds the final quotient. If necessary calculate
|
||||
// the remainder instead.
|
||||
if isRem {
|
||||
let tt = pos.ins().imul_imm(qf, d);
|
||||
pos.func.dfg.replace(inst).isub(n1, tt);
|
||||
} else {
|
||||
pos.func.dfg.replace(inst).copy(qf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply basic simplifications.
|
||||
///
|
||||
/// This folds constants with arithmetic to form `_imm` instructions, and other
|
||||
/// minor simplifications.
|
||||
fn simplify(pos: &mut FuncCursor, inst: Inst) {
|
||||
match pos.func.dfg[inst] {
|
||||
InstructionData::Binary { opcode, args } => {
|
||||
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
mut imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Iadd => Opcode::IaddImm,
|
||||
Opcode::Imul => Opcode::ImulImm,
|
||||
Opcode::Sdiv => Opcode::SdivImm,
|
||||
Opcode::Udiv => Opcode::UdivImm,
|
||||
Opcode::Srem => Opcode::SremImm,
|
||||
Opcode::Urem => Opcode::UremImm,
|
||||
Opcode::Band => Opcode::BandImm,
|
||||
Opcode::Bor => Opcode::BorImm,
|
||||
Opcode::Bxor => Opcode::BxorImm,
|
||||
Opcode::Rotl => Opcode::RotlImm,
|
||||
Opcode::Rotr => Opcode::RotrImm,
|
||||
Opcode::Ishl => Opcode::IshlImm,
|
||||
Opcode::Ushr => Opcode::UshrImm,
|
||||
Opcode::Sshr => Opcode::SshrImm,
|
||||
Opcode::Isub => {
|
||||
imm = imm.wrapping_neg();
|
||||
Opcode::IaddImm
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
pos.func.dfg.replace(inst).BinaryImm(
|
||||
new_opcode,
|
||||
ty,
|
||||
imm,
|
||||
args[0],
|
||||
);
|
||||
}
|
||||
} else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
let new_opcode = match opcode {
|
||||
Opcode::Isub => Opcode::IrsubImm,
|
||||
_ => return,
|
||||
};
|
||||
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||
pos.func.dfg.replace(inst).BinaryImm(
|
||||
new_opcode,
|
||||
ty,
|
||||
imm,
|
||||
args[1],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::IntCompare { opcode, cond, args } => {
|
||||
debug_assert_eq!(opcode, Opcode::Icmp);
|
||||
if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
|
||||
if let InstructionData::UnaryImm {
|
||||
opcode: Opcode::Iconst,
|
||||
imm,
|
||||
} = pos.func.dfg[iconst_inst]
|
||||
{
|
||||
pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::CondTrap { .. } |
|
||||
InstructionData::Branch { .. } |
|
||||
InstructionData::Ternary { opcode: Opcode::Select, .. } => {
|
||||
// Fold away a redundant `bint`.
|
||||
let maybe = {
|
||||
let args = pos.func.dfg.inst_args(inst);
|
||||
if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Bint,
|
||||
arg: bool_val,
|
||||
} = pos.func.dfg[def_inst]
|
||||
{
|
||||
Some(bool_val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(bool_val) = maybe {
|
||||
let args = pos.func.dfg.inst_args_mut(inst);
|
||||
args[0] = bool_val;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// The main pre-opt pass.
|
||||
pub fn do_preopt(func: &mut Function) {
|
||||
let _tt = timing::preopt();
|
||||
let mut pos = FuncCursor::new(func);
|
||||
while let Some(_ebb) = pos.next_ebb() {
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
// Apply basic simplifications.
|
||||
simplify(&mut pos, inst);
|
||||
|
||||
//-- BEGIN -- division by constants ----------------
|
||||
|
||||
let mb_dri = get_div_info(inst, &pos.func.dfg);
|
||||
if let Some(divrem_info) = mb_dri {
|
||||
do_divrem_transformation(&divrem_info, &mut pos, inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
//-- END -- division by constants ------------------
|
||||
}
|
||||
}
|
||||
}
|
||||
34
lib/codegen/src/print_errors.rs
Normal file
34
lib/codegen/src/print_errors.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
//! Utility routines for pretty-printing error messages.
|
||||
|
||||
use ir;
|
||||
use isa::TargetIsa;
|
||||
use result::CtonError;
|
||||
use std::fmt::Write;
|
||||
use std::string::{String, ToString};
|
||||
use verifier;
|
||||
|
||||
/// Pretty-print a verifier error.
|
||||
pub fn pretty_verifier_error(
|
||||
func: &ir::Function,
|
||||
isa: Option<&TargetIsa>,
|
||||
err: &verifier::Error,
|
||||
) -> String {
|
||||
let mut msg = err.to_string();
|
||||
match err.location {
|
||||
ir::entities::AnyEntity::Inst(inst) => {
|
||||
write!(msg, "\n{}: {}\n\n", inst, func.dfg.display_inst(inst, isa)).unwrap()
|
||||
}
|
||||
_ => msg.push('\n'),
|
||||
}
|
||||
write!(msg, "{}", func.display(isa)).unwrap();
|
||||
msg
|
||||
}
|
||||
|
||||
/// Pretty-print a Cretonne error.
|
||||
pub fn pretty_error(func: &ir::Function, isa: Option<&TargetIsa>, err: CtonError) -> String {
|
||||
if let CtonError::Verifier(e) = err {
|
||||
pretty_verifier_error(func, isa, &e)
|
||||
} else {
|
||||
err.to_string()
|
||||
}
|
||||
}
|
||||
18
lib/codegen/src/ref_slice.rs
Normal file
18
lib/codegen/src/ref_slice.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
//! Functions for converting a reference into a singleton slice.
|
||||
//!
|
||||
//! See also the [`ref_slice` crate](https://crates.io/crates/ref_slice).
|
||||
//!
|
||||
//! We define the functions here to avoid external dependencies, and to ensure that they are
|
||||
//! inlined in this crate.
|
||||
//!
|
||||
//! Despite their using an unsafe block, these functions are completely safe.
|
||||
|
||||
use std::slice;
|
||||
|
||||
pub fn ref_slice<T>(s: &T) -> &[T] {
|
||||
unsafe { slice::from_raw_parts(s, 1) }
|
||||
}
|
||||
|
||||
pub fn ref_slice_mut<T>(s: &mut T) -> &mut [T] {
|
||||
unsafe { slice::from_raw_parts_mut(s, 1) }
|
||||
}
|
||||
131
lib/codegen/src/regalloc/affinity.rs
Normal file
131
lib/codegen/src/regalloc/affinity.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
//! Value affinity for register allocation.
|
||||
//!
|
||||
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
|
||||
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
|
||||
//! instruction operand constraints.
|
||||
//!
|
||||
//! For values that want to be in registers, the affinity hint includes a register class or
|
||||
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
|
||||
//! larger register class instead.
|
||||
|
||||
use ir::{AbiParam, ArgumentLoc};
|
||||
use isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
|
||||
use std::fmt;
|
||||
|
||||
/// Preferred register allocation for an SSA value.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Affinity {
|
||||
/// No affinity.
|
||||
///
|
||||
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
|
||||
/// value that won't appear in the final program.
|
||||
None,
|
||||
|
||||
/// This value should be placed in a spill slot on the stack.
|
||||
Stack,
|
||||
|
||||
/// This value prefers a register from the given register class.
|
||||
Reg(RegClassIndex),
|
||||
}
|
||||
|
||||
impl Default for Affinity {
|
||||
fn default() -> Self {
|
||||
Affinity::None
|
||||
}
|
||||
}
|
||||
|
||||
impl Affinity {
|
||||
/// Create an affinity that satisfies a single constraint.
|
||||
///
|
||||
/// This will never create an `Affinity::None`.
|
||||
/// Use the `Default` implementation for that.
|
||||
pub fn new(constraint: &OperandConstraint) -> Affinity {
|
||||
if constraint.kind == ConstraintKind::Stack {
|
||||
Affinity::Stack
|
||||
} else {
|
||||
Affinity::Reg(constraint.regclass.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an affinity that matches an ABI argument for `isa`.
|
||||
pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Affinity {
|
||||
match arg.location {
|
||||
ArgumentLoc::Unassigned => Affinity::None,
|
||||
ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
|
||||
ArgumentLoc::Stack(_) => Affinity::Stack,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `None` affinity?
|
||||
pub fn is_none(self) -> bool {
|
||||
match self {
|
||||
Affinity::None => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Reg` affinity?
|
||||
pub fn is_reg(self) -> bool {
|
||||
match self {
|
||||
Affinity::Reg(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Stack` affinity?
|
||||
pub fn is_stack(self) -> bool {
|
||||
match self {
|
||||
Affinity::Stack => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge an operand constraint into this affinity.
|
||||
///
|
||||
/// Note that this does not guarantee that the register allocator will pick a register that
|
||||
/// satisfies the constraint.
|
||||
pub fn merge(&mut self, constraint: &OperandConstraint, reg_info: &RegInfo) {
|
||||
match *self {
|
||||
Affinity::None => *self = Affinity::new(constraint),
|
||||
Affinity::Reg(rc) => {
|
||||
// If the preferred register class is a subclass of the constraint, there's no need
|
||||
// to change anything.
|
||||
if constraint.kind != ConstraintKind::Stack &&
|
||||
!constraint.regclass.has_subclass(rc)
|
||||
{
|
||||
// If the register classes don't overlap, `intersect` returns `None`, and we
|
||||
// just keep our previous affinity.
|
||||
if let Some(subclass) = constraint.regclass.intersect_index(reg_info.rc(rc)) {
|
||||
// This constraint shrinks our preferred register class.
|
||||
*self = Affinity::Reg(subclass);
|
||||
}
|
||||
}
|
||||
}
|
||||
Affinity::Stack => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this value affinity, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
|
||||
DisplayAffinity(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayAffinity<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Affinity::None => write!(f, "none"),
|
||||
Affinity::Stack => write!(f, "stack"),
|
||||
Affinity::Reg(rci) => {
|
||||
match self.1 {
|
||||
Some(regs) => write!(f, "{}", regs.rc(rci)),
|
||||
None => write!(f, "{}", rci),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1123
lib/codegen/src/regalloc/coalescing.rs
Normal file
1123
lib/codegen/src/regalloc/coalescing.rs
Normal file
File diff suppressed because it is too large
Load Diff
1143
lib/codegen/src/regalloc/coloring.rs
Normal file
1143
lib/codegen/src/regalloc/coloring.rs
Normal file
File diff suppressed because it is too large
Load Diff
159
lib/codegen/src/regalloc/context.rs
Normal file
159
lib/codegen/src/regalloc/context.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
//! Register allocator context.
|
||||
//!
|
||||
//! The `Context` struct contains data structures that should be preserved across invocations of
|
||||
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
|
||||
//! avoids allocating data structures independently for each function begin compiled.
|
||||
|
||||
use dominator_tree::DominatorTree;
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::Function;
|
||||
use isa::TargetIsa;
|
||||
use regalloc::coalescing::Coalescing;
|
||||
use regalloc::coloring::Coloring;
|
||||
use regalloc::live_value_tracker::LiveValueTracker;
|
||||
use regalloc::liveness::Liveness;
|
||||
use regalloc::reload::Reload;
|
||||
use regalloc::spilling::Spilling;
|
||||
use regalloc::virtregs::VirtRegs;
|
||||
use result::CtonResult;
|
||||
use timing;
|
||||
use topo_order::TopoOrder;
|
||||
use verifier::{verify_context, verify_cssa, verify_liveness, verify_locations};
|
||||
|
||||
/// Persistent memory allocations for register allocation.
|
||||
pub struct Context {
|
||||
liveness: Liveness,
|
||||
virtregs: VirtRegs,
|
||||
coalescing: Coalescing,
|
||||
topo: TopoOrder,
|
||||
tracker: LiveValueTracker,
|
||||
spilling: Spilling,
|
||||
reload: Reload,
|
||||
coloring: Coloring,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Create a new context for register allocation.
|
||||
///
|
||||
/// This context should be reused for multiple functions in order to avoid repeated memory
|
||||
/// allocations.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
liveness: Liveness::new(),
|
||||
virtregs: VirtRegs::new(),
|
||||
coalescing: Coalescing::new(),
|
||||
topo: TopoOrder::new(),
|
||||
tracker: LiveValueTracker::new(),
|
||||
spilling: Spilling::new(),
|
||||
reload: Reload::new(),
|
||||
coloring: Coloring::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this context.
|
||||
pub fn clear(&mut self) {
|
||||
self.liveness.clear();
|
||||
self.virtregs.clear();
|
||||
self.coalescing.clear();
|
||||
self.topo.clear();
|
||||
self.tracker.clear();
|
||||
self.spilling.clear();
|
||||
self.reload.clear();
|
||||
self.coloring.clear();
|
||||
}
|
||||
|
||||
/// Allocate registers in `func`.
|
||||
///
|
||||
/// After register allocation, all values in `func` have been assigned to a register or stack
|
||||
/// location that is consistent with instruction encoding constraints.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &TargetIsa,
|
||||
func: &mut Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
) -> CtonResult {
|
||||
let _tt = timing::regalloc();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
// `Liveness` and `Coloring` are self-clearing.
|
||||
self.virtregs.clear();
|
||||
|
||||
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
|
||||
// phases.
|
||||
self.tracker.clear();
|
||||
|
||||
// Pass: Liveness analysis.
|
||||
self.liveness.compute(isa, func, cfg);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
verify_liveness(isa, func, cfg, &self.liveness)?;
|
||||
}
|
||||
|
||||
// Pass: Coalesce and create Conventional SSA form.
|
||||
self.coalescing.conventional_ssa(
|
||||
isa,
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.virtregs,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
verify_context(func, cfg, domtree, isa)?;
|
||||
verify_liveness(isa, func, cfg, &self.liveness)?;
|
||||
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
|
||||
}
|
||||
|
||||
// Pass: Spilling.
|
||||
self.spilling.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&self.virtregs,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
verify_context(func, cfg, domtree, isa)?;
|
||||
verify_liveness(isa, func, cfg, &self.liveness)?;
|
||||
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
|
||||
}
|
||||
|
||||
// Pass: Reload.
|
||||
self.reload.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
verify_context(func, cfg, domtree, isa)?;
|
||||
verify_liveness(isa, func, cfg, &self.liveness)?;
|
||||
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
|
||||
}
|
||||
|
||||
// Pass: Coloring.
|
||||
self.coloring.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
verify_context(func, cfg, domtree, isa)?;
|
||||
verify_liveness(isa, func, cfg, &self.liveness)?;
|
||||
verify_locations(isa, func, Some(&self.liveness))?;
|
||||
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
215
lib/codegen/src/regalloc/diversion.rs
Normal file
215
lib/codegen/src/regalloc/diversion.rs
Normal file
@@ -0,0 +1,215 @@
|
||||
//! Register diversions.
|
||||
//!
|
||||
//! Normally, a value is assigned to a single register or stack location by the register allocator.
|
||||
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
|
||||
//! instruction constraints.
|
||||
//!
|
||||
//! These register diversions are local to an EBB. No values can be diverted when entering a new
|
||||
//! EBB.
|
||||
|
||||
use ir::{InstructionData, Opcode};
|
||||
use ir::{StackSlot, Value, ValueLoc, ValueLocations};
|
||||
use isa::{RegInfo, RegUnit};
|
||||
use std::fmt;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// A diversion of a value from its original location to a new register or stack location.
|
||||
///
|
||||
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
|
||||
/// same value.
|
||||
///
|
||||
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
|
||||
/// the current one.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Diversion {
|
||||
/// The value that is diverted.
|
||||
pub value: Value,
|
||||
/// The original value location.
|
||||
pub from: ValueLoc,
|
||||
/// The current value location.
|
||||
pub to: ValueLoc,
|
||||
}
|
||||
|
||||
impl Diversion {
|
||||
/// Make a new diversion.
|
||||
pub fn new(value: Value, from: ValueLoc, to: ValueLoc) -> Diversion {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
Diversion { value, from, to }
|
||||
}
|
||||
}
|
||||
|
||||
/// Keep track of diversions in an EBB.
|
||||
pub struct RegDiversions {
|
||||
current: Vec<Diversion>,
|
||||
}
|
||||
|
||||
impl RegDiversions {
|
||||
/// Create a new empty diversion tracker.
|
||||
pub fn new() -> Self {
|
||||
Self { current: Vec::new() }
|
||||
}
|
||||
|
||||
/// Clear the tracker, preparing for a new EBB.
|
||||
pub fn clear(&mut self) {
|
||||
self.current.clear()
|
||||
}
|
||||
|
||||
/// Are there any diversions?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.current.is_empty()
|
||||
}
|
||||
|
||||
/// Get the current diversion of `value`, if any.
|
||||
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
|
||||
self.current.iter().find(|d| d.value == value)
|
||||
}
|
||||
|
||||
/// Get all current diversions.
|
||||
pub fn all(&self) -> &[Diversion] {
|
||||
self.current.as_slice()
|
||||
}
|
||||
|
||||
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
|
||||
/// values
|
||||
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
|
||||
match self.diversion(value) {
|
||||
Some(d) => d.to,
|
||||
None => locations[value],
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current register location for `value`, or panic if `value` isn't in a register.
|
||||
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
|
||||
self.get(value, locations).unwrap_reg()
|
||||
}
|
||||
|
||||
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
|
||||
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
|
||||
self.get(value, locations).unwrap_stack()
|
||||
}
|
||||
|
||||
/// Record any kind of move.
|
||||
///
|
||||
/// The `from` location must match an existing `to` location, if any.
|
||||
pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
if let Some(i) = self.current.iter().position(|d| d.value == value) {
|
||||
debug_assert_eq!(self.current[i].to, from, "Bad regmove chain for {}", value);
|
||||
if self.current[i].from != to {
|
||||
self.current[i].to = to;
|
||||
} else {
|
||||
self.current.swap_remove(i);
|
||||
}
|
||||
} else {
|
||||
self.current.push(Diversion::new(value, from, to));
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a register -> register move.
|
||||
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Record a register -> stack move.
|
||||
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
|
||||
}
|
||||
|
||||
/// Record a stack -> register move.
|
||||
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Apply the effect of `inst`.
|
||||
///
|
||||
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
|
||||
/// match.
|
||||
pub fn apply(&mut self, inst: &InstructionData) {
|
||||
match *inst {
|
||||
InstructionData::RegMove {
|
||||
opcode: Opcode::Regmove,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regmove(arg, src, dst),
|
||||
InstructionData::RegSpill {
|
||||
opcode: Opcode::Regspill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regspill(arg, src, dst),
|
||||
InstructionData::RegFill {
|
||||
opcode: Opcode::Regfill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regfill(arg, src, dst),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Drop any recorded move for `value`.
|
||||
///
|
||||
/// Returns the `to` location of the removed diversion.
|
||||
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
|
||||
self.current.iter().position(|d| d.value == value).map(
|
||||
|i| {
|
||||
self.current.swap_remove(i).to
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Return an object that can display the diversions.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
|
||||
DisplayDiversions(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Object that displays register diversions.
|
||||
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayDiversions<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{{")?;
|
||||
for div in self.0.all() {
|
||||
write!(
|
||||
f,
|
||||
" {}: {} -> {}",
|
||||
div.value,
|
||||
div.from.display(self.1),
|
||||
div.to.display(self.1)
|
||||
)?
|
||||
}
|
||||
write!(f, " }}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use entity::EntityRef;
|
||||
use ir::Value;
|
||||
|
||||
#[test]
|
||||
fn inserts() {
|
||||
let mut divs = RegDiversions::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
|
||||
divs.regmove(v1, 10, 12);
|
||||
assert_eq!(
|
||||
divs.diversion(v1),
|
||||
Some(&Diversion {
|
||||
value: v1,
|
||||
from: ValueLoc::Reg(10),
|
||||
to: ValueLoc::Reg(12),
|
||||
})
|
||||
);
|
||||
assert_eq!(divs.diversion(v2), None);
|
||||
|
||||
divs.regmove(v1, 12, 11);
|
||||
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
|
||||
divs.regmove(v1, 11, 10);
|
||||
assert_eq!(divs.diversion(v1), None);
|
||||
}
|
||||
}
|
||||
348
lib/codegen/src/regalloc/live_value_tracker.rs
Normal file
348
lib/codegen/src/regalloc/live_value_tracker.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
//! Track which values are live in an EBB with instruction granularity.
|
||||
//!
|
||||
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
|
||||
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
|
||||
//! instruction, starting at the EBB header.
|
||||
|
||||
use dominator_tree::DominatorTree;
|
||||
use entity::{EntityList, ListPool};
|
||||
use ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
|
||||
use partition_slice::partition_slice;
|
||||
use regalloc::affinity::Affinity;
|
||||
use regalloc::liveness::Liveness;
|
||||
use regalloc::liverange::LiveRange;
|
||||
use std::collections::HashMap;
|
||||
use std::vec::Vec;
|
||||
|
||||
type ValueList = EntityList<Value>;
|
||||
|
||||
/// Compute and track live values throughout an EBB.
|
||||
pub struct LiveValueTracker {
|
||||
/// The set of values that are live at the current program point.
|
||||
live: LiveValueVec,
|
||||
|
||||
/// Saved set of live values for every jump and branch that can potentially be an immediate
|
||||
/// dominator of an EBB.
|
||||
///
|
||||
/// This is the set of values that are live *before* the branch.
|
||||
idom_sets: HashMap<Inst, ValueList>,
|
||||
|
||||
/// Memory pool for the live sets.
|
||||
idom_pool: ListPool<Value>,
|
||||
}
|
||||
|
||||
/// Information about a value that is live at the current program point.
|
||||
#[derive(Debug)]
|
||||
pub struct LiveValue {
|
||||
/// The live value.
|
||||
pub value: Value,
|
||||
|
||||
/// The local ending point of the live range in the current EBB, as returned by
|
||||
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
|
||||
pub endpoint: Inst,
|
||||
|
||||
/// The affinity of the value as represented in its `LiveRange`.
|
||||
///
|
||||
/// This value is simply a copy of the affinity stored in the live range. We copy it because
|
||||
/// almost all users of `LiveValue` need to look at it.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The live range for this value never leaves its EBB.
|
||||
pub is_local: bool,
|
||||
|
||||
/// This value is dead - the live range ends immediately.
|
||||
pub is_dead: bool,
|
||||
}
|
||||
|
||||
struct LiveValueVec {
|
||||
/// The set of values that are live at the current program point.
|
||||
values: Vec<LiveValue>,
|
||||
|
||||
/// How many values at the front of `values` are known to be live after `inst`?
|
||||
///
|
||||
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
|
||||
/// time for the same instruction.
|
||||
live_prefix: Option<(Inst, usize)>,
|
||||
}
|
||||
|
||||
impl LiveValueVec {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
values: Vec::new(),
|
||||
live_prefix: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a new live value to `values`. Copy some properties from `lr`.
|
||||
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
|
||||
self.values.push(LiveValue {
|
||||
value,
|
||||
endpoint,
|
||||
affinity: lr.affinity,
|
||||
is_local: lr.is_local(),
|
||||
is_dead: lr.is_dead(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Remove all elements.
|
||||
fn clear(&mut self) {
|
||||
self.values.clear();
|
||||
self.live_prefix = None;
|
||||
}
|
||||
|
||||
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
|
||||
/// vector.
|
||||
///
|
||||
/// Returns the number of values that will be live after `next_inst`.
|
||||
fn live_after(&mut self, next_inst: Inst) -> usize {
|
||||
// How many values at the front of the vector are already known to survive `next_inst`?
|
||||
// We don't need to pass this prefix to `partition_slice()`
|
||||
let keep = match self.live_prefix {
|
||||
Some((i, prefix)) if i == next_inst => prefix,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
// Move the remaining surviving values to the front partition of the vector.
|
||||
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
|
||||
|
||||
// Remember the new prefix length in case we get called again for the same `next_inst`.
|
||||
self.live_prefix = Some((next_inst, prefix));
|
||||
prefix
|
||||
}
|
||||
|
||||
/// Remove the values killed by `next_inst`.
|
||||
fn remove_kill_values(&mut self, next_inst: Inst) {
|
||||
let keep = self.live_after(next_inst);
|
||||
self.values.truncate(keep);
|
||||
}
|
||||
|
||||
/// Remove any dead values.
|
||||
fn remove_dead_values(&mut self) {
|
||||
self.values.retain(|v| !v.is_dead);
|
||||
self.live_prefix = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl LiveValueTracker {
|
||||
/// Create a new blank tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
live: LiveValueVec::new(),
|
||||
idom_sets: HashMap::new(),
|
||||
idom_pool: ListPool::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all cached information.
|
||||
pub fn clear(&mut self) {
|
||||
self.live.clear();
|
||||
self.idom_sets.clear();
|
||||
self.idom_pool.clear();
|
||||
}
|
||||
|
||||
/// Get the set of currently live values.
|
||||
///
|
||||
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
|
||||
/// defined by the current instruction.
|
||||
pub fn live(&self) -> &[LiveValue] {
|
||||
&self.live.values
|
||||
}
|
||||
|
||||
/// Get a mutable set of currently live values.
|
||||
///
|
||||
/// Use with care and don't move entries around.
|
||||
pub fn live_mut(&mut self) -> &mut [LiveValue] {
|
||||
&mut self.live.values
|
||||
}
|
||||
|
||||
/// Move the current position to the top of `ebb`.
|
||||
///
|
||||
/// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
|
||||
/// been visited first.
|
||||
///
|
||||
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
|
||||
/// from the immediate dominator. The second slice is the set of `ebb` parameters.
|
||||
///
|
||||
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
|
||||
pub fn ebb_top(
|
||||
&mut self,
|
||||
ebb: Ebb,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
layout: &Layout,
|
||||
domtree: &DominatorTree,
|
||||
) -> (&[LiveValue], &[LiveValue]) {
|
||||
// Start over, compute the set of live values at the top of the EBB from two sources:
|
||||
//
|
||||
// 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
|
||||
// actually live-in.
|
||||
// 2. Arguments to `ebb` that are not dead.
|
||||
//
|
||||
self.live.clear();
|
||||
|
||||
// Compute the live-in values. Start by filtering the set of values that were live before
|
||||
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
|
||||
// the entry block or an unreachable block).
|
||||
if let Some(idom) = domtree.idom(ebb) {
|
||||
// If the immediate dominator exits, we must have a stored list for it. This is a
|
||||
// requirement to the order EBBs are visited: All dominators must have been processed
|
||||
// before the current EBB.
|
||||
let idom_live_list = self.idom_sets.get(&idom).expect(
|
||||
"No stored live set for dominator",
|
||||
);
|
||||
let ctx = liveness.context(layout);
|
||||
// Get just the values that are live-in to `ebb`.
|
||||
for &value in idom_live_list.as_slice(&self.idom_pool) {
|
||||
let lr = liveness.get(value).expect(
|
||||
"Immediate dominator value has no live range",
|
||||
);
|
||||
|
||||
// Check if this value is live-in here.
|
||||
if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add all the live parameters to `ebb`.
|
||||
let first_arg = self.live.values.len();
|
||||
for &value in dfg.ebb_params(ebb) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), ebb.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Ebb(local_ebb) => {
|
||||
// This is a dead EBB parameter which is not even live into the first
|
||||
// instruction in the EBB.
|
||||
debug_assert_eq!(
|
||||
local_ebb,
|
||||
ebb,
|
||||
"EBB parameter live range ends at wrong EBB header"
|
||||
);
|
||||
// Give this value a fake endpoint that is the first instruction in the EBB.
|
||||
// We expect it to be removed by calling `drop_dead_args()`.
|
||||
self.live.push(
|
||||
value,
|
||||
layout.first_inst(ebb).expect("Empty EBB"),
|
||||
lr,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.live.values.split_at(first_arg)
|
||||
}
|
||||
|
||||
/// Prepare to move past `inst`.
|
||||
///
|
||||
/// Determine the set of already live values that are killed by `inst`, and add the new defined
|
||||
/// values to the tracked set.
|
||||
///
|
||||
/// Returns `(throughs, kills, defs)` as a tuple of slices:
|
||||
///
|
||||
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
|
||||
/// killed by the instruction.
|
||||
/// 2. The `kills` slice is the set of values that were live before the instruction and are
|
||||
/// killed at the instruction. This does not include dead defs.
|
||||
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
|
||||
/// dead defines.
|
||||
///
|
||||
/// The order of `throughs` and `kills` is arbitrary.
|
||||
///
|
||||
/// The `drop_dead()` method must be called next to actually remove the dead values from the
|
||||
/// tracked set after the two returned slices are no longer needed.
|
||||
pub fn process_inst(
|
||||
&mut self,
|
||||
inst: Inst,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
|
||||
// Save a copy of the live values before any branches or jumps that could be somebody's
|
||||
// immediate dominator.
|
||||
if dfg[inst].opcode().is_branch() {
|
||||
self.save_idom_live_set(inst);
|
||||
}
|
||||
|
||||
// Move killed values to the end of the vector.
|
||||
// Don't remove them yet, `drop_dead()` will do that.
|
||||
let first_kill = self.live.live_after(inst);
|
||||
|
||||
// Add the values defined by `inst`.
|
||||
let first_def = self.live.values.len();
|
||||
for &value in dfg.inst_results(inst) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), inst.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Ebb(ebb) => {
|
||||
panic!("Instruction result live range can't end at {}", ebb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
&self.live.values[0..first_kill],
|
||||
&self.live.values[first_kill..first_def],
|
||||
&self.live.values[first_def..],
|
||||
)
|
||||
}
|
||||
|
||||
/// Prepare to move past a ghost instruction.
|
||||
///
|
||||
/// This is like `process_inst`, except any defs are ignored.
|
||||
///
|
||||
/// Returns `(throughs, kills)`.
|
||||
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
|
||||
let first_kill = self.live.live_after(inst);
|
||||
self.live.values.as_slice().split_at(first_kill)
|
||||
}
|
||||
|
||||
/// Drop the values that are now dead after moving past `inst`.
|
||||
///
|
||||
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
|
||||
///
|
||||
/// This must be called after `process_inst(inst)` and before proceeding to the next
|
||||
/// instruction.
|
||||
pub fn drop_dead(&mut self, inst: Inst) {
|
||||
// Remove both live values that were killed by `inst` and dead defines from `inst`.
|
||||
self.live.remove_kill_values(inst);
|
||||
}
|
||||
|
||||
/// Drop any values that are marked as `is_dead`.
|
||||
///
|
||||
/// Use this after calling `ebb_top` to clean out dead EBB parameters.
|
||||
pub fn drop_dead_params(&mut self) {
|
||||
self.live.remove_dead_values();
|
||||
}
|
||||
|
||||
/// Process new spills.
|
||||
///
|
||||
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
|
||||
/// `Stack`.
|
||||
pub fn process_spills<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(Value) -> bool,
|
||||
{
|
||||
for lv in &mut self.live.values {
|
||||
if f(lv.value) {
|
||||
lv.affinity = Affinity::Stack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the current set of live values so it is associated with `idom`.
|
||||
fn save_idom_live_set(&mut self, idom: Inst) {
|
||||
let values = self.live.values.iter().map(|lv| lv.value);
|
||||
let pool = &mut self.idom_pool;
|
||||
// If there already is a set saved for `idom`, just keep it.
|
||||
self.idom_sets.entry(idom).or_insert_with(|| {
|
||||
let mut list = ValueList::default();
|
||||
list.extend(values, pool);
|
||||
list
|
||||
});
|
||||
}
|
||||
}
|
||||
458
lib/codegen/src/regalloc/liveness.rs
Normal file
458
lib/codegen/src/regalloc/liveness.rs
Normal file
@@ -0,0 +1,458 @@
|
||||
//! Liveness analysis for SSA values.
|
||||
//!
|
||||
//! This module computes the live range of all the SSA values in a function and produces a
|
||||
//! `LiveRange` instance for each.
|
||||
//!
|
||||
//!
|
||||
//! # Liveness consumers
|
||||
//!
|
||||
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
|
||||
//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
|
||||
//! currently live values as it is iterating down the instructions in the EBB. It asks the
|
||||
//! following questions:
|
||||
//!
|
||||
//! - What is the set of live values at the entry to the EBB?
|
||||
//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
|
||||
//! use?
|
||||
//! - When moving past a branch, which of the live values are still live below the branch?
|
||||
//!
|
||||
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
|
||||
//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
|
||||
//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
|
||||
//! from the set of live values at the dominating branch instruction and filtering it with
|
||||
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
|
||||
//!
|
||||
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
|
||||
//! number of live values at every program point and insert spill code until the number of
|
||||
//! registers needed is small enough.
|
||||
//!
|
||||
//!
|
||||
//! # Alternative algorithms
|
||||
//!
|
||||
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
|
||||
//! alternatives.
|
||||
//!
|
||||
//! ## Data-flow equations
|
||||
//!
|
||||
//! The classic *live variables analysis* that you will find in all compiler books from the
|
||||
//! previous century does not depend on SSA form. It is typically implemented by iteratively
|
||||
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
|
||||
//! variables for every basic block in the program.
|
||||
//!
|
||||
//! This algorithm has some disadvantages that makes us look elsewhere:
|
||||
//!
|
||||
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
|
||||
//! - Sparse representation. In practice, the majority of SSA values never leave their basic block,
|
||||
//! and those that do span basic blocks rarely span a large number of basic blocks. This makes
|
||||
//! the bit-vectors quite sparse.
|
||||
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
|
||||
//! not include temporaries used in evaluating expressions. We have an SSA form program which
|
||||
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
|
||||
//! problem worse because there are many more SSA values than there was variables in the original
|
||||
//! program, and we don't know a priori which SSA values leave their basic block.
|
||||
//! - Missing last-use information. For values that are not live-out of a basic block, we would
|
||||
//! need to store information about the last use in the block somewhere. LLVM stores this
|
||||
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
|
||||
//! source of problems for LLVM's register allocator.
|
||||
//!
|
||||
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
|
||||
//! multiple definitions of the same variable. We don't need this generality since we already have
|
||||
//! a program in SSA form.
|
||||
//!
|
||||
//! ## LLVM's liveness analysis
|
||||
//!
|
||||
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
|
||||
//! a disjoint union of related SSA values that should be assigned to the same physical register.
|
||||
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
|
||||
//! that Cretonne's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
|
||||
//! describes the live range of a virtual register *and* which one of the related SSA values is
|
||||
//! live at any given program point.
|
||||
//!
|
||||
//! LLVM computes the live range of each virtual register independently by using the use-def chains
|
||||
//! that are baked into its IR. The algorithm for a single virtual register is:
|
||||
//!
|
||||
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
|
||||
//! the def-chain. This does not include any phi-values.
|
||||
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
|
||||
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
|
||||
//! that already contain some liveness and extend the last live SSA value in the block to be
|
||||
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
|
||||
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
|
||||
//! PHI values to be created when different SSA values can reach the same block.
|
||||
//!
|
||||
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
|
||||
//! one SSA value.
|
||||
//!
|
||||
//! This algorithm has some advantages compared to the data-flow equations:
|
||||
//!
|
||||
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
|
||||
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
|
||||
//! blocks in the program.
|
||||
//! - The time to compute the live range of a global virtual register is proportional to the number
|
||||
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
|
||||
//! functions.
|
||||
//! - A single live range can be recomputed after making modifications to the IR. No global
|
||||
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
|
||||
//! which Cretonne doesn't.
|
||||
//!
|
||||
//! Cretonne uses a very similar data structures and algorithms to LLVM, with the important
|
||||
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
|
||||
//! uses in Cretonne IR refers to SSA values instead of virtual registers. This means that Cretonne
|
||||
//! can skip the last step of reconstructing SSA form for the virtual register uses.
|
||||
//!
|
||||
//! ## Fast Liveness Checking for SSA-Form Programs
|
||||
//!
|
||||
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
|
||||
//! was presented at CGO 2008:
|
||||
//!
|
||||
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
|
||||
//! Checking for SSA-Form Programs.* CGO.
|
||||
//!
|
||||
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
|
||||
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
|
||||
//! chain of the value and performs lookups in the precomputed bit-vectors.
|
||||
//!
|
||||
//! I did not seriously consider this analysis for Cretonne because:
|
||||
//!
|
||||
//! - It depends critically on use chains which Cretonne doesn't have.
|
||||
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
|
||||
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
|
||||
//! quadratic behavior in unfortunate cases.
|
||||
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
|
||||
//! based approach, which isn't that impressive.
|
||||
//!
|
||||
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cretonne
|
||||
//! gains use chains, this approach would be worth a proper evaluation.
|
||||
//!
|
||||
//!
|
||||
//! # Cretonne's liveness analysis
|
||||
//!
|
||||
//! The algorithm implemented in this module is similar to LLVM's with these differences:
|
||||
//!
|
||||
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
|
||||
//! register.
|
||||
//! - Instructions in Cretonne IR contains references to SSA values, not virtual registers.
|
||||
//! - All live ranges are computed in one traversal of the program. Cretonne doesn't have use
|
||||
//! chains, so it is not possible to compute the live range for a single SSA value independently.
|
||||
//!
|
||||
//! The liveness computation visits all instructions in the program. The order is not important for
|
||||
//! the algorithm to be correct. At each instruction, the used values are examined.
|
||||
//!
|
||||
//! - The first time a value is encountered, its live range is constructed as a dead live range
|
||||
//! containing only the defining program point.
|
||||
//! - The local interval of the value's live range is extended so it reaches the use. This may
|
||||
//! require creating a new live-in local interval for the EBB.
|
||||
//! - If the live range became live-in to the EBB, add the EBB to a work-list.
|
||||
//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
|
||||
//! of the live-in EBB's CFG predecessor instructions as a 'use'.
|
||||
//!
|
||||
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
|
||||
//! visited. No data about each value beyond the live range is needed between visiting uses, so
|
||||
//! nothing is lost by computing the live range of all values simultaneously.
|
||||
//!
|
||||
//! ## Cache efficiency of Cretonne vs LLVM
|
||||
//!
|
||||
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
|
||||
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
|
||||
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
|
||||
//! somewhat chaotically.
|
||||
//!
|
||||
//! Cretonne uses a transposed algorithm, visiting instructions in order. This means that each
|
||||
//! instruction is brought into cache only once, and it is likely that the other instructions on
|
||||
//! the same cache line will be visited before the line is evicted.
|
||||
//!
|
||||
//! Cretonne's problem is that the `LiveRange` structs are visited many times and not always
|
||||
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
|
||||
//! multiple related values can live on the same cache line.
|
||||
//!
|
||||
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
|
||||
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
|
||||
//! size to 32 bytes.
|
||||
//! - Related values should be stored on the same cache line. The current sparse set implementation
|
||||
//! does a decent job of that.
|
||||
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
|
||||
//! line. These lists are very likely to be found in L2 cache at least.
|
||||
//!
|
||||
//! There is some room for improvement.
|
||||
|
||||
use entity::SparseMap;
|
||||
use flowgraph::ControlFlowGraph;
|
||||
use ir::dfg::ValueDef;
|
||||
use ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
|
||||
use isa::{EncInfo, TargetIsa};
|
||||
use regalloc::affinity::Affinity;
|
||||
use regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
|
||||
use std::mem;
|
||||
use std::ops::Index;
|
||||
use std::vec::Vec;
|
||||
use timing;
|
||||
|
||||
/// A set of live ranges, indexed by value number.
|
||||
type LiveRangeSet = SparseMap<Value, LiveRange>;
|
||||
|
||||
/// Get a mutable reference to the live range for `value`.
|
||||
/// Create it if necessary.
|
||||
fn get_or_create<'a>(
|
||||
lrset: &'a mut LiveRangeSet,
|
||||
value: Value,
|
||||
isa: &TargetIsa,
|
||||
func: &Function,
|
||||
enc_info: &EncInfo,
|
||||
) -> &'a mut LiveRange {
|
||||
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
|
||||
// which can probably only be resolved by non-lexical lifetimes.
|
||||
// https://github.com/rust-lang/rfcs/issues/811
|
||||
if lrset.get(value).is_none() {
|
||||
// Create a live range for value. We need the program point that defines it.
|
||||
let def;
|
||||
let affinity;
|
||||
match func.dfg.value_def(value) {
|
||||
ValueDef::Result(inst, rnum) => {
|
||||
def = inst.into();
|
||||
// Initialize the affinity from the defining instruction's result constraints.
|
||||
// Don't do this for call return values which are always tied to a single register.
|
||||
affinity = enc_info
|
||||
.operand_constraints(func.encodings[inst])
|
||||
.and_then(|rc| rc.outs.get(rnum))
|
||||
.map(Affinity::new)
|
||||
.or_else(|| {
|
||||
// If this is a call, get the return value affinity.
|
||||
func.dfg.call_signature(inst).map(|sig| {
|
||||
Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)
|
||||
})
|
||||
})
|
||||
.unwrap_or_default();
|
||||
}
|
||||
ValueDef::Param(ebb, num) => {
|
||||
def = ebb.into();
|
||||
if func.layout.entry_block() == Some(ebb) {
|
||||
// The affinity for entry block parameters can be inferred from the function
|
||||
// signature.
|
||||
affinity = Affinity::abi(&func.signature.params[num], isa);
|
||||
} else {
|
||||
// Give normal EBB parameters a register affinity matching their type.
|
||||
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
|
||||
affinity = Affinity::Reg(rc.into());
|
||||
}
|
||||
}
|
||||
};
|
||||
lrset.insert(LiveRange::new(value, def, affinity));
|
||||
}
|
||||
lrset.get_mut(value).unwrap()
|
||||
}
|
||||
|
||||
/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
|
||||
fn extend_to_use(
|
||||
lr: &mut LiveRange,
|
||||
ebb: Ebb,
|
||||
to: Inst,
|
||||
worklist: &mut Vec<Ebb>,
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
forest: &mut LiveRangeForest,
|
||||
) {
|
||||
// This is our scratch working space, and we'll leave it empty when we return.
|
||||
debug_assert!(worklist.is_empty());
|
||||
|
||||
// Extend the range locally in `ebb`.
|
||||
// If there already was a live interval in that block, we're done.
|
||||
if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
|
||||
worklist.push(ebb);
|
||||
}
|
||||
|
||||
// The work list contains those EBBs where we have learned that the value needs to be
|
||||
// live-in.
|
||||
//
|
||||
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
|
||||
// CFG from the existing live range to `ebb`.
|
||||
//
|
||||
// Extend the live range as we go. The live range itself also serves as a visited set since
|
||||
// `extend_in_ebb` will never return true twice for the same EBB.
|
||||
//
|
||||
while let Some(livein) = worklist.pop() {
|
||||
// We've learned that the value needs to be live-in to the `livein` EBB.
|
||||
// Make sure it is also live at all predecessor branches to `livein`.
|
||||
for (pred, branch) in cfg.pred_iter(livein) {
|
||||
if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
|
||||
// This predecessor EBB also became live-in. We need to process it later.
|
||||
worklist.push(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Liveness analysis for a function.
|
||||
///
|
||||
/// Compute a live range for every SSA value used in the function.
|
||||
pub struct Liveness {
|
||||
/// The live ranges that have been computed so far.
|
||||
ranges: LiveRangeSet,
|
||||
|
||||
/// Memory pool for the live ranges.
|
||||
forest: LiveRangeForest,
|
||||
|
||||
/// Working space for the `extend_to_use` algorithm.
|
||||
/// This vector is always empty, except for inside that function.
|
||||
/// It lives here to avoid repeated allocation of scratch memory.
|
||||
worklist: Vec<Ebb>,
|
||||
}
|
||||
|
||||
impl Liveness {
|
||||
/// Create a new empty liveness analysis.
|
||||
///
|
||||
/// The memory allocated for this analysis can be reused for multiple functions. Use the
|
||||
/// `compute` method to actually runs the analysis for a function.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
ranges: LiveRangeSet::new(),
|
||||
forest: LiveRangeForest::new(),
|
||||
worklist: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a context needed for working with a `LiveRange`.
|
||||
pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
|
||||
LiveRangeContext::new(layout, &self.forest)
|
||||
}
|
||||
|
||||
/// Clear all data structures in this liveness analysis.
|
||||
pub fn clear(&mut self) {
|
||||
self.ranges.clear();
|
||||
self.forest.clear();
|
||||
self.worklist.clear();
|
||||
}
|
||||
|
||||
/// Get the live range for `value`, if it exists.
|
||||
pub fn get(&self, value: Value) -> Option<&LiveRange> {
|
||||
self.ranges.get(value)
|
||||
}
|
||||
|
||||
/// Create a new live range for `value`.
|
||||
///
|
||||
/// The new live range will be defined at `def` with no extent, like a dead value.
|
||||
///
|
||||
/// This asserts that `value` does not have an existing live range.
|
||||
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let old = self.ranges.insert(
|
||||
LiveRange::new(value, def.into(), affinity),
|
||||
);
|
||||
debug_assert!(old.is_none(), "{} already has a live range", value);
|
||||
}
|
||||
|
||||
/// Move the definition of `value` to `def`.
|
||||
///
|
||||
/// The old and new def points must be in the same EBB, and before the end of the live range.
|
||||
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
lr.move_def_locally(def.into());
|
||||
}
|
||||
|
||||
/// Locally extend the live range for `value` to reach `user`.
|
||||
///
|
||||
/// It is assumed the `value` is already live before `user` in `ebb`.
|
||||
///
|
||||
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
|
||||
pub fn extend_locally(
|
||||
&mut self,
|
||||
value: Value,
|
||||
ebb: Ebb,
|
||||
user: Inst,
|
||||
layout: &Layout,
|
||||
) -> &mut Affinity {
|
||||
debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
|
||||
debug_assert!(!livein, "{} should already be live in {}", value, ebb);
|
||||
&mut lr.affinity
|
||||
}
|
||||
|
||||
/// Change the affinity of `value` to `Stack` and return the previous affinity.
|
||||
pub fn spill(&mut self, value: Value) -> Affinity {
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
mem::replace(&mut lr.affinity, Affinity::Stack)
|
||||
}
|
||||
|
||||
/// Compute the live ranges of all SSA values used in `func`.
|
||||
/// This clears out any existing analysis stored in this data structure.
|
||||
pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
|
||||
let _tt = timing::ra_liveness();
|
||||
self.ranges.clear();
|
||||
|
||||
// Get ISA data structures used for computing live range affinities.
|
||||
let enc_info = isa.encoding_info();
|
||||
let reg_info = isa.register_info();
|
||||
|
||||
// The liveness computation needs to visit all uses, but the order doesn't matter.
|
||||
// TODO: Perhaps this traversal of the function could be combined with a dead code
|
||||
// elimination pass if we visit a post-order of the dominator tree?
|
||||
// TODO: Resolve value aliases while we're visiting instructions?
|
||||
for ebb in func.layout.ebbs() {
|
||||
// Make sure we have created live ranges for dead EBB parameters.
|
||||
// TODO: If these parameters are really dead, we could remove them, except for the
|
||||
// entry block which must match the function signature.
|
||||
for &arg in func.dfg.ebb_params(ebb) {
|
||||
get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
|
||||
}
|
||||
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
// Eliminate all value aliases, they would confuse the register allocator.
|
||||
func.dfg.resolve_aliases_in_arguments(inst);
|
||||
|
||||
// Make sure we have created live ranges for dead defs.
|
||||
// TODO: When we implement DCE, we can use the absence of a live range to indicate
|
||||
// an unused value.
|
||||
for &def in func.dfg.inst_results(inst) {
|
||||
get_or_create(&mut self.ranges, def, isa, func, &enc_info);
|
||||
}
|
||||
|
||||
// Iterator of constraints, one per value operand.
|
||||
let encoding = func.encodings[inst];
|
||||
let mut operand_constraints = enc_info
|
||||
.operand_constraints(encoding)
|
||||
.map(|c| c.ins)
|
||||
.unwrap_or(&[])
|
||||
.iter();
|
||||
|
||||
for &arg in func.dfg.inst_args(inst) {
|
||||
// Get the live range, create it as a dead range if necessary.
|
||||
let lr = get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
|
||||
|
||||
// Extend the live range to reach this use.
|
||||
extend_to_use(
|
||||
lr,
|
||||
ebb,
|
||||
inst,
|
||||
&mut self.worklist,
|
||||
func,
|
||||
cfg,
|
||||
&mut self.forest,
|
||||
);
|
||||
|
||||
// Apply operand constraint, ignoring any variable arguments after the fixed
|
||||
// operands described by `operand_constraints`. Variable arguments are either
|
||||
// EBB arguments or call/return ABI arguments.
|
||||
if let Some(constraint) = operand_constraints.next() {
|
||||
lr.affinity.merge(constraint, ®_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<Value> for Liveness {
|
||||
type Output = LiveRange;
|
||||
|
||||
fn index(&self, index: Value) -> &LiveRange {
|
||||
match self.ranges.get(index) {
|
||||
Some(lr) => lr,
|
||||
None => panic!("{} has no live range", index),
|
||||
}
|
||||
}
|
||||
}
|
||||
748
lib/codegen/src/regalloc/liverange.rs
Normal file
748
lib/codegen/src/regalloc/liverange.rs
Normal file
@@ -0,0 +1,748 @@
|
||||
//! Data structure representing the live range of an SSA value.
|
||||
//!
|
||||
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
|
||||
//! an SSA value begins where it is defined and extends to all program points where the value is
|
||||
//! still needed.
|
||||
//!
|
||||
//! # Local Live Ranges
|
||||
//!
|
||||
//! Inside a single extended basic block, the live range of a value is always an interval between
|
||||
//! two program points (if the value is live in the EBB at all). The starting point is either:
|
||||
//!
|
||||
//! 1. The instruction that defines the value, or
|
||||
//! 2. The EBB header, because the value is an argument to the EBB, or
|
||||
//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
|
||||
//!
|
||||
//! The ending point of the local live range is the last of the following program points in the
|
||||
//! EBB:
|
||||
//!
|
||||
//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
|
||||
//! 2. The last branch or jump instruction in the EBB that can reach a use.
|
||||
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
|
||||
//!
|
||||
//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
|
||||
//! outside a loop and used inside the loop, it will be live in the entire loop.
|
||||
//!
|
||||
//! # Global Live Ranges
|
||||
//!
|
||||
//! Values that appear in more than one EBB have a *global live range* which can be seen as the
|
||||
//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
|
||||
//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
|
||||
//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
|
||||
//!
|
||||
//! In the special case of a dead value, the global live range is a single interval where the start
|
||||
//! and end points are the same. The global live range of a value is never completely empty.
|
||||
//!
|
||||
//! # Register interference
|
||||
//!
|
||||
//! The register allocator uses live ranges to determine if values *interfere*, which means that
|
||||
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
|
||||
//! their intervals overlap.
|
||||
//!
|
||||
//! If one live range ends at an instruction that defines another live range, those two live ranges
|
||||
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
|
||||
//! register for an output value. If Cretonne gets support for inline assembly, we will need to
|
||||
//! handle *early clobbers* which are output registers that are not allowed to alias any input
|
||||
//! registers.
|
||||
//!
|
||||
//! If `i1 < i2 < i3` are program points, we have:
|
||||
//!
|
||||
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
|
||||
//! - `i1-i2` and `i2-i3` don't interfere.
|
||||
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
|
||||
//! - `i1-i2` and `i2-i2` don't interfere.
|
||||
//! - `i2-i3` and `i2-i2` do interfere.
|
||||
//!
|
||||
//! Because of this behavior around interval end points, live range interference is not completely
|
||||
//! equivalent to mathematical intersection of open or half-open intervals.
|
||||
//!
|
||||
//! # Implementation notes
|
||||
//!
|
||||
//! A few notes about the implementation of this data structure. This should not concern someone
|
||||
//! only looking to use the public interface.
|
||||
//!
|
||||
//! ## EBB ordering
|
||||
//!
|
||||
//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
|
||||
//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
|
||||
//! depend on any property of the program order, so alternative orderings are possible:
|
||||
//!
|
||||
//! 1. The EBB layout order. This is what we currently use.
|
||||
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
|
||||
//! def interval.
|
||||
//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
|
||||
//! `ProgramOrder` for comparisons.
|
||||
//!
|
||||
//! These orderings will cause small differences in coalescing opportunities, but all of them would
|
||||
//! do a decent job of compressing a long live range. The numerical order might be preferable
|
||||
//! because:
|
||||
//!
|
||||
//! - It has better performance because EBB numbers can be compared directly without any table
|
||||
//! lookups.
|
||||
//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
|
||||
//! live-in intervals from any coalesced representations that happen to cross a new EBB.
|
||||
//!
|
||||
//! For comparing instructions, the layout order is always what we want.
|
||||
//!
|
||||
//! ## Alternative representation
|
||||
//!
|
||||
//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
|
||||
//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
|
||||
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
|
||||
//!
|
||||
//! Coalescing is an important compression technique because some live ranges can span thousands of
|
||||
//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
|
||||
//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
|
||||
//! `Ebb` entry represents a single live-in interval.
|
||||
//!
|
||||
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
|
||||
//! It is more complicated to work with, though, so it is probably not worth it. The performance
|
||||
//! benefits of switching to a numerical EBB order only appears if the binary search is doing
|
||||
//! EBB-EBB comparisons.
|
||||
//!
|
||||
//! ## B-tree representation
|
||||
//!
|
||||
//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
|
||||
//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
|
||||
//! of coalescing, so we would need to roll our own.
|
||||
//!
|
||||
|
||||
use bforest;
|
||||
use entity::SparseMapValue;
|
||||
use ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
|
||||
use regalloc::affinity::Affinity;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
/// Global live range of a single SSA value.
|
||||
///
|
||||
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
|
||||
/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
|
||||
/// most one interval per EBB. We further distinguish between:
|
||||
///
|
||||
/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
|
||||
/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
|
||||
///
|
||||
/// A live-in interval always begins at the EBB header, while the def interval can begin at the
|
||||
/// defining instruction, or at the EBB header for an EBB argument value.
|
||||
///
|
||||
/// All values have a def interval, but a large proportion of values don't have any live-in
|
||||
/// intervals. These are called *local live ranges*.
|
||||
///
|
||||
/// # Program order requirements
|
||||
///
|
||||
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
|
||||
/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
|
||||
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
|
||||
/// ensure that the provided ordering is consistent between calls.
|
||||
///
|
||||
/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
|
||||
///
|
||||
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
|
||||
/// instructions using or defining their value, `LiveRange` structs can contain references to
|
||||
/// branch and jump instructions.
|
||||
pub type LiveRange = GenLiveRange<Layout>;
|
||||
|
||||
/// Generic live range implementation.
|
||||
///
|
||||
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
|
||||
/// Use `LiveRange` instead of using this generic directly.
|
||||
pub struct GenLiveRange<PO: ProgramOrder> {
|
||||
/// The value described by this live range.
|
||||
/// This member can't be modified in case the live range is stored in a `SparseMap`.
|
||||
value: Value,
|
||||
|
||||
/// The preferred register allocation for this value.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The instruction or EBB header where this value is defined.
|
||||
def_begin: ProgramPoint,
|
||||
|
||||
/// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
|
||||
///
|
||||
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
|
||||
/// uses.
|
||||
def_end: ProgramPoint,
|
||||
|
||||
/// Additional live-in intervals sorted in program order.
|
||||
///
|
||||
/// This map is empty for most values which are only used in one EBB.
|
||||
///
|
||||
/// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
|
||||
/// `inst` which may belong to a later EBB in the program order.
|
||||
///
|
||||
/// The entries are non-overlapping, and none of them overlap the EBB where the value is
|
||||
/// defined.
|
||||
liveins: bforest::Map<Ebb, Inst, PO>,
|
||||
}
|
||||
|
||||
/// Context information needed to query a `LiveRange`.
|
||||
pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
|
||||
/// Ordering of EBBs.
|
||||
pub order: &'a PO,
|
||||
/// Memory pool.
|
||||
pub forest: &'a bforest::MapForest<Ebb, Inst, PO>,
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
|
||||
/// Make a new context.
|
||||
pub fn new(
|
||||
order: &'a PO,
|
||||
forest: &'a bforest::MapForest<Ebb, Inst, PO>,
|
||||
) -> LiveRangeContext<'a, PO> {
|
||||
LiveRangeContext { order, forest }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
|
||||
fn clone(&self) -> Self {
|
||||
LiveRangeContext {
|
||||
order: self.order,
|
||||
forest: self.forest,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
|
||||
|
||||
/// Forest of B-trees used for storing live ranges.
|
||||
pub type LiveRangeForest = bforest::MapForest<Ebb, Inst, Layout>;
|
||||
|
||||
impl<PO: ProgramOrder> bforest::Comparator<Ebb> for PO {
|
||||
fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
|
||||
self.cmp(a, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl<PO: ProgramOrder> GenLiveRange<PO> {
|
||||
/// Create a new live range for `value` defined at `def`.
|
||||
///
|
||||
/// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
|
||||
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> GenLiveRange<PO> {
|
||||
GenLiveRange {
|
||||
value,
|
||||
affinity,
|
||||
def_begin: def,
|
||||
def_end: def,
|
||||
liveins: bforest::Map::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
|
||||
/// Create a live-in interval if necessary.
|
||||
///
|
||||
/// If the live range already has a local interval in `ebb`, extend its end point so it
|
||||
/// includes `to`, and return false.
|
||||
///
|
||||
/// If the live range did not previously have a local interval in `ebb`, add one so the value
|
||||
/// is live-in to `ebb`, extending to `to`. Return true.
|
||||
///
|
||||
/// The return value can be used to detect if we just learned that the value is live-in to
|
||||
/// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
|
||||
pub fn extend_in_ebb(
|
||||
&mut self,
|
||||
ebb: Ebb,
|
||||
to: Inst,
|
||||
order: &PO,
|
||||
forest: &mut bforest::MapForest<Ebb, Inst, PO>,
|
||||
) -> bool {
|
||||
// First check if we're extending the def interval.
|
||||
//
|
||||
// We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
|
||||
// check it without a method for getting `to`'s EBB.
|
||||
if order.cmp(ebb, self.def_end) != Ordering::Greater &&
|
||||
order.cmp(to, self.def_begin) != Ordering::Less
|
||||
{
|
||||
let to_pp = to.into();
|
||||
debug_assert_ne!(
|
||||
to_pp,
|
||||
self.def_begin,
|
||||
"Can't use value in the defining instruction."
|
||||
);
|
||||
if order.cmp(to, self.def_end) == Ordering::Greater {
|
||||
self.def_end = to_pp;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now check if we're extending any of the existing live-in intervals.
|
||||
let mut c = self.liveins.cursor(forest, order);
|
||||
let first_time_livein;
|
||||
|
||||
if let Some(end) = c.goto(ebb) {
|
||||
// There's an interval beginning at `ebb`. See if it extends.
|
||||
first_time_livein = false;
|
||||
if order.cmp(end, to) == Ordering::Less {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
return first_time_livein;
|
||||
}
|
||||
} else if let Some((_, end)) = c.prev() {
|
||||
// There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
|
||||
// a coalesced interval that begins before and ends after.
|
||||
if order.cmp(end, ebb) == Ordering::Greater {
|
||||
// Yep, the previous interval overlaps `ebb`.
|
||||
first_time_livein = false;
|
||||
if order.cmp(end, to) == Ordering::Less {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
return first_time_livein;
|
||||
}
|
||||
} else {
|
||||
first_time_livein = true;
|
||||
// The current interval does not overlap `ebb`, but it may still be possible to
|
||||
// coalesce with it.
|
||||
if order.is_ebb_gap(end, ebb) {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
c.insert(ebb, to);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// There is no existing interval before `ebb`.
|
||||
first_time_livein = true;
|
||||
c.insert(ebb, to);
|
||||
}
|
||||
|
||||
// Now `c` to left pointing at an interval that ends in `to`.
|
||||
debug_assert_eq!(c.value(), Some(to));
|
||||
|
||||
// See if it can be coalesced with the following interval.
|
||||
if let Some((next_ebb, next_end)) = c.next() {
|
||||
if order.is_ebb_gap(to, next_ebb) {
|
||||
// Remove this interval and extend the previous end point to `next_end`.
|
||||
c.remove();
|
||||
c.prev();
|
||||
*c.value_mut().unwrap() = next_end;
|
||||
}
|
||||
}
|
||||
|
||||
first_time_livein
|
||||
}
|
||||
|
||||
/// Is this the live range of a dead value?
|
||||
///
|
||||
/// A dead value has no uses, and its live range ends at the same program point where it is
|
||||
/// defined.
|
||||
pub fn is_dead(&self) -> bool {
|
||||
self.def_begin == self.def_end
|
||||
}
|
||||
|
||||
/// Is this a local live range?
|
||||
///
|
||||
/// A local live range is only used in the same EBB where it was defined. It is allowed to span
|
||||
/// multiple basic blocks within that EBB.
|
||||
pub fn is_local(&self) -> bool {
|
||||
self.liveins.is_empty()
|
||||
}
|
||||
|
||||
/// Get the program point where this live range is defined.
|
||||
///
|
||||
/// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
|
||||
/// instruction.
|
||||
pub fn def(&self) -> ProgramPoint {
|
||||
self.def_begin
|
||||
}
|
||||
|
||||
/// Move the definition of this value to a new program point.
|
||||
///
|
||||
/// It is only valid to move the definition within the same EBB, and it can't be moved beyond
|
||||
/// `def_local_end()`.
|
||||
pub fn move_def_locally(&mut self, def: ProgramPoint) {
|
||||
self.def_begin = def;
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in the EBB where it is defined.
|
||||
///
|
||||
/// This can be the EBB header itself in the case of a dead EBB argument.
|
||||
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
|
||||
pub fn def_local_end(&self) -> ProgramPoint {
|
||||
self.def_end
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in an EBB where it is live-in.
|
||||
///
|
||||
/// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
|
||||
/// of this live range's local interval in `ebb`.
|
||||
///
|
||||
/// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
|
||||
/// answer, but it is also possible that an even later program point is returned. So don't
|
||||
/// depend on the returned `Inst` to belong to `ebb`.
|
||||
pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
|
||||
self.liveins
|
||||
.get_or_less(ebb, ctx.forest, ctx.order)
|
||||
.and_then(|(_, inst)| {
|
||||
// We have an entry that ends at `inst`.
|
||||
if ctx.order.cmp(inst, ebb) == Ordering::Greater {
|
||||
Some(inst)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Is this value live-in to `ebb`?
|
||||
///
|
||||
/// An EBB argument is not considered to be live in.
|
||||
pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
self.livein_local_end(ebb, ctx).is_some()
|
||||
}
|
||||
|
||||
/// Get all the live-in intervals.
|
||||
///
|
||||
/// Note that the intervals are stored in a compressed form so each entry may span multiple
|
||||
/// EBBs where the value is live in.
|
||||
pub fn liveins<'a>(
|
||||
&'a self,
|
||||
ctx: LiveRangeContext<'a, PO>,
|
||||
) -> bforest::MapIter<'a, Ebb, Inst, PO> {
|
||||
self.liveins.iter(ctx.forest)
|
||||
}
|
||||
|
||||
/// Check if this live range overlaps a definition in `ebb`.
|
||||
pub fn overlaps_def(
|
||||
&self,
|
||||
def: ExpandedProgramPoint,
|
||||
ebb: Ebb,
|
||||
ctx: LiveRangeContext<PO>,
|
||||
) -> bool {
|
||||
// Two defs at the same program point always overlap, even if one is dead.
|
||||
if def == self.def_begin.into() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with the local range.
|
||||
if ctx.order.cmp(def, self.def_begin) != Ordering::Less &&
|
||||
ctx.order.cmp(def, self.def_end) == Ordering::Less
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(ebb, ctx) {
|
||||
Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range reaches a use at `user` in `ebb`.
|
||||
pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
// Check for an overlap with the local range.
|
||||
if ctx.order.cmp(user, self.def_begin) == Ordering::Greater &&
|
||||
ctx.order.cmp(user, self.def_end) != Ordering::Greater
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(ebb, ctx) {
|
||||
Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range is killed at `user` in `ebb`.
|
||||
pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
|
||||
impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
|
||||
fn key(&self) -> Value {
|
||||
self.value
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{GenLiveRange, LiveRangeContext};
|
||||
use bforest;
|
||||
use entity::EntityRef;
|
||||
use ir::{Ebb, Inst, Value};
|
||||
use ir::{ExpandedProgramPoint, ProgramOrder};
|
||||
use std::cmp::Ordering;
|
||||
use std::vec::Vec;
|
||||
|
||||
// Dummy program order which simply compares indexes.
|
||||
// It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
|
||||
// in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
|
||||
// ebb * 10 + 1. This is used in the coalesce test.
|
||||
struct ProgOrder {}
|
||||
|
||||
impl ProgramOrder for ProgOrder {
|
||||
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
fn idx(pp: ExpandedProgramPoint) -> usize {
|
||||
match pp {
|
||||
ExpandedProgramPoint::Inst(i) => i.index(),
|
||||
ExpandedProgramPoint::Ebb(e) => e.index(),
|
||||
}
|
||||
}
|
||||
|
||||
let ia = idx(a.into());
|
||||
let ib = idx(b.into());
|
||||
ia.cmp(&ib)
|
||||
}
|
||||
|
||||
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
|
||||
inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgOrder {
|
||||
// Get the EBB corresponding to `inst`.
|
||||
fn inst_ebb(&self, inst: Inst) -> Ebb {
|
||||
let i = inst.index();
|
||||
Ebb::new(i - i % 10)
|
||||
}
|
||||
|
||||
// Get the EBB of a program point.
|
||||
fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
|
||||
match pp.into() {
|
||||
ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
|
||||
ExpandedProgramPoint::Ebb(e) => e,
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the live range invariants.
|
||||
fn validate(
|
||||
&self,
|
||||
lr: &GenLiveRange<ProgOrder>,
|
||||
forest: &bforest::MapForest<Ebb, Inst, ProgOrder>,
|
||||
) {
|
||||
// The def interval must cover a single EBB.
|
||||
let def_ebb = self.pp_ebb(lr.def_begin);
|
||||
assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
|
||||
|
||||
// Check that the def interval isn't backwards.
|
||||
match self.cmp(lr.def_begin, lr.def_end) {
|
||||
Ordering::Equal => assert!(lr.liveins.is_empty()),
|
||||
Ordering::Greater => {
|
||||
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
|
||||
}
|
||||
Ordering::Less => {}
|
||||
}
|
||||
|
||||
// Check the live-in intervals.
|
||||
let mut prev_end = None;
|
||||
for (begin, end) in lr.liveins.iter(forest) {
|
||||
assert_eq!(self.cmp(begin, end), Ordering::Less);
|
||||
if let Some(e) = prev_end {
|
||||
assert_eq!(self.cmp(e, begin), Ordering::Less);
|
||||
}
|
||||
|
||||
assert!(
|
||||
self.cmp(lr.def_end, begin) == Ordering::Less ||
|
||||
self.cmp(lr.def_begin, end) == Ordering::Greater,
|
||||
"Interval can't overlap the def EBB"
|
||||
);
|
||||
|
||||
// Save for next round.
|
||||
prev_end = Some(end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton `ProgramOrder` for tests below.
|
||||
const PO: &'static ProgOrder = &ProgOrder {};
|
||||
|
||||
#[test]
|
||||
fn dead_def_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e0 = Ebb::new(0);
|
||||
let i1 = Inst::new(1);
|
||||
let i2 = Inst::new(2);
|
||||
let e2 = Ebb::new(2);
|
||||
let lr = GenLiveRange::new(v0, i1.into(), Default::default());
|
||||
let forest = &bforest::MapForest::new();
|
||||
let ctx = LiveRangeContext::new(PO, forest);
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i1.into());
|
||||
assert_eq!(lr.def_local_end(), i1.into());
|
||||
assert_eq!(lr.livein_local_end(e2, ctx), None);
|
||||
PO.validate(&lr, ctx.forest);
|
||||
|
||||
// A dead live range overlaps its own def program point.
|
||||
assert!(lr.overlaps_def(i1.into(), e0, ctx));
|
||||
assert!(!lr.overlaps_def(i2.into(), e0, ctx));
|
||||
assert!(!lr.overlaps_def(e0.into(), e0, ctx));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dead_arg_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e2 = Ebb::new(2);
|
||||
let lr = GenLiveRange::new(v0, e2.into(), Default::default());
|
||||
let forest = &bforest::MapForest::new();
|
||||
let ctx = LiveRangeContext::new(PO, forest);
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e2.into());
|
||||
assert_eq!(lr.def_local_end(), e2.into());
|
||||
// The def interval of an EBB argument does not count as live-in.
|
||||
assert_eq!(lr.livein_local_end(e2, ctx), None);
|
||||
PO.validate(&lr, ctx.forest);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_arg() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
// Extending a dead EBB argument in its own block should not indicate that a live-in
|
||||
// interval was created.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending further.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn global_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let e20 = Ebb::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let i22 = Inst::new(22);
|
||||
let i23 = Inst::new(23);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
|
||||
// Adding a live-in interval.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i22)
|
||||
);
|
||||
|
||||
// Non-extending the live-in.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i22)
|
||||
);
|
||||
|
||||
// Extending the existing live-in.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i23)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coalesce() {
|
||||
let v0 = Value::new(0);
|
||||
let i11 = Inst::new(11);
|
||||
let e20 = Ebb::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let e30 = Ebb::new(30);
|
||||
let i31 = Inst::new(31);
|
||||
let e40 = Ebb::new(40);
|
||||
let i41 = Inst::new(41);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e30, i31)]
|
||||
);
|
||||
|
||||
// Coalesce to previous
|
||||
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e30, i41)]
|
||||
);
|
||||
|
||||
// Coalesce to next
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i41)]
|
||||
);
|
||||
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e40, i41)]
|
||||
);
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i21), (e40, i41)]
|
||||
);
|
||||
|
||||
// Coalesce to previous and next
|
||||
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i41)]
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Add more tests that exercise the binary search algorithm.
|
||||
}
|
||||
23
lib/codegen/src/regalloc/mod.rs
Normal file
23
lib/codegen/src/regalloc/mod.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
//! Register allocation.
|
||||
//!
|
||||
//! This module contains data structures and algorithms used for register allocation.
|
||||
|
||||
pub mod register_set;
|
||||
pub mod coloring;
|
||||
pub mod live_value_tracker;
|
||||
pub mod liveness;
|
||||
pub mod liverange;
|
||||
pub mod virtregs;
|
||||
|
||||
mod affinity;
|
||||
mod coalescing;
|
||||
mod context;
|
||||
mod diversion;
|
||||
mod pressure;
|
||||
mod reload;
|
||||
mod solver;
|
||||
mod spilling;
|
||||
|
||||
pub use self::register_set::RegisterSet;
|
||||
pub use self::context::Context;
|
||||
pub use self::diversion::RegDiversions;
|
||||
377
lib/codegen/src/regalloc/pressure.rs
Normal file
377
lib/codegen/src/regalloc/pressure.rs
Normal file
@@ -0,0 +1,377 @@
|
||||
//! Register pressure tracking.
|
||||
//!
|
||||
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
|
||||
//! sufficiently". This module defines the data structures needed to measure register pressure
|
||||
//! accurately enough to guarantee that the coloring phase will not run out of registers.
|
||||
//!
|
||||
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
|
||||
//! any given program point. This simplistic method has two problems:
|
||||
//!
|
||||
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
|
||||
//! register banks, so we need to at least count the number of live registers in each register
|
||||
//! bank separately.
|
||||
//!
|
||||
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
|
||||
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
|
||||
//! This makes it difficult to accurately measure register pressure.
|
||||
//!
|
||||
//! This module deals with the problems via *register banks* and *top-level register classes*.
|
||||
//! Register classes in different register banks are completely independent, so we can count
|
||||
//! registers in one bank without worrying about the other bank at all.
|
||||
//!
|
||||
//! All register classes have a unique top-level register class, and we will count registers for
|
||||
//! each top-level register class individually. However, a register bank can have multiple
|
||||
//! top-level register classes that interfere with each other, so all top-level counts need to
|
||||
//! be considered when determining how many more registers can be allocated.
|
||||
//!
|
||||
//! Currently, the only register bank with multiple top-level registers is the `arm32`
|
||||
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
|
||||
//!
|
||||
//! # Base and transient counts
|
||||
//!
|
||||
//! We maintain two separate register counts per top-level register class: base counts and
|
||||
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
|
||||
//! transient counts are adjusted with `take_transient` and `free_transient`.
|
||||
|
||||
// Remove once we're using the pressure tracker.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
|
||||
use regalloc::RegisterSet;
|
||||
use std::cmp::min;
|
||||
use std::fmt;
|
||||
use std::iter::ExactSizeIterator;
|
||||
|
||||
/// Information per top-level register class.
|
||||
///
|
||||
/// Everything but the counts is static information computed from the constructor arguments.
|
||||
#[derive(Default)]
|
||||
struct TopRC {
|
||||
// Number of registers currently used from this register class.
|
||||
base_count: u32,
|
||||
transient_count: u32,
|
||||
|
||||
// Max number of registers that can be allocated.
|
||||
limit: u32,
|
||||
|
||||
// Register units per register.
|
||||
width: u8,
|
||||
|
||||
// The first aliasing top-level RC.
|
||||
first_toprc: u8,
|
||||
|
||||
// The number of aliasing top-level RCs.
|
||||
num_toprcs: u8,
|
||||
}
|
||||
|
||||
impl TopRC {
|
||||
fn total_count(&self) -> u32 {
|
||||
self.base_count + self.transient_count
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Pressure {
|
||||
// Bit mask of top-level register classes that are aliased by other top-level register classes.
|
||||
// Unaliased register classes can use a simpler interference algorithm.
|
||||
aliased: RegClassMask,
|
||||
|
||||
// Current register counts per top-level register class.
|
||||
toprc: [TopRC; MAX_TRACKED_TOPRCS],
|
||||
}
|
||||
|
||||
impl Pressure {
|
||||
/// Create a new register pressure tracker.
|
||||
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
|
||||
let mut p = Pressure {
|
||||
aliased: 0,
|
||||
toprc: Default::default(),
|
||||
};
|
||||
|
||||
// Get the layout of aliasing top-level register classes from the register banks.
|
||||
for bank in reginfo.banks.iter() {
|
||||
let first = bank.first_toprc;
|
||||
let num = bank.num_toprcs;
|
||||
|
||||
if bank.pressure_tracking {
|
||||
for rc in &mut p.toprc[first..first + num] {
|
||||
rc.first_toprc = first as u8;
|
||||
rc.num_toprcs = num as u8;
|
||||
}
|
||||
|
||||
// Flag the top-level register classes with aliases.
|
||||
if num > 1 {
|
||||
p.aliased |= ((1 << num) - 1) << first;
|
||||
}
|
||||
} else {
|
||||
// This bank has no pressure tracking, so its top-level register classes may exceed
|
||||
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
|
||||
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
|
||||
// These aren't used if we don't set the `aliased` bit.
|
||||
rc.first_toprc = !0;
|
||||
rc.limit = !0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute per-class limits from `usable`.
|
||||
for (toprc, rc) in p.toprc.iter_mut().take_while(|t| t.num_toprcs > 0).zip(
|
||||
reginfo.classes,
|
||||
)
|
||||
{
|
||||
toprc.limit = usable.iter(rc).len() as u32;
|
||||
toprc.width = rc.width;
|
||||
}
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
/// Check for an available register in the register class `rc`.
|
||||
///
|
||||
/// If it is possible to allocate one more register from `rc`'s top-level register class,
|
||||
/// returns 0.
|
||||
///
|
||||
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
|
||||
/// pressure should be eased in one of the returned top-level register classes before calling
|
||||
/// `can_take()` to check again.
|
||||
fn check_avail(&self, rc: RegClass) -> RegClassMask {
|
||||
let entry = match self.toprc.get(rc.toprc as usize) {
|
||||
None => return 0, // Not a pressure tracked bank.
|
||||
Some(e) => e,
|
||||
};
|
||||
let mask = 1 << rc.toprc;
|
||||
if (self.aliased & mask) == 0 {
|
||||
// This is a simple unaliased top-level register class.
|
||||
if entry.total_count() < entry.limit {
|
||||
0
|
||||
} else {
|
||||
mask
|
||||
}
|
||||
} else {
|
||||
// This is the more complicated case. The top-level register class has aliases.
|
||||
self.check_avail_aliased(entry)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for an available register in a top-level register class that may have aliases.
|
||||
///
|
||||
/// This is the out-of-line slow path for `check_avail()`.
|
||||
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
|
||||
let first = usize::from(entry.first_toprc);
|
||||
let num = usize::from(entry.num_toprcs);
|
||||
let width = u32::from(entry.width);
|
||||
let ulimit = entry.limit * width;
|
||||
|
||||
// Count up the number of available register units.
|
||||
let mut units = 0;
|
||||
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
|
||||
let rcw = u32::from(rc.width);
|
||||
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
|
||||
// one of ours. This is assuming that none of the smaller registers are straddling the
|
||||
// bigger ones.
|
||||
//
|
||||
// If `rc.width` is larger than `width`, we are also assuming that the registers are
|
||||
// aligned and `rc.width` is a multiple of `width`.
|
||||
let u = if rcw < width {
|
||||
// We can't take more than the total number of register units in the class.
|
||||
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
|
||||
min(rc.total_count() * width, rc.limit * rcw)
|
||||
} else {
|
||||
rc.total_count() * rcw
|
||||
};
|
||||
|
||||
// If this top-level RC on its own is responsible for exceeding our limit, return it
|
||||
// early to guarantee that registers here are spilled before spilling other registers
|
||||
// unnecessarily.
|
||||
if u >= ulimit {
|
||||
return 1 << rci;
|
||||
}
|
||||
|
||||
units += u;
|
||||
}
|
||||
|
||||
// We've counted up the worst-case number of register units claimed by all aliasing
|
||||
// classes. Compare to the unit limit in this class.
|
||||
if units < ulimit {
|
||||
0
|
||||
} else {
|
||||
// Registers need to be spilled from any one of the aliasing classes.
|
||||
((1 << num) - 1) << first
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a register from `rc`.
|
||||
///
|
||||
/// This does not check if there are enough registers available.
|
||||
pub fn take(&mut self, rc: RegClass) {
|
||||
self.toprc.get_mut(rc.toprc as usize).map(
|
||||
|t| t.base_count += 1,
|
||||
);
|
||||
}
|
||||
|
||||
/// Free a register in `rc`.
|
||||
pub fn free(&mut self, rc: RegClass) {
|
||||
self.toprc.get_mut(rc.toprc as usize).map(
|
||||
|t| t.base_count -= 1,
|
||||
);
|
||||
}
|
||||
|
||||
/// Reset all counts to 0, both base and transient.
|
||||
pub fn reset(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count = 0;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to increment a transient counter.
|
||||
///
|
||||
/// This will fail if there are not enough registers available.
|
||||
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
|
||||
let mask = self.check_avail(rc);
|
||||
if mask == 0 {
|
||||
self.toprc.get_mut(rc.toprc as usize).map(|t| {
|
||||
t.transient_count += 1
|
||||
});
|
||||
Ok(())
|
||||
} else {
|
||||
Err(mask)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset all transient counts to 0.
|
||||
pub fn reset_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Preserve the transient counts by transferring them to the base counts.
|
||||
pub fn preserve_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count += e.transient_count;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Pressure {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Pressure[")?;
|
||||
for rc in &self.toprc {
|
||||
if rc.limit > 0 && rc.limit < !0 {
|
||||
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(build_arm32)]
|
||||
mod tests {
|
||||
use super::Pressure;
|
||||
use isa::{RegClass, TargetIsa};
|
||||
use regalloc::RegisterSet;
|
||||
use std::borrow::Borrow;
|
||||
use std::boxed::Box;
|
||||
|
||||
// Make an arm32 `TargetIsa`, if possible.
|
||||
fn arm32() -> Option<Box<TargetIsa>> {
|
||||
use isa;
|
||||
use settings;
|
||||
|
||||
let shared_builder = settings::builder();
|
||||
let shared_flags = settings::Flags::new(&shared_builder);
|
||||
|
||||
isa::lookup("arm32").ok().map(|b| b.finish(shared_flags))
|
||||
}
|
||||
|
||||
// Get a register class by name.
|
||||
fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
|
||||
isa.register_info()
|
||||
.classes
|
||||
.iter()
|
||||
.find(|rc| rc.name == name)
|
||||
.expect("Can't find named register class.")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_counting() {
|
||||
let isa = arm32().expect("This test requires arm32 support");
|
||||
let isa = isa.borrow();
|
||||
let gpr = rc_by_name(isa, "GPR");
|
||||
let s = rc_by_name(isa, "S");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
let mut count = 0;
|
||||
while pressure.check_avail(gpr) == 0 {
|
||||
pressure.take(gpr);
|
||||
count += 1;
|
||||
}
|
||||
assert_eq!(count, 16);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.free(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
pressure.take(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.reset();
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arm_float_bank() {
|
||||
let isa = arm32().expect("This test requires arm32 support");
|
||||
let isa = isa.borrow();
|
||||
let s = rc_by_name(isa, "S");
|
||||
let d = rc_by_name(isa, "D");
|
||||
let q = rc_by_name(isa, "Q");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Allocating a single S-register should not affect availability.
|
||||
pressure.take(s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(d);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(q);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Take a total of 16 S-regs.
|
||||
for _ in 1..16 {
|
||||
pressure.take(s);
|
||||
}
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
|
||||
for _ in 0..6 {
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
pressure.take(q);
|
||||
}
|
||||
|
||||
// We've taken 16 S, 1 D, and 7 Qs.
|
||||
assert!(pressure.check_avail(s) != 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert!(pressure.check_avail(q) != 0);
|
||||
}
|
||||
}
|
||||
321
lib/codegen/src/regalloc/register_set.rs
Normal file
321
lib/codegen/src/regalloc/register_set.rs
Normal file
@@ -0,0 +1,321 @@
|
||||
//! Set of allocatable registers as a bit vector of register units.
|
||||
//!
|
||||
//! While allocating registers, we need to keep track of which registers are available and which
|
||||
//! registers are in use. Since registers can alias in different ways, we track this via the
|
||||
//! "register unit" abstraction. Every register contains one or more register units. Registers that
|
||||
//! share a register unit can't be in use at the same time.
|
||||
|
||||
use isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
|
||||
use std::char;
|
||||
use std::fmt;
|
||||
use std::iter::ExactSizeIterator;
|
||||
use std::mem::size_of_val;
|
||||
|
||||
/// Set of registers available for allocation.
|
||||
#[derive(Clone)]
|
||||
pub struct RegisterSet {
|
||||
avail: RegUnitMask,
|
||||
}
|
||||
|
||||
// Given a register class and a register unit in the class, compute a word index and a bit mask of
|
||||
// register units representing that register.
|
||||
//
|
||||
// Note that a register is not allowed to straddle words.
|
||||
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
|
||||
// Bit mask representing the register. It is `rc.width` consecutive units.
|
||||
let width_bits = (1 << rc.width) - 1;
|
||||
// Index into avail[] of the word containing `reg`.
|
||||
let word_index = (reg / 32) as usize;
|
||||
// The actual bits in the word that cover `reg`.
|
||||
let reg_bits = width_bits << (reg % 32);
|
||||
|
||||
(word_index, reg_bits)
|
||||
}
|
||||
|
||||
impl RegisterSet {
|
||||
/// Create a new register set with all registers available.
|
||||
///
|
||||
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
|
||||
/// allocatable registers where reserved registers have been filtered out.
|
||||
pub fn new() -> Self {
|
||||
Self { avail: [!0; 3] }
|
||||
}
|
||||
|
||||
/// Create a new register set with no registers available.
|
||||
pub fn empty() -> Self {
|
||||
Self { avail: [0; 3] }
|
||||
}
|
||||
|
||||
/// Returns `true` if the specified register is available.
|
||||
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
(self.avail[idx] & bits) == bits
|
||||
}
|
||||
|
||||
/// Allocate `reg` from `rc` so it is no longer available.
|
||||
///
|
||||
/// It is an error to take a register that doesn't have all of its register units available.
|
||||
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == bits,
|
||||
"{}:{} not available in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] &= !bits;
|
||||
}
|
||||
|
||||
/// Return `reg` and all of its register units to the set of available registers.
|
||||
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == 0,
|
||||
"{}:{} not allocated in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] |= bits;
|
||||
}
|
||||
|
||||
/// Return an iterator over all available registers belonging to the register class `rc`.
|
||||
///
|
||||
/// This doesn't allocate anything from the set; use `take()` for that.
|
||||
pub fn iter(&self, rc: RegClass) -> RegSetIter {
|
||||
// Start by copying the RC mask. It is a single set bit for each register in the class.
|
||||
let mut rsi = RegSetIter { regs: rc.mask };
|
||||
|
||||
// Mask out the unavailable units.
|
||||
for idx in 0..self.avail.len() {
|
||||
// If a single unit in a register is unavailable, the whole register can't be used.
|
||||
// If a register straddles a word boundary, it will be marked as unavailable.
|
||||
// There's an assertion in `cdsl/registers.py` to check for that.
|
||||
for i in 0..rc.width {
|
||||
rsi.regs[idx] &= self.avail[idx] >> i;
|
||||
}
|
||||
}
|
||||
rsi
|
||||
}
|
||||
|
||||
/// Check if any register units allocated out of this set interferes with units allocated out
|
||||
/// of `other`.
|
||||
///
|
||||
/// This assumes that unused bits are 1.
|
||||
pub fn interferes_with(&self, other: &RegisterSet) -> bool {
|
||||
self.avail.iter().zip(&other.avail).any(
|
||||
|(&x, &y)| (x | y) != !0,
|
||||
)
|
||||
}
|
||||
|
||||
/// Intersect this set of registers with `other`. This has the effect of removing any register
|
||||
/// units from this set that are not in `other`.
|
||||
pub fn intersect(&mut self, other: &RegisterSet) {
|
||||
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
|
||||
*x &= y;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this register set, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
|
||||
DisplayRegisterSet(self.clone(), regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over available registers in a register class.
|
||||
pub struct RegSetIter {
|
||||
regs: RegUnitMask,
|
||||
}
|
||||
|
||||
impl Iterator for RegSetIter {
|
||||
type Item = RegUnit;
|
||||
|
||||
fn next(&mut self) -> Option<RegUnit> {
|
||||
let mut unit_offset = 0;
|
||||
|
||||
// Find the first set bit in `self.regs`.
|
||||
for word in &mut self.regs {
|
||||
if *word != 0 {
|
||||
// Compute the register unit number from the lowest set bit in the word.
|
||||
let unit = unit_offset + word.trailing_zeros() as RegUnit;
|
||||
|
||||
// Clear that lowest bit so we won't find it again.
|
||||
*word &= *word - 1;
|
||||
|
||||
return Some(unit);
|
||||
}
|
||||
// How many register units was there in the word? This is a constant 32 for `u32` etc.
|
||||
unit_offset += 8 * size_of_val(word) as RegUnit;
|
||||
}
|
||||
|
||||
// All of `self.regs` is 0.
|
||||
None
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
|
||||
(bits, Some(bits))
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for RegSetIter {}
|
||||
|
||||
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[")?;
|
||||
match self.1 {
|
||||
None => {
|
||||
for w in &self.0.avail {
|
||||
write!(f, " #{:08x}", w)?;
|
||||
}
|
||||
}
|
||||
Some(reginfo) => {
|
||||
let toprcs = reginfo
|
||||
.banks
|
||||
.iter()
|
||||
.map(|b| b.first_toprc + b.num_toprcs)
|
||||
.max()
|
||||
.expect("No register banks");
|
||||
for rc in ®info.classes[0..toprcs] {
|
||||
if rc.width == 1 {
|
||||
let bank = ®info.banks[rc.bank as usize];
|
||||
write!(f, " {}: ", rc)?;
|
||||
for offset in 0..bank.units {
|
||||
let reg = bank.first_unit + offset;
|
||||
if !rc.contains(reg) {
|
||||
continue;
|
||||
}
|
||||
if !self.0.is_avail(rc, reg) {
|
||||
write!(f, "-")?;
|
||||
continue;
|
||||
}
|
||||
// Display individual registers as either the second letter of their
|
||||
// name or the last digit of their number.
|
||||
// This works for x86 (rax, rbx, ...) and for numbered regs.
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
bank.names
|
||||
.get(offset as usize)
|
||||
.and_then(|name| name.chars().nth(1))
|
||||
.unwrap_or_else(
|
||||
|| char::from_digit(u32::from(offset % 10), 10).unwrap(),
|
||||
)
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegisterSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use isa::registers::{RegClass, RegClassData};
|
||||
use std::vec::Vec;
|
||||
|
||||
// Register classes for testing.
|
||||
const GPR: RegClass = &RegClassData {
|
||||
name: "GPR",
|
||||
index: 0,
|
||||
width: 1,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0xf0000000, 0x0000000f, 0],
|
||||
info: &INFO,
|
||||
};
|
||||
|
||||
const DPR: RegClass = &RegClassData {
|
||||
name: "DPR",
|
||||
index: 0,
|
||||
width: 2,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0x50000000, 0x0000000a, 0],
|
||||
info: &INFO,
|
||||
};
|
||||
|
||||
const INFO: RegInfo = RegInfo {
|
||||
banks: &[],
|
||||
classes: &[],
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn put_and_take() {
|
||||
let mut regs = RegisterSet::new();
|
||||
|
||||
// `GPR` has units 28-36.
|
||||
assert_eq!(regs.iter(GPR).len(), 8);
|
||||
assert_eq!(regs.iter(GPR).count(), 8);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 29));
|
||||
regs.take(&GPR, 29);
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
regs.take(&GPR, 30);
|
||||
assert!(!regs.is_avail(GPR, 30));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
regs.take(&GPR, 32);
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 5);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
regs.free(&GPR, 30);
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
regs.free(&GPR, 32);
|
||||
assert!(regs.is_avail(GPR, 31));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interference() {
|
||||
let mut regs1 = RegisterSet::new();
|
||||
let mut regs2 = RegisterSet::new();
|
||||
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.take(&GPR, 32);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs2.take(&GPR, 31);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.intersect(®s2);
|
||||
assert!(regs1.interferes_with(®s2));
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user