Merge pull request #2257 from fitzgen/peepmatic-no-paths-in-linear-ir

Peepmatic: Do not use paths in linear IR
This commit is contained in:
Nick Fitzgerald
2020-10-13 12:18:26 -07:00
committed by GitHub
17 changed files with 846 additions and 1064 deletions

View File

@@ -1,7 +1,6 @@
//! Interfacing with actual instructions.
use crate::part::{Constant, Part};
use crate::paths::Path;
use crate::r#type::Type;
use std::fmt::Debug;
use std::hash::Hash;
@@ -54,26 +53,22 @@ pub unsafe trait InstructionSet<'a> {
new: Part<Self::Instruction>,
) -> Self::Instruction;
/// Get the instruction, constant, or condition code at the given path.
///
/// If there is no such entity at the given path (e.g. we run into a
/// function parameter and can't traverse the path any further) then `None`
/// should be returned.
fn get_part_at_path(
&self,
context: &mut Self::Context,
root: Self::Instruction,
path: Path,
) -> Option<Part<Self::Instruction>>;
/// Get the given instruction's operator.
///
/// If the instruction isn't supported, then `None` should be returned.
fn operator(
///
/// Additionally, if `Some` is returned, then the instruction's operands
/// must be pushed in order into `operands`. E.g. calling this method on
/// `(iadd $x $y)` would return `Some(iadd)` and extend `operands` with
/// `[$x, $y]`.
fn operator<E>(
&self,
context: &mut Self::Context,
instr: Self::Instruction,
) -> Option<Self::Operator>;
operands: &mut E,
) -> Option<Self::Operator>
where
E: Extend<Part<Self::Instruction>>;
/// Make a unary instruction.
///

View File

@@ -25,7 +25,6 @@ pub mod linear;
pub mod optimizations;
pub mod optimizer;
pub mod part;
pub mod paths;
pub mod r#type;
pub mod unquote;

View File

@@ -7,7 +7,6 @@
use crate::cc::ConditionCode;
use crate::integer_interner::{IntegerId, IntegerInterner};
use crate::paths::{PathId, PathInterner};
use crate::r#type::{BitWidth, Type};
use crate::unquote::UnquoteOperator;
use serde::{Deserialize, Serialize};
@@ -24,9 +23,6 @@ where
/// The linear optimizations.
pub optimizations: Vec<Optimization<TOperator>>,
/// The de-duplicated paths referenced by these optimizations.
pub paths: PathInterner,
/// The integer literals referenced by these optimizations.
pub integers: IntegerInterner,
}
@@ -37,8 +33,13 @@ pub struct Optimization<TOperator>
where
TOperator: 'static + Copy + Debug + Eq + Hash,
{
/// The chain of increments for this optimization.
pub increments: Vec<Increment<TOperator>>,
/// The chain of match operations and expected results for this
/// optimization.
pub matches: Vec<Match>,
/// Actions to perform, given that the operation resulted in the expected
/// value.
pub actions: Vec<Action<TOperator>>,
}
/// Match any value.
@@ -61,31 +62,20 @@ pub fn bool_to_match_result(b: bool) -> MatchResult {
unsafe { Ok(NonZeroU32::new_unchecked(b + 1)) }
}
/// A partial match of an optimization's LHS and partial construction of its
/// RHS.
/// A partial match of an optimization's LHS.
///
/// An increment is a matching operation, the expected result from that
/// operation to continue to the next increment, and the actions to take to
/// build up the LHS scope and RHS instructions given that we got the expected
/// result from this increment's matching operation. Each increment will
/// basically become a state and a transition edge out of that state in the
/// final automata.
/// An match is composed of a matching operation and the expected result of that
/// operation. Each match will basically become a state and a transition edge
/// out of that state in the final automata.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Increment<TOperator>
where
TOperator: 'static + Copy + Debug + Eq + Hash,
{
pub struct Match {
/// The matching operation to perform.
pub operation: MatchOp,
/// The expected result of our matching operation, that enables us to
/// continue to the next increment, or `Else` for "don't care"
/// wildcard-style matching.
/// continue to the next match, or `Else` for "don't care" wildcard-style
/// matching.
pub expected: MatchResult,
/// Actions to perform, given that the operation resulted in the expected
/// value.
pub actions: Vec<Action<TOperator>>,
}
/// A matching operation to be performed on some Cranelift instruction as part
@@ -93,79 +83,54 @@ where
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
pub enum MatchOp {
/// Switch on the opcode of an instruction.
Opcode {
/// The path to the instruction whose opcode we're switching on.
path: PathId,
},
///
/// Upon successfully matching an instruction's opcode, bind each of its
/// operands to a LHS temporary.
Opcode(LhsId),
/// Does an instruction have a constant value?
IsConst {
/// The path to the instruction (or immediate) that we're checking
/// whether it is constant or not.
path: PathId,
},
IsConst(LhsId),
/// Is the constant value a power of two?
IsPowerOfTwo {
/// The path to the instruction (or immediate) that we are checking
/// whether it is a constant power of two or not.
path: PathId,
},
IsPowerOfTwo(LhsId),
/// Switch on the bit width of a value.
BitWidth {
/// The path to the instruction (or immediate) whose result's bit width
/// we are checking.
path: PathId,
},
BitWidth(LhsId),
/// Does the value fit in our target architecture's native word size?
FitsInNativeWord {
/// The path to the instruction (or immediate) whose result we are
/// checking whether it fits in a native word or not.
path: PathId,
},
FitsInNativeWord(LhsId),
/// Are the instructions (or immediates) at the given paths the same?
Eq {
/// The path to the first instruction (or immediate).
path_a: PathId,
/// The path to the second instruction (or immediate).
path_b: PathId,
},
/// Are the instructions (or immediates) the same?
Eq(LhsId, LhsId),
/// Switch on the constant integer value of an instruction.
IntegerValue {
/// The path to the instruction.
path: PathId,
},
IntegerValue(LhsId),
/// Switch on the constant boolean value of an instruction.
BooleanValue {
/// The path to the instruction.
path: PathId,
},
BooleanValue(LhsId),
/// Switch on a condition code.
ConditionCode {
/// The path to the condition code.
path: PathId,
},
ConditionCode(LhsId),
/// No operation. Always evaluates to `None`.
/// No operation. Always evaluates to `Else`.
///
/// Exceedingly rare in real optimizations; nonetheless required to support
/// Never appears in real optimizations; nonetheless required to support
/// corner cases of the DSL, such as a LHS pattern that is nothing but a
/// variable pattern.
/// variable.
Nop,
}
/// A canonicalized identifier for a left-hand side value that was bound in a
/// pattern.
///
/// These are defined in a pre-order traversal of the LHS pattern by successful
/// `MatchOp::Opcode` matches.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct LhsId(pub u16);
/// A canonicalized identifier for a right-hand side value.
///
/// These are defined by RHS actions.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct RhsId(pub u16);
@@ -177,8 +142,8 @@ pub struct RhsId(pub u16);
pub enum Action<TOperator> {
/// Reuse something from the left-hand side.
GetLhs {
/// The path to the instruction or value.
path: PathId,
/// The left-hand side instruction or value.
lhs: LhsId,
},
/// Perform compile-time evaluation.

View File

@@ -5,7 +5,6 @@ use crate::instruction_set::InstructionSet;
use crate::integer_interner::IntegerInterner;
use crate::linear::{Action, MatchOp, MatchResult};
use crate::optimizer::PeepholeOptimizer;
use crate::paths::PathInterner;
use peepmatic_automata::Automaton;
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
@@ -25,9 +24,6 @@ pub struct PeepholeOptimizations<TOperator>
where
TOperator: 'static + Copy + Debug + Eq + Hash,
{
/// The instruction paths referenced by the peephole optimizations.
pub paths: PathInterner,
/// Not all integers we're matching on fit in the `u32` that we use as the
/// result of match operations. So we intern them and refer to them by id.
pub integers: IntegerInterner,
@@ -88,6 +84,7 @@ where
PeepholeOptimizer {
peep_opt: self,
instr_set,
left_hand_sides: vec![],
right_hand_sides: vec![],
actions: vec![],
backtracking_states: vec![],

View File

@@ -27,9 +27,10 @@ where
{
pub(crate) peep_opt: &'peep PeepholeOptimizations<TInstructionSet::Operator>,
pub(crate) instr_set: TInstructionSet,
pub(crate) left_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
pub(crate) right_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
pub(crate) actions: Vec<Action<TInstructionSet::Operator>>,
pub(crate) backtracking_states: Vec<(State, usize)>,
pub(crate) backtracking_states: Vec<(State, usize, usize)>,
}
impl<'peep, 'ctx, TInstructionSet> Debug for PeepholeOptimizer<'peep, 'ctx, TInstructionSet>
@@ -40,6 +41,7 @@ where
let PeepholeOptimizer {
peep_opt,
instr_set: _,
left_hand_sides,
right_hand_sides,
actions,
backtracking_states,
@@ -47,6 +49,7 @@ where
f.debug_struct("PeepholeOptimizer")
.field("peep_opt", peep_opt)
.field("instr_set", &"_")
.field("left_hand_sides", left_hand_sides)
.field("right_hand_sides", right_hand_sides)
.field("actions", actions)
.field("backtracking_states", backtracking_states)
@@ -117,12 +120,8 @@ where
for action in actions.drain(..) {
log::trace!("Evaluating action: {:?}", action);
match action {
Action::GetLhs { path } => {
let path = self.peep_opt.paths.lookup(path);
let lhs = self
.instr_set
.get_part_at_path(context, root, path)
.expect("should always get part at path OK by the time it is bound");
Action::GetLhs { lhs } => {
let lhs = self.left_hand_sides[lhs.0 as usize];
self.right_hand_sides.push(lhs);
}
Action::UnaryUnquote { operator, operand } => {
@@ -284,22 +283,17 @@ where
log::trace!("Evaluating match operation: {:?}", match_op);
let result: MatchResult = (|| match match_op {
Opcode { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
Opcode(id) => {
let part = self.left_hand_sides[id.0 as usize];
let inst = part.as_instruction().ok_or(Else)?;
let op = self.instr_set.operator(context, inst).ok_or(Else)?;
let op = self
.instr_set
.operator(context, inst, &mut self.left_hand_sides)
.ok_or(Else)?;
Ok(op.into())
}
IsConst { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
IsConst(id) => {
let part = self.left_hand_sides[id.0 as usize];
let is_const = match part {
Part::Instruction(i) => {
self.instr_set.instruction_to_constant(context, i).is_some()
@@ -308,12 +302,8 @@ where
};
bool_to_match_result(is_const)
}
IsPowerOfTwo { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
IsPowerOfTwo(id) => {
let part = self.left_hand_sides[id.0 as usize];
match part {
Part::Constant(c) => {
let is_pow2 = c.as_int().unwrap().is_power_of_two();
@@ -327,18 +317,11 @@ where
let is_pow2 = c.as_int().unwrap().is_power_of_two();
bool_to_match_result(is_pow2)
}
Part::ConditionCode(_) => unreachable!(
"IsPowerOfTwo on a condition
code"
),
Part::ConditionCode(_) => unreachable!("IsPowerOfTwo on a condition code"),
}
}
BitWidth { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
BitWidth(id) => {
let part = self.left_hand_sides[id.0 as usize];
let bit_width = match part {
Part::Instruction(i) => self.instr_set.instruction_result_bit_width(context, i),
Part::Constant(Constant::Int(_, w)) | Part::Constant(Constant::Bool(_, w)) => {
@@ -355,15 +338,11 @@ where
);
Ok(unsafe { NonZeroU32::new_unchecked(bit_width as u32) })
}
FitsInNativeWord { path } => {
FitsInNativeWord(id) => {
let native_word_size = self.instr_set.native_word_size_in_bits(context);
debug_assert!(native_word_size.is_power_of_two());
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
let part = self.left_hand_sides[id.0 as usize];
let fits = match part {
Part::Instruction(i) => {
let size = self.instr_set.instruction_result_bit_width(context, i);
@@ -378,17 +357,9 @@ where
};
bool_to_match_result(fits)
}
Eq { path_a, path_b } => {
let path_a = self.peep_opt.paths.lookup(path_a);
let part_a = self
.instr_set
.get_part_at_path(context, root, path_a)
.ok_or(Else)?;
let path_b = self.peep_opt.paths.lookup(path_b);
let part_b = self
.instr_set
.get_part_at_path(context, root, path_b)
.ok_or(Else)?;
Eq(a, b) => {
let part_a = self.left_hand_sides[a.0 as usize];
let part_b = self.left_hand_sides[b.0 as usize];
let eq = match (part_a, part_b) {
(Part::Instruction(inst), Part::Constant(c1))
| (Part::Constant(c1), Part::Instruction(inst)) => {
@@ -401,12 +372,8 @@ where
};
bool_to_match_result(eq)
}
IntegerValue { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
IntegerValue(id) => {
let part = self.left_hand_sides[id.0 as usize];
match part {
Part::Constant(c) => {
let x = c.as_int().ok_or(Else)?;
@@ -425,12 +392,8 @@ where
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
}
}
BooleanValue { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
BooleanValue(id) => {
let part = self.left_hand_sides[id.0 as usize];
match part {
Part::Constant(c) => {
let b = c.as_bool().ok_or(Else)?;
@@ -447,12 +410,8 @@ where
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
}
}
ConditionCode { path } => {
let path = self.peep_opt.paths.lookup(path);
let part = self
.instr_set
.get_part_at_path(context, root, path)
.ok_or(Else)?;
ConditionCode(id) => {
let part = self.left_hand_sides[id.0 as usize];
let cc = part.as_condition_code().ok_or(Else)?;
let cc = cc as u32;
debug_assert!(cc != 0);
@@ -483,12 +442,20 @@ where
self.backtracking_states.clear();
self.actions.clear();
self.right_hand_sides.clear();
self.left_hand_sides.clear();
// `LhsId(0)` is always the root.
self.left_hand_sides.push(Part::Instruction(root));
let mut r#final = None;
let mut query = self.peep_opt.automata.query();
loop {
log::trace!("Current state: {:?}", query.current_state());
log::trace!(
"self.left_hand_sides = {:#?}",
self.left_hand_sides.iter().enumerate().collect::<Vec<_>>()
);
if query.is_in_final_state() {
// If we're in a final state (which means an optimization is
@@ -507,8 +474,11 @@ where
// optimization, we want to be able to backtrack to this state and
// then try taking the `Else` transition.
if query.has_transition_on(&Err(Else)) {
self.backtracking_states
.push((query.current_state(), self.actions.len()));
self.backtracking_states.push((
query.current_state(),
self.actions.len(),
self.left_hand_sides.len(),
));
}
let match_op = match query.current_state_data() {
@@ -522,9 +492,10 @@ where
actions
} else if r#final.is_some() {
break;
} else if let Some((state, actions_len)) = self.backtracking_states.pop() {
} else if let Some((state, actions_len, lhs_len)) = self.backtracking_states.pop() {
query.go_to_state(state);
self.actions.truncate(actions_len);
self.left_hand_sides.truncate(lhs_len);
query
.next(&Err(Else))
.expect("backtracking states always have `Else` transitions")

View File

@@ -1,242 +0,0 @@
//! Representing paths through the dataflow graph.
//!
//! Paths are relative from a *root* instruction, which is the instruction we
//! are determining which, if any, optimizations apply.
//!
//! Paths are series of indices through each instruction's children as we
//! traverse down the graph from the root. Children are immediates followed by
//! arguments: `[imm0, imm1, ..., immN, arg0, arg1, ..., argN]`.
//!
//! ## Examples
//!
//! * `[0]` is the path to the root.
//! * `[0, 0]` is the path to the root's first child.
//! * `[0, 1]` is the path to the root's second child.
//! * `[0, 1, 0]` is the path to the root's second child's first child.
//!
//! ## Interning
//!
//! To avoid extra allocations, de-duplicate paths, and reference them via a
//! fixed-length value, we intern paths inside a `PathInterner` and then
//! reference them via `PathId`.
// TODO: Make `[]` the path to the root, and get rid of this redundant leading
// zero that is currently in every single path.
use serde::de::{Deserializer, SeqAccess, Visitor};
use serde::ser::{SerializeSeq, Serializer};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::convert::TryInto;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
/// A path through the data-flow graph from the root instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct Path<'a>(pub &'a [u8]);
impl Path<'_> {
/// Construct a new path through the data-flow graph from the root
/// instruction.
pub fn new(path: &impl AsRef<[u8]>) -> Path {
Path(path.as_ref())
}
}
/// An identifier for an interned path.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PathId(u16);
/// An interner and de-duplicator for `Path`s.
///
/// Can be serialized and deserialized while maintaining the same id to interned
/// path mapping.
#[derive(Debug, Default)]
pub struct PathInterner {
/// A map from a path (whose owned data is inside `arena`) to the canonical
/// `PathId` we assigned it when interning it.
map: HashMap<UnsafePath, PathId>,
/// A map from a `PathId` index to an unsafe, self-borrowed path pointing
/// into `arena`. It is safe to given these out as safe `Path`s, as long as
/// the lifetime is not longer than this `PathInterner`'s lifetime.
paths: Vec<UnsafePath>,
/// Bump allocation arena for path data. The bump arena ensures that these
/// allocations never move, and are therefore safe for self-references.
arena: bumpalo::Bump,
}
impl PathInterner {
/// Construct a new, empty `PathInterner`.
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Intern a path into this `PathInterner`, returning its canonical
/// `PathId`.
///
/// If we've already interned this path before, then the existing id we
/// already assigned to it is returned. If we've never seen this path
/// before, then it is copied into this `PathInterner` and a new id is
/// assigned to it.
#[inline]
pub fn intern<'a>(&mut self, path: Path<'a>) -> PathId {
let unsafe_path = unsafe { UnsafePath::from_path(&path) };
if let Some(id) = self.map.get(&unsafe_path) {
return *id;
}
self.intern_new(path)
}
#[inline(never)]
fn intern_new<'a>(&mut self, path: Path<'a>) -> PathId {
let id: u16 = self
.paths
.len()
.try_into()
.expect("too many paths interned");
let id = PathId(id);
let our_path = self.arena.alloc_slice_copy(&path.0);
let unsafe_path = unsafe { UnsafePath::from_slice(&our_path) };
self.paths.push(unsafe_path.clone());
let old = self.map.insert(unsafe_path, id);
debug_assert!(old.is_none());
debug_assert_eq!(self.lookup(id), path);
debug_assert_eq!(self.intern(path), id);
id
}
/// Lookup a previously interned path by id.
#[inline]
pub fn lookup<'a>(&'a self, id: PathId) -> Path<'a> {
let unsafe_path = self
.paths
.get(id.0 as usize)
.unwrap_or_else(|| Self::lookup_failure());
unsafe { unsafe_path.as_path() }
}
#[inline(never)]
fn lookup_failure() -> ! {
panic!(
"no path for the given id; this can only happen when mixing `PathId`s with different \
`PathInterner`s"
)
}
}
impl Serialize for PathInterner {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(Some(self.paths.len()))?;
for p in &self.paths {
let p = unsafe { p.as_path() };
seq.serialize_element(&p)?;
}
seq.end()
}
}
impl<'de> Deserialize<'de> for PathInterner {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(PathInternerVisitor {
marker: PhantomData,
})
}
}
struct PathInternerVisitor {
marker: PhantomData<fn() -> PathInterner>,
}
impl<'de> Visitor<'de> for PathInternerVisitor {
type Value = PathInterner;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a `peepmatic_runtime::paths::PathInterner`")
}
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
const DEFAULT_CAPACITY: usize = 16;
let capacity = access.size_hint().unwrap_or(DEFAULT_CAPACITY);
let mut interner = PathInterner {
map: HashMap::with_capacity(capacity),
paths: Vec::with_capacity(capacity),
arena: bumpalo::Bump::new(),
};
while let Some(path) = access.next_element::<Path>()? {
interner.intern(path);
}
Ok(interner)
}
}
/// An unsafe, unchecked borrow of a path. Not for use outside of
/// `PathInterner`!
#[derive(Clone, Debug)]
struct UnsafePath {
ptr: *const u8,
len: usize,
}
impl PartialEq for UnsafePath {
fn eq(&self, rhs: &UnsafePath) -> bool {
unsafe { self.as_slice() == rhs.as_slice() }
}
}
impl Eq for UnsafePath {}
impl Hash for UnsafePath {
fn hash<H>(&self, hasher: &mut H)
where
H: Hasher,
{
unsafe { self.as_slice().hash(hasher) }
}
}
/// Safety: callers must ensure that the constructed values won't have unsafe
/// usages of `PartialEq`, `Eq`, or `Hash`.
impl UnsafePath {
unsafe fn from_path(p: &Path) -> Self {
Self::from_slice(&p.0)
}
unsafe fn from_slice(s: &[u8]) -> Self {
UnsafePath {
ptr: s.as_ptr(),
len: s.len(),
}
}
}
/// Safety: callers must ensure that `'a` does not outlive the lifetime of the
/// underlying data.
impl UnsafePath {
unsafe fn as_slice<'a>(&self) -> &'a [u8] {
std::slice::from_raw_parts(self.ptr, self.len)
}
unsafe fn as_path<'a>(&self) -> Path<'a> {
Path(self.as_slice())
}
}