Merge pull request #2257 from fitzgen/peepmatic-no-paths-in-linear-ir
Peepmatic: Do not use paths in linear IR
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
//! Interfacing with actual instructions.
|
||||
|
||||
use crate::part::{Constant, Part};
|
||||
use crate::paths::Path;
|
||||
use crate::r#type::Type;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
@@ -54,26 +53,22 @@ pub unsafe trait InstructionSet<'a> {
|
||||
new: Part<Self::Instruction>,
|
||||
) -> Self::Instruction;
|
||||
|
||||
/// Get the instruction, constant, or condition code at the given path.
|
||||
///
|
||||
/// If there is no such entity at the given path (e.g. we run into a
|
||||
/// function parameter and can't traverse the path any further) then `None`
|
||||
/// should be returned.
|
||||
fn get_part_at_path(
|
||||
&self,
|
||||
context: &mut Self::Context,
|
||||
root: Self::Instruction,
|
||||
path: Path,
|
||||
) -> Option<Part<Self::Instruction>>;
|
||||
|
||||
/// Get the given instruction's operator.
|
||||
///
|
||||
/// If the instruction isn't supported, then `None` should be returned.
|
||||
fn operator(
|
||||
///
|
||||
/// Additionally, if `Some` is returned, then the instruction's operands
|
||||
/// must be pushed in order into `operands`. E.g. calling this method on
|
||||
/// `(iadd $x $y)` would return `Some(iadd)` and extend `operands` with
|
||||
/// `[$x, $y]`.
|
||||
fn operator<E>(
|
||||
&self,
|
||||
context: &mut Self::Context,
|
||||
instr: Self::Instruction,
|
||||
) -> Option<Self::Operator>;
|
||||
operands: &mut E,
|
||||
) -> Option<Self::Operator>
|
||||
where
|
||||
E: Extend<Part<Self::Instruction>>;
|
||||
|
||||
/// Make a unary instruction.
|
||||
///
|
||||
|
||||
@@ -25,7 +25,6 @@ pub mod linear;
|
||||
pub mod optimizations;
|
||||
pub mod optimizer;
|
||||
pub mod part;
|
||||
pub mod paths;
|
||||
pub mod r#type;
|
||||
pub mod unquote;
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
use crate::cc::ConditionCode;
|
||||
use crate::integer_interner::{IntegerId, IntegerInterner};
|
||||
use crate::paths::{PathId, PathInterner};
|
||||
use crate::r#type::{BitWidth, Type};
|
||||
use crate::unquote::UnquoteOperator;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -24,9 +23,6 @@ where
|
||||
/// The linear optimizations.
|
||||
pub optimizations: Vec<Optimization<TOperator>>,
|
||||
|
||||
/// The de-duplicated paths referenced by these optimizations.
|
||||
pub paths: PathInterner,
|
||||
|
||||
/// The integer literals referenced by these optimizations.
|
||||
pub integers: IntegerInterner,
|
||||
}
|
||||
@@ -37,8 +33,13 @@ pub struct Optimization<TOperator>
|
||||
where
|
||||
TOperator: 'static + Copy + Debug + Eq + Hash,
|
||||
{
|
||||
/// The chain of increments for this optimization.
|
||||
pub increments: Vec<Increment<TOperator>>,
|
||||
/// The chain of match operations and expected results for this
|
||||
/// optimization.
|
||||
pub matches: Vec<Match>,
|
||||
|
||||
/// Actions to perform, given that the operation resulted in the expected
|
||||
/// value.
|
||||
pub actions: Vec<Action<TOperator>>,
|
||||
}
|
||||
|
||||
/// Match any value.
|
||||
@@ -61,31 +62,20 @@ pub fn bool_to_match_result(b: bool) -> MatchResult {
|
||||
unsafe { Ok(NonZeroU32::new_unchecked(b + 1)) }
|
||||
}
|
||||
|
||||
/// A partial match of an optimization's LHS and partial construction of its
|
||||
/// RHS.
|
||||
/// A partial match of an optimization's LHS.
|
||||
///
|
||||
/// An increment is a matching operation, the expected result from that
|
||||
/// operation to continue to the next increment, and the actions to take to
|
||||
/// build up the LHS scope and RHS instructions given that we got the expected
|
||||
/// result from this increment's matching operation. Each increment will
|
||||
/// basically become a state and a transition edge out of that state in the
|
||||
/// final automata.
|
||||
/// An match is composed of a matching operation and the expected result of that
|
||||
/// operation. Each match will basically become a state and a transition edge
|
||||
/// out of that state in the final automata.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Increment<TOperator>
|
||||
where
|
||||
TOperator: 'static + Copy + Debug + Eq + Hash,
|
||||
{
|
||||
pub struct Match {
|
||||
/// The matching operation to perform.
|
||||
pub operation: MatchOp,
|
||||
|
||||
/// The expected result of our matching operation, that enables us to
|
||||
/// continue to the next increment, or `Else` for "don't care"
|
||||
/// wildcard-style matching.
|
||||
/// continue to the next match, or `Else` for "don't care" wildcard-style
|
||||
/// matching.
|
||||
pub expected: MatchResult,
|
||||
|
||||
/// Actions to perform, given that the operation resulted in the expected
|
||||
/// value.
|
||||
pub actions: Vec<Action<TOperator>>,
|
||||
}
|
||||
|
||||
/// A matching operation to be performed on some Cranelift instruction as part
|
||||
@@ -93,79 +83,54 @@ where
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
|
||||
pub enum MatchOp {
|
||||
/// Switch on the opcode of an instruction.
|
||||
Opcode {
|
||||
/// The path to the instruction whose opcode we're switching on.
|
||||
path: PathId,
|
||||
},
|
||||
///
|
||||
/// Upon successfully matching an instruction's opcode, bind each of its
|
||||
/// operands to a LHS temporary.
|
||||
Opcode(LhsId),
|
||||
|
||||
/// Does an instruction have a constant value?
|
||||
IsConst {
|
||||
/// The path to the instruction (or immediate) that we're checking
|
||||
/// whether it is constant or not.
|
||||
path: PathId,
|
||||
},
|
||||
IsConst(LhsId),
|
||||
|
||||
/// Is the constant value a power of two?
|
||||
IsPowerOfTwo {
|
||||
/// The path to the instruction (or immediate) that we are checking
|
||||
/// whether it is a constant power of two or not.
|
||||
path: PathId,
|
||||
},
|
||||
IsPowerOfTwo(LhsId),
|
||||
|
||||
/// Switch on the bit width of a value.
|
||||
BitWidth {
|
||||
/// The path to the instruction (or immediate) whose result's bit width
|
||||
/// we are checking.
|
||||
path: PathId,
|
||||
},
|
||||
BitWidth(LhsId),
|
||||
|
||||
/// Does the value fit in our target architecture's native word size?
|
||||
FitsInNativeWord {
|
||||
/// The path to the instruction (or immediate) whose result we are
|
||||
/// checking whether it fits in a native word or not.
|
||||
path: PathId,
|
||||
},
|
||||
FitsInNativeWord(LhsId),
|
||||
|
||||
/// Are the instructions (or immediates) at the given paths the same?
|
||||
Eq {
|
||||
/// The path to the first instruction (or immediate).
|
||||
path_a: PathId,
|
||||
/// The path to the second instruction (or immediate).
|
||||
path_b: PathId,
|
||||
},
|
||||
/// Are the instructions (or immediates) the same?
|
||||
Eq(LhsId, LhsId),
|
||||
|
||||
/// Switch on the constant integer value of an instruction.
|
||||
IntegerValue {
|
||||
/// The path to the instruction.
|
||||
path: PathId,
|
||||
},
|
||||
IntegerValue(LhsId),
|
||||
|
||||
/// Switch on the constant boolean value of an instruction.
|
||||
BooleanValue {
|
||||
/// The path to the instruction.
|
||||
path: PathId,
|
||||
},
|
||||
BooleanValue(LhsId),
|
||||
|
||||
/// Switch on a condition code.
|
||||
ConditionCode {
|
||||
/// The path to the condition code.
|
||||
path: PathId,
|
||||
},
|
||||
ConditionCode(LhsId),
|
||||
|
||||
/// No operation. Always evaluates to `None`.
|
||||
/// No operation. Always evaluates to `Else`.
|
||||
///
|
||||
/// Exceedingly rare in real optimizations; nonetheless required to support
|
||||
/// Never appears in real optimizations; nonetheless required to support
|
||||
/// corner cases of the DSL, such as a LHS pattern that is nothing but a
|
||||
/// variable pattern.
|
||||
/// variable.
|
||||
Nop,
|
||||
}
|
||||
|
||||
/// A canonicalized identifier for a left-hand side value that was bound in a
|
||||
/// pattern.
|
||||
///
|
||||
/// These are defined in a pre-order traversal of the LHS pattern by successful
|
||||
/// `MatchOp::Opcode` matches.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct LhsId(pub u16);
|
||||
|
||||
/// A canonicalized identifier for a right-hand side value.
|
||||
///
|
||||
/// These are defined by RHS actions.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct RhsId(pub u16);
|
||||
|
||||
@@ -177,8 +142,8 @@ pub struct RhsId(pub u16);
|
||||
pub enum Action<TOperator> {
|
||||
/// Reuse something from the left-hand side.
|
||||
GetLhs {
|
||||
/// The path to the instruction or value.
|
||||
path: PathId,
|
||||
/// The left-hand side instruction or value.
|
||||
lhs: LhsId,
|
||||
},
|
||||
|
||||
/// Perform compile-time evaluation.
|
||||
|
||||
@@ -5,7 +5,6 @@ use crate::instruction_set::InstructionSet;
|
||||
use crate::integer_interner::IntegerInterner;
|
||||
use crate::linear::{Action, MatchOp, MatchResult};
|
||||
use crate::optimizer::PeepholeOptimizer;
|
||||
use crate::paths::PathInterner;
|
||||
use peepmatic_automata::Automaton;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Debug;
|
||||
@@ -25,9 +24,6 @@ pub struct PeepholeOptimizations<TOperator>
|
||||
where
|
||||
TOperator: 'static + Copy + Debug + Eq + Hash,
|
||||
{
|
||||
/// The instruction paths referenced by the peephole optimizations.
|
||||
pub paths: PathInterner,
|
||||
|
||||
/// Not all integers we're matching on fit in the `u32` that we use as the
|
||||
/// result of match operations. So we intern them and refer to them by id.
|
||||
pub integers: IntegerInterner,
|
||||
@@ -88,6 +84,7 @@ where
|
||||
PeepholeOptimizer {
|
||||
peep_opt: self,
|
||||
instr_set,
|
||||
left_hand_sides: vec![],
|
||||
right_hand_sides: vec![],
|
||||
actions: vec![],
|
||||
backtracking_states: vec![],
|
||||
|
||||
@@ -27,9 +27,10 @@ where
|
||||
{
|
||||
pub(crate) peep_opt: &'peep PeepholeOptimizations<TInstructionSet::Operator>,
|
||||
pub(crate) instr_set: TInstructionSet,
|
||||
pub(crate) left_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
|
||||
pub(crate) right_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
|
||||
pub(crate) actions: Vec<Action<TInstructionSet::Operator>>,
|
||||
pub(crate) backtracking_states: Vec<(State, usize)>,
|
||||
pub(crate) backtracking_states: Vec<(State, usize, usize)>,
|
||||
}
|
||||
|
||||
impl<'peep, 'ctx, TInstructionSet> Debug for PeepholeOptimizer<'peep, 'ctx, TInstructionSet>
|
||||
@@ -40,6 +41,7 @@ where
|
||||
let PeepholeOptimizer {
|
||||
peep_opt,
|
||||
instr_set: _,
|
||||
left_hand_sides,
|
||||
right_hand_sides,
|
||||
actions,
|
||||
backtracking_states,
|
||||
@@ -47,6 +49,7 @@ where
|
||||
f.debug_struct("PeepholeOptimizer")
|
||||
.field("peep_opt", peep_opt)
|
||||
.field("instr_set", &"_")
|
||||
.field("left_hand_sides", left_hand_sides)
|
||||
.field("right_hand_sides", right_hand_sides)
|
||||
.field("actions", actions)
|
||||
.field("backtracking_states", backtracking_states)
|
||||
@@ -117,12 +120,8 @@ where
|
||||
for action in actions.drain(..) {
|
||||
log::trace!("Evaluating action: {:?}", action);
|
||||
match action {
|
||||
Action::GetLhs { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let lhs = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.expect("should always get part at path OK by the time it is bound");
|
||||
Action::GetLhs { lhs } => {
|
||||
let lhs = self.left_hand_sides[lhs.0 as usize];
|
||||
self.right_hand_sides.push(lhs);
|
||||
}
|
||||
Action::UnaryUnquote { operator, operand } => {
|
||||
@@ -284,22 +283,17 @@ where
|
||||
|
||||
log::trace!("Evaluating match operation: {:?}", match_op);
|
||||
let result: MatchResult = (|| match match_op {
|
||||
Opcode { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
Opcode(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let inst = part.as_instruction().ok_or(Else)?;
|
||||
let op = self.instr_set.operator(context, inst).ok_or(Else)?;
|
||||
let op = self
|
||||
.instr_set
|
||||
.operator(context, inst, &mut self.left_hand_sides)
|
||||
.ok_or(Else)?;
|
||||
Ok(op.into())
|
||||
}
|
||||
IsConst { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IsConst(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let is_const = match part {
|
||||
Part::Instruction(i) => {
|
||||
self.instr_set.instruction_to_constant(context, i).is_some()
|
||||
@@ -308,12 +302,8 @@ where
|
||||
};
|
||||
bool_to_match_result(is_const)
|
||||
}
|
||||
IsPowerOfTwo { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IsPowerOfTwo(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let is_pow2 = c.as_int().unwrap().is_power_of_two();
|
||||
@@ -327,18 +317,11 @@ where
|
||||
let is_pow2 = c.as_int().unwrap().is_power_of_two();
|
||||
bool_to_match_result(is_pow2)
|
||||
}
|
||||
Part::ConditionCode(_) => unreachable!(
|
||||
"IsPowerOfTwo on a condition
|
||||
code"
|
||||
),
|
||||
Part::ConditionCode(_) => unreachable!("IsPowerOfTwo on a condition code"),
|
||||
}
|
||||
}
|
||||
BitWidth { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
BitWidth(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let bit_width = match part {
|
||||
Part::Instruction(i) => self.instr_set.instruction_result_bit_width(context, i),
|
||||
Part::Constant(Constant::Int(_, w)) | Part::Constant(Constant::Bool(_, w)) => {
|
||||
@@ -355,15 +338,11 @@ where
|
||||
);
|
||||
Ok(unsafe { NonZeroU32::new_unchecked(bit_width as u32) })
|
||||
}
|
||||
FitsInNativeWord { path } => {
|
||||
FitsInNativeWord(id) => {
|
||||
let native_word_size = self.instr_set.native_word_size_in_bits(context);
|
||||
debug_assert!(native_word_size.is_power_of_two());
|
||||
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let fits = match part {
|
||||
Part::Instruction(i) => {
|
||||
let size = self.instr_set.instruction_result_bit_width(context, i);
|
||||
@@ -378,17 +357,9 @@ where
|
||||
};
|
||||
bool_to_match_result(fits)
|
||||
}
|
||||
Eq { path_a, path_b } => {
|
||||
let path_a = self.peep_opt.paths.lookup(path_a);
|
||||
let part_a = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path_a)
|
||||
.ok_or(Else)?;
|
||||
let path_b = self.peep_opt.paths.lookup(path_b);
|
||||
let part_b = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path_b)
|
||||
.ok_or(Else)?;
|
||||
Eq(a, b) => {
|
||||
let part_a = self.left_hand_sides[a.0 as usize];
|
||||
let part_b = self.left_hand_sides[b.0 as usize];
|
||||
let eq = match (part_a, part_b) {
|
||||
(Part::Instruction(inst), Part::Constant(c1))
|
||||
| (Part::Constant(c1), Part::Instruction(inst)) => {
|
||||
@@ -401,12 +372,8 @@ where
|
||||
};
|
||||
bool_to_match_result(eq)
|
||||
}
|
||||
IntegerValue { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IntegerValue(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let x = c.as_int().ok_or(Else)?;
|
||||
@@ -425,12 +392,8 @@ where
|
||||
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
|
||||
}
|
||||
}
|
||||
BooleanValue { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
BooleanValue(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let b = c.as_bool().ok_or(Else)?;
|
||||
@@ -447,12 +410,8 @@ where
|
||||
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
|
||||
}
|
||||
}
|
||||
ConditionCode { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
ConditionCode(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let cc = part.as_condition_code().ok_or(Else)?;
|
||||
let cc = cc as u32;
|
||||
debug_assert!(cc != 0);
|
||||
@@ -483,12 +442,20 @@ where
|
||||
self.backtracking_states.clear();
|
||||
self.actions.clear();
|
||||
self.right_hand_sides.clear();
|
||||
self.left_hand_sides.clear();
|
||||
|
||||
// `LhsId(0)` is always the root.
|
||||
self.left_hand_sides.push(Part::Instruction(root));
|
||||
|
||||
let mut r#final = None;
|
||||
|
||||
let mut query = self.peep_opt.automata.query();
|
||||
loop {
|
||||
log::trace!("Current state: {:?}", query.current_state());
|
||||
log::trace!(
|
||||
"self.left_hand_sides = {:#?}",
|
||||
self.left_hand_sides.iter().enumerate().collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
if query.is_in_final_state() {
|
||||
// If we're in a final state (which means an optimization is
|
||||
@@ -507,8 +474,11 @@ where
|
||||
// optimization, we want to be able to backtrack to this state and
|
||||
// then try taking the `Else` transition.
|
||||
if query.has_transition_on(&Err(Else)) {
|
||||
self.backtracking_states
|
||||
.push((query.current_state(), self.actions.len()));
|
||||
self.backtracking_states.push((
|
||||
query.current_state(),
|
||||
self.actions.len(),
|
||||
self.left_hand_sides.len(),
|
||||
));
|
||||
}
|
||||
|
||||
let match_op = match query.current_state_data() {
|
||||
@@ -522,9 +492,10 @@ where
|
||||
actions
|
||||
} else if r#final.is_some() {
|
||||
break;
|
||||
} else if let Some((state, actions_len)) = self.backtracking_states.pop() {
|
||||
} else if let Some((state, actions_len, lhs_len)) = self.backtracking_states.pop() {
|
||||
query.go_to_state(state);
|
||||
self.actions.truncate(actions_len);
|
||||
self.left_hand_sides.truncate(lhs_len);
|
||||
query
|
||||
.next(&Err(Else))
|
||||
.expect("backtracking states always have `Else` transitions")
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
//! Representing paths through the dataflow graph.
|
||||
//!
|
||||
//! Paths are relative from a *root* instruction, which is the instruction we
|
||||
//! are determining which, if any, optimizations apply.
|
||||
//!
|
||||
//! Paths are series of indices through each instruction's children as we
|
||||
//! traverse down the graph from the root. Children are immediates followed by
|
||||
//! arguments: `[imm0, imm1, ..., immN, arg0, arg1, ..., argN]`.
|
||||
//!
|
||||
//! ## Examples
|
||||
//!
|
||||
//! * `[0]` is the path to the root.
|
||||
//! * `[0, 0]` is the path to the root's first child.
|
||||
//! * `[0, 1]` is the path to the root's second child.
|
||||
//! * `[0, 1, 0]` is the path to the root's second child's first child.
|
||||
//!
|
||||
//! ## Interning
|
||||
//!
|
||||
//! To avoid extra allocations, de-duplicate paths, and reference them via a
|
||||
//! fixed-length value, we intern paths inside a `PathInterner` and then
|
||||
//! reference them via `PathId`.
|
||||
|
||||
// TODO: Make `[]` the path to the root, and get rid of this redundant leading
|
||||
// zero that is currently in every single path.
|
||||
|
||||
use serde::de::{Deserializer, SeqAccess, Visitor};
|
||||
use serde::ser::{SerializeSeq, Serializer};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// A path through the data-flow graph from the root instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Path<'a>(pub &'a [u8]);
|
||||
|
||||
impl Path<'_> {
|
||||
/// Construct a new path through the data-flow graph from the root
|
||||
/// instruction.
|
||||
pub fn new(path: &impl AsRef<[u8]>) -> Path {
|
||||
Path(path.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
/// An identifier for an interned path.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct PathId(u16);
|
||||
|
||||
/// An interner and de-duplicator for `Path`s.
|
||||
///
|
||||
/// Can be serialized and deserialized while maintaining the same id to interned
|
||||
/// path mapping.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PathInterner {
|
||||
/// A map from a path (whose owned data is inside `arena`) to the canonical
|
||||
/// `PathId` we assigned it when interning it.
|
||||
map: HashMap<UnsafePath, PathId>,
|
||||
|
||||
/// A map from a `PathId` index to an unsafe, self-borrowed path pointing
|
||||
/// into `arena`. It is safe to given these out as safe `Path`s, as long as
|
||||
/// the lifetime is not longer than this `PathInterner`'s lifetime.
|
||||
paths: Vec<UnsafePath>,
|
||||
|
||||
/// Bump allocation arena for path data. The bump arena ensures that these
|
||||
/// allocations never move, and are therefore safe for self-references.
|
||||
arena: bumpalo::Bump,
|
||||
}
|
||||
|
||||
impl PathInterner {
|
||||
/// Construct a new, empty `PathInterner`.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Intern a path into this `PathInterner`, returning its canonical
|
||||
/// `PathId`.
|
||||
///
|
||||
/// If we've already interned this path before, then the existing id we
|
||||
/// already assigned to it is returned. If we've never seen this path
|
||||
/// before, then it is copied into this `PathInterner` and a new id is
|
||||
/// assigned to it.
|
||||
#[inline]
|
||||
pub fn intern<'a>(&mut self, path: Path<'a>) -> PathId {
|
||||
let unsafe_path = unsafe { UnsafePath::from_path(&path) };
|
||||
if let Some(id) = self.map.get(&unsafe_path) {
|
||||
return *id;
|
||||
}
|
||||
self.intern_new(path)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn intern_new<'a>(&mut self, path: Path<'a>) -> PathId {
|
||||
let id: u16 = self
|
||||
.paths
|
||||
.len()
|
||||
.try_into()
|
||||
.expect("too many paths interned");
|
||||
let id = PathId(id);
|
||||
|
||||
let our_path = self.arena.alloc_slice_copy(&path.0);
|
||||
let unsafe_path = unsafe { UnsafePath::from_slice(&our_path) };
|
||||
|
||||
self.paths.push(unsafe_path.clone());
|
||||
let old = self.map.insert(unsafe_path, id);
|
||||
|
||||
debug_assert!(old.is_none());
|
||||
debug_assert_eq!(self.lookup(id), path);
|
||||
debug_assert_eq!(self.intern(path), id);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
/// Lookup a previously interned path by id.
|
||||
#[inline]
|
||||
pub fn lookup<'a>(&'a self, id: PathId) -> Path<'a> {
|
||||
let unsafe_path = self
|
||||
.paths
|
||||
.get(id.0 as usize)
|
||||
.unwrap_or_else(|| Self::lookup_failure());
|
||||
unsafe { unsafe_path.as_path() }
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_failure() -> ! {
|
||||
panic!(
|
||||
"no path for the given id; this can only happen when mixing `PathId`s with different \
|
||||
`PathInterner`s"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for PathInterner {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let mut seq = serializer.serialize_seq(Some(self.paths.len()))?;
|
||||
for p in &self.paths {
|
||||
let p = unsafe { p.as_path() };
|
||||
seq.serialize_element(&p)?;
|
||||
}
|
||||
seq.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for PathInterner {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_seq(PathInternerVisitor {
|
||||
marker: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct PathInternerVisitor {
|
||||
marker: PhantomData<fn() -> PathInterner>,
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for PathInternerVisitor {
|
||||
type Value = PathInterner;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(formatter, "a `peepmatic_runtime::paths::PathInterner`")
|
||||
}
|
||||
|
||||
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
|
||||
where
|
||||
M: SeqAccess<'de>,
|
||||
{
|
||||
const DEFAULT_CAPACITY: usize = 16;
|
||||
let capacity = access.size_hint().unwrap_or(DEFAULT_CAPACITY);
|
||||
|
||||
let mut interner = PathInterner {
|
||||
map: HashMap::with_capacity(capacity),
|
||||
paths: Vec::with_capacity(capacity),
|
||||
arena: bumpalo::Bump::new(),
|
||||
};
|
||||
|
||||
while let Some(path) = access.next_element::<Path>()? {
|
||||
interner.intern(path);
|
||||
}
|
||||
|
||||
Ok(interner)
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsafe, unchecked borrow of a path. Not for use outside of
|
||||
/// `PathInterner`!
|
||||
#[derive(Clone, Debug)]
|
||||
struct UnsafePath {
|
||||
ptr: *const u8,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl PartialEq for UnsafePath {
|
||||
fn eq(&self, rhs: &UnsafePath) -> bool {
|
||||
unsafe { self.as_slice() == rhs.as_slice() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for UnsafePath {}
|
||||
|
||||
impl Hash for UnsafePath {
|
||||
fn hash<H>(&self, hasher: &mut H)
|
||||
where
|
||||
H: Hasher,
|
||||
{
|
||||
unsafe { self.as_slice().hash(hasher) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Safety: callers must ensure that the constructed values won't have unsafe
|
||||
/// usages of `PartialEq`, `Eq`, or `Hash`.
|
||||
impl UnsafePath {
|
||||
unsafe fn from_path(p: &Path) -> Self {
|
||||
Self::from_slice(&p.0)
|
||||
}
|
||||
|
||||
unsafe fn from_slice(s: &[u8]) -> Self {
|
||||
UnsafePath {
|
||||
ptr: s.as_ptr(),
|
||||
len: s.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Safety: callers must ensure that `'a` does not outlive the lifetime of the
|
||||
/// underlying data.
|
||||
impl UnsafePath {
|
||||
unsafe fn as_slice<'a>(&self) -> &'a [u8] {
|
||||
std::slice::from_raw_parts(self.ptr, self.len)
|
||||
}
|
||||
|
||||
unsafe fn as_path<'a>(&self) -> Path<'a> {
|
||||
Path(self.as_slice())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user