peepmatic: Do not use paths in linear IR
Rather than using paths from the root instruction to the instruction we are matching against or checking if it is constant or whatever, use temporary variables. When we successfully match an instruction's opcode, we simultaneously define these temporaries for the instruction's operands. This is similar to how open-coding these matches in Rust would use `match` expressions with pattern matching to bind the operands to variables at the same time. This saves about 1.8% of instructions retired when Peepmatic is enabled.
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
//! Interfacing with actual instructions.
|
||||
|
||||
use crate::part::{Constant, Part};
|
||||
use crate::paths::Path;
|
||||
use crate::r#type::Type;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
@@ -54,26 +53,22 @@ pub unsafe trait InstructionSet<'a> {
|
||||
new: Part<Self::Instruction>,
|
||||
) -> Self::Instruction;
|
||||
|
||||
/// Get the instruction, constant, or condition code at the given path.
|
||||
///
|
||||
/// If there is no such entity at the given path (e.g. we run into a
|
||||
/// function parameter and can't traverse the path any further) then `None`
|
||||
/// should be returned.
|
||||
fn get_part_at_path(
|
||||
&self,
|
||||
context: &mut Self::Context,
|
||||
root: Self::Instruction,
|
||||
path: Path,
|
||||
) -> Option<Part<Self::Instruction>>;
|
||||
|
||||
/// Get the given instruction's operator.
|
||||
///
|
||||
/// If the instruction isn't supported, then `None` should be returned.
|
||||
fn operator(
|
||||
///
|
||||
/// Additionally, if `Some` is returned, then the instruction's operands
|
||||
/// must be pushed in order into `operands`. E.g. calling this method on
|
||||
/// `(iadd $x $y)` would return `Some(iadd)` and extend `operands` with
|
||||
/// `[$x, $y]`.
|
||||
fn operator<E>(
|
||||
&self,
|
||||
context: &mut Self::Context,
|
||||
instr: Self::Instruction,
|
||||
) -> Option<Self::Operator>;
|
||||
operands: &mut E,
|
||||
) -> Option<Self::Operator>
|
||||
where
|
||||
E: Extend<Part<Self::Instruction>>;
|
||||
|
||||
/// Make a unary instruction.
|
||||
///
|
||||
|
||||
@@ -25,7 +25,6 @@ pub mod linear;
|
||||
pub mod optimizations;
|
||||
pub mod optimizer;
|
||||
pub mod part;
|
||||
pub mod paths;
|
||||
pub mod r#type;
|
||||
pub mod unquote;
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
use crate::cc::ConditionCode;
|
||||
use crate::integer_interner::{IntegerId, IntegerInterner};
|
||||
use crate::paths::{PathId, PathInterner};
|
||||
use crate::r#type::{BitWidth, Type};
|
||||
use crate::unquote::UnquoteOperator;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -24,9 +23,6 @@ where
|
||||
/// The linear optimizations.
|
||||
pub optimizations: Vec<Optimization<TOperator>>,
|
||||
|
||||
/// The de-duplicated paths referenced by these optimizations.
|
||||
pub paths: PathInterner,
|
||||
|
||||
/// The integer literals referenced by these optimizations.
|
||||
pub integers: IntegerInterner,
|
||||
}
|
||||
@@ -87,79 +83,54 @@ pub struct Match {
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
|
||||
pub enum MatchOp {
|
||||
/// Switch on the opcode of an instruction.
|
||||
Opcode {
|
||||
/// The path to the instruction whose opcode we're switching on.
|
||||
path: PathId,
|
||||
},
|
||||
///
|
||||
/// Upon successfully matching an instruction's opcode, bind each of its
|
||||
/// operands to a LHS temporary.
|
||||
Opcode(LhsId),
|
||||
|
||||
/// Does an instruction have a constant value?
|
||||
IsConst {
|
||||
/// The path to the instruction (or immediate) that we're checking
|
||||
/// whether it is constant or not.
|
||||
path: PathId,
|
||||
},
|
||||
IsConst(LhsId),
|
||||
|
||||
/// Is the constant value a power of two?
|
||||
IsPowerOfTwo {
|
||||
/// The path to the instruction (or immediate) that we are checking
|
||||
/// whether it is a constant power of two or not.
|
||||
path: PathId,
|
||||
},
|
||||
IsPowerOfTwo(LhsId),
|
||||
|
||||
/// Switch on the bit width of a value.
|
||||
BitWidth {
|
||||
/// The path to the instruction (or immediate) whose result's bit width
|
||||
/// we are checking.
|
||||
path: PathId,
|
||||
},
|
||||
BitWidth(LhsId),
|
||||
|
||||
/// Does the value fit in our target architecture's native word size?
|
||||
FitsInNativeWord {
|
||||
/// The path to the instruction (or immediate) whose result we are
|
||||
/// checking whether it fits in a native word or not.
|
||||
path: PathId,
|
||||
},
|
||||
FitsInNativeWord(LhsId),
|
||||
|
||||
/// Are the instructions (or immediates) at the given paths the same?
|
||||
Eq {
|
||||
/// The path to the first instruction (or immediate).
|
||||
path_a: PathId,
|
||||
/// The path to the second instruction (or immediate).
|
||||
path_b: PathId,
|
||||
},
|
||||
/// Are the instructions (or immediates) the same?
|
||||
Eq(LhsId, LhsId),
|
||||
|
||||
/// Switch on the constant integer value of an instruction.
|
||||
IntegerValue {
|
||||
/// The path to the instruction.
|
||||
path: PathId,
|
||||
},
|
||||
IntegerValue(LhsId),
|
||||
|
||||
/// Switch on the constant boolean value of an instruction.
|
||||
BooleanValue {
|
||||
/// The path to the instruction.
|
||||
path: PathId,
|
||||
},
|
||||
BooleanValue(LhsId),
|
||||
|
||||
/// Switch on a condition code.
|
||||
ConditionCode {
|
||||
/// The path to the condition code.
|
||||
path: PathId,
|
||||
},
|
||||
ConditionCode(LhsId),
|
||||
|
||||
/// No operation. Always evaluates to `None`.
|
||||
/// No operation. Always evaluates to `Else`.
|
||||
///
|
||||
/// Exceedingly rare in real optimizations; nonetheless required to support
|
||||
/// Never appears in real optimizations; nonetheless required to support
|
||||
/// corner cases of the DSL, such as a LHS pattern that is nothing but a
|
||||
/// variable pattern.
|
||||
/// variable.
|
||||
Nop,
|
||||
}
|
||||
|
||||
/// A canonicalized identifier for a left-hand side value that was bound in a
|
||||
/// pattern.
|
||||
///
|
||||
/// These are defined in a pre-order traversal of the LHS pattern by successful
|
||||
/// `MatchOp::Opcode` matches.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct LhsId(pub u16);
|
||||
|
||||
/// A canonicalized identifier for a right-hand side value.
|
||||
///
|
||||
/// These are defined by RHS actions.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct RhsId(pub u16);
|
||||
|
||||
@@ -171,8 +142,8 @@ pub struct RhsId(pub u16);
|
||||
pub enum Action<TOperator> {
|
||||
/// Reuse something from the left-hand side.
|
||||
GetLhs {
|
||||
/// The path to the instruction or value.
|
||||
path: PathId,
|
||||
/// The left-hand side instruction or value.
|
||||
lhs: LhsId,
|
||||
},
|
||||
|
||||
/// Perform compile-time evaluation.
|
||||
|
||||
@@ -5,7 +5,6 @@ use crate::instruction_set::InstructionSet;
|
||||
use crate::integer_interner::IntegerInterner;
|
||||
use crate::linear::{Action, MatchOp, MatchResult};
|
||||
use crate::optimizer::PeepholeOptimizer;
|
||||
use crate::paths::PathInterner;
|
||||
use peepmatic_automata::Automaton;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Debug;
|
||||
@@ -25,9 +24,6 @@ pub struct PeepholeOptimizations<TOperator>
|
||||
where
|
||||
TOperator: 'static + Copy + Debug + Eq + Hash,
|
||||
{
|
||||
/// The instruction paths referenced by the peephole optimizations.
|
||||
pub paths: PathInterner,
|
||||
|
||||
/// Not all integers we're matching on fit in the `u32` that we use as the
|
||||
/// result of match operations. So we intern them and refer to them by id.
|
||||
pub integers: IntegerInterner,
|
||||
@@ -88,6 +84,7 @@ where
|
||||
PeepholeOptimizer {
|
||||
peep_opt: self,
|
||||
instr_set,
|
||||
left_hand_sides: vec![],
|
||||
right_hand_sides: vec![],
|
||||
actions: vec![],
|
||||
backtracking_states: vec![],
|
||||
|
||||
@@ -27,9 +27,10 @@ where
|
||||
{
|
||||
pub(crate) peep_opt: &'peep PeepholeOptimizations<TInstructionSet::Operator>,
|
||||
pub(crate) instr_set: TInstructionSet,
|
||||
pub(crate) left_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
|
||||
pub(crate) right_hand_sides: Vec<Part<TInstructionSet::Instruction>>,
|
||||
pub(crate) actions: Vec<Action<TInstructionSet::Operator>>,
|
||||
pub(crate) backtracking_states: Vec<(State, usize)>,
|
||||
pub(crate) backtracking_states: Vec<(State, usize, usize)>,
|
||||
}
|
||||
|
||||
impl<'peep, 'ctx, TInstructionSet> Debug for PeepholeOptimizer<'peep, 'ctx, TInstructionSet>
|
||||
@@ -40,6 +41,7 @@ where
|
||||
let PeepholeOptimizer {
|
||||
peep_opt,
|
||||
instr_set: _,
|
||||
left_hand_sides,
|
||||
right_hand_sides,
|
||||
actions,
|
||||
backtracking_states,
|
||||
@@ -47,6 +49,7 @@ where
|
||||
f.debug_struct("PeepholeOptimizer")
|
||||
.field("peep_opt", peep_opt)
|
||||
.field("instr_set", &"_")
|
||||
.field("left_hand_sides", left_hand_sides)
|
||||
.field("right_hand_sides", right_hand_sides)
|
||||
.field("actions", actions)
|
||||
.field("backtracking_states", backtracking_states)
|
||||
@@ -117,12 +120,8 @@ where
|
||||
for action in actions.drain(..) {
|
||||
log::trace!("Evaluating action: {:?}", action);
|
||||
match action {
|
||||
Action::GetLhs { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let lhs = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.expect("should always get part at path OK by the time it is bound");
|
||||
Action::GetLhs { lhs } => {
|
||||
let lhs = self.left_hand_sides[lhs.0 as usize];
|
||||
self.right_hand_sides.push(lhs);
|
||||
}
|
||||
Action::UnaryUnquote { operator, operand } => {
|
||||
@@ -284,22 +283,17 @@ where
|
||||
|
||||
log::trace!("Evaluating match operation: {:?}", match_op);
|
||||
let result: MatchResult = (|| match match_op {
|
||||
Opcode { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
Opcode(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let inst = part.as_instruction().ok_or(Else)?;
|
||||
let op = self.instr_set.operator(context, inst).ok_or(Else)?;
|
||||
let op = self
|
||||
.instr_set
|
||||
.operator(context, inst, &mut self.left_hand_sides)
|
||||
.ok_or(Else)?;
|
||||
Ok(op.into())
|
||||
}
|
||||
IsConst { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IsConst(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let is_const = match part {
|
||||
Part::Instruction(i) => {
|
||||
self.instr_set.instruction_to_constant(context, i).is_some()
|
||||
@@ -308,12 +302,8 @@ where
|
||||
};
|
||||
bool_to_match_result(is_const)
|
||||
}
|
||||
IsPowerOfTwo { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IsPowerOfTwo(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let is_pow2 = c.as_int().unwrap().is_power_of_two();
|
||||
@@ -327,18 +317,11 @@ where
|
||||
let is_pow2 = c.as_int().unwrap().is_power_of_two();
|
||||
bool_to_match_result(is_pow2)
|
||||
}
|
||||
Part::ConditionCode(_) => unreachable!(
|
||||
"IsPowerOfTwo on a condition
|
||||
code"
|
||||
),
|
||||
Part::ConditionCode(_) => unreachable!("IsPowerOfTwo on a condition code"),
|
||||
}
|
||||
}
|
||||
BitWidth { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
BitWidth(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let bit_width = match part {
|
||||
Part::Instruction(i) => self.instr_set.instruction_result_bit_width(context, i),
|
||||
Part::Constant(Constant::Int(_, w)) | Part::Constant(Constant::Bool(_, w)) => {
|
||||
@@ -355,15 +338,11 @@ where
|
||||
);
|
||||
Ok(unsafe { NonZeroU32::new_unchecked(bit_width as u32) })
|
||||
}
|
||||
FitsInNativeWord { path } => {
|
||||
FitsInNativeWord(id) => {
|
||||
let native_word_size = self.instr_set.native_word_size_in_bits(context);
|
||||
debug_assert!(native_word_size.is_power_of_two());
|
||||
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let fits = match part {
|
||||
Part::Instruction(i) => {
|
||||
let size = self.instr_set.instruction_result_bit_width(context, i);
|
||||
@@ -378,17 +357,9 @@ where
|
||||
};
|
||||
bool_to_match_result(fits)
|
||||
}
|
||||
Eq { path_a, path_b } => {
|
||||
let path_a = self.peep_opt.paths.lookup(path_a);
|
||||
let part_a = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path_a)
|
||||
.ok_or(Else)?;
|
||||
let path_b = self.peep_opt.paths.lookup(path_b);
|
||||
let part_b = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path_b)
|
||||
.ok_or(Else)?;
|
||||
Eq(a, b) => {
|
||||
let part_a = self.left_hand_sides[a.0 as usize];
|
||||
let part_b = self.left_hand_sides[b.0 as usize];
|
||||
let eq = match (part_a, part_b) {
|
||||
(Part::Instruction(inst), Part::Constant(c1))
|
||||
| (Part::Constant(c1), Part::Instruction(inst)) => {
|
||||
@@ -401,12 +372,8 @@ where
|
||||
};
|
||||
bool_to_match_result(eq)
|
||||
}
|
||||
IntegerValue { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
IntegerValue(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let x = c.as_int().ok_or(Else)?;
|
||||
@@ -425,12 +392,8 @@ where
|
||||
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
|
||||
}
|
||||
}
|
||||
BooleanValue { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
BooleanValue(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
match part {
|
||||
Part::Constant(c) => {
|
||||
let b = c.as_bool().ok_or(Else)?;
|
||||
@@ -447,12 +410,8 @@ where
|
||||
Part::ConditionCode(_) => unreachable!("IntegerValue on condition code"),
|
||||
}
|
||||
}
|
||||
ConditionCode { path } => {
|
||||
let path = self.peep_opt.paths.lookup(path);
|
||||
let part = self
|
||||
.instr_set
|
||||
.get_part_at_path(context, root, path)
|
||||
.ok_or(Else)?;
|
||||
ConditionCode(id) => {
|
||||
let part = self.left_hand_sides[id.0 as usize];
|
||||
let cc = part.as_condition_code().ok_or(Else)?;
|
||||
let cc = cc as u32;
|
||||
debug_assert!(cc != 0);
|
||||
@@ -483,12 +442,20 @@ where
|
||||
self.backtracking_states.clear();
|
||||
self.actions.clear();
|
||||
self.right_hand_sides.clear();
|
||||
self.left_hand_sides.clear();
|
||||
|
||||
// `LhsId(0)` is always the root.
|
||||
self.left_hand_sides.push(Part::Instruction(root));
|
||||
|
||||
let mut r#final = None;
|
||||
|
||||
let mut query = self.peep_opt.automata.query();
|
||||
loop {
|
||||
log::trace!("Current state: {:?}", query.current_state());
|
||||
log::trace!(
|
||||
"self.left_hand_sides = {:#?}",
|
||||
self.left_hand_sides.iter().enumerate().collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
if query.is_in_final_state() {
|
||||
// If we're in a final state (which means an optimization is
|
||||
@@ -507,8 +474,11 @@ where
|
||||
// optimization, we want to be able to backtrack to this state and
|
||||
// then try taking the `Else` transition.
|
||||
if query.has_transition_on(&Err(Else)) {
|
||||
self.backtracking_states
|
||||
.push((query.current_state(), self.actions.len()));
|
||||
self.backtracking_states.push((
|
||||
query.current_state(),
|
||||
self.actions.len(),
|
||||
self.left_hand_sides.len(),
|
||||
));
|
||||
}
|
||||
|
||||
let match_op = match query.current_state_data() {
|
||||
@@ -522,9 +492,10 @@ where
|
||||
actions
|
||||
} else if r#final.is_some() {
|
||||
break;
|
||||
} else if let Some((state, actions_len)) = self.backtracking_states.pop() {
|
||||
} else if let Some((state, actions_len, lhs_len)) = self.backtracking_states.pop() {
|
||||
query.go_to_state(state);
|
||||
self.actions.truncate(actions_len);
|
||||
self.left_hand_sides.truncate(lhs_len);
|
||||
query
|
||||
.next(&Err(Else))
|
||||
.expect("backtracking states always have `Else` transitions")
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
//! Representing paths through the dataflow graph.
|
||||
//!
|
||||
//! Paths are relative from a *root* instruction, which is the instruction we
|
||||
//! are determining which, if any, optimizations apply.
|
||||
//!
|
||||
//! Paths are series of indices through each instruction's children as we
|
||||
//! traverse down the graph from the root. Children are immediates followed by
|
||||
//! arguments: `[imm0, imm1, ..., immN, arg0, arg1, ..., argN]`.
|
||||
//!
|
||||
//! ## Examples
|
||||
//!
|
||||
//! * `[0]` is the path to the root.
|
||||
//! * `[0, 0]` is the path to the root's first child.
|
||||
//! * `[0, 1]` is the path to the root's second child.
|
||||
//! * `[0, 1, 0]` is the path to the root's second child's first child.
|
||||
//!
|
||||
//! ## Interning
|
||||
//!
|
||||
//! To avoid extra allocations, de-duplicate paths, and reference them via a
|
||||
//! fixed-length value, we intern paths inside a `PathInterner` and then
|
||||
//! reference them via `PathId`.
|
||||
|
||||
// TODO: Make `[]` the path to the root, and get rid of this redundant leading
|
||||
// zero that is currently in every single path.
|
||||
|
||||
use serde::de::{Deserializer, SeqAccess, Visitor};
|
||||
use serde::ser::{SerializeSeq, Serializer};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fmt;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/// A path through the data-flow graph from the root instruction.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Path<'a>(pub &'a [u8]);
|
||||
|
||||
impl Path<'_> {
|
||||
/// Construct a new path through the data-flow graph from the root
|
||||
/// instruction.
|
||||
pub fn new(path: &impl AsRef<[u8]>) -> Path {
|
||||
Path(path.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
/// An identifier for an interned path.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct PathId(u16);
|
||||
|
||||
/// An interner and de-duplicator for `Path`s.
|
||||
///
|
||||
/// Can be serialized and deserialized while maintaining the same id to interned
|
||||
/// path mapping.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PathInterner {
|
||||
/// A map from a path (whose owned data is inside `arena`) to the canonical
|
||||
/// `PathId` we assigned it when interning it.
|
||||
map: HashMap<UnsafePath, PathId>,
|
||||
|
||||
/// A map from a `PathId` index to an unsafe, self-borrowed path pointing
|
||||
/// into `arena`. It is safe to given these out as safe `Path`s, as long as
|
||||
/// the lifetime is not longer than this `PathInterner`'s lifetime.
|
||||
paths: Vec<UnsafePath>,
|
||||
|
||||
/// Bump allocation arena for path data. The bump arena ensures that these
|
||||
/// allocations never move, and are therefore safe for self-references.
|
||||
arena: bumpalo::Bump,
|
||||
}
|
||||
|
||||
impl PathInterner {
|
||||
/// Construct a new, empty `PathInterner`.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Intern a path into this `PathInterner`, returning its canonical
|
||||
/// `PathId`.
|
||||
///
|
||||
/// If we've already interned this path before, then the existing id we
|
||||
/// already assigned to it is returned. If we've never seen this path
|
||||
/// before, then it is copied into this `PathInterner` and a new id is
|
||||
/// assigned to it.
|
||||
#[inline]
|
||||
pub fn intern<'a>(&mut self, path: Path<'a>) -> PathId {
|
||||
let unsafe_path = unsafe { UnsafePath::from_path(&path) };
|
||||
if let Some(id) = self.map.get(&unsafe_path) {
|
||||
return *id;
|
||||
}
|
||||
self.intern_new(path)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn intern_new<'a>(&mut self, path: Path<'a>) -> PathId {
|
||||
let id: u16 = self
|
||||
.paths
|
||||
.len()
|
||||
.try_into()
|
||||
.expect("too many paths interned");
|
||||
let id = PathId(id);
|
||||
|
||||
let our_path = self.arena.alloc_slice_copy(&path.0);
|
||||
let unsafe_path = unsafe { UnsafePath::from_slice(&our_path) };
|
||||
|
||||
self.paths.push(unsafe_path.clone());
|
||||
let old = self.map.insert(unsafe_path, id);
|
||||
|
||||
debug_assert!(old.is_none());
|
||||
debug_assert_eq!(self.lookup(id), path);
|
||||
debug_assert_eq!(self.intern(path), id);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
/// Lookup a previously interned path by id.
|
||||
#[inline]
|
||||
pub fn lookup<'a>(&'a self, id: PathId) -> Path<'a> {
|
||||
let unsafe_path = self
|
||||
.paths
|
||||
.get(id.0 as usize)
|
||||
.unwrap_or_else(|| Self::lookup_failure());
|
||||
unsafe { unsafe_path.as_path() }
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_failure() -> ! {
|
||||
panic!(
|
||||
"no path for the given id; this can only happen when mixing `PathId`s with different \
|
||||
`PathInterner`s"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for PathInterner {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let mut seq = serializer.serialize_seq(Some(self.paths.len()))?;
|
||||
for p in &self.paths {
|
||||
let p = unsafe { p.as_path() };
|
||||
seq.serialize_element(&p)?;
|
||||
}
|
||||
seq.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for PathInterner {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_seq(PathInternerVisitor {
|
||||
marker: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct PathInternerVisitor {
|
||||
marker: PhantomData<fn() -> PathInterner>,
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for PathInternerVisitor {
|
||||
type Value = PathInterner;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(formatter, "a `peepmatic_runtime::paths::PathInterner`")
|
||||
}
|
||||
|
||||
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
|
||||
where
|
||||
M: SeqAccess<'de>,
|
||||
{
|
||||
const DEFAULT_CAPACITY: usize = 16;
|
||||
let capacity = access.size_hint().unwrap_or(DEFAULT_CAPACITY);
|
||||
|
||||
let mut interner = PathInterner {
|
||||
map: HashMap::with_capacity(capacity),
|
||||
paths: Vec::with_capacity(capacity),
|
||||
arena: bumpalo::Bump::new(),
|
||||
};
|
||||
|
||||
while let Some(path) = access.next_element::<Path>()? {
|
||||
interner.intern(path);
|
||||
}
|
||||
|
||||
Ok(interner)
|
||||
}
|
||||
}
|
||||
|
||||
/// An unsafe, unchecked borrow of a path. Not for use outside of
|
||||
/// `PathInterner`!
|
||||
#[derive(Clone, Debug)]
|
||||
struct UnsafePath {
|
||||
ptr: *const u8,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl PartialEq for UnsafePath {
|
||||
fn eq(&self, rhs: &UnsafePath) -> bool {
|
||||
unsafe { self.as_slice() == rhs.as_slice() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for UnsafePath {}
|
||||
|
||||
impl Hash for UnsafePath {
|
||||
fn hash<H>(&self, hasher: &mut H)
|
||||
where
|
||||
H: Hasher,
|
||||
{
|
||||
unsafe { self.as_slice().hash(hasher) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Safety: callers must ensure that the constructed values won't have unsafe
|
||||
/// usages of `PartialEq`, `Eq`, or `Hash`.
|
||||
impl UnsafePath {
|
||||
unsafe fn from_path(p: &Path) -> Self {
|
||||
Self::from_slice(&p.0)
|
||||
}
|
||||
|
||||
unsafe fn from_slice(s: &[u8]) -> Self {
|
||||
UnsafePath {
|
||||
ptr: s.as_ptr(),
|
||||
len: s.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Safety: callers must ensure that `'a` does not outlive the lifetime of the
|
||||
/// underlying data.
|
||||
impl UnsafePath {
|
||||
unsafe fn as_slice<'a>(&self) -> &'a [u8] {
|
||||
std::slice::from_raw_parts(self.ptr, self.len)
|
||||
}
|
||||
|
||||
unsafe fn as_path<'a>(&self) -> Path<'a> {
|
||||
Path(self.as_slice())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user