Add a libfilecheck crate.

This library implements functionality similar to LLVM's FileCheck utility, but
in library form.
This commit is contained in:
Jakob Stoklund Olesen
2016-09-01 14:38:01 -07:00
parent d8712b2ce6
commit 71b742ec34
7 changed files with 1613 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
[package]
authors = ["The Cretonne Project Developers"]
name = "filecheck"
version = "0.0.0"
publish = false
[lib]
name = "filecheck"
path = "lib.rs"
[dependencies]
regex = "0.1.71"

View File

@@ -0,0 +1,392 @@
use error::{Error, Result};
use variable::{VariableMap, Value, varname_prefix};
use pattern::Pattern;
use regex::{Regex, Captures};
use std::collections::HashMap;
use std::cmp::max;
use std::fmt::{self, Display, Formatter};
// The different kinds of directives we support.
enum Directive {
Check(Pattern),
SameLn(Pattern),
NextLn(Pattern),
Unordered(Pattern),
Not(Pattern),
Regex(String, String),
}
// Regular expression matching a directive.
// The match groups are:
//
// 1. Keyword.
// 2. Rest of line / pattern.
//
const DIRECTIVE_RX: &'static str = r"\b(check|sameln|nextln|unordered|not|regex):\s+(.*)";
impl Directive {
/// Create a new directive from a `DIRECTIVE_RX` match.
fn new(caps: Captures) -> Result<Directive> {
let cmd = caps.at(1).expect("group 1 must match");
let rest = caps.at(2).expect("group 2 must match");
if cmd == "regex" {
return Directive::regex(rest);
}
// All other commands are followed by a pattern.
let pat = try!(rest.parse());
match cmd {
"check" => Ok(Directive::Check(pat)),
"sameln" => Ok(Directive::SameLn(pat)),
"nextln" => Ok(Directive::NextLn(pat)),
"unordered" => Ok(Directive::Unordered(pat)),
"not" => {
if !pat.defs().is_empty() {
let msg = format!("can't define variables '$({}=...' in not: {}",
pat.defs()[0],
rest);
Err(Error::DuplicateDef(msg))
} else {
Ok(Directive::Not(pat))
}
}
_ => panic!("unexpected command {} in regex match", cmd),
}
}
/// Create a `regex:` directive from a `VAR=...` string.
fn regex(rest: &str) -> Result<Directive> {
let varlen = varname_prefix(rest);
if varlen == 0 {
return Err(Error::Syntax(format!("invalid variable name in regex: {}", rest)));
}
let var = rest[0..varlen].to_string();
if !rest[varlen..].starts_with("=") {
return Err(Error::Syntax(format!("expected '=' after variable '{}' in regex: {}",
var,
rest)));
}
Ok(Directive::Regex(var, rest[varlen + 1..].to_string()))
}
}
/// Builder for constructing a `Checker` instance.
pub struct CheckerBuilder {
directives: Vec<Directive>,
linerx: Regex,
}
impl CheckerBuilder {
/// Create a new, blank `CheckerBuilder`.
pub fn new() -> CheckerBuilder {
CheckerBuilder {
directives: Vec::new(),
linerx: Regex::new(DIRECTIVE_RX).unwrap(),
}
}
/// Add a potential directive line.
///
/// Returns true if this is a a directive with one of the known prefixes.
/// Returns false if no known directive was found.
/// Returns an error if there is a problem with the directive.
pub fn directive(&mut self, l: &str) -> Result<bool> {
match self.linerx.captures(l) {
Some(caps) => {
self.directives.push(try!(Directive::new(caps)));
Ok(true)
}
None => Ok(false),
}
}
/// Add multiple directives.
///
/// The text is split into lines that are added individually as potential directives.
/// This method can be used to parse a whole test file containing multiple directives.
pub fn text(&mut self, t: &str) -> Result<&mut Self> {
for caps in self.linerx.captures_iter(t) {
self.directives.push(try!(Directive::new(caps)));
}
Ok(self)
}
/// Get the finished `Checker`.
pub fn finish(&mut self) -> Checker {
// Move directives into the new checker, leaving `self.directives` empty and ready for
// building a new checker.
Checker::new(self.directives.split_off(0))
}
}
/// Verify a list of directives against a test input.
///
/// Use a `CheckerBuilder` to construct a `Checker`. Then use the `test` method to verify the list
/// of directives against a test input.
pub struct Checker {
directives: Vec<Directive>,
}
impl Checker {
fn new(directives: Vec<Directive>) -> Checker {
Checker { directives: directives }
}
/// An empty checker contains no directives, and will match any input string.
pub fn is_empty(&self) -> bool {
self.directives.is_empty()
}
/// Verify directives against the input text.
///
/// This returns `true` if the text matches all the directives, `false` if it doesn't.
/// An error is only returned if there is a problem with the directives.
pub fn check(&self, text: &str, vars: &VariableMap) -> Result<bool> {
let mut state = State::new(text, vars);
// For each pending `not:` check, store (begin-offset, regex).
let mut nots = Vec::new();
for dct in &self.directives {
let (pat, range) = match *dct {
Directive::Check(ref pat) => (pat, state.check()),
Directive::SameLn(ref pat) => (pat, state.sameln()),
Directive::NextLn(ref pat) => (pat, state.nextln()),
Directive::Unordered(ref pat) => (pat, state.unordered(pat)),
Directive::Not(ref pat) => {
// Resolve `not:` directives immediately to get the right variable values, but
// don't match it until we know the end of the range.
//
// The `not:` directives test the same range as `unordered:` directives. In
// particular, if they refer to defined variables, their range is restricted to
// the text following the match that defined the variable.
nots.push((state.unordered_begin(pat), try!(pat.resolve(&state))));
continue;
}
Directive::Regex(ref var, ref rx) => {
state.vars.insert(var.clone(),
VarDef {
value: Value::Regex(rx.clone()),
offset: 0,
});
continue;
}
};
// Check if `pat` matches in `range`.
if let Some((match_begin, match_end)) = try!(state.match_positive(pat, range)) {
if let &Directive::Unordered(_) = dct {
// This was an unordered unordered match.
// Keep track of the largest matched position, but leave `last_ordered` alone.
state.max_match = max(state.max_match, match_end);
} else {
// Ordered match.
state.last_ordered = match_end;
state.max_match = match_end;
// Verify any pending `not:` directives now that we know their range.
for (not_begin, rx) in nots.drain(..) {
if let Some(_) = rx.find(&text[not_begin..match_begin]) {
// Matched `not:` pattern.
// TODO: Use matched range for an error message.
return Ok(false);
}
}
}
} else {
// No match!
return Ok(false);
}
}
// Verify any pending `not:` directives after the last ordered directive.
for (not_begin, rx) in nots.drain(..) {
if let Some(_) = rx.find(&text[not_begin..]) {
// Matched `not:` pattern.
// TODO: Use matched range for an error message.
return Ok(false);
}
}
Ok(true)
}
}
/// A local definition of a variable.
pub struct VarDef {
/// The value given to the variable.
value: Value,
/// Offset in input text from where the variable is available.
offset: usize,
}
struct State<'a> {
env_vars: &'a VariableMap,
text: &'a str,
vars: HashMap<String, VarDef>,
// Offset after the last ordered match. This does not include recent unordered matches.
last_ordered: usize,
// Largest offset following a positive match, including unordered matches.
max_match: usize,
}
impl<'a> State<'a> {
fn new(text: &'a str, env_vars: &'a VariableMap) -> State<'a> {
State {
text: text,
env_vars: env_vars,
vars: HashMap::new(),
last_ordered: 0,
max_match: 0,
}
}
// Get the offset following the match that defined `var`, or 0 if var is an environment
// variable or unknown.
fn def_offset(&self, var: &str) -> usize {
self.vars.get(var).map(|&VarDef { offset, .. }| offset).unwrap_or(0)
}
// Get the offset of the beginning of the next line after `pos`.
fn bol(&self, pos: usize) -> usize {
if let Some(offset) = self.text[pos..].find('\n') {
pos + offset + 1
} else {
self.text.len()
}
}
// Get the range in text to be matched by a `check:`.
fn check(&self) -> (usize, usize) {
(self.max_match, self.text.len())
}
// Get the range in text to be matched by a `sameln:`.
fn sameln(&self) -> (usize, usize) {
let b = self.max_match;
let e = self.bol(b);
(b, e)
}
// Get the range in text to be matched by a `nextln:`.
fn nextln(&self) -> (usize, usize) {
let b = self.bol(self.max_match);
let e = self.bol(b);
(b, e)
}
// Get the beginning of the range in text to be matched by a `unordered:` or `not:` directive.
// The unordered directive must match after the directives that define the variables used.
fn unordered_begin(&self, pat: &Pattern) -> usize {
let mut from = self.last_ordered;
for part in pat.parts() {
if let Some(var) = part.ref_var() {
from = max(from, self.def_offset(var));
}
}
from
}
// Get the range in text to be matched by a `unordered:` directive.
fn unordered(&self, pat: &Pattern) -> (usize, usize) {
(self.unordered_begin(pat), self.text.len())
}
// Search for `pat` in `range`, return the range matched.
// After a positive match, update variable definitions, if any.
fn match_positive(&mut self,
pat: &Pattern,
range: (usize, usize))
-> Result<Option<(usize, usize)>> {
let rx = try!(pat.resolve(self));
let txt = &self.text[range.0..range.1];
let defs = pat.defs();
let matched_range = if defs.is_empty() {
// Pattern defines no variables. Fastest search is `find`.
rx.find(txt)
} else {
// We need the captures to define variables.
rx.captures(txt).map(|caps| {
let matched_range = caps.pos(0).expect("whole expression must match");
for var in defs {
let vardef = VarDef {
value: Value::Text(caps.name(var).unwrap_or("").to_string()),
// This offset is the end of the whole matched pattern, not just the text
// defining the variable.
offset: range.0 + matched_range.1,
};
self.vars.insert(var.clone(), vardef);
}
matched_range
})
};
Ok(matched_range.map(|(b, e)| (range.0 + b, range.0 + e)))
}
}
impl<'a> VariableMap for State<'a> {
fn lookup(&self, varname: &str) -> Option<Value> {
// First look for a local define.
if let Some(&VarDef { ref value, .. }) = self.vars.get(varname) {
Some(value.clone())
} else {
// No local, maybe an environment variable?
self.env_vars.lookup(varname)
}
}
}
impl Display for Directive {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::Directive::*;
match *self {
Check(ref pat) => writeln!(f, "check: {}", pat),
SameLn(ref pat) => writeln!(f, "sameln: {}", pat),
NextLn(ref pat) => writeln!(f, "nextln: {}", pat),
Unordered(ref pat) => writeln!(f, "unordered: {}", pat),
Not(ref pat) => writeln!(f, "not: {}", pat),
Regex(ref var, ref rx) => writeln!(f, "regex: {}={}", var, rx),
}
}
}
impl Display for Checker {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
for (idx, dir) in self.directives.iter().enumerate() {
try!(write!(f, "#{} {}", idx, dir));
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::CheckerBuilder;
use error::Error;
fn e2s(e: Error) -> String {
e.to_string()
}
#[test]
fn directive() {
let mut b = CheckerBuilder::new();
assert_eq!(b.directive("not here: more text").map_err(e2s), Ok(false));
assert_eq!(b.directive("not here: regex: X=more text").map_err(e2s),
Ok(true));
assert_eq!(b.directive("regex: X = tommy").map_err(e2s),
Err("expected '=' after variable 'X' in regex: X = tommy".to_string()));
assert_eq!(b.directive("[arm]not: patt $x $(y) here").map_err(e2s),
Ok(true));
assert_eq!(b.directive("[x86]sameln: $x $(y=[^]]*) there").map_err(e2s),
Ok(true));
let c = b.finish();
assert_eq!(c.to_string(),
"#0 regex: X=more text\n#1 not: patt $(x) $(y) here\n#2 sameln: $(x) \
$(y=[^]]*) there\n");
}
}

View File

@@ -0,0 +1,69 @@
use std::result;
use std::convert::From;
use std::error::Error as StdError;
use std::fmt;
use regex;
/// A result from the filecheck library.
pub type Result<T> = result::Result<T, Error>;
/// A filecheck error.
#[derive(Debug)]
pub enum Error {
/// A syntax error in a check line.
Syntax(String),
/// A check refers to an undefined variable.
///
/// The pattern contains `$foo` where the `foo` variable has not yet been defined.
/// Use `$$` to match a literal dollar sign.
UndefVariable(String),
/// A pattern contains a back-reference to a variable that was defined in the same pattern.
///
/// For example, `check: Hello $(world=.*) $world`. Backreferences are not support. Often the
/// desired effect can be achieved with the `sameln` check:
///
/// ```text
/// check: Hello $(world=[^ ]*)
/// sameln: $world
/// ```
Backref(String),
/// A pattern contains multiple definitions of the same variable.
DuplicateDef(String),
/// An error in a regular expression.
///
/// Use `cause()` to get the underlying `Regex` library error.
Regex(regex::Error),
}
impl StdError for Error {
fn description(&self) -> &str {
use Error::*;
match *self {
Syntax(ref s) => s,
UndefVariable(ref s) => s,
Backref(ref s) => s,
DuplicateDef(ref s) => s,
Regex(ref err) => err.description(),
}
}
fn cause(&self) -> Option<&StdError> {
use Error::*;
match *self {
Regex(ref err) => Some(err),
_ => None,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.description())
}
}
impl From<regex::Error> for Error {
fn from(e: regex::Error) -> Error {
Error::Regex(e)
}
}

View File

@@ -0,0 +1,246 @@
//! This crate provides a text pattern matching library with functionality similar to the LLVM
//! project's [FileCheck command](http://llvm.org/docs/CommandGuide/FileCheck.html).
//!
//! A list of directives is typically extracted from a file containing a test case. The test case
//! is then run through the program under test, and its output matched against the directives.
//!
//! See the [CheckerBuilder](struct.CheckerBuilder.html) and [Checker](struct.Checker.html) types
//! for the main library API.
//!
//! # Directives
//!
//! These are the directives recognized by *filecheck*:
//! <pre class="rust">
//! <a href="#the-check-directive">check: <i>&lt;pattern&gt;</i></a>
//! <a href="#the-sameln-directive">sameln: <i>&lt;pattern&gt;</i></a>
//! <a href="#the-nextln-directive">nextln: <i>&lt;pattern&gt;</i></a>
//! <a href="#the-unordered-directive">unordered: <i>&lt;pattern&gt;</i></a>
//! <a href="#the-not-directive">not: <i>&lt;pattern&gt;</i></a>
//! <a href="#the-regex-directive">regex: <i>&lt;variable&gt;</i>=<i>&lt;regex&gt;</i></a>
//! </pre>
//! Each directive is described in more detail below.
//!
//! ## Example
//!
//! The Rust program below prints the primes less than 100. It has *filecheck* directives embedded
//! in comments:
//!
//! ```rust
//! fn is_prime(x: u32) -> bool {
//! (2..x).all(|d| x % d != 0)
//! }
//!
//! // Check that we get the primes and nothing else:
//! // regex: NUM=\d+
//! // not: $NUM
//! // check: 2
//! // nextln: 3
//! // check: 89
//! // nextln: 97
//! // not: $NUM
//! fn main() {
//! for p in (2..10).filter(|&x| is_prime(x)) {
//! println!("{}", p);
//! }
//! }
//! ```
//!
//! A test driver compiles and runs the program, then pipes the output through *filecheck*:
//!
//! ```sh
//! $ rustc primes.rs
//! $ ./primes | cton-util filecheck -v
//! #0 regex: NUM=\d+
//! #1 not: $NUM
//! #2 check: 2
//! #3 nextln: 3
//! #4 check: 89
//! #5 nextln: 97
//! #6 not: $NUM
//! no match #1: \d+
//! > 2
//! ~
//! match #2: \b2\b
//! > 3
//! ~
//! match #3: \b3\b
//! > 5
//! > 7
//! ...
//! > 79
//! > 83
//! > 89
//! ~~
//! match #4: \b89\b
//! > 97
//! ~~
//! match #5: \b97\b
//! no match #6: \d+
//! OK
//! ```
//!
//! ## The `check:` directive
//!
//! Match patterns non-overlapping and in order:
//!
//! ```sh
//! #0 check: one
//! #1 check: two
//! ```
//!
//! These directives will match the string `"one two"`, but not `"two one"`. The second directive
//! must match after the first one, and it can't overlap.
//!
//! ## The `sameln:` directive
//!
//! Match a pattern in the same line as the previous match.
//!
//! ```sh
//! #0 check: one
//! #1 sameln: two
//! ```
//!
//! These directives will match the string `"one two"`, but not `"one\ntwo"`. The second match must
//! be in the same line as the first. Like the `check:` directive, the match must also follow the
//! first match, so `"two one" would not be matched.
//!
//! If there is no previous match, `sameln:` matches on the first line of the input.
//!
//! ## The `nextln:` directive
//!
//! Match a pattern in the next line after the previous match.
//!
//! ```sh
//! #0 check: one
//! #1 nextln: two
//! ```
//!
//! These directives will match the string `"one\ntwo"`, but not `"one two"` or `"one\n\ntwo"`.
//!
//! If there is no previous match, `nextln:` matches on the second line of the input as if there
//! were a previous match on the first line.
//!
//! ## The `unordered:` directive
//!
//! Match patterns in any order, and possibly overlapping each other.
//!
//! ```sh
//! #0 unordered: one
//! #1 unordered: two
//! ```
//!
//! These directives will match the string `"one two"` *and* the string `"two one"`.
//!
//! When a normal ordered match is inserted into a sequence of `unordered:` directives, it acts as
//! a barrier:
//!
//! ```sh
//! #0 unordered: one
//! #1 unordered: two
//! #2 check: three
//! #3 unordered: four
//! #4 unordered: five
//! ```
//!
//! These directives will match `"two one three four five"`, but not `"two three one four five"`.
//! The `unordered:` matches are not allowed to cross the ordered `check:` directive.
//!
//! When `unordered:` matches define and use variables, a topological order is enforced. This means
//! that a match referencing a variable must follow the match where the variable was defined:
//!
//! ```sh
//! #0 regex: V=\bv\d+\b
//! #1 unordered: $(va=$V) = load
//! #2 unordered: $(vb=$V) = iadd $va
//! #3 unordered: $(vc=$V) = load
//! #4 unordered: iadd $va, $vc
//! ```
//!
//! In the above directives, #2 must match after #1, and #4 must match after both #1 and #3, but
//! otherwise they can match in any order.
//!
//! ## The `not:` directive
//!
//! Check that a pattern *does not* appear between matches.
//!
//! ```sh
//! #0 check: one
//! #1 not: two
//! #2 check: three
//! ```
//!
//! The directives above will match `"one five three"`, but not `"one two three"`.
//!
//! The pattern in a `not:` directive can't define any variables. Since it never matches anything,
//! the variables would not get a value.
//!
//! ## The `regex:` directive
//!
//! Define a shorthand name for a regular expression.
//!
//! ```sh
//! #0 regex: ID=\b[_a-zA-Z][_0-9a-zA-Z]*\b
//! #1 check: $ID + $ID
//! ```
//!
//! The `regex:` directive gives a name to a regular expression which can then be used as part of a
//! pattern to match. Patterns are otherwise just plain text strings to match, so this is not
//! simple macro expansion.
//!
//! See [the Rust regex crate](../regex/index.html#syntax) for the regular expression syntax.
//!
//! # Patterns and variables
//!
//! Patterns are plain text strings to be matched in the input file. The dollar sign is used as an
//! escape character to expand variables. The following escape sequences are recognized:
//!
//! <pre>
//! $$ Match single dollar sign.
//! $() Match the empty string.
//! $(=<i>&lt;regex&gt;</i>) Match regular expression <i>&lt;regex&gt;</i>.
//! $<i>&lt;var&gt;</i> Match contents of variable <i>&lt;var&gt;</i>.
//! $(<i>&lt;var&gt;</i>) Match contents of variable <i>&lt;var&gt;</i>.
//! $(<i>&lt;var&gt;</i>=<i>&lt;regex&gt;</i>) Match <i>&lt;regex&gt;</i>, then
//! define <i>&lt;var&gt;</i> as the matched text.
//! $(<i>&lt;var&gt;</i>=$<i>&lt;rxvar&gt;</i>) Match regex in <i>&lt;rxvar&gt;</i>, then
//! define <i>&lt;var&gt;</i> as the matched text.
//! </pre>
//!
//! Variables can contain either plain text or regular expressions. Plain text variables are
//! defined with the `$(var=...)` syntax in a previous directive. They match the same text again.
//! Backreferences within the same pattern are not allowed. When a variable is defined in a
//! pattern, it can't be referenced again in the same pattern.
//!
//! Regular expression variables are defined with the `regex:` directive. They match the regular
//! expression each time they are used, so the matches don't need to be identical.
//!
//! ## Word boundaries
//!
//! If a pattern begins or ends with a (plain text) letter or number, it will only match on a word
//! boundary. Use the `$()` empty string match to prevent this:
//!
//! ```sh
//! check: one$()
//! ```
//!
//! This will match `"one"` and `"onetwo"`, but not `"zeroone"`.
//!
//! The empty match syntax can also be used to require leading or trailing whitespace:
//!
//! ```sh
//! check: one, $()
//! ```
//!
//! This will match `"one, two"` , but not `"one,two"`. Without the `$()`, trailing whitespace
//! would be trimmed from the pattern.
pub use error::{Error, Result};
pub use variable::{VariableMap, Value, NO_VARIABLES};
pub use checker::{Checker, CheckerBuilder};
extern crate regex;
mod error;
mod variable;
mod pattern;
mod checker;

View File

@@ -0,0 +1,523 @@
//! Pattern matching for a single directive.
use error::{Error, Result};
use variable::{varname_prefix, VariableMap, Value};
use std::str::FromStr;
use std::fmt::{self, Display, Formatter, Write};
use regex::{Regex, RegexBuilder, quote};
/// A pattern to match as specified in a directive.
///
/// Each pattern is broken into a sequence of parts that must match in order. The kinds of parts
/// are:
///
/// 1. Plain text match.
/// 2. Variable match, `$FOO` or `$(FOO)`. The variable `FOO` may expand to plain text or a regex.
/// 3. Variable definition from literal regex, `$(foo=.*)`. Match the regex and assign matching text
/// to variable `foo`.
/// 4. Variable definition from regex variable, `$(foo=$RX)`. Lookup variable `RX` which should
/// expand to a regex, match the regex, and assign matching text to variable `foo`.
///
pub struct Pattern {
parts: Vec<Part>,
// Variables defined by this pattern.
defs: Vec<String>,
}
/// One atomic part of a pattern.
#[derive(Debug, PartialEq, Eq)]
pub enum Part {
/// Match a plain string.
Text(String),
/// Match a regular expression. The regex has already been wrapped in a non-capturing group if
/// necessary, so it is safe to concatenate.
Regex(String),
/// Match the contents of a variable, which can be plain text or regex.
Var(String),
/// Match literal regex, then assign match to variable.
/// The regex has already been wrapped in a named capture group.
DefLit { def: usize, regex: String },
/// Lookup variable `var`, match resulting regex, assign matching text to variable `defs[def]`.
DefVar { def: usize, var: String },
}
impl Part {
/// Get the variabled referenced by this part, if any.
pub fn ref_var(&self) -> Option<&str> {
match *self {
Part::Var(ref var) => Some(var),
Part::DefVar { ref var, .. } => Some(var),
_ => None,
}
}
}
impl Pattern {
/// Create a new blank pattern. Use the `FromStr` trait to generate Patterns with content.
fn new() -> Pattern {
Pattern {
parts: Vec::new(),
defs: Vec::new(),
}
}
/// Check if the variable `v` is defined by this pattern.
pub fn defines_var(&self, v: &str) -> bool {
self.defs.iter().any(|d| d == v)
}
/// Add a definition of a new variable.
/// Return the allocated def number.
fn add_def(&mut self, v: &str) -> Result<usize> {
if self.defines_var(v) {
Err(Error::DuplicateDef(format!("duplicate definition of ${} in same pattern", v)))
} else {
let idx = self.defs.len();
self.defs.push(v.to_string());
Ok(idx)
}
}
/// Parse a `Part` from a prefix of `s`.
/// Return the part and the number of bytes consumed from `s`.
/// Adds defined variables to `self.defs`.
fn parse_part(&mut self, s: &str) -> Result<(Part, usize)> {
let dollar = s.find('$');
if dollar != Some(0) {
// String doesn't begin with a dollar sign, so match plain text up to the dollar sign.
let end = dollar.unwrap_or(s.len());
return Ok((Part::Text(s[0..end].to_string()), end));
}
// String starts with a dollar sign. Look for these possibilities:
//
// 1. `$$`.
// 2. `$var`.
// 3. `$(var)`.
// 4. `$(var=regex)`. Where `regex` is a regular expression possibly containing matching
// braces.
// 5. `$(var=$VAR)`.
// A doubled dollar sign matches a single dollar sign.
if s.starts_with("$$") {
return Ok((Part::Text("$".to_string()), 2));
}
// Look for `$var`.
let varname_end = 1 + varname_prefix(&s[1..]);
if varname_end != 1 {
return Ok((Part::Var(s[1..varname_end].to_string()), varname_end));
}
// All remaining possibilities start with `$(`.
if s.len() < 2 || !s.starts_with("$(") {
return Err(Error::Syntax("pattern syntax error, use $$ to match a single $"
.to_string()));
}
// Match the variable name, allowing for an empty varname in `$()`, or `$(=...)`.
let varname_end = 2 + varname_prefix(&s[2..]);
let varname = s[2..varname_end].to_string();
match s[varname_end..].chars().next() {
None => {
return Err(Error::Syntax(format!("unterminated $({}...", varname)));
}
Some(')') => {
let part = if varname.is_empty() {
// Match `$()`, turn it into an empty text match.
Part::Text(varname)
} else {
// Match `$(var)`.
Part::Var(varname)
};
return Ok((part, varname_end + 1));
}
Some('=') => {
// Variable definition. Fall through.
}
Some(ch) => {
return Err(Error::Syntax(format!("syntax error in $({}... '{}'", varname, ch)));
}
}
// This is a variable definition of the form `$(var=...`.
// Allocate a definition index.
let def = if varname.is_empty() {
None
} else {
Some(try!(self.add_def(&varname)))
};
// Match `$(var=$PAT)`.
if s[varname_end + 1..].starts_with('$') {
let refname_begin = varname_end + 2;
let refname_end = refname_begin + varname_prefix(&s[refname_begin..]);
if refname_begin == refname_end {
return Err(Error::Syntax(format!("expected variable name in $({}=$...", varname)));
}
if !s[refname_end..].starts_with(')') {
return Err(Error::Syntax(format!("expected ')' after $({}=${}...",
varname,
&s[refname_begin..refname_end])));
}
let refname = s[refname_begin..refname_end].to_string();
return if let Some(defidx) = def {
Ok((Part::DefVar {
def: defidx,
var: refname,
},
refname_end + 1))
} else {
Err(Error::Syntax(format!("expected variable name in $(=${})", refname)))
};
}
// Last case: `$(var=...)` where `...` is a regular expression, possibly containing matched
// parentheses.
let rx_begin = varname_end + 1;
let rx_end = rx_begin + regex_prefix(&s[rx_begin..]);
if s[rx_end..].starts_with(')') {
let part = if let Some(defidx) = def {
// Wrap the regex in a named capture group.
Part::DefLit {
def: defidx,
regex: format!("(?P<{}>{})", varname, &s[rx_begin..rx_end]),
}
} else {
// When the varname is empty just match the regex, don't capture any variables.
// This is `$(=[a-z])`.
// Wrap the regex in a non-capturing group to make it concatenation-safe.
Part::Regex(format!("(?:{})", &s[rx_begin..rx_end]))
};
Ok((part, rx_end + 1))
} else {
Err(Error::Syntax(format!("missing ')' after regex in $({}={}",
varname,
&s[rx_begin..rx_end])))
}
}
}
/// Compute the length of a regular expression terminated by `)` or `}`.
/// Handle nested and escaped parentheses in the rx, but don't actualy parse it.
/// Return the position of the terminating brace or the length of the string.
fn regex_prefix(s: &str) -> usize {
// The prevous char was a backslash.
let mut escape = false;
// State around parsing charsets.
enum State {
Normal, // Outside any charset.
Curly, // Inside curly braces.
CSFirst, // Immediately after opening `[`.
CSNeg, // Immediately after `[^`.
CSBody, // Inside `[...`.
}
let mut state = State::Normal;
// Current nesting level of parens.
let mut nest = 0usize;
for (idx, ch) in s.char_indices() {
if escape {
escape = false;
continue;
} else if ch == '\\' {
escape = true;
continue;
}
match state {
State::Normal => {
match ch {
'[' => state = State::CSFirst,
'{' => state = State::Curly,
'(' => nest += 1,
')' if nest > 0 => nest -= 1,
')' | '}' => return idx,
_ => {}
}
}
State::Curly => {
if ch == '}' {
state = State::Normal;
}
}
State::CSFirst => {
state = match ch {
'^' => State::CSNeg,
_ => State::CSBody,
}
}
State::CSNeg => state = State::CSBody,
State::CSBody => {
if ch == ']' {
state = State::Normal;
}
}
}
}
s.len()
}
impl FromStr for Pattern {
type Err = Error;
fn from_str(s: &str) -> Result<Pattern> {
// Always remove leading and trailing whitespace.
// Use `$()` to actually include that in a match.
let s = s.trim();
let mut pat = Pattern::new();
let mut pos = 0;
while pos < s.len() {
let (part, len) = try!(pat.parse_part(&s[pos..]));
if let Some(v) = part.ref_var() {
if pat.defines_var(v) {
return Err(Error::Backref(format!("unsupported back-reference to '${}' \
defined in same pattern",
v)));
}
}
pat.parts.push(part);
pos += len;
}
Ok(pat)
}
}
impl Pattern {
/// Get a list of parts in this pattern.
pub fn parts(&self) -> &[Part] {
&self.parts
}
/// Get a list of variable names defined when this pattern matches.
pub fn defs(&self) -> &[String] {
&self.defs
}
/// Resolve all variable references in this pattern, turning it into a regular expression.
pub fn resolve(&self, vmap: &VariableMap) -> Result<Regex> {
let mut out = String::new();
// Add a word boundary check `\b` to the beginning of the regex, but only if the first part
// is a plain text match that starts with a word character.
//
// This behavior can be disabled by starting the pattern with `$()`.
if let Some(&Part::Text(ref s)) = self.parts.first() {
if s.starts_with(char::is_alphanumeric) {
out.push_str(r"\b");
}
}
for part in &self.parts {
match *part {
Part::Text(ref s) => {
out.push_str(&quote(s));
}
Part::Regex(ref rx) => out.push_str(rx),
Part::Var(ref var) => {
// Resolve the variable. We can handle a plain text expansion.
match vmap.lookup(var) {
None => {
return Err(Error::UndefVariable(format!("undefined variable ${}", var)))
}
Some(Value::Text(s)) => out.push_str(&quote(&s)),
// Wrap regex in non-capturing group for safe concatenation.
Some(Value::Regex(rx)) => write!(out, "(?:{})", rx).unwrap(),
}
}
Part::DefLit { ref regex, .. } => out.push_str(regex),
Part::DefVar { def, ref var } => {
// Wrap regex in a named capture group.
write!(out,
"(?P<{}>{})",
self.defs[def],
match vmap.lookup(var) {
None => {
return Err(Error::UndefVariable(format!("undefined variable \
${}",
var)))
}
Some(Value::Text(s)) => quote(&s),
Some(Value::Regex(rx)) => rx,
})
.unwrap()
}
}
}
// Add a word boundary check `\b` to the end of the regex, but only if the final part
// is a plain text match that ends with a word character.
//
// This behavior can be disabled by ending the pattern with `$()`.
if let Some(&Part::Text(ref s)) = self.parts.last() {
if s.ends_with(char::is_alphanumeric) {
out.push_str(r"\b");
}
}
Ok(try!(RegexBuilder::new(&out).multi_line(true).compile()))
}
}
impl Display for Pattern {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
for part in &self.parts {
use self::Part::*;
try!(match *part {
Text(ref txt) if txt == "" => write!(f, "$()"),
Text(ref txt) if txt == "$" => write!(f, "$$"),
Text(ref txt) => write!(f, "{}", txt),
Regex(ref rx) => write!(f, "$(={})", rx),
Var(ref var) => write!(f, "$({})", var),
DefLit { def, ref regex } => {
let defvar = &self.defs[def];
// (?P<defvar>...).
let litrx = &regex[5 + defvar.len()..regex.len() - 1];
write!(f, "$({}={})", defvar, litrx)
}
DefVar { def, ref var } => write!(f, "$({}=${})", self.defs[def], var),
});
}
Ok(())
}
}
#[cfg(test)]
mod tests {
#[test]
fn regex() {
use super::regex_prefix;
assert_eq!(regex_prefix(""), 0);
assert_eq!(regex_prefix(")"), 0);
assert_eq!(regex_prefix(")c"), 0);
assert_eq!(regex_prefix("x"), 1);
assert_eq!(regex_prefix("x)x"), 1);
assert_eq!(regex_prefix("x(c))x"), 4);
assert_eq!(regex_prefix("()x(c))x"), 6);
assert_eq!(regex_prefix("()x(c)"), 6);
assert_eq!(regex_prefix("x([)]))x"), 6);
assert_eq!(regex_prefix("x[)])x"), 4);
assert_eq!(regex_prefix("x[^)])x"), 5);
assert_eq!(regex_prefix("x[^])x"), 6);
}
#[test]
fn part() {
use super::{Pattern, Part};
let mut pat = Pattern::new();
// This is dubious, should we panic instead?
assert_eq!(pat.parse_part("").unwrap(), (Part::Text("".to_string()), 0));
assert_eq!(pat.parse_part("x").unwrap(),
(Part::Text("x".to_string()), 1));
assert_eq!(pat.parse_part("x2").unwrap(),
(Part::Text("x2".to_string()), 2));
assert_eq!(pat.parse_part("x$").unwrap(),
(Part::Text("x".to_string()), 1));
assert_eq!(pat.parse_part("x$$").unwrap(),
(Part::Text("x".to_string()), 1));
assert_eq!(pat.parse_part("$").unwrap_err().to_string(),
"pattern syntax error, use $$ to match a single $");
assert_eq!(pat.parse_part("$$").unwrap(),
(Part::Text("$".to_string()), 2));
assert_eq!(pat.parse_part("$$ ").unwrap(),
(Part::Text("$".to_string()), 2));
assert_eq!(pat.parse_part("$0").unwrap(),
(Part::Var("0".to_string()), 2));
assert_eq!(pat.parse_part("$xx=").unwrap(),
(Part::Var("xx".to_string()), 3));
assert_eq!(pat.parse_part("$xx$").unwrap(),
(Part::Var("xx".to_string()), 3));
assert_eq!(pat.parse_part("$(0)").unwrap(),
(Part::Var("0".to_string()), 4));
assert_eq!(pat.parse_part("$()").unwrap(),
(Part::Text("".to_string()), 3));
assert_eq!(pat.parse_part("$(0").unwrap_err().to_string(),
("unterminated $(0..."));
assert_eq!(pat.parse_part("$(foo:").unwrap_err().to_string(),
("syntax error in $(foo... ':'"));
assert_eq!(pat.parse_part("$(foo =").unwrap_err().to_string(),
("syntax error in $(foo... ' '"));
assert_eq!(pat.parse_part("$(eo0=$bar").unwrap_err().to_string(),
("expected ')' after $(eo0=$bar..."));
assert_eq!(pat.parse_part("$(eo1=$bar}").unwrap_err().to_string(),
("expected ')' after $(eo1=$bar..."));
assert_eq!(pat.parse_part("$(eo2=$)").unwrap_err().to_string(),
("expected variable name in $(eo2=$..."));
assert_eq!(pat.parse_part("$(eo3=$-)").unwrap_err().to_string(),
("expected variable name in $(eo3=$..."));
}
#[test]
fn partdefs() {
use super::{Pattern, Part};
let mut pat = Pattern::new();
assert_eq!(pat.parse_part("$(foo=$bar)").unwrap(),
(Part::DefVar {
def: 0,
var: "bar".to_string(),
},
11));
assert_eq!(pat.parse_part("$(foo=$bar)").unwrap_err().to_string(),
"duplicate definition of $foo in same pattern");
assert_eq!(pat.parse_part("$(fxo=$bar)x").unwrap(),
(Part::DefVar {
def: 1,
var: "bar".to_string(),
},
11));
assert_eq!(pat.parse_part("$(fo2=[a-z])").unwrap(),
(Part::DefLit {
def: 2,
regex: "(?P<fo2>[a-z])".to_string(),
},
12));
assert_eq!(pat.parse_part("$(fo3=[a-)])").unwrap(),
(Part::DefLit {
def: 3,
regex: "(?P<fo3>[a-)])".to_string(),
},
12));
assert_eq!(pat.parse_part("$(fo4=)").unwrap(),
(Part::DefLit {
def: 4,
regex: "(?P<fo4>)".to_string(),
},
7));
assert_eq!(pat.parse_part("$(=.*)").unwrap(),
(Part::Regex("(?:.*)".to_string()), 6));
assert_eq!(pat.parse_part("$(=)").unwrap(),
(Part::Regex("(?:)".to_string()), 4));
assert_eq!(pat.parse_part("$()").unwrap(),
(Part::Text("".to_string()), 3));
}
#[test]
fn pattern() {
use super::Pattern;
let p: Pattern = " Hello world! ".parse().unwrap();
assert_eq!(format!("{:?}", p.parts), "[Text(\"Hello world!\")]");
let p: Pattern = " $foo=$(bar) ".parse().unwrap();
assert_eq!(format!("{:?}", p.parts),
"[Var(\"foo\"), Text(\"=\"), Var(\"bar\")]");
}
}

View File

@@ -0,0 +1,313 @@
extern crate filecheck;
use filecheck::{CheckerBuilder, NO_VARIABLES, Error as FcError};
fn e2s(e: FcError) -> String {
e.to_string()
}
#[test]
fn empty() {
let c = CheckerBuilder::new().finish();
assert!(c.is_empty());
// An empty checker matches anything.
assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn no_directives() {
let c = CheckerBuilder::new().text("nothing here").unwrap().finish();
assert!(c.is_empty());
// An empty checker matches anything.
assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn no_matches() {
let c = CheckerBuilder::new().text("regex: FOO=bar").unwrap().finish();
assert!(!c.is_empty());
// An empty checker matches anything.
assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn simple() {
let c = CheckerBuilder::new()
.text("
check: one
check: two
")
.unwrap()
.finish();
let t = "
zero
one
and a half
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "
zero
and a half
two
one
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
}
#[test]
fn sameln() {
let c = CheckerBuilder::new()
.text("
check: one
sameln: two
")
.unwrap()
.finish();
let t = "
zero
one
and a half
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "
zero
one
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "
zero
one two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn nextln() {
let c = CheckerBuilder::new()
.text("
check: one
nextln: two
")
.unwrap()
.finish();
let t = "
zero
one
and a half
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "
zero
one
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "
zero
one two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "
zero
one
two";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn leading_nextln() {
// A leading nextln directive should match from line 2.
// This is somewhat arbitrary, but consistent with a preceeding 'check: $()' directive.
let c = CheckerBuilder::new()
.text("
nextln: one
nextln: two
")
.unwrap()
.finish();
let t = "zero
one
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "one
two
three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
}
#[test]
fn leading_sameln() {
// A leading sameln directive should match from line 1.
let c = CheckerBuilder::new()
.text("
sameln: one
sameln: two
")
.unwrap()
.finish();
let t = "zero
one two three
";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "zero one two three";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "zero one
two three";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
}
#[test]
fn not() {
let c = CheckerBuilder::new()
.text("
check: one$()
not: $()eat$()
check: $()two
")
.unwrap()
.finish();
let t = "onetwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "one eat two";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "oneeattwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "oneatwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn notnot() {
let c = CheckerBuilder::new()
.text("
check: one$()
not: $()eat$()
not: half
check: $()two
")
.unwrap()
.finish();
let t = "onetwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
let t = "one eat two";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "one half two";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "oneeattwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
// The `not: half` pattern only matches whole words, but the bracketing matches are considered
// word boundaries, so it does match in this case.
let t = "onehalftwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false));
let t = "oneatwo";
assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true));
}
#[test]
fn unordered() {
let c = CheckerBuilder::new()
.text("
check: one
unordered: two
unordered: three
check: four
")
.unwrap()
.finish();
assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(false));
assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(false));
}
#[test]
fn leading_unordered() {
let c = CheckerBuilder::new()
.text("
unordered: two
unordered: three
check: four
")
.unwrap()
.finish();
assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(false));
assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(false));
}
#[test]
fn trailing_unordered() {
let c = CheckerBuilder::new()
.text("
check: one
unordered: two
unordered: three
")
.unwrap()
.finish();
assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(true));
assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(true));
}

View File

@@ -0,0 +1,58 @@
/// A variable name is one or more ASCII alphanumerical characters, including underscore.
/// Note that numerical variable names like `$45` are allowed too.
///
/// Try to parse a variable name from the begining of `s`.
/// Return the index of the character following the varname.
/// This returns 0 if `s` doesn't have a prefix that is a variable name.
pub fn varname_prefix(s: &str) -> usize {
for (idx, ch) in s.char_indices() {
match ch {
'a'...'z' | 'A'...'Z' | '0'...'9' | '_' => {}
_ => return idx,
}
}
s.len()
}
/// A variable can contain either a regular expression or plain text.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Value {
Text(String),
Regex(String),
}
/// Resolve variables by name.
pub trait VariableMap {
/// Get the value of the variable `varname`, or return `None` for an unknown variable name.
fn lookup(&self, varname: &str) -> Option<Value>;
}
impl VariableMap for () {
fn lookup(&self, _: &str) -> Option<Value> {
None
}
}
/// An empty variable map.
pub const NO_VARIABLES: &'static VariableMap = &();
#[cfg(test)]
mod tests {
#[test]
fn varname() {
use super::varname_prefix;
assert_eq!(varname_prefix(""), 0);
assert_eq!(varname_prefix("\0"), 0);
assert_eq!(varname_prefix("_"), 1);
assert_eq!(varname_prefix("0"), 1);
assert_eq!(varname_prefix("01"), 2);
assert_eq!(varname_prefix("b"), 1);
assert_eq!(varname_prefix("C"), 1);
assert_eq!(varname_prefix("."), 0);
assert_eq!(varname_prefix(".s"), 0);
assert_eq!(varname_prefix("0."), 1);
assert_eq!(varname_prefix("01="), 2);
assert_eq!(varname_prefix("0a)"), 2);
}
}