From 71b742ec347258a16e40b59b9ccd8ecadcccde7c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 1 Sep 2016 14:38:01 -0700 Subject: [PATCH] Add a libfilecheck crate. This library implements functionality similar to LLVM's FileCheck utility, but in library form. --- cranelift/src/libfilecheck/Cargo.toml | 12 + cranelift/src/libfilecheck/checker.rs | 392 ++++++++++++++++ cranelift/src/libfilecheck/error.rs | 69 +++ cranelift/src/libfilecheck/lib.rs | 246 ++++++++++ cranelift/src/libfilecheck/pattern.rs | 523 ++++++++++++++++++++++ cranelift/src/libfilecheck/tests/basic.rs | 313 +++++++++++++ cranelift/src/libfilecheck/variable.rs | 58 +++ 7 files changed, 1613 insertions(+) create mode 100644 cranelift/src/libfilecheck/Cargo.toml create mode 100644 cranelift/src/libfilecheck/checker.rs create mode 100644 cranelift/src/libfilecheck/error.rs create mode 100644 cranelift/src/libfilecheck/lib.rs create mode 100644 cranelift/src/libfilecheck/pattern.rs create mode 100644 cranelift/src/libfilecheck/tests/basic.rs create mode 100644 cranelift/src/libfilecheck/variable.rs diff --git a/cranelift/src/libfilecheck/Cargo.toml b/cranelift/src/libfilecheck/Cargo.toml new file mode 100644 index 0000000000..4f85778d92 --- /dev/null +++ b/cranelift/src/libfilecheck/Cargo.toml @@ -0,0 +1,12 @@ +[package] +authors = ["The Cretonne Project Developers"] +name = "filecheck" +version = "0.0.0" +publish = false + +[lib] +name = "filecheck" +path = "lib.rs" + +[dependencies] +regex = "0.1.71" diff --git a/cranelift/src/libfilecheck/checker.rs b/cranelift/src/libfilecheck/checker.rs new file mode 100644 index 0000000000..8abcbfdd3d --- /dev/null +++ b/cranelift/src/libfilecheck/checker.rs @@ -0,0 +1,392 @@ +use error::{Error, Result}; +use variable::{VariableMap, Value, varname_prefix}; +use pattern::Pattern; +use regex::{Regex, Captures}; +use std::collections::HashMap; +use std::cmp::max; +use std::fmt::{self, Display, Formatter}; + +// The different kinds of directives we support. +enum Directive { + Check(Pattern), + SameLn(Pattern), + NextLn(Pattern), + Unordered(Pattern), + Not(Pattern), + Regex(String, String), +} + +// Regular expression matching a directive. +// The match groups are: +// +// 1. Keyword. +// 2. Rest of line / pattern. +// +const DIRECTIVE_RX: &'static str = r"\b(check|sameln|nextln|unordered|not|regex):\s+(.*)"; + +impl Directive { + /// Create a new directive from a `DIRECTIVE_RX` match. + fn new(caps: Captures) -> Result { + let cmd = caps.at(1).expect("group 1 must match"); + let rest = caps.at(2).expect("group 2 must match"); + + if cmd == "regex" { + return Directive::regex(rest); + } + + // All other commands are followed by a pattern. + let pat = try!(rest.parse()); + + match cmd { + "check" => Ok(Directive::Check(pat)), + "sameln" => Ok(Directive::SameLn(pat)), + "nextln" => Ok(Directive::NextLn(pat)), + "unordered" => Ok(Directive::Unordered(pat)), + "not" => { + if !pat.defs().is_empty() { + let msg = format!("can't define variables '$({}=...' in not: {}", + pat.defs()[0], + rest); + Err(Error::DuplicateDef(msg)) + } else { + Ok(Directive::Not(pat)) + } + } + _ => panic!("unexpected command {} in regex match", cmd), + } + } + + /// Create a `regex:` directive from a `VAR=...` string. + fn regex(rest: &str) -> Result { + let varlen = varname_prefix(rest); + if varlen == 0 { + return Err(Error::Syntax(format!("invalid variable name in regex: {}", rest))); + } + let var = rest[0..varlen].to_string(); + if !rest[varlen..].starts_with("=") { + return Err(Error::Syntax(format!("expected '=' after variable '{}' in regex: {}", + var, + rest))); + } + Ok(Directive::Regex(var, rest[varlen + 1..].to_string())) + } +} + + +/// Builder for constructing a `Checker` instance. +pub struct CheckerBuilder { + directives: Vec, + linerx: Regex, +} + +impl CheckerBuilder { + /// Create a new, blank `CheckerBuilder`. + pub fn new() -> CheckerBuilder { + CheckerBuilder { + directives: Vec::new(), + linerx: Regex::new(DIRECTIVE_RX).unwrap(), + } + } + + /// Add a potential directive line. + /// + /// Returns true if this is a a directive with one of the known prefixes. + /// Returns false if no known directive was found. + /// Returns an error if there is a problem with the directive. + pub fn directive(&mut self, l: &str) -> Result { + match self.linerx.captures(l) { + Some(caps) => { + self.directives.push(try!(Directive::new(caps))); + Ok(true) + } + None => Ok(false), + } + } + + /// Add multiple directives. + /// + /// The text is split into lines that are added individually as potential directives. + /// This method can be used to parse a whole test file containing multiple directives. + pub fn text(&mut self, t: &str) -> Result<&mut Self> { + for caps in self.linerx.captures_iter(t) { + self.directives.push(try!(Directive::new(caps))); + } + Ok(self) + } + + /// Get the finished `Checker`. + pub fn finish(&mut self) -> Checker { + // Move directives into the new checker, leaving `self.directives` empty and ready for + // building a new checker. + Checker::new(self.directives.split_off(0)) + } +} + +/// Verify a list of directives against a test input. +/// +/// Use a `CheckerBuilder` to construct a `Checker`. Then use the `test` method to verify the list +/// of directives against a test input. +pub struct Checker { + directives: Vec, +} + +impl Checker { + fn new(directives: Vec) -> Checker { + Checker { directives: directives } + } + + /// An empty checker contains no directives, and will match any input string. + pub fn is_empty(&self) -> bool { + self.directives.is_empty() + } + + /// Verify directives against the input text. + /// + /// This returns `true` if the text matches all the directives, `false` if it doesn't. + /// An error is only returned if there is a problem with the directives. + pub fn check(&self, text: &str, vars: &VariableMap) -> Result { + let mut state = State::new(text, vars); + + // For each pending `not:` check, store (begin-offset, regex). + let mut nots = Vec::new(); + + for dct in &self.directives { + let (pat, range) = match *dct { + Directive::Check(ref pat) => (pat, state.check()), + Directive::SameLn(ref pat) => (pat, state.sameln()), + Directive::NextLn(ref pat) => (pat, state.nextln()), + Directive::Unordered(ref pat) => (pat, state.unordered(pat)), + Directive::Not(ref pat) => { + // Resolve `not:` directives immediately to get the right variable values, but + // don't match it until we know the end of the range. + // + // The `not:` directives test the same range as `unordered:` directives. In + // particular, if they refer to defined variables, their range is restricted to + // the text following the match that defined the variable. + nots.push((state.unordered_begin(pat), try!(pat.resolve(&state)))); + continue; + } + Directive::Regex(ref var, ref rx) => { + state.vars.insert(var.clone(), + VarDef { + value: Value::Regex(rx.clone()), + offset: 0, + }); + continue; + } + }; + // Check if `pat` matches in `range`. + if let Some((match_begin, match_end)) = try!(state.match_positive(pat, range)) { + if let &Directive::Unordered(_) = dct { + // This was an unordered unordered match. + // Keep track of the largest matched position, but leave `last_ordered` alone. + state.max_match = max(state.max_match, match_end); + } else { + // Ordered match. + state.last_ordered = match_end; + state.max_match = match_end; + + // Verify any pending `not:` directives now that we know their range. + for (not_begin, rx) in nots.drain(..) { + if let Some(_) = rx.find(&text[not_begin..match_begin]) { + // Matched `not:` pattern. + // TODO: Use matched range for an error message. + return Ok(false); + } + } + } + } else { + // No match! + return Ok(false); + } + } + + // Verify any pending `not:` directives after the last ordered directive. + for (not_begin, rx) in nots.drain(..) { + if let Some(_) = rx.find(&text[not_begin..]) { + // Matched `not:` pattern. + // TODO: Use matched range for an error message. + return Ok(false); + } + } + + Ok(true) + } +} + +/// A local definition of a variable. +pub struct VarDef { + /// The value given to the variable. + value: Value, + /// Offset in input text from where the variable is available. + offset: usize, +} + +struct State<'a> { + env_vars: &'a VariableMap, + text: &'a str, + vars: HashMap, + // Offset after the last ordered match. This does not include recent unordered matches. + last_ordered: usize, + // Largest offset following a positive match, including unordered matches. + max_match: usize, +} + +impl<'a> State<'a> { + fn new(text: &'a str, env_vars: &'a VariableMap) -> State<'a> { + State { + text: text, + env_vars: env_vars, + vars: HashMap::new(), + last_ordered: 0, + max_match: 0, + } + } + + // Get the offset following the match that defined `var`, or 0 if var is an environment + // variable or unknown. + fn def_offset(&self, var: &str) -> usize { + self.vars.get(var).map(|&VarDef { offset, .. }| offset).unwrap_or(0) + } + + // Get the offset of the beginning of the next line after `pos`. + fn bol(&self, pos: usize) -> usize { + if let Some(offset) = self.text[pos..].find('\n') { + pos + offset + 1 + } else { + self.text.len() + } + } + + // Get the range in text to be matched by a `check:`. + fn check(&self) -> (usize, usize) { + (self.max_match, self.text.len()) + } + + // Get the range in text to be matched by a `sameln:`. + fn sameln(&self) -> (usize, usize) { + let b = self.max_match; + let e = self.bol(b); + (b, e) + } + + // Get the range in text to be matched by a `nextln:`. + fn nextln(&self) -> (usize, usize) { + let b = self.bol(self.max_match); + let e = self.bol(b); + (b, e) + } + + // Get the beginning of the range in text to be matched by a `unordered:` or `not:` directive. + // The unordered directive must match after the directives that define the variables used. + fn unordered_begin(&self, pat: &Pattern) -> usize { + let mut from = self.last_ordered; + for part in pat.parts() { + if let Some(var) = part.ref_var() { + from = max(from, self.def_offset(var)); + } + } + from + } + + // Get the range in text to be matched by a `unordered:` directive. + fn unordered(&self, pat: &Pattern) -> (usize, usize) { + (self.unordered_begin(pat), self.text.len()) + } + + // Search for `pat` in `range`, return the range matched. + // After a positive match, update variable definitions, if any. + fn match_positive(&mut self, + pat: &Pattern, + range: (usize, usize)) + -> Result> { + let rx = try!(pat.resolve(self)); + let txt = &self.text[range.0..range.1]; + let defs = pat.defs(); + let matched_range = if defs.is_empty() { + // Pattern defines no variables. Fastest search is `find`. + rx.find(txt) + } else { + // We need the captures to define variables. + rx.captures(txt).map(|caps| { + let matched_range = caps.pos(0).expect("whole expression must match"); + for var in defs { + let vardef = VarDef { + value: Value::Text(caps.name(var).unwrap_or("").to_string()), + // This offset is the end of the whole matched pattern, not just the text + // defining the variable. + offset: range.0 + matched_range.1, + }; + self.vars.insert(var.clone(), vardef); + } + matched_range + }) + }; + Ok(matched_range.map(|(b, e)| (range.0 + b, range.0 + e))) + } +} + +impl<'a> VariableMap for State<'a> { + fn lookup(&self, varname: &str) -> Option { + // First look for a local define. + if let Some(&VarDef { ref value, .. }) = self.vars.get(varname) { + Some(value.clone()) + } else { + // No local, maybe an environment variable? + self.env_vars.lookup(varname) + } + } +} + +impl Display for Directive { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + use self::Directive::*; + match *self { + Check(ref pat) => writeln!(f, "check: {}", pat), + SameLn(ref pat) => writeln!(f, "sameln: {}", pat), + NextLn(ref pat) => writeln!(f, "nextln: {}", pat), + Unordered(ref pat) => writeln!(f, "unordered: {}", pat), + Not(ref pat) => writeln!(f, "not: {}", pat), + Regex(ref var, ref rx) => writeln!(f, "regex: {}={}", var, rx), + } + } +} + +impl Display for Checker { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + for (idx, dir) in self.directives.iter().enumerate() { + try!(write!(f, "#{} {}", idx, dir)); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::CheckerBuilder; + use error::Error; + + fn e2s(e: Error) -> String { + e.to_string() + } + + #[test] + fn directive() { + let mut b = CheckerBuilder::new(); + + assert_eq!(b.directive("not here: more text").map_err(e2s), Ok(false)); + assert_eq!(b.directive("not here: regex: X=more text").map_err(e2s), + Ok(true)); + assert_eq!(b.directive("regex: X = tommy").map_err(e2s), + Err("expected '=' after variable 'X' in regex: X = tommy".to_string())); + assert_eq!(b.directive("[arm]not: patt $x $(y) here").map_err(e2s), + Ok(true)); + assert_eq!(b.directive("[x86]sameln: $x $(y=[^]]*) there").map_err(e2s), + Ok(true)); + + let c = b.finish(); + assert_eq!(c.to_string(), + "#0 regex: X=more text\n#1 not: patt $(x) $(y) here\n#2 sameln: $(x) \ + $(y=[^]]*) there\n"); + } +} diff --git a/cranelift/src/libfilecheck/error.rs b/cranelift/src/libfilecheck/error.rs new file mode 100644 index 0000000000..1cc17001fc --- /dev/null +++ b/cranelift/src/libfilecheck/error.rs @@ -0,0 +1,69 @@ +use std::result; +use std::convert::From; +use std::error::Error as StdError; +use std::fmt; +use regex; + +/// A result from the filecheck library. +pub type Result = result::Result; + +/// A filecheck error. +#[derive(Debug)] +pub enum Error { + /// A syntax error in a check line. + Syntax(String), + /// A check refers to an undefined variable. + /// + /// The pattern contains `$foo` where the `foo` variable has not yet been defined. + /// Use `$$` to match a literal dollar sign. + UndefVariable(String), + /// A pattern contains a back-reference to a variable that was defined in the same pattern. + /// + /// For example, `check: Hello $(world=.*) $world`. Backreferences are not support. Often the + /// desired effect can be achieved with the `sameln` check: + /// + /// ```text + /// check: Hello $(world=[^ ]*) + /// sameln: $world + /// ``` + Backref(String), + /// A pattern contains multiple definitions of the same variable. + DuplicateDef(String), + /// An error in a regular expression. + /// + /// Use `cause()` to get the underlying `Regex` library error. + Regex(regex::Error), +} + +impl StdError for Error { + fn description(&self) -> &str { + use Error::*; + match *self { + Syntax(ref s) => s, + UndefVariable(ref s) => s, + Backref(ref s) => s, + DuplicateDef(ref s) => s, + Regex(ref err) => err.description(), + } + } + + fn cause(&self) -> Option<&StdError> { + use Error::*; + match *self { + Regex(ref err) => Some(err), + _ => None, + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.description()) + } +} + +impl From for Error { + fn from(e: regex::Error) -> Error { + Error::Regex(e) + } +} diff --git a/cranelift/src/libfilecheck/lib.rs b/cranelift/src/libfilecheck/lib.rs new file mode 100644 index 0000000000..16d6d5028d --- /dev/null +++ b/cranelift/src/libfilecheck/lib.rs @@ -0,0 +1,246 @@ +//! This crate provides a text pattern matching library with functionality similar to the LLVM +//! project's [FileCheck command](http://llvm.org/docs/CommandGuide/FileCheck.html). +//! +//! A list of directives is typically extracted from a file containing a test case. The test case +//! is then run through the program under test, and its output matched against the directives. +//! +//! See the [CheckerBuilder](struct.CheckerBuilder.html) and [Checker](struct.Checker.html) types +//! for the main library API. +//! +//! # Directives +//! +//! These are the directives recognized by *filecheck*: +//!
+//! check: <pattern>
+//! sameln: <pattern>
+//! nextln: <pattern>
+//! unordered: <pattern>
+//! not: <pattern>
+//! regex: <variable>=<regex>
+//! 
+//! Each directive is described in more detail below. +//! +//! ## Example +//! +//! The Rust program below prints the primes less than 100. It has *filecheck* directives embedded +//! in comments: +//! +//! ```rust +//! fn is_prime(x: u32) -> bool { +//! (2..x).all(|d| x % d != 0) +//! } +//! +//! // Check that we get the primes and nothing else: +//! // regex: NUM=\d+ +//! // not: $NUM +//! // check: 2 +//! // nextln: 3 +//! // check: 89 +//! // nextln: 97 +//! // not: $NUM +//! fn main() { +//! for p in (2..10).filter(|&x| is_prime(x)) { +//! println!("{}", p); +//! } +//! } +//! ``` +//! +//! A test driver compiles and runs the program, then pipes the output through *filecheck*: +//! +//! ```sh +//! $ rustc primes.rs +//! $ ./primes | cton-util filecheck -v +//! #0 regex: NUM=\d+ +//! #1 not: $NUM +//! #2 check: 2 +//! #3 nextln: 3 +//! #4 check: 89 +//! #5 nextln: 97 +//! #6 not: $NUM +//! no match #1: \d+ +//! > 2 +//! ~ +//! match #2: \b2\b +//! > 3 +//! ~ +//! match #3: \b3\b +//! > 5 +//! > 7 +//! ... +//! > 79 +//! > 83 +//! > 89 +//! ~~ +//! match #4: \b89\b +//! > 97 +//! ~~ +//! match #5: \b97\b +//! no match #6: \d+ +//! OK +//! ``` +//! +//! ## The `check:` directive +//! +//! Match patterns non-overlapping and in order: +//! +//! ```sh +//! #0 check: one +//! #1 check: two +//! ``` +//! +//! These directives will match the string `"one two"`, but not `"two one"`. The second directive +//! must match after the first one, and it can't overlap. +//! +//! ## The `sameln:` directive +//! +//! Match a pattern in the same line as the previous match. +//! +//! ```sh +//! #0 check: one +//! #1 sameln: two +//! ``` +//! +//! These directives will match the string `"one two"`, but not `"one\ntwo"`. The second match must +//! be in the same line as the first. Like the `check:` directive, the match must also follow the +//! first match, so `"two one" would not be matched. +//! +//! If there is no previous match, `sameln:` matches on the first line of the input. +//! +//! ## The `nextln:` directive +//! +//! Match a pattern in the next line after the previous match. +//! +//! ```sh +//! #0 check: one +//! #1 nextln: two +//! ``` +//! +//! These directives will match the string `"one\ntwo"`, but not `"one two"` or `"one\n\ntwo"`. +//! +//! If there is no previous match, `nextln:` matches on the second line of the input as if there +//! were a previous match on the first line. +//! +//! ## The `unordered:` directive +//! +//! Match patterns in any order, and possibly overlapping each other. +//! +//! ```sh +//! #0 unordered: one +//! #1 unordered: two +//! ``` +//! +//! These directives will match the string `"one two"` *and* the string `"two one"`. +//! +//! When a normal ordered match is inserted into a sequence of `unordered:` directives, it acts as +//! a barrier: +//! +//! ```sh +//! #0 unordered: one +//! #1 unordered: two +//! #2 check: three +//! #3 unordered: four +//! #4 unordered: five +//! ``` +//! +//! These directives will match `"two one three four five"`, but not `"two three one four five"`. +//! The `unordered:` matches are not allowed to cross the ordered `check:` directive. +//! +//! When `unordered:` matches define and use variables, a topological order is enforced. This means +//! that a match referencing a variable must follow the match where the variable was defined: +//! +//! ```sh +//! #0 regex: V=\bv\d+\b +//! #1 unordered: $(va=$V) = load +//! #2 unordered: $(vb=$V) = iadd $va +//! #3 unordered: $(vc=$V) = load +//! #4 unordered: iadd $va, $vc +//! ``` +//! +//! In the above directives, #2 must match after #1, and #4 must match after both #1 and #3, but +//! otherwise they can match in any order. +//! +//! ## The `not:` directive +//! +//! Check that a pattern *does not* appear between matches. +//! +//! ```sh +//! #0 check: one +//! #1 not: two +//! #2 check: three +//! ``` +//! +//! The directives above will match `"one five three"`, but not `"one two three"`. +//! +//! The pattern in a `not:` directive can't define any variables. Since it never matches anything, +//! the variables would not get a value. +//! +//! ## The `regex:` directive +//! +//! Define a shorthand name for a regular expression. +//! +//! ```sh +//! #0 regex: ID=\b[_a-zA-Z][_0-9a-zA-Z]*\b +//! #1 check: $ID + $ID +//! ``` +//! +//! The `regex:` directive gives a name to a regular expression which can then be used as part of a +//! pattern to match. Patterns are otherwise just plain text strings to match, so this is not +//! simple macro expansion. +//! +//! See [the Rust regex crate](../regex/index.html#syntax) for the regular expression syntax. +//! +//! # Patterns and variables +//! +//! Patterns are plain text strings to be matched in the input file. The dollar sign is used as an +//! escape character to expand variables. The following escape sequences are recognized: +//! +//!
+//! $$                Match single dollar sign.
+//! $()               Match the empty string.
+//! $(=<regex>)       Match regular expression <regex>.
+//! $<var>            Match contents of variable <var>.
+//! $(<var>)          Match contents of variable <var>.
+//! $(<var>=<regex>)  Match <regex>, then
+//!                   define <var> as the matched text.
+//! $(<var>=$<rxvar>) Match regex in <rxvar>, then
+//!                   define <var> as the matched text.
+//! 
+//! +//! Variables can contain either plain text or regular expressions. Plain text variables are +//! defined with the `$(var=...)` syntax in a previous directive. They match the same text again. +//! Backreferences within the same pattern are not allowed. When a variable is defined in a +//! pattern, it can't be referenced again in the same pattern. +//! +//! Regular expression variables are defined with the `regex:` directive. They match the regular +//! expression each time they are used, so the matches don't need to be identical. +//! +//! ## Word boundaries +//! +//! If a pattern begins or ends with a (plain text) letter or number, it will only match on a word +//! boundary. Use the `$()` empty string match to prevent this: +//! +//! ```sh +//! check: one$() +//! ``` +//! +//! This will match `"one"` and `"onetwo"`, but not `"zeroone"`. +//! +//! The empty match syntax can also be used to require leading or trailing whitespace: +//! +//! ```sh +//! check: one, $() +//! ``` +//! +//! This will match `"one, two"` , but not `"one,two"`. Without the `$()`, trailing whitespace +//! would be trimmed from the pattern. + +pub use error::{Error, Result}; +pub use variable::{VariableMap, Value, NO_VARIABLES}; +pub use checker::{Checker, CheckerBuilder}; + +extern crate regex; + +mod error; +mod variable; +mod pattern; +mod checker; diff --git a/cranelift/src/libfilecheck/pattern.rs b/cranelift/src/libfilecheck/pattern.rs new file mode 100644 index 0000000000..69464bd7c8 --- /dev/null +++ b/cranelift/src/libfilecheck/pattern.rs @@ -0,0 +1,523 @@ +//! Pattern matching for a single directive. + +use error::{Error, Result}; +use variable::{varname_prefix, VariableMap, Value}; +use std::str::FromStr; +use std::fmt::{self, Display, Formatter, Write}; +use regex::{Regex, RegexBuilder, quote}; + +/// A pattern to match as specified in a directive. +/// +/// Each pattern is broken into a sequence of parts that must match in order. The kinds of parts +/// are: +/// +/// 1. Plain text match. +/// 2. Variable match, `$FOO` or `$(FOO)`. The variable `FOO` may expand to plain text or a regex. +/// 3. Variable definition from literal regex, `$(foo=.*)`. Match the regex and assign matching text +/// to variable `foo`. +/// 4. Variable definition from regex variable, `$(foo=$RX)`. Lookup variable `RX` which should +/// expand to a regex, match the regex, and assign matching text to variable `foo`. +/// +pub struct Pattern { + parts: Vec, + // Variables defined by this pattern. + defs: Vec, +} + +/// One atomic part of a pattern. +#[derive(Debug, PartialEq, Eq)] +pub enum Part { + /// Match a plain string. + Text(String), + /// Match a regular expression. The regex has already been wrapped in a non-capturing group if + /// necessary, so it is safe to concatenate. + Regex(String), + /// Match the contents of a variable, which can be plain text or regex. + Var(String), + /// Match literal regex, then assign match to variable. + /// The regex has already been wrapped in a named capture group. + DefLit { def: usize, regex: String }, + /// Lookup variable `var`, match resulting regex, assign matching text to variable `defs[def]`. + DefVar { def: usize, var: String }, +} + +impl Part { + /// Get the variabled referenced by this part, if any. + pub fn ref_var(&self) -> Option<&str> { + match *self { + Part::Var(ref var) => Some(var), + Part::DefVar { ref var, .. } => Some(var), + _ => None, + } + } +} + +impl Pattern { + /// Create a new blank pattern. Use the `FromStr` trait to generate Patterns with content. + fn new() -> Pattern { + Pattern { + parts: Vec::new(), + defs: Vec::new(), + } + } + + /// Check if the variable `v` is defined by this pattern. + pub fn defines_var(&self, v: &str) -> bool { + self.defs.iter().any(|d| d == v) + } + + /// Add a definition of a new variable. + /// Return the allocated def number. + fn add_def(&mut self, v: &str) -> Result { + if self.defines_var(v) { + Err(Error::DuplicateDef(format!("duplicate definition of ${} in same pattern", v))) + } else { + let idx = self.defs.len(); + self.defs.push(v.to_string()); + Ok(idx) + } + } + + /// Parse a `Part` from a prefix of `s`. + /// Return the part and the number of bytes consumed from `s`. + /// Adds defined variables to `self.defs`. + fn parse_part(&mut self, s: &str) -> Result<(Part, usize)> { + let dollar = s.find('$'); + if dollar != Some(0) { + // String doesn't begin with a dollar sign, so match plain text up to the dollar sign. + let end = dollar.unwrap_or(s.len()); + return Ok((Part::Text(s[0..end].to_string()), end)); + } + + // String starts with a dollar sign. Look for these possibilities: + // + // 1. `$$`. + // 2. `$var`. + // 3. `$(var)`. + // 4. `$(var=regex)`. Where `regex` is a regular expression possibly containing matching + // braces. + // 5. `$(var=$VAR)`. + + // A doubled dollar sign matches a single dollar sign. + if s.starts_with("$$") { + return Ok((Part::Text("$".to_string()), 2)); + } + + // Look for `$var`. + let varname_end = 1 + varname_prefix(&s[1..]); + if varname_end != 1 { + return Ok((Part::Var(s[1..varname_end].to_string()), varname_end)); + } + + // All remaining possibilities start with `$(`. + if s.len() < 2 || !s.starts_with("$(") { + return Err(Error::Syntax("pattern syntax error, use $$ to match a single $" + .to_string())); + } + + // Match the variable name, allowing for an empty varname in `$()`, or `$(=...)`. + let varname_end = 2 + varname_prefix(&s[2..]); + let varname = s[2..varname_end].to_string(); + + match s[varname_end..].chars().next() { + None => { + return Err(Error::Syntax(format!("unterminated $({}...", varname))); + } + Some(')') => { + let part = if varname.is_empty() { + // Match `$()`, turn it into an empty text match. + Part::Text(varname) + } else { + // Match `$(var)`. + Part::Var(varname) + }; + return Ok((part, varname_end + 1)); + } + Some('=') => { + // Variable definition. Fall through. + } + Some(ch) => { + return Err(Error::Syntax(format!("syntax error in $({}... '{}'", varname, ch))); + } + } + + // This is a variable definition of the form `$(var=...`. + + // Allocate a definition index. + let def = if varname.is_empty() { + None + } else { + Some(try!(self.add_def(&varname))) + }; + + // Match `$(var=$PAT)`. + if s[varname_end + 1..].starts_with('$') { + let refname_begin = varname_end + 2; + let refname_end = refname_begin + varname_prefix(&s[refname_begin..]); + if refname_begin == refname_end { + return Err(Error::Syntax(format!("expected variable name in $({}=$...", varname))); + } + if !s[refname_end..].starts_with(')') { + return Err(Error::Syntax(format!("expected ')' after $({}=${}...", + varname, + &s[refname_begin..refname_end]))); + } + let refname = s[refname_begin..refname_end].to_string(); + return if let Some(defidx) = def { + Ok((Part::DefVar { + def: defidx, + var: refname, + }, + refname_end + 1)) + } else { + Err(Error::Syntax(format!("expected variable name in $(=${})", refname))) + }; + } + + // Last case: `$(var=...)` where `...` is a regular expression, possibly containing matched + // parentheses. + let rx_begin = varname_end + 1; + let rx_end = rx_begin + regex_prefix(&s[rx_begin..]); + if s[rx_end..].starts_with(')') { + let part = if let Some(defidx) = def { + // Wrap the regex in a named capture group. + Part::DefLit { + def: defidx, + regex: format!("(?P<{}>{})", varname, &s[rx_begin..rx_end]), + } + } else { + // When the varname is empty just match the regex, don't capture any variables. + // This is `$(=[a-z])`. + // Wrap the regex in a non-capturing group to make it concatenation-safe. + Part::Regex(format!("(?:{})", &s[rx_begin..rx_end])) + }; + Ok((part, rx_end + 1)) + } else { + Err(Error::Syntax(format!("missing ')' after regex in $({}={}", + varname, + &s[rx_begin..rx_end]))) + } + } +} + +/// Compute the length of a regular expression terminated by `)` or `}`. +/// Handle nested and escaped parentheses in the rx, but don't actualy parse it. +/// Return the position of the terminating brace or the length of the string. +fn regex_prefix(s: &str) -> usize { + // The prevous char was a backslash. + let mut escape = false; + // State around parsing charsets. + enum State { + Normal, // Outside any charset. + Curly, // Inside curly braces. + CSFirst, // Immediately after opening `[`. + CSNeg, // Immediately after `[^`. + CSBody, // Inside `[...`. + } + let mut state = State::Normal; + + // Current nesting level of parens. + let mut nest = 0usize; + + for (idx, ch) in s.char_indices() { + if escape { + escape = false; + continue; + } else if ch == '\\' { + escape = true; + continue; + } + match state { + State::Normal => { + match ch { + '[' => state = State::CSFirst, + '{' => state = State::Curly, + '(' => nest += 1, + ')' if nest > 0 => nest -= 1, + ')' | '}' => return idx, + _ => {} + } + } + State::Curly => { + if ch == '}' { + state = State::Normal; + } + } + State::CSFirst => { + state = match ch { + '^' => State::CSNeg, + _ => State::CSBody, + } + } + State::CSNeg => state = State::CSBody, + State::CSBody => { + if ch == ']' { + state = State::Normal; + } + } + } + } + s.len() +} + +impl FromStr for Pattern { + type Err = Error; + + fn from_str(s: &str) -> Result { + // Always remove leading and trailing whitespace. + // Use `$()` to actually include that in a match. + let s = s.trim(); + let mut pat = Pattern::new(); + let mut pos = 0; + while pos < s.len() { + let (part, len) = try!(pat.parse_part(&s[pos..])); + if let Some(v) = part.ref_var() { + if pat.defines_var(v) { + return Err(Error::Backref(format!("unsupported back-reference to '${}' \ + defined in same pattern", + v))); + } + } + pat.parts.push(part); + pos += len; + } + Ok(pat) + } +} + +impl Pattern { + /// Get a list of parts in this pattern. + pub fn parts(&self) -> &[Part] { + &self.parts + } + + /// Get a list of variable names defined when this pattern matches. + pub fn defs(&self) -> &[String] { + &self.defs + } + + /// Resolve all variable references in this pattern, turning it into a regular expression. + pub fn resolve(&self, vmap: &VariableMap) -> Result { + let mut out = String::new(); + + // Add a word boundary check `\b` to the beginning of the regex, but only if the first part + // is a plain text match that starts with a word character. + // + // This behavior can be disabled by starting the pattern with `$()`. + if let Some(&Part::Text(ref s)) = self.parts.first() { + if s.starts_with(char::is_alphanumeric) { + out.push_str(r"\b"); + } + } + + for part in &self.parts { + match *part { + Part::Text(ref s) => { + out.push_str("e(s)); + } + Part::Regex(ref rx) => out.push_str(rx), + Part::Var(ref var) => { + // Resolve the variable. We can handle a plain text expansion. + match vmap.lookup(var) { + None => { + return Err(Error::UndefVariable(format!("undefined variable ${}", var))) + } + Some(Value::Text(s)) => out.push_str("e(&s)), + // Wrap regex in non-capturing group for safe concatenation. + Some(Value::Regex(rx)) => write!(out, "(?:{})", rx).unwrap(), + } + } + Part::DefLit { ref regex, .. } => out.push_str(regex), + Part::DefVar { def, ref var } => { + // Wrap regex in a named capture group. + write!(out, + "(?P<{}>{})", + self.defs[def], + match vmap.lookup(var) { + None => { + return Err(Error::UndefVariable(format!("undefined variable \ + ${}", + var))) + } + Some(Value::Text(s)) => quote(&s), + Some(Value::Regex(rx)) => rx, + }) + .unwrap() + } + } + + } + + // Add a word boundary check `\b` to the end of the regex, but only if the final part + // is a plain text match that ends with a word character. + // + // This behavior can be disabled by ending the pattern with `$()`. + if let Some(&Part::Text(ref s)) = self.parts.last() { + if s.ends_with(char::is_alphanumeric) { + out.push_str(r"\b"); + } + } + + Ok(try!(RegexBuilder::new(&out).multi_line(true).compile())) + } +} + +impl Display for Pattern { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + for part in &self.parts { + use self::Part::*; + try!(match *part { + Text(ref txt) if txt == "" => write!(f, "$()"), + Text(ref txt) if txt == "$" => write!(f, "$$"), + Text(ref txt) => write!(f, "{}", txt), + Regex(ref rx) => write!(f, "$(={})", rx), + Var(ref var) => write!(f, "$({})", var), + DefLit { def, ref regex } => { + let defvar = &self.defs[def]; + // (?P...). + let litrx = ®ex[5 + defvar.len()..regex.len() - 1]; + write!(f, "$({}={})", defvar, litrx) + } + DefVar { def, ref var } => write!(f, "$({}=${})", self.defs[def], var), + }); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn regex() { + use super::regex_prefix; + + assert_eq!(regex_prefix(""), 0); + assert_eq!(regex_prefix(")"), 0); + assert_eq!(regex_prefix(")c"), 0); + assert_eq!(regex_prefix("x"), 1); + assert_eq!(regex_prefix("x)x"), 1); + + assert_eq!(regex_prefix("x(c))x"), 4); + assert_eq!(regex_prefix("()x(c))x"), 6); + assert_eq!(regex_prefix("()x(c)"), 6); + + assert_eq!(regex_prefix("x([)]))x"), 6); + assert_eq!(regex_prefix("x[)])x"), 4); + assert_eq!(regex_prefix("x[^)])x"), 5); + assert_eq!(regex_prefix("x[^])x"), 6); + } + + #[test] + fn part() { + use super::{Pattern, Part}; + let mut pat = Pattern::new(); + + // This is dubious, should we panic instead? + assert_eq!(pat.parse_part("").unwrap(), (Part::Text("".to_string()), 0)); + + assert_eq!(pat.parse_part("x").unwrap(), + (Part::Text("x".to_string()), 1)); + assert_eq!(pat.parse_part("x2").unwrap(), + (Part::Text("x2".to_string()), 2)); + assert_eq!(pat.parse_part("x$").unwrap(), + (Part::Text("x".to_string()), 1)); + assert_eq!(pat.parse_part("x$$").unwrap(), + (Part::Text("x".to_string()), 1)); + + assert_eq!(pat.parse_part("$").unwrap_err().to_string(), + "pattern syntax error, use $$ to match a single $"); + + assert_eq!(pat.parse_part("$$").unwrap(), + (Part::Text("$".to_string()), 2)); + assert_eq!(pat.parse_part("$$ ").unwrap(), + (Part::Text("$".to_string()), 2)); + + assert_eq!(pat.parse_part("$0").unwrap(), + (Part::Var("0".to_string()), 2)); + assert_eq!(pat.parse_part("$xx=").unwrap(), + (Part::Var("xx".to_string()), 3)); + assert_eq!(pat.parse_part("$xx$").unwrap(), + (Part::Var("xx".to_string()), 3)); + + assert_eq!(pat.parse_part("$(0)").unwrap(), + (Part::Var("0".to_string()), 4)); + assert_eq!(pat.parse_part("$()").unwrap(), + (Part::Text("".to_string()), 3)); + + assert_eq!(pat.parse_part("$(0").unwrap_err().to_string(), + ("unterminated $(0...")); + assert_eq!(pat.parse_part("$(foo:").unwrap_err().to_string(), + ("syntax error in $(foo... ':'")); + assert_eq!(pat.parse_part("$(foo =").unwrap_err().to_string(), + ("syntax error in $(foo... ' '")); + assert_eq!(pat.parse_part("$(eo0=$bar").unwrap_err().to_string(), + ("expected ')' after $(eo0=$bar...")); + assert_eq!(pat.parse_part("$(eo1=$bar}").unwrap_err().to_string(), + ("expected ')' after $(eo1=$bar...")); + assert_eq!(pat.parse_part("$(eo2=$)").unwrap_err().to_string(), + ("expected variable name in $(eo2=$...")); + assert_eq!(pat.parse_part("$(eo3=$-)").unwrap_err().to_string(), + ("expected variable name in $(eo3=$...")); + } + + #[test] + fn partdefs() { + use super::{Pattern, Part}; + let mut pat = Pattern::new(); + + assert_eq!(pat.parse_part("$(foo=$bar)").unwrap(), + (Part::DefVar { + def: 0, + var: "bar".to_string(), + }, + 11)); + assert_eq!(pat.parse_part("$(foo=$bar)").unwrap_err().to_string(), + "duplicate definition of $foo in same pattern"); + + assert_eq!(pat.parse_part("$(fxo=$bar)x").unwrap(), + (Part::DefVar { + def: 1, + var: "bar".to_string(), + }, + 11)); + + assert_eq!(pat.parse_part("$(fo2=[a-z])").unwrap(), + (Part::DefLit { + def: 2, + regex: "(?P[a-z])".to_string(), + }, + 12)); + assert_eq!(pat.parse_part("$(fo3=[a-)])").unwrap(), + (Part::DefLit { + def: 3, + regex: "(?P[a-)])".to_string(), + }, + 12)); + assert_eq!(pat.parse_part("$(fo4=)").unwrap(), + (Part::DefLit { + def: 4, + regex: "(?P)".to_string(), + }, + 7)); + + assert_eq!(pat.parse_part("$(=.*)").unwrap(), + (Part::Regex("(?:.*)".to_string()), 6)); + + assert_eq!(pat.parse_part("$(=)").unwrap(), + (Part::Regex("(?:)".to_string()), 4)); + assert_eq!(pat.parse_part("$()").unwrap(), + (Part::Text("".to_string()), 3)); + } + + #[test] + fn pattern() { + use super::Pattern; + + let p: Pattern = " Hello world! ".parse().unwrap(); + assert_eq!(format!("{:?}", p.parts), "[Text(\"Hello world!\")]"); + + let p: Pattern = " $foo=$(bar) ".parse().unwrap(); + assert_eq!(format!("{:?}", p.parts), + "[Var(\"foo\"), Text(\"=\"), Var(\"bar\")]"); + } +} diff --git a/cranelift/src/libfilecheck/tests/basic.rs b/cranelift/src/libfilecheck/tests/basic.rs new file mode 100644 index 0000000000..595158192c --- /dev/null +++ b/cranelift/src/libfilecheck/tests/basic.rs @@ -0,0 +1,313 @@ +extern crate filecheck; + +use filecheck::{CheckerBuilder, NO_VARIABLES, Error as FcError}; + +fn e2s(e: FcError) -> String { + e.to_string() +} + +#[test] +fn empty() { + let c = CheckerBuilder::new().finish(); + assert!(c.is_empty()); + + // An empty checker matches anything. + assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn no_directives() { + let c = CheckerBuilder::new().text("nothing here").unwrap().finish(); + assert!(c.is_empty()); + + // An empty checker matches anything. + assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn no_matches() { + let c = CheckerBuilder::new().text("regex: FOO=bar").unwrap().finish(); + assert!(!c.is_empty()); + + // An empty checker matches anything. + assert_eq!(c.check("", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("hello", NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn simple() { + let c = CheckerBuilder::new() + .text(" + check: one + check: two + ") + .unwrap() + .finish(); + + let t = " + zero + one + and a half + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = " + zero + and a half + two + one + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); +} + +#[test] +fn sameln() { + let c = CheckerBuilder::new() + .text(" + check: one + sameln: two + ") + .unwrap() + .finish(); + + let t = " + zero + one + and a half + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = " + zero + one + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = " + zero + one two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn nextln() { + let c = CheckerBuilder::new() + .text(" + check: one + nextln: two + ") + .unwrap() + .finish(); + + let t = " + zero + one + and a half + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = " + zero + one + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = " + zero + one two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = " + zero + one + two"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn leading_nextln() { + // A leading nextln directive should match from line 2. + // This is somewhat arbitrary, but consistent with a preceeding 'check: $()' directive. + let c = CheckerBuilder::new() + .text(" + nextln: one + nextln: two + ") + .unwrap() + .finish(); + + let t = "zero + one + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = "one + two + three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); +} + +#[test] +fn leading_sameln() { + // A leading sameln directive should match from line 1. + let c = CheckerBuilder::new() + .text(" + sameln: one + sameln: two + ") + .unwrap() + .finish(); + + let t = "zero + one two three + "; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "zero one two three"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = "zero one + two three"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); +} + +#[test] +fn not() { + let c = CheckerBuilder::new() + .text(" + check: one$() + not: $()eat$() + check: $()two + ") + .unwrap() + .finish(); + + let t = "onetwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = "one eat two"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "oneeattwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "oneatwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn notnot() { + let c = CheckerBuilder::new() + .text(" + check: one$() + not: $()eat$() + not: half + check: $()two + ") + .unwrap() + .finish(); + + let t = "onetwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); + + let t = "one eat two"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "one half two"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "oneeattwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + // The `not: half` pattern only matches whole words, but the bracketing matches are considered + // word boundaries, so it does match in this case. + let t = "onehalftwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(false)); + + let t = "oneatwo"; + assert_eq!(c.check(t, NO_VARIABLES).map_err(e2s), Ok(true)); +} + +#[test] +fn unordered() { + let c = CheckerBuilder::new() + .text(" + check: one + unordered: two + unordered: three + check: four + ") + .unwrap() + .finish(); + + assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(false)); + assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(false)); +} + +#[test] +fn leading_unordered() { + let c = CheckerBuilder::new() + .text(" + unordered: two + unordered: three + check: four + ") + .unwrap() + .finish(); + + assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(false)); + assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(false)); +} + +#[test] +fn trailing_unordered() { + let c = CheckerBuilder::new() + .text(" + check: one + unordered: two + unordered: three + ") + .unwrap() + .finish(); + + assert_eq!(c.check("one two three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three four", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three four two four", NO_VARIABLES).map_err(e2s), Ok(true)); + + assert_eq!(c.check("one two four three", NO_VARIABLES).map_err(e2s), Ok(true)); + assert_eq!(c.check("one three four two", NO_VARIABLES).map_err(e2s), Ok(true)); +} diff --git a/cranelift/src/libfilecheck/variable.rs b/cranelift/src/libfilecheck/variable.rs new file mode 100644 index 0000000000..7238249146 --- /dev/null +++ b/cranelift/src/libfilecheck/variable.rs @@ -0,0 +1,58 @@ +/// A variable name is one or more ASCII alphanumerical characters, including underscore. +/// Note that numerical variable names like `$45` are allowed too. +/// +/// Try to parse a variable name from the begining of `s`. +/// Return the index of the character following the varname. +/// This returns 0 if `s` doesn't have a prefix that is a variable name. +pub fn varname_prefix(s: &str) -> usize { + for (idx, ch) in s.char_indices() { + match ch { + 'a'...'z' | 'A'...'Z' | '0'...'9' | '_' => {} + _ => return idx, + } + } + s.len() +} + +/// A variable can contain either a regular expression or plain text. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Value { + Text(String), + Regex(String), +} + +/// Resolve variables by name. +pub trait VariableMap { + /// Get the value of the variable `varname`, or return `None` for an unknown variable name. + fn lookup(&self, varname: &str) -> Option; +} + +impl VariableMap for () { + fn lookup(&self, _: &str) -> Option { + None + } +} + +/// An empty variable map. +pub const NO_VARIABLES: &'static VariableMap = &(); + +#[cfg(test)] +mod tests { + #[test] + fn varname() { + use super::varname_prefix; + + assert_eq!(varname_prefix(""), 0); + assert_eq!(varname_prefix("\0"), 0); + assert_eq!(varname_prefix("_"), 1); + assert_eq!(varname_prefix("0"), 1); + assert_eq!(varname_prefix("01"), 2); + assert_eq!(varname_prefix("b"), 1); + assert_eq!(varname_prefix("C"), 1); + assert_eq!(varname_prefix("."), 0); + assert_eq!(varname_prefix(".s"), 0); + assert_eq!(varname_prefix("0."), 1); + assert_eq!(varname_prefix("01="), 2); + assert_eq!(varname_prefix("0a)"), 2); + } +}