Share split_entity_name between lexer and sourcemap.
There's only one way of parsing entity names correctly.
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
// ====--------------------------------------------------------------------------------------====//
|
// ====--------------------------------------------------------------------------------------====//
|
||||||
|
|
||||||
use std::str::CharIndices;
|
use std::str::CharIndices;
|
||||||
|
use std::u16;
|
||||||
use cretonne::ir::types;
|
use cretonne::ir::types;
|
||||||
use cretonne::ir::{Value, Ebb};
|
use cretonne::ir::{Value, Ebb};
|
||||||
use error::Location;
|
use error::Location;
|
||||||
@@ -72,6 +73,23 @@ fn error<'a>(error: Error, loc: Location) -> Result<LocatedToken<'a>, LocatedErr
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the number of decimal digits at the end of `s`.
|
||||||
|
fn trailing_digits(s: &str) -> usize {
|
||||||
|
// It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
|
||||||
|
s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
|
||||||
|
/// letters and numeric tail.
|
||||||
|
pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
|
||||||
|
let (head, tail) = name.split_at(name.len() - trailing_digits(name));
|
||||||
|
if tail.len() > 1 && tail.starts_with('0') {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
tail.parse().ok().map(|n| (head, n))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Lexical analysis.
|
/// Lexical analysis.
|
||||||
///
|
///
|
||||||
/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
|
/// A `Lexer` reads text from a `&str` and provides a sequence of tokens.
|
||||||
@@ -237,52 +255,42 @@ impl<'a> Lexer<'a> {
|
|||||||
fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
|
fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
|
||||||
let begin = self.pos;
|
let begin = self.pos;
|
||||||
let loc = self.loc();
|
let loc = self.loc();
|
||||||
let mut trailing_digits = 0usize;
|
|
||||||
|
|
||||||
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
|
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic());
|
||||||
loop {
|
loop {
|
||||||
match self.next_ch() {
|
match self.next_ch() {
|
||||||
Some(ch) if ch.is_digit(10) => trailing_digits += 1,
|
Some('_') => {}
|
||||||
Some('_') => trailing_digits = 0,
|
Some(ch) if ch.is_alphanumeric() => {}
|
||||||
Some(ch) if ch.is_alphabetic() => trailing_digits = 0,
|
|
||||||
_ => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let text = &self.source[begin..self.pos];
|
let text = &self.source[begin..self.pos];
|
||||||
let (prefix, suffix) = text.split_at(text.len() - trailing_digits);
|
|
||||||
|
|
||||||
// Look for numbered well-known entities like ebb15, v45, ...
|
// Look for numbered well-known entities like ebb15, v45, ...
|
||||||
token(Self::numbered_entity(prefix, suffix)
|
token(split_entity_name(text)
|
||||||
.or_else(|| Self::value_type(text, prefix, suffix))
|
.and_then(|(prefix, number)| {
|
||||||
|
Self::numbered_entity(prefix, number)
|
||||||
|
.or_else(|| Self::value_type(text, prefix, number))
|
||||||
|
})
|
||||||
.unwrap_or(Token::Identifier(text)),
|
.unwrap_or(Token::Identifier(text)),
|
||||||
loc)
|
loc)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If prefix is a well-known entity prefix and suffix is a valid entity number, return the
|
// If prefix is a well-known entity prefix and suffix is a valid entity number, return the
|
||||||
// decoded token.
|
// decoded token.
|
||||||
fn numbered_entity(prefix: &str, suffix: &str) -> Option<Token<'a>> {
|
fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
|
||||||
// Reject non-canonical numbers like v0001.
|
|
||||||
if suffix.len() > 1 && suffix.starts_with('0') {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let value: u32 = match suffix.parse() {
|
|
||||||
Ok(v) => v,
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
match prefix {
|
match prefix {
|
||||||
"v" => Value::direct_with_number(value).map(|v| Token::Value(v)),
|
"v" => Value::direct_with_number(number).map(|v| Token::Value(v)),
|
||||||
"vx" => Value::table_with_number(value).map(|v| Token::Value(v)),
|
"vx" => Value::table_with_number(number).map(|v| Token::Value(v)),
|
||||||
"ebb" => Ebb::with_number(value).map(|ebb| Token::Ebb(ebb)),
|
"ebb" => Ebb::with_number(number).map(|ebb| Token::Ebb(ebb)),
|
||||||
"ss" => Some(Token::StackSlot(value)),
|
"ss" => Some(Token::StackSlot(number)),
|
||||||
"jt" => Some(Token::JumpTable(value)),
|
"jt" => Some(Token::JumpTable(number)),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recognize a scalar or vector type.
|
// Recognize a scalar or vector type.
|
||||||
fn value_type(text: &str, prefix: &str, suffix: &str) -> Option<Token<'a>> {
|
fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
|
||||||
let is_vector = prefix.ends_with('x');
|
let is_vector = prefix.ends_with('x');
|
||||||
let scalar = if is_vector {
|
let scalar = if is_vector {
|
||||||
&prefix[0..prefix.len() - 1]
|
&prefix[0..prefix.len() - 1]
|
||||||
@@ -304,11 +312,11 @@ impl<'a> Lexer<'a> {
|
|||||||
_ => return None,
|
_ => return None,
|
||||||
};
|
};
|
||||||
if is_vector {
|
if is_vector {
|
||||||
let lanes: u16 = match suffix.parse() {
|
if number <= u16::MAX as u32 {
|
||||||
Ok(v) => v,
|
base_type.by(number as u16).map(|t| Token::Type(t))
|
||||||
_ => return None,
|
} else {
|
||||||
};
|
None
|
||||||
base_type.by(lanes).map(|t| Token::Type(t))
|
}
|
||||||
} else {
|
} else {
|
||||||
Some(Token::Type(base_type))
|
Some(Token::Type(base_type))
|
||||||
}
|
}
|
||||||
@@ -356,11 +364,36 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::trailing_digits;
|
||||||
use super::*;
|
use super::*;
|
||||||
use cretonne::ir::types;
|
use cretonne::ir::types;
|
||||||
use cretonne::ir::{Value, Ebb};
|
use cretonne::ir::{Value, Ebb};
|
||||||
use error::Location;
|
use error::Location;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn digits() {
|
||||||
|
assert_eq!(trailing_digits(""), 0);
|
||||||
|
assert_eq!(trailing_digits("x"), 0);
|
||||||
|
assert_eq!(trailing_digits("0x"), 0);
|
||||||
|
assert_eq!(trailing_digits("x1"), 1);
|
||||||
|
assert_eq!(trailing_digits("1x1"), 1);
|
||||||
|
assert_eq!(trailing_digits("1x01"), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn entity_name() {
|
||||||
|
assert_eq!(split_entity_name(""), None);
|
||||||
|
assert_eq!(split_entity_name("x"), None);
|
||||||
|
assert_eq!(split_entity_name("x+"), None);
|
||||||
|
assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
|
||||||
|
assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
|
||||||
|
assert_eq!(split_entity_name("1"), Some(("", 1)));
|
||||||
|
assert_eq!(split_entity_name("x1"), Some(("x", 1)));
|
||||||
|
assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
|
||||||
|
// Reject this non-canonical form.
|
||||||
|
assert_eq!(split_entity_name("inst01"), None);
|
||||||
|
}
|
||||||
|
|
||||||
fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
|
fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
|
||||||
Some(super::token(token, Location { line_number: line }))
|
Some(super::token(token, Location { line_number: line }))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use std::collections::HashMap;
|
|||||||
use cretonne::ir::{StackSlot, JumpTable, Ebb, Value, Inst};
|
use cretonne::ir::{StackSlot, JumpTable, Ebb, Value, Inst};
|
||||||
use cretonne::ir::entities::AnyEntity;
|
use cretonne::ir::entities::AnyEntity;
|
||||||
use error::{Result, Location};
|
use error::{Result, Location};
|
||||||
|
use lexer::split_entity_name;
|
||||||
|
|
||||||
/// Mapping from source entity names to entity references that are valid in the parsed function.
|
/// Mapping from source entity names to entity references that are valid in the parsed function.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -114,23 +115,6 @@ impl SourceMap {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the number of decimal digits at the end of `s`.
|
|
||||||
fn trailing_digits(s: &str) -> usize {
|
|
||||||
// It's faster to iterate backwards over bytes, and we're only counting ASCII digits.
|
|
||||||
s.as_bytes().iter().rev().cloned().take_while(|&b| b'0' <= b && b <= b'9').count()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII
|
|
||||||
/// letters and numeric tail.
|
|
||||||
fn split_entity_name(name: &str) -> Option<(&str, u32)> {
|
|
||||||
let (head, tail) = name.split_at(name.len() - trailing_digits(name));
|
|
||||||
if tail.len() > 1 && tail.starts_with('0') {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
tail.parse().ok().map(|n| (head, n))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Interface for mutating a source map.
|
/// Interface for mutating a source map.
|
||||||
///
|
///
|
||||||
@@ -211,33 +195,8 @@ impl MutableSourceMap for SourceMap {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{trailing_digits, split_entity_name};
|
|
||||||
use parse_test;
|
use parse_test;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn digits() {
|
|
||||||
assert_eq!(trailing_digits(""), 0);
|
|
||||||
assert_eq!(trailing_digits("x"), 0);
|
|
||||||
assert_eq!(trailing_digits("0x"), 0);
|
|
||||||
assert_eq!(trailing_digits("x1"), 1);
|
|
||||||
assert_eq!(trailing_digits("1x1"), 1);
|
|
||||||
assert_eq!(trailing_digits("1x01"), 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn entity_name() {
|
|
||||||
assert_eq!(split_entity_name(""), None);
|
|
||||||
assert_eq!(split_entity_name("x"), None);
|
|
||||||
assert_eq!(split_entity_name("x+"), None);
|
|
||||||
assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
|
|
||||||
assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
|
|
||||||
assert_eq!(split_entity_name("1"), Some(("", 1)));
|
|
||||||
assert_eq!(split_entity_name("x1"), Some(("x", 1)));
|
|
||||||
assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
|
|
||||||
// Reject this non-canonical form.
|
|
||||||
assert_eq!(split_entity_name("inst01"), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn details() {
|
fn details() {
|
||||||
let tf = parse_test("function detail() {
|
let tf = parse_test("function detail() {
|
||||||
|
|||||||
Reference in New Issue
Block a user