Support for file input and output, including multiple input files with proper position tracking.

This commit is contained in:
Chris Fallin
2021-09-04 14:04:19 -07:00
parent e9a57d854d
commit 638c9edd01
8 changed files with 255 additions and 90 deletions

View File

@@ -11,6 +11,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi",
]
[[package]]
name = "atty"
version = "0.2.14"
@@ -22,12 +31,33 @@ dependencies = [
"winapi",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "env_logger"
version = "0.8.4"
@@ -60,6 +90,7 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
name = "isle"
version = "0.1.0"
dependencies = [
"clap",
"env_logger",
"log",
"thiserror",
@@ -121,6 +152,12 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "syn"
version = "1.0.75"
@@ -141,6 +178,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.26"
@@ -161,12 +207,24 @@ dependencies = [
"syn",
]
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "winapi"
version = "0.3.9"

View File

@@ -9,3 +9,4 @@ license = "Apache-2.0 WITH LLVM-exception"
log = "0.4"
env_logger = "0.8"
thiserror = "1.0"
clap = "2.33"

View File

@@ -4,7 +4,7 @@ use crate::lexer::Pos;
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Defs {
pub defs: Vec<Def>,
pub filename: String,
pub filenames: Vec<String>,
}
/// One toplevel form in an ISLE file.

View File

@@ -512,9 +512,17 @@ impl<'a> Codegen<'a> {
pub fn generate_rust(&self) -> Result<String, Error> {
let mut code = String::new();
writeln!(&mut code, "// GENERATED BY ISLE. DO NOT EDIT!")?;
writeln!(&mut code, "//")?;
writeln!(
&mut code,
"use super::*; // Pulls in all external types and ctors/etors"
"// Generated automatically from the instruction-selection DSL code in:",
)?;
for file in &self.typeenv.filenames {
writeln!(&mut code, "// - {}", file)?;
}
writeln!(
&mut code,
"\nuse super::*; // Pulls in all external types and ctors/etors"
)?;
self.generate_internal_types(&mut code)?;
Ok(code)
@@ -533,10 +541,11 @@ impl<'a> Codegen<'a> {
let name = &self.typeenv.syms[name.index()];
writeln!(
code,
"\n// Internal type {}: defined at {}.",
"\n/// Internal type {}: defined at {}.",
name,
pos.pretty_print_line(&self.typeenv.filename)
pos.pretty_print_line(&self.typeenv.filenames[..])
)?;
writeln!(code, "#[derive(Clone, Debug)]")?;
writeln!(code, "enum {} {{", name)?;
for variant in variants {
let name = &self.typeenv.syms[variant.name.index()];
@@ -546,7 +555,7 @@ impl<'a> Codegen<'a> {
let ty_name = self.typeenv.types[field.ty.index()].name(&self.typeenv);
writeln!(code, " {}: {},", name, ty_name)?;
}
writeln!(code, " }}")?;
writeln!(code, " }},")?;
}
writeln!(code, "}}")?;
}

View File

@@ -1,41 +1,56 @@
//! Lexer for the ISLE language.
use crate::error::Error;
use std::borrow::Cow;
#[derive(Clone, Debug)]
pub struct Lexer<'a> {
buf: &'a [u8],
pub filenames: Vec<String>,
file_starts: Vec<usize>,
buf: Cow<'a, [u8]>,
pos: Pos,
lookahead: Option<(Pos, Token<'a>)>,
lookahead: Option<(Pos, Token)>,
}
#[derive(Clone, Debug)]
enum LexerInput<'a> {
String { s: &'a str, filename: &'a str },
File { content: String, filename: String },
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Pos {
pub file: usize,
pub offset: usize,
pub line: usize,
pub col: usize,
}
impl Pos {
pub fn pretty_print(&self, filename: &str) -> String {
format!("{}:{}:{}", filename, self.line, self.col)
pub fn pretty_print(&self, filenames: &[String]) -> String {
format!("{}:{}:{}", filenames[self.file], self.line, self.col)
}
pub fn pretty_print_line(&self, filename: &str) -> String {
format!("{} line {}", filename, self.line)
pub fn pretty_print_line(&self, filenames: &[String]) -> String {
format!("{} line {}", filenames[self.file], self.line)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token {
LParen,
RParen,
Symbol(&'a str),
Symbol(String),
Int(i64),
}
impl<'a> Lexer<'a> {
pub fn new(s: &'a str) -> Lexer<'a> {
pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> {
let mut l = Lexer {
buf: s.as_bytes(),
filenames: vec![filename.to_string()],
file_starts: vec![0],
buf: Cow::Borrowed(s.as_bytes()),
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
@@ -46,6 +61,43 @@ impl<'a> Lexer<'a> {
l
}
pub fn from_files(filenames: Vec<String>) -> Result<Lexer<'a>, Error> {
assert!(!filenames.is_empty());
let file_contents: Vec<String> = filenames
.iter()
.map(|f| {
use std::io::Read;
let mut f = std::fs::File::open(f)?;
let mut s = String::new();
f.read_to_string(&mut s)?;
Ok(s)
})
.collect::<Result<Vec<String>, Error>>()?;
let mut file_starts = vec![];
let mut buf = String::new();
for file in file_contents {
file_starts.push(buf.len());
buf += &file;
buf += "\n";
}
let mut l = Lexer {
filenames,
buf: Cow::Owned(buf.into_bytes()),
file_starts,
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
},
lookahead: None,
};
l.reload();
Ok(l)
}
pub fn offset(&self) -> usize {
self.pos.offset
}
@@ -54,7 +106,24 @@ impl<'a> Lexer<'a> {
self.pos
}
fn next_token(&mut self) -> Option<(Pos, Token<'a>)> {
fn advance_pos(&mut self) {
self.pos.col += 1;
if self.buf[self.pos.offset] == b'\n' {
self.pos.line += 1;
self.pos.col = 0;
}
self.pos.offset += 1;
if self.pos.file + 1 < self.file_starts.len() {
let next_start = self.file_starts[self.pos.file + 1];
if self.pos.offset >= next_start {
assert!(self.pos.offset == next_start);
self.pos.file += 1;
self.pos.line = 1;
}
}
}
fn next_token(&mut self) -> Option<(Pos, Token)> {
fn is_sym_first_char(c: u8) -> bool {
match c {
b'-' | b'0'..=b'9' | b'(' | b')' | b';' => false,
@@ -73,20 +142,13 @@ impl<'a> Lexer<'a> {
// Skip any whitespace and any comments.
while self.pos.offset < self.buf.len() {
if self.buf[self.pos.offset].is_ascii_whitespace() {
self.pos.col += 1;
if self.buf[self.pos.offset] == b'\n' {
self.pos.line += 1;
self.pos.col = 0;
}
self.pos.offset += 1;
self.advance_pos();
continue;
}
if self.buf[self.pos.offset] == b';' {
while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' {
self.pos.offset += 1;
self.advance_pos();
}
self.pos.line += 1;
self.pos.col = 0;
continue;
}
break;
@@ -99,13 +161,11 @@ impl<'a> Lexer<'a> {
let char_pos = self.pos;
match self.buf[self.pos.offset] {
b'(' => {
self.pos.offset += 1;
self.pos.col += 1;
self.advance_pos();
Some((char_pos, Token::LParen))
}
b')' => {
self.pos.offset += 1;
self.pos.col += 1;
self.advance_pos();
Some((char_pos, Token::RParen))
}
c if is_sym_first_char(c) => {
@@ -114,19 +174,17 @@ impl<'a> Lexer<'a> {
while self.pos.offset < self.buf.len()
&& is_sym_other_char(self.buf[self.pos.offset])
{
self.pos.col += 1;
self.pos.offset += 1;
self.advance_pos();
}
let end = self.pos.offset;
let s = std::str::from_utf8(&self.buf[start..end])
.expect("Only ASCII characters, should be UTF-8");
Some((start_pos, Token::Symbol(s)))
Some((start_pos, Token::Symbol(s.to_string())))
}
c if (c >= b'0' && c <= b'9') || c == b'-' => {
let start_pos = self.pos;
let neg = if c == b'-' {
self.pos.offset += 1;
self.pos.col += 1;
self.advance_pos();
true
} else {
false
@@ -136,8 +194,7 @@ impl<'a> Lexer<'a> {
&& (self.buf[self.pos.offset] >= b'0' && self.buf[self.pos.offset] <= b'9')
{
num = (num * 10) + (self.buf[self.pos.offset] - b'0') as i64;
self.pos.offset += 1;
self.pos.col += 1;
self.advance_pos();
}
let tok = if neg {
@@ -157,8 +214,8 @@ impl<'a> Lexer<'a> {
}
}
pub fn peek(&self) -> Option<(Pos, Token<'a>)> {
self.lookahead
pub fn peek(&self) -> Option<&(Pos, Token)> {
self.lookahead.as_ref()
}
pub fn eof(&self) -> bool {
@@ -167,16 +224,16 @@ impl<'a> Lexer<'a> {
}
impl<'a> std::iter::Iterator for Lexer<'a> {
type Item = (Pos, Token<'a>);
type Item = (Pos, Token);
fn next(&mut self) -> Option<(Pos, Token<'a>)> {
fn next(&mut self) -> Option<(Pos, Token)> {
let tok = self.lookahead.take();
self.reload();
tok
}
}
impl<'a> Token<'a> {
impl Token {
pub fn is_int(&self) -> bool {
match self {
Token::Int(_) => true,
@@ -199,14 +256,17 @@ mod test {
#[test]
fn lexer_basic() {
assert_eq!(
Lexer::new(";; comment\n; another\r\n \t(one two three 23 -568 )\n")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
Lexer::from_str(
";; comment\n; another\r\n \t(one two three 23 -568 )\n",
"test"
)
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("one"),
Token::Symbol("two"),
Token::Symbol("three"),
Token::Symbol("one".to_string()),
Token::Symbol("two".to_string()),
Token::Symbol("three".to_string()),
Token::Int(23),
Token::Int(-568),
Token::RParen
@@ -217,15 +277,19 @@ mod test {
#[test]
fn ends_with_sym() {
assert_eq!(
Lexer::new("asdf").map(|(_, tok)| tok).collect::<Vec<_>>(),
vec![Token::Symbol("asdf"),]
Lexer::from_str("asdf", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![Token::Symbol("asdf".to_string()),]
);
}
#[test]
fn ends_with_num() {
assert_eq!(
Lexer::new("23").map(|(_, tok)| tok).collect::<Vec<_>>(),
Lexer::from_str("23", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![Token::Int(23)],
);
}
@@ -233,16 +297,16 @@ mod test {
#[test]
fn weird_syms() {
assert_eq!(
Lexer::new("(+ [] => !! _test!;comment\n)")
Lexer::from_str("(+ [] => !! _test!;comment\n)", "test")
.map(|(_, tok)| tok)
.collect::<Vec<_>>(),
vec![
Token::LParen,
Token::Symbol("+"),
Token::Symbol("[]"),
Token::Symbol("=>"),
Token::Symbol("!!"),
Token::Symbol("_test!"),
Token::Symbol("+".to_string()),
Token::Symbol("[]".to_string()),
Token::Symbol("=>".to_string()),
Token::Symbol("!!".to_string()),
Token::Symbol("_test!".to_string()),
Token::RParen,
]
);

View File

@@ -1,7 +1,6 @@
#![allow(dead_code)]
use std::io::stdin;
use std::io::Read;
use clap::{App, Arg};
mod ast;
mod codegen;
@@ -14,11 +13,47 @@ mod sema;
fn main() -> Result<(), error::Error> {
let _ = env_logger::try_init();
let mut input = String::new();
stdin().read_to_string(&mut input)?;
let mut parser = parser::Parser::new("<stdin>", &input[..]);
let matches = App::new("isle")
.version(env!("CARGO_PKG_VERSION"))
.author("Chris Fallin <chris@cfallin.org>")
.about("Instruction selection logic engine (ISLE) code generator")
.arg(
Arg::with_name("input")
.short("i")
.long("input")
.value_name("FILE.isle")
.takes_value(true)
.multiple(true)
.required(true),
)
.arg(
Arg::with_name("output")
.short("o")
.long("output")
.value_name("FILE.rs")
.takes_value(true)
.required(true),
)
.get_matches();
let input_files = matches
.values_of("input")
.unwrap()
.map(|s| s.to_string())
.collect::<Vec<_>>();
let output_file = matches.value_of("output").unwrap();
let lexer = lexer::Lexer::from_files(input_files)?;
let mut parser = parser::Parser::new(lexer);
let defs = parser.parse_defs()?;
let code = compile::compile(&defs)?;
println!("{}", code);
{
use std::io::Write;
let mut f = std::fs::File::create(output_file)?;
writeln!(&mut f, "{}", code)?;
}
Ok(())
}

View File

@@ -6,42 +6,37 @@ use crate::lexer::{Lexer, Pos, Token};
#[derive(Clone, Debug)]
pub struct Parser<'a> {
filename: &'a str,
lexer: Lexer<'a>,
}
pub type ParseResult<T> = std::result::Result<T, ParseError>;
impl<'a> Parser<'a> {
pub fn new(filename: &'a str, s: &'a str) -> Parser<'a> {
Parser {
filename,
lexer: Lexer::new(s),
}
pub fn new(lexer: Lexer<'a>) -> Parser<'a> {
Parser { lexer }
}
pub fn error(&self, pos: Pos, msg: String) -> ParseError {
ParseError {
filename: self.filename.to_string(),
filename: self.lexer.filenames[pos.file].clone(),
pos,
msg,
}
}
fn take<F: Fn(Token) -> bool>(&mut self, f: F) -> ParseResult<Token<'a>> {
if let Some((pos, peek)) = self.lexer.peek() {
fn take<F: Fn(&Token) -> bool>(&mut self, f: F) -> ParseResult<Token> {
if let Some(&(pos, ref peek)) = self.lexer.peek() {
if !f(peek) {
return Err(self.error(pos, format!("Unexpected token {:?}", peek)));
}
self.lexer.next();
Ok(peek)
Ok(self.lexer.next().unwrap().1)
} else {
Err(self.error(self.lexer.pos(), "Unexpected EOF".to_string()))
}
}
fn is<F: Fn(Token) -> bool>(&self, f: F) -> bool {
if let Some((_, peek)) = self.lexer.peek() {
fn is<F: Fn(&Token) -> bool>(&self, f: F) -> bool {
if let Some(&(_, ref peek)) = self.lexer.peek() {
f(peek)
} else {
false
@@ -49,14 +44,14 @@ impl<'a> Parser<'a> {
}
fn pos(&self) -> Option<Pos> {
self.lexer.peek().map(|(pos, _)| pos)
self.lexer.peek().map(|(pos, _)| *pos)
}
fn is_lparen(&self) -> bool {
self.is(|tok| tok == Token::LParen)
self.is(|tok| *tok == Token::LParen)
}
fn is_rparen(&self) -> bool {
self.is(|tok| tok == Token::RParen)
self.is(|tok| *tok == Token::RParen)
}
fn is_sym(&self) -> bool {
self.is(|tok| tok.is_sym())
@@ -65,17 +60,20 @@ impl<'a> Parser<'a> {
self.is(|tok| tok.is_int())
}
fn is_sym_str(&self, s: &str) -> bool {
self.is(|tok| tok == Token::Symbol(s))
self.is(|tok| match tok {
&Token::Symbol(ref tok_s) if tok_s == s => true,
_ => false,
})
}
fn lparen(&mut self) -> ParseResult<()> {
self.take(|tok| tok == Token::LParen).map(|_| ())
self.take(|tok| *tok == Token::LParen).map(|_| ())
}
fn rparen(&mut self) -> ParseResult<()> {
self.take(|tok| tok == Token::RParen).map(|_| ())
self.take(|tok| *tok == Token::RParen).map(|_| ())
}
fn symbol(&mut self) -> ParseResult<&'a str> {
fn symbol(&mut self) -> ParseResult<String> {
match self.take(|tok| tok.is_sym())? {
Token::Symbol(s) => Ok(s),
_ => unreachable!(),
@@ -96,14 +94,14 @@ impl<'a> Parser<'a> {
}
Ok(Defs {
defs,
filename: self.filename.to_string(),
filenames: self.lexer.filenames.clone(),
})
}
fn parse_def(&mut self) -> ParseResult<Def> {
self.lparen()?;
let pos = self.pos();
let def = match self.symbol()? {
let def = match &self.symbol()?[..] {
"type" => Def::Type(self.parse_type()?),
"rule" => Def::Rule(self.parse_rule()?),
"decl" => Def::Decl(self.parse_decl()?),
@@ -143,7 +141,7 @@ impl<'a> Parser<'a> {
fn parse_ident(&mut self) -> ParseResult<Ident> {
let pos = self.pos();
let s = self.symbol()?;
self.str_to_ident(pos.unwrap(), s)
self.str_to_ident(pos.unwrap(), &s)
}
fn parse_type(&mut self) -> ParseResult<Type> {
@@ -285,7 +283,7 @@ impl<'a> Parser<'a> {
let var = self.str_to_ident(pos.unwrap(), s)?;
Ok(Pattern::Var { var })
} else {
let var = self.str_to_ident(pos.unwrap(), s)?;
let var = self.str_to_ident(pos.unwrap(), &s)?;
if self.is_sym_str("@") {
self.symbol()?;
let subpat = Box::new(self.parse_pattern()?);

View File

@@ -30,7 +30,7 @@ declare_id!(VarId);
#[derive(Clone, Debug)]
pub struct TypeEnv {
pub filename: String,
pub filenames: Vec<String>,
pub syms: Vec<String>,
pub sym_map: HashMap<String, Sym>,
pub types: Vec<Type>,
@@ -158,7 +158,7 @@ impl Expr {
impl TypeEnv {
pub fn from_ast(defs: &ast::Defs) -> SemaResult<TypeEnv> {
let mut tyenv = TypeEnv {
filename: defs.filename.clone(),
filenames: defs.filenames.clone(),
syms: vec![],
sym_map: HashMap::new(),
types: vec![],
@@ -270,7 +270,7 @@ impl TypeEnv {
fn error(&self, pos: Pos, msg: String) -> SemaError {
SemaError {
filename: self.filename.clone(),
filename: self.filenames[pos.file].clone(),
pos,
msg,
}