From 638c9edd01c8c437509c7c56cd989c07e9a52718 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sat, 4 Sep 2021 14:04:19 -0700 Subject: [PATCH] Support for file input and output, including multiple input files with proper position tracking. --- cranelift/isle/Cargo.lock | 58 ++++++++++++ cranelift/isle/Cargo.toml | 1 + cranelift/isle/src/ast.rs | 2 +- cranelift/isle/src/codegen.rs | 17 +++- cranelift/isle/src/lexer.rs | 168 +++++++++++++++++++++++----------- cranelift/isle/src/main.rs | 47 ++++++++-- cranelift/isle/src/parser.rs | 46 +++++----- cranelift/isle/src/sema.rs | 6 +- 8 files changed, 255 insertions(+), 90 deletions(-) diff --git a/cranelift/isle/Cargo.lock b/cranelift/isle/Cargo.lock index b890d9e546..f37ad00454 100644 --- a/cranelift/isle/Cargo.lock +++ b/cranelift/isle/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + [[package]] name = "atty" version = "0.2.14" @@ -22,12 +31,33 @@ dependencies = [ "winapi", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "env_logger" version = "0.8.4" @@ -60,6 +90,7 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" name = "isle" version = "0.1.0" dependencies = [ + "clap", "env_logger", "log", "thiserror", @@ -121,6 +152,12 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.75" @@ -141,6 +178,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.26" @@ -161,12 +207,24 @@ dependencies = [ "syn", ] +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "winapi" version = "0.3.9" diff --git a/cranelift/isle/Cargo.toml b/cranelift/isle/Cargo.toml index 8774800064..d3f43cb1c5 100644 --- a/cranelift/isle/Cargo.toml +++ b/cranelift/isle/Cargo.toml @@ -9,3 +9,4 @@ license = "Apache-2.0 WITH LLVM-exception" log = "0.4" env_logger = "0.8" thiserror = "1.0" +clap = "2.33" diff --git a/cranelift/isle/src/ast.rs b/cranelift/isle/src/ast.rs index 53a0698d95..b0c06b68ae 100644 --- a/cranelift/isle/src/ast.rs +++ b/cranelift/isle/src/ast.rs @@ -4,7 +4,7 @@ use crate::lexer::Pos; #[derive(Clone, PartialEq, Eq, Debug)] pub struct Defs { pub defs: Vec, - pub filename: String, + pub filenames: Vec, } /// One toplevel form in an ISLE file. diff --git a/cranelift/isle/src/codegen.rs b/cranelift/isle/src/codegen.rs index 4a12c5d7c9..d503c81515 100644 --- a/cranelift/isle/src/codegen.rs +++ b/cranelift/isle/src/codegen.rs @@ -512,9 +512,17 @@ impl<'a> Codegen<'a> { pub fn generate_rust(&self) -> Result { let mut code = String::new(); writeln!(&mut code, "// GENERATED BY ISLE. DO NOT EDIT!")?; + writeln!(&mut code, "//")?; writeln!( &mut code, - "use super::*; // Pulls in all external types and ctors/etors" + "// Generated automatically from the instruction-selection DSL code in:", + )?; + for file in &self.typeenv.filenames { + writeln!(&mut code, "// - {}", file)?; + } + writeln!( + &mut code, + "\nuse super::*; // Pulls in all external types and ctors/etors" )?; self.generate_internal_types(&mut code)?; Ok(code) @@ -533,10 +541,11 @@ impl<'a> Codegen<'a> { let name = &self.typeenv.syms[name.index()]; writeln!( code, - "\n// Internal type {}: defined at {}.", + "\n/// Internal type {}: defined at {}.", name, - pos.pretty_print_line(&self.typeenv.filename) + pos.pretty_print_line(&self.typeenv.filenames[..]) )?; + writeln!(code, "#[derive(Clone, Debug)]")?; writeln!(code, "enum {} {{", name)?; for variant in variants { let name = &self.typeenv.syms[variant.name.index()]; @@ -546,7 +555,7 @@ impl<'a> Codegen<'a> { let ty_name = self.typeenv.types[field.ty.index()].name(&self.typeenv); writeln!(code, " {}: {},", name, ty_name)?; } - writeln!(code, " }}")?; + writeln!(code, " }},")?; } writeln!(code, "}}")?; } diff --git a/cranelift/isle/src/lexer.rs b/cranelift/isle/src/lexer.rs index f9672db298..af53d7313e 100644 --- a/cranelift/isle/src/lexer.rs +++ b/cranelift/isle/src/lexer.rs @@ -1,41 +1,56 @@ //! Lexer for the ISLE language. +use crate::error::Error; +use std::borrow::Cow; + #[derive(Clone, Debug)] pub struct Lexer<'a> { - buf: &'a [u8], + pub filenames: Vec, + file_starts: Vec, + buf: Cow<'a, [u8]>, pos: Pos, - lookahead: Option<(Pos, Token<'a>)>, + lookahead: Option<(Pos, Token)>, +} + +#[derive(Clone, Debug)] +enum LexerInput<'a> { + String { s: &'a str, filename: &'a str }, + File { content: String, filename: String }, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Pos { + pub file: usize, pub offset: usize, pub line: usize, pub col: usize, } impl Pos { - pub fn pretty_print(&self, filename: &str) -> String { - format!("{}:{}:{}", filename, self.line, self.col) + pub fn pretty_print(&self, filenames: &[String]) -> String { + format!("{}:{}:{}", filenames[self.file], self.line, self.col) } - pub fn pretty_print_line(&self, filename: &str) -> String { - format!("{} line {}", filename, self.line) + pub fn pretty_print_line(&self, filenames: &[String]) -> String { + format!("{} line {}", filenames[self.file], self.line) } } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Token<'a> { +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Token { LParen, RParen, - Symbol(&'a str), + Symbol(String), Int(i64), } impl<'a> Lexer<'a> { - pub fn new(s: &'a str) -> Lexer<'a> { + pub fn from_str(s: &'a str, filename: &'a str) -> Lexer<'a> { let mut l = Lexer { - buf: s.as_bytes(), + filenames: vec![filename.to_string()], + file_starts: vec![0], + buf: Cow::Borrowed(s.as_bytes()), pos: Pos { + file: 0, offset: 0, line: 1, col: 0, @@ -46,6 +61,43 @@ impl<'a> Lexer<'a> { l } + pub fn from_files(filenames: Vec) -> Result, Error> { + assert!(!filenames.is_empty()); + let file_contents: Vec = filenames + .iter() + .map(|f| { + use std::io::Read; + let mut f = std::fs::File::open(f)?; + let mut s = String::new(); + f.read_to_string(&mut s)?; + Ok(s) + }) + .collect::, Error>>()?; + + let mut file_starts = vec![]; + let mut buf = String::new(); + for file in file_contents { + file_starts.push(buf.len()); + buf += &file; + buf += "\n"; + } + + let mut l = Lexer { + filenames, + buf: Cow::Owned(buf.into_bytes()), + file_starts, + pos: Pos { + file: 0, + offset: 0, + line: 1, + col: 0, + }, + lookahead: None, + }; + l.reload(); + Ok(l) + } + pub fn offset(&self) -> usize { self.pos.offset } @@ -54,7 +106,24 @@ impl<'a> Lexer<'a> { self.pos } - fn next_token(&mut self) -> Option<(Pos, Token<'a>)> { + fn advance_pos(&mut self) { + self.pos.col += 1; + if self.buf[self.pos.offset] == b'\n' { + self.pos.line += 1; + self.pos.col = 0; + } + self.pos.offset += 1; + if self.pos.file + 1 < self.file_starts.len() { + let next_start = self.file_starts[self.pos.file + 1]; + if self.pos.offset >= next_start { + assert!(self.pos.offset == next_start); + self.pos.file += 1; + self.pos.line = 1; + } + } + } + + fn next_token(&mut self) -> Option<(Pos, Token)> { fn is_sym_first_char(c: u8) -> bool { match c { b'-' | b'0'..=b'9' | b'(' | b')' | b';' => false, @@ -73,20 +142,13 @@ impl<'a> Lexer<'a> { // Skip any whitespace and any comments. while self.pos.offset < self.buf.len() { if self.buf[self.pos.offset].is_ascii_whitespace() { - self.pos.col += 1; - if self.buf[self.pos.offset] == b'\n' { - self.pos.line += 1; - self.pos.col = 0; - } - self.pos.offset += 1; + self.advance_pos(); continue; } if self.buf[self.pos.offset] == b';' { while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' { - self.pos.offset += 1; + self.advance_pos(); } - self.pos.line += 1; - self.pos.col = 0; continue; } break; @@ -99,13 +161,11 @@ impl<'a> Lexer<'a> { let char_pos = self.pos; match self.buf[self.pos.offset] { b'(' => { - self.pos.offset += 1; - self.pos.col += 1; + self.advance_pos(); Some((char_pos, Token::LParen)) } b')' => { - self.pos.offset += 1; - self.pos.col += 1; + self.advance_pos(); Some((char_pos, Token::RParen)) } c if is_sym_first_char(c) => { @@ -114,19 +174,17 @@ impl<'a> Lexer<'a> { while self.pos.offset < self.buf.len() && is_sym_other_char(self.buf[self.pos.offset]) { - self.pos.col += 1; - self.pos.offset += 1; + self.advance_pos(); } let end = self.pos.offset; let s = std::str::from_utf8(&self.buf[start..end]) .expect("Only ASCII characters, should be UTF-8"); - Some((start_pos, Token::Symbol(s))) + Some((start_pos, Token::Symbol(s.to_string()))) } c if (c >= b'0' && c <= b'9') || c == b'-' => { let start_pos = self.pos; let neg = if c == b'-' { - self.pos.offset += 1; - self.pos.col += 1; + self.advance_pos(); true } else { false @@ -136,8 +194,7 @@ impl<'a> Lexer<'a> { && (self.buf[self.pos.offset] >= b'0' && self.buf[self.pos.offset] <= b'9') { num = (num * 10) + (self.buf[self.pos.offset] - b'0') as i64; - self.pos.offset += 1; - self.pos.col += 1; + self.advance_pos(); } let tok = if neg { @@ -157,8 +214,8 @@ impl<'a> Lexer<'a> { } } - pub fn peek(&self) -> Option<(Pos, Token<'a>)> { - self.lookahead + pub fn peek(&self) -> Option<&(Pos, Token)> { + self.lookahead.as_ref() } pub fn eof(&self) -> bool { @@ -167,16 +224,16 @@ impl<'a> Lexer<'a> { } impl<'a> std::iter::Iterator for Lexer<'a> { - type Item = (Pos, Token<'a>); + type Item = (Pos, Token); - fn next(&mut self) -> Option<(Pos, Token<'a>)> { + fn next(&mut self) -> Option<(Pos, Token)> { let tok = self.lookahead.take(); self.reload(); tok } } -impl<'a> Token<'a> { +impl Token { pub fn is_int(&self) -> bool { match self { Token::Int(_) => true, @@ -199,14 +256,17 @@ mod test { #[test] fn lexer_basic() { assert_eq!( - Lexer::new(";; comment\n; another\r\n \t(one two three 23 -568 )\n") - .map(|(_, tok)| tok) - .collect::>(), + Lexer::from_str( + ";; comment\n; another\r\n \t(one two three 23 -568 )\n", + "test" + ) + .map(|(_, tok)| tok) + .collect::>(), vec![ Token::LParen, - Token::Symbol("one"), - Token::Symbol("two"), - Token::Symbol("three"), + Token::Symbol("one".to_string()), + Token::Symbol("two".to_string()), + Token::Symbol("three".to_string()), Token::Int(23), Token::Int(-568), Token::RParen @@ -217,15 +277,19 @@ mod test { #[test] fn ends_with_sym() { assert_eq!( - Lexer::new("asdf").map(|(_, tok)| tok).collect::>(), - vec![Token::Symbol("asdf"),] + Lexer::from_str("asdf", "test") + .map(|(_, tok)| tok) + .collect::>(), + vec![Token::Symbol("asdf".to_string()),] ); } #[test] fn ends_with_num() { assert_eq!( - Lexer::new("23").map(|(_, tok)| tok).collect::>(), + Lexer::from_str("23", "test") + .map(|(_, tok)| tok) + .collect::>(), vec![Token::Int(23)], ); } @@ -233,16 +297,16 @@ mod test { #[test] fn weird_syms() { assert_eq!( - Lexer::new("(+ [] => !! _test!;comment\n)") + Lexer::from_str("(+ [] => !! _test!;comment\n)", "test") .map(|(_, tok)| tok) .collect::>(), vec![ Token::LParen, - Token::Symbol("+"), - Token::Symbol("[]"), - Token::Symbol("=>"), - Token::Symbol("!!"), - Token::Symbol("_test!"), + Token::Symbol("+".to_string()), + Token::Symbol("[]".to_string()), + Token::Symbol("=>".to_string()), + Token::Symbol("!!".to_string()), + Token::Symbol("_test!".to_string()), Token::RParen, ] ); diff --git a/cranelift/isle/src/main.rs b/cranelift/isle/src/main.rs index 038a8a7a85..a09e21096a 100644 --- a/cranelift/isle/src/main.rs +++ b/cranelift/isle/src/main.rs @@ -1,7 +1,6 @@ #![allow(dead_code)] -use std::io::stdin; -use std::io::Read; +use clap::{App, Arg}; mod ast; mod codegen; @@ -14,11 +13,47 @@ mod sema; fn main() -> Result<(), error::Error> { let _ = env_logger::try_init(); - let mut input = String::new(); - stdin().read_to_string(&mut input)?; - let mut parser = parser::Parser::new("", &input[..]); + + let matches = App::new("isle") + .version(env!("CARGO_PKG_VERSION")) + .author("Chris Fallin ") + .about("Instruction selection logic engine (ISLE) code generator") + .arg( + Arg::with_name("input") + .short("i") + .long("input") + .value_name("FILE.isle") + .takes_value(true) + .multiple(true) + .required(true), + ) + .arg( + Arg::with_name("output") + .short("o") + .long("output") + .value_name("FILE.rs") + .takes_value(true) + .required(true), + ) + .get_matches(); + + let input_files = matches + .values_of("input") + .unwrap() + .map(|s| s.to_string()) + .collect::>(); + let output_file = matches.value_of("output").unwrap(); + + let lexer = lexer::Lexer::from_files(input_files)?; + let mut parser = parser::Parser::new(lexer); let defs = parser.parse_defs()?; let code = compile::compile(&defs)?; - println!("{}", code); + + { + use std::io::Write; + let mut f = std::fs::File::create(output_file)?; + writeln!(&mut f, "{}", code)?; + } + Ok(()) } diff --git a/cranelift/isle/src/parser.rs b/cranelift/isle/src/parser.rs index 7bd7c8d058..8d9591b175 100644 --- a/cranelift/isle/src/parser.rs +++ b/cranelift/isle/src/parser.rs @@ -6,42 +6,37 @@ use crate::lexer::{Lexer, Pos, Token}; #[derive(Clone, Debug)] pub struct Parser<'a> { - filename: &'a str, lexer: Lexer<'a>, } pub type ParseResult = std::result::Result; impl<'a> Parser<'a> { - pub fn new(filename: &'a str, s: &'a str) -> Parser<'a> { - Parser { - filename, - lexer: Lexer::new(s), - } + pub fn new(lexer: Lexer<'a>) -> Parser<'a> { + Parser { lexer } } pub fn error(&self, pos: Pos, msg: String) -> ParseError { ParseError { - filename: self.filename.to_string(), + filename: self.lexer.filenames[pos.file].clone(), pos, msg, } } - fn take bool>(&mut self, f: F) -> ParseResult> { - if let Some((pos, peek)) = self.lexer.peek() { + fn take bool>(&mut self, f: F) -> ParseResult { + if let Some(&(pos, ref peek)) = self.lexer.peek() { if !f(peek) { return Err(self.error(pos, format!("Unexpected token {:?}", peek))); } - self.lexer.next(); - Ok(peek) + Ok(self.lexer.next().unwrap().1) } else { Err(self.error(self.lexer.pos(), "Unexpected EOF".to_string())) } } - fn is bool>(&self, f: F) -> bool { - if let Some((_, peek)) = self.lexer.peek() { + fn is bool>(&self, f: F) -> bool { + if let Some(&(_, ref peek)) = self.lexer.peek() { f(peek) } else { false @@ -49,14 +44,14 @@ impl<'a> Parser<'a> { } fn pos(&self) -> Option { - self.lexer.peek().map(|(pos, _)| pos) + self.lexer.peek().map(|(pos, _)| *pos) } fn is_lparen(&self) -> bool { - self.is(|tok| tok == Token::LParen) + self.is(|tok| *tok == Token::LParen) } fn is_rparen(&self) -> bool { - self.is(|tok| tok == Token::RParen) + self.is(|tok| *tok == Token::RParen) } fn is_sym(&self) -> bool { self.is(|tok| tok.is_sym()) @@ -65,17 +60,20 @@ impl<'a> Parser<'a> { self.is(|tok| tok.is_int()) } fn is_sym_str(&self, s: &str) -> bool { - self.is(|tok| tok == Token::Symbol(s)) + self.is(|tok| match tok { + &Token::Symbol(ref tok_s) if tok_s == s => true, + _ => false, + }) } fn lparen(&mut self) -> ParseResult<()> { - self.take(|tok| tok == Token::LParen).map(|_| ()) + self.take(|tok| *tok == Token::LParen).map(|_| ()) } fn rparen(&mut self) -> ParseResult<()> { - self.take(|tok| tok == Token::RParen).map(|_| ()) + self.take(|tok| *tok == Token::RParen).map(|_| ()) } - fn symbol(&mut self) -> ParseResult<&'a str> { + fn symbol(&mut self) -> ParseResult { match self.take(|tok| tok.is_sym())? { Token::Symbol(s) => Ok(s), _ => unreachable!(), @@ -96,14 +94,14 @@ impl<'a> Parser<'a> { } Ok(Defs { defs, - filename: self.filename.to_string(), + filenames: self.lexer.filenames.clone(), }) } fn parse_def(&mut self) -> ParseResult { self.lparen()?; let pos = self.pos(); - let def = match self.symbol()? { + let def = match &self.symbol()?[..] { "type" => Def::Type(self.parse_type()?), "rule" => Def::Rule(self.parse_rule()?), "decl" => Def::Decl(self.parse_decl()?), @@ -143,7 +141,7 @@ impl<'a> Parser<'a> { fn parse_ident(&mut self) -> ParseResult { let pos = self.pos(); let s = self.symbol()?; - self.str_to_ident(pos.unwrap(), s) + self.str_to_ident(pos.unwrap(), &s) } fn parse_type(&mut self) -> ParseResult { @@ -285,7 +283,7 @@ impl<'a> Parser<'a> { let var = self.str_to_ident(pos.unwrap(), s)?; Ok(Pattern::Var { var }) } else { - let var = self.str_to_ident(pos.unwrap(), s)?; + let var = self.str_to_ident(pos.unwrap(), &s)?; if self.is_sym_str("@") { self.symbol()?; let subpat = Box::new(self.parse_pattern()?); diff --git a/cranelift/isle/src/sema.rs b/cranelift/isle/src/sema.rs index ff6a2d219e..2ff6f3cc1c 100644 --- a/cranelift/isle/src/sema.rs +++ b/cranelift/isle/src/sema.rs @@ -30,7 +30,7 @@ declare_id!(VarId); #[derive(Clone, Debug)] pub struct TypeEnv { - pub filename: String, + pub filenames: Vec, pub syms: Vec, pub sym_map: HashMap, pub types: Vec, @@ -158,7 +158,7 @@ impl Expr { impl TypeEnv { pub fn from_ast(defs: &ast::Defs) -> SemaResult { let mut tyenv = TypeEnv { - filename: defs.filename.clone(), + filenames: defs.filenames.clone(), syms: vec![], sym_map: HashMap::new(), types: vec![], @@ -270,7 +270,7 @@ impl TypeEnv { fn error(&self, pos: Pos, msg: String) -> SemaError { SemaError { - filename: self.filename.clone(), + filename: self.filenames[pos.file].clone(), pos, msg, }