ISLE: support more flexible integer constants. (#4559)

The ISLE language's lexer previously used a very primitive
`i64::from_str_radix` call to parse integer constants, allowing values
in the range -2^63..2^63 only. Also, underscores to separate digits (as
is allwoed in Rust) were not supported. Finally, 128-bit constants were
not supported at all.

This PR addresses all issues above:
- Integer constants are internally stored as 128-bit values.
- Parsing supports either signed (-2^127..2^127) or unsigned (0..2^128)
  range. Negation works independently of that, so one can write
  `-0xffff..ffff` (128 bits wide, i.e., -(2^128-1)) to get a `1`.
- Underscores are supported to separate groups of digits, so one can
  write `0xffff_ffff`.
- A minor oversight was fixed: hex constants can start with `0X`
  (uppercase) as well as `0x`, for consistency with Rust and C.

This PR also adds a new kind of ISLE test that actually runs a driver
linked to compiled ISLE code; we previously didn't have any such tests,
but it is now quite useful to assert correct interpretation of constant
values.
This commit is contained in:
Chris Fallin
2022-07-29 14:52:14 -07:00
committed by GitHub
parent b1273548fb
commit 8e9e9c52a1
10 changed files with 95 additions and 28 deletions

View File

@@ -12,6 +12,7 @@ fn main() {
emit_tests(&mut out, "isle_examples/pass", "run_pass"); emit_tests(&mut out, "isle_examples/pass", "run_pass");
emit_tests(&mut out, "isle_examples/fail", "run_fail"); emit_tests(&mut out, "isle_examples/fail", "run_fail");
emit_tests(&mut out, "isle_examples/link", "run_link"); emit_tests(&mut out, "isle_examples/link", "run_link");
emit_tests(&mut out, "isle_examples/run", "run_run");
let output = out_dir.join("isle_tests.rs"); let output = out_dir.join("isle_tests.rs");
std::fs::write(output, out).unwrap(); std::fs::write(output, out).unwrap();

View File

@@ -0,0 +1,17 @@
(type i64 (primitive i64))
(decl X (i64) i64)
(rule (X -1) -2)
(rule (X -2) -3)
(rule (X 0x7fff_ffff_ffff_ffff) 0x8000_0000_0000_0000)
(rule (X 0xffff_ffff_ffff_fff0) 1)
(type i128 (primitive i128))
(decl Y (i128) i128)
(rule (Y 0x1000_0000_0000_0000_1234_5678_9abc_def0) -1)
(rule (Y 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) 3)
(rule (Y -0x1000_0000_0000_0000_1234_5678_9abc_def0) 1)
(rule (Y -0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) -3)

View File

@@ -0,0 +1,18 @@
mod iconst;
struct Context;
impl iconst::Context for Context {}
fn main() {
let mut ctx = Context;
assert_eq!(iconst::constructor_X(&mut ctx, -1), Some(-2));
assert_eq!(iconst::constructor_X(&mut ctx, -2), Some(-3));
assert_eq!(iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff), Some(0x8000_0000_0000_0000u64 as i64));
assert_eq!(iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64), Some(1));
assert_eq!(iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(-1));
assert_eq!(iconst::constructor_Y(&mut ctx, 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128), Some(3));
assert_eq!(iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(1));
assert_eq!(iconst::constructor_Y(&mut ctx, -(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128)), Some(-3));
}

View File

@@ -121,7 +121,7 @@ pub enum Pattern {
pos: Pos, pos: Pos,
}, },
/// An operator that matches a constant integer value. /// An operator that matches a constant integer value.
ConstInt { val: i64, pos: Pos }, ConstInt { val: i128, pos: Pos },
/// An operator that matches an external constant value. /// An operator that matches an external constant value.
ConstPrim { val: Ident, pos: Pos }, ConstPrim { val: Ident, pos: Pos },
/// An application of a type variant or term. /// An application of a type variant or term.
@@ -306,7 +306,7 @@ pub enum Expr {
/// A variable use. /// A variable use.
Var { name: Ident, pos: Pos }, Var { name: Ident, pos: Pos },
/// A constant integer. /// A constant integer.
ConstInt { val: i64, pos: Pos }, ConstInt { val: i128, pos: Pos },
/// A constant of some other primitive type. /// A constant of some other primitive type.
ConstPrim { val: Ident, pos: Pos }, ConstPrim { val: Ident, pos: Pos },
/// The `(let ((var ty val)*) body)` form. /// The `(let ((var ty val)*) body)` form.

View File

@@ -261,7 +261,7 @@ impl<'a> Codegen<'a> {
ctx.values.insert(value.clone(), (is_ref, ty)); ctx.values.insert(value.clone(), (is_ref, ty));
} }
fn const_int(&self, val: i64, ty: TypeId) -> String { fn const_int(&self, val: i128, ty: TypeId) -> String {
let is_bool = match &self.typeenv.types[ty.index()] { let is_bool = match &self.typeenv.types[ty.index()] {
&Type::Primitive(_, name, _) => &self.typeenv.syms[name.index()] == "bool", &Type::Primitive(_, name, _) => &self.typeenv.syms[name.index()] == "bool",
_ => unreachable!(), _ => unreachable!(),
@@ -269,7 +269,12 @@ impl<'a> Codegen<'a> {
if is_bool { if is_bool {
format!("{}", val != 0) format!("{}", val != 0)
} else { } else {
format!("{}", val) let ty_name = self.type_name(ty, /* by_ref = */ false);
if ty_name == "i128" {
format!("{}i128", val)
} else {
format!("{}i128 as {}", val, ty_name)
}
} }
} }
@@ -523,10 +528,14 @@ impl<'a> Codegen<'a> {
false false
} }
&PatternInst::MatchInt { &PatternInst::MatchInt {
ref input, int_val, .. ref input,
int_val,
ty,
..
} => { } => {
let int_val = self.const_int(int_val, ty);
let input = self.value_by_val(input, ctx); let input = self.value_by_val(input, ctx);
writeln!(code, "{}if {} == {} {{", indent, input, int_val).unwrap(); writeln!(code, "{}if {} == {} {{", indent, input, int_val).unwrap();
false false
} }
&PatternInst::MatchPrim { ref input, val, .. } => { &PatternInst::MatchPrim { ref input, val, .. } => {

View File

@@ -49,7 +49,7 @@ pub enum PatternInst {
/// The value's type. /// The value's type.
ty: TypeId, ty: TypeId,
/// The integer to match against the value. /// The integer to match against the value.
int_val: i64, int_val: i128,
}, },
/// Try matching the given value as the given constant. Produces no values. /// Try matching the given value as the given constant. Produces no values.
@@ -128,7 +128,7 @@ pub enum ExprInst {
/// This integer type. /// This integer type.
ty: TypeId, ty: TypeId,
/// The integer value. Must fit within the type. /// The integer value. Must fit within the type.
val: i64, val: i128,
}, },
/// Produce a constant extern value. /// Produce a constant extern value.
@@ -222,7 +222,7 @@ impl ExprSequence {
/// Is this expression sequence producing a constant integer? /// Is this expression sequence producing a constant integer?
/// ///
/// If so, return the integer type and the constant. /// If so, return the integer type and the constant.
pub fn is_const_int(&self) -> Option<(TypeId, i64)> { pub fn is_const_int(&self) -> Option<(TypeId, i128)> {
if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) { if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
match &self.insts[0] { match &self.insts[0] {
&ExprInst::ConstInt { ty, val } => Some((ty, val)), &ExprInst::ConstInt { ty, val } => Some((ty, val)),
@@ -266,7 +266,7 @@ impl PatternSequence {
self.add_inst(PatternInst::MatchEqual { a, b, ty }); self.add_inst(PatternInst::MatchEqual { a, b, ty });
} }
fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i64) { fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i128) {
self.add_inst(PatternInst::MatchInt { input, ty, int_val }); self.add_inst(PatternInst::MatchInt { input, ty, int_val });
} }
@@ -486,7 +486,7 @@ impl ExprSequence {
id id
} }
fn add_const_int(&mut self, ty: TypeId, val: i64) -> Value { fn add_const_int(&mut self, ty: TypeId, val: i128) -> Value {
let inst = InstId(self.insts.len()); let inst = InstId(self.insts.len());
self.add_inst(ExprInst::ConstInt { ty, val }); self.add_inst(ExprInst::ConstInt { ty, val });
Value::Expr { inst, output: 0 } Value::Expr { inst, output: 0 }

View File

@@ -68,7 +68,7 @@ pub enum Token {
/// A symbol, e.g. `Foo`. /// A symbol, e.g. `Foo`.
Symbol(String), Symbol(String),
/// An integer. /// An integer.
Int(i64), Int(i128),
/// `@` /// `@`
At, At,
} }
@@ -252,7 +252,8 @@ impl<'a> Lexer<'a> {
// Check for hex literals. // Check for hex literals.
if self.buf.get(self.pos.offset).copied() == Some(b'0') if self.buf.get(self.pos.offset).copied() == Some(b'0')
&& self.buf.get(self.pos.offset + 1).copied() == Some(b'x') && (self.buf.get(self.pos.offset + 1).copied() == Some(b'x')
|| self.buf.get(self.pos.offset + 1).copied() == Some(b'X'))
{ {
self.advance_pos(); self.advance_pos();
self.advance_pos(); self.advance_pos();
@@ -262,7 +263,7 @@ impl<'a> Lexer<'a> {
// Find the range in the buffer for this integer literal. We'll // Find the range in the buffer for this integer literal. We'll
// pass this range to `i64::from_str_radix` to do the actual // pass this range to `i64::from_str_radix` to do the actual
// string-to-integer conversion. // string-to-integer conversion.
let start_offset = self.pos.offset; let mut s = vec![];
while self.pos.offset < self.buf.len() while self.pos.offset < self.buf.len()
&& ((radix == 10 && ((radix == 10
&& self.buf[self.pos.offset] >= b'0' && self.buf[self.pos.offset] >= b'0'
@@ -273,21 +274,25 @@ impl<'a> Lexer<'a> {
|| (self.buf[self.pos.offset] >= b'a' || (self.buf[self.pos.offset] >= b'a'
&& self.buf[self.pos.offset] <= b'f') && self.buf[self.pos.offset] <= b'f')
|| (self.buf[self.pos.offset] >= b'A' || (self.buf[self.pos.offset] >= b'A'
&& self.buf[self.pos.offset] <= b'F')))) && self.buf[self.pos.offset] <= b'F')))
|| self.buf[self.pos.offset] == b'_')
{ {
if self.buf[self.pos.offset] != b'_' {
s.push(self.buf[self.pos.offset]);
}
self.advance_pos(); self.advance_pos();
} }
let end_offset = self.pos.offset; let s_utf8 = std::str::from_utf8(&s[..]).unwrap();
let num = i64::from_str_radix( // Support either signed range (-2^127..2^127) or
std::str::from_utf8(&self.buf[start_offset..end_offset]).unwrap(), // unsigned range (0..2^128).
radix, let num = i128::from_str_radix(s_utf8, radix)
) .or_else(|_| u128::from_str_radix(s_utf8, radix).map(|val| val as i128))
.map_err(|e| self.error(start_pos, e.to_string()))?; .map_err(|e| self.error(start_pos, e.to_string()))?;
let tok = if neg { let tok = if neg {
Token::Int(num.checked_neg().ok_or_else(|| { Token::Int(num.checked_neg().ok_or_else(|| {
self.error(start_pos, "integer literal cannot fit in i64") self.error(start_pos, "integer literal cannot fit in i128")
})?) })?)
} else { } else {
Token::Int(num) Token::Int(num)

View File

@@ -114,7 +114,7 @@ impl<'a> Parser<'a> {
} }
} }
fn int(&mut self) -> Result<i64> { fn int(&mut self) -> Result<i128> {
match self.take(|tok| tok.is_int())? { match self.take(|tok| tok.is_int())? {
Token::Int(i) => Ok(i), Token::Int(i) => Ok(i),
_ => unreachable!(), _ => unreachable!(),
@@ -391,7 +391,7 @@ impl<'a> Parser<'a> {
iflets, iflets,
expr, expr,
pos, pos,
prio, prio: prio.map(|prio| i64::try_from(prio).unwrap()),
}); });
} }
} }

View File

@@ -449,7 +449,7 @@ pub enum Pattern {
/// Match the current value against a constant integer of the given integer /// Match the current value against a constant integer of the given integer
/// type. /// type.
ConstInt(TypeId, i64), ConstInt(TypeId, i128),
/// Match the current value against a constant primitive value of the given /// Match the current value against a constant primitive value of the given
/// primitive type. /// primitive type.
@@ -474,7 +474,7 @@ pub enum Expr {
/// Get the value of a variable that was bound in the left-hand side. /// Get the value of a variable that was bound in the left-hand side.
Var(TypeId, VarId), Var(TypeId, VarId),
/// Get a constant integer. /// Get a constant integer.
ConstInt(TypeId, i64), ConstInt(TypeId, i128),
/// Get a constant primitive. /// Get a constant primitive.
ConstPrim(TypeId, Sym), ConstPrim(TypeId, Sym),
/// Evaluate the nested expressions and bind their results to the given /// Evaluate the nested expressions and bind their results to the given

View File

@@ -18,7 +18,7 @@ pub fn run_fail(filename: &str) {
assert!(build(filename).is_err()); assert!(build(filename).is_err());
} }
pub fn run_link(isle_filename: &str) { fn build_and_link_isle(isle_filename: &str) -> (tempfile::TempDir, std::path::PathBuf) {
let tempdir = tempfile::tempdir().unwrap(); let tempdir = tempfile::tempdir().unwrap();
let code = build(isle_filename).unwrap(); let code = build(isle_filename).unwrap();
@@ -45,10 +45,27 @@ pub fn run_link(isle_filename: &str) {
let mut rustc = std::process::Command::new("rustc") let mut rustc = std::process::Command::new("rustc")
.arg(&rust_driver) .arg(&rust_driver)
.arg("-o") .arg("-o")
.arg(output) .arg(output.clone())
.spawn() .spawn()
.unwrap(); .unwrap();
assert!(rustc.wait().unwrap().success()); assert!(rustc.wait().unwrap().success());
(tempdir, output)
}
pub fn run_link(isle_filename: &str) {
build_and_link_isle(isle_filename);
}
pub fn run_run(isle_filename: &str) {
let (_tempdir, exe) = build_and_link_isle(isle_filename);
assert!(std::process::Command::new(exe)
.spawn()
.unwrap()
.wait()
.unwrap()
.success());
} }
// Generated by build.rs. // Generated by build.rs.