ISLE: support more flexible integer constants. (#4559)

The ISLE language's lexer previously used a very primitive
`i64::from_str_radix` call to parse integer constants, allowing values
in the range -2^63..2^63 only. Also, underscores to separate digits (as
is allwoed in Rust) were not supported. Finally, 128-bit constants were
not supported at all.

This PR addresses all issues above:
- Integer constants are internally stored as 128-bit values.
- Parsing supports either signed (-2^127..2^127) or unsigned (0..2^128)
  range. Negation works independently of that, so one can write
  `-0xffff..ffff` (128 bits wide, i.e., -(2^128-1)) to get a `1`.
- Underscores are supported to separate groups of digits, so one can
  write `0xffff_ffff`.
- A minor oversight was fixed: hex constants can start with `0X`
  (uppercase) as well as `0x`, for consistency with Rust and C.

This PR also adds a new kind of ISLE test that actually runs a driver
linked to compiled ISLE code; we previously didn't have any such tests,
but it is now quite useful to assert correct interpretation of constant
values.
This commit is contained in:
Chris Fallin
2022-07-29 14:52:14 -07:00
committed by GitHub
parent b1273548fb
commit 8e9e9c52a1
10 changed files with 95 additions and 28 deletions

View File

@@ -12,6 +12,7 @@ fn main() {
emit_tests(&mut out, "isle_examples/pass", "run_pass");
emit_tests(&mut out, "isle_examples/fail", "run_fail");
emit_tests(&mut out, "isle_examples/link", "run_link");
emit_tests(&mut out, "isle_examples/run", "run_run");
let output = out_dir.join("isle_tests.rs");
std::fs::write(output, out).unwrap();

View File

@@ -0,0 +1,17 @@
(type i64 (primitive i64))
(decl X (i64) i64)
(rule (X -1) -2)
(rule (X -2) -3)
(rule (X 0x7fff_ffff_ffff_ffff) 0x8000_0000_0000_0000)
(rule (X 0xffff_ffff_ffff_fff0) 1)
(type i128 (primitive i128))
(decl Y (i128) i128)
(rule (Y 0x1000_0000_0000_0000_1234_5678_9abc_def0) -1)
(rule (Y 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) 3)
(rule (Y -0x1000_0000_0000_0000_1234_5678_9abc_def0) 1)
(rule (Y -0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) -3)

View File

@@ -0,0 +1,18 @@
mod iconst;
struct Context;
impl iconst::Context for Context {}
fn main() {
let mut ctx = Context;
assert_eq!(iconst::constructor_X(&mut ctx, -1), Some(-2));
assert_eq!(iconst::constructor_X(&mut ctx, -2), Some(-3));
assert_eq!(iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff), Some(0x8000_0000_0000_0000u64 as i64));
assert_eq!(iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64), Some(1));
assert_eq!(iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(-1));
assert_eq!(iconst::constructor_Y(&mut ctx, 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128), Some(3));
assert_eq!(iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(1));
assert_eq!(iconst::constructor_Y(&mut ctx, -(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128)), Some(-3));
}

View File

@@ -121,7 +121,7 @@ pub enum Pattern {
pos: Pos,
},
/// An operator that matches a constant integer value.
ConstInt { val: i64, pos: Pos },
ConstInt { val: i128, pos: Pos },
/// An operator that matches an external constant value.
ConstPrim { val: Ident, pos: Pos },
/// An application of a type variant or term.
@@ -306,7 +306,7 @@ pub enum Expr {
/// A variable use.
Var { name: Ident, pos: Pos },
/// A constant integer.
ConstInt { val: i64, pos: Pos },
ConstInt { val: i128, pos: Pos },
/// A constant of some other primitive type.
ConstPrim { val: Ident, pos: Pos },
/// The `(let ((var ty val)*) body)` form.

View File

@@ -261,7 +261,7 @@ impl<'a> Codegen<'a> {
ctx.values.insert(value.clone(), (is_ref, ty));
}
fn const_int(&self, val: i64, ty: TypeId) -> String {
fn const_int(&self, val: i128, ty: TypeId) -> String {
let is_bool = match &self.typeenv.types[ty.index()] {
&Type::Primitive(_, name, _) => &self.typeenv.syms[name.index()] == "bool",
_ => unreachable!(),
@@ -269,7 +269,12 @@ impl<'a> Codegen<'a> {
if is_bool {
format!("{}", val != 0)
} else {
format!("{}", val)
let ty_name = self.type_name(ty, /* by_ref = */ false);
if ty_name == "i128" {
format!("{}i128", val)
} else {
format!("{}i128 as {}", val, ty_name)
}
}
}
@@ -523,10 +528,14 @@ impl<'a> Codegen<'a> {
false
}
&PatternInst::MatchInt {
ref input, int_val, ..
ref input,
int_val,
ty,
..
} => {
let int_val = self.const_int(int_val, ty);
let input = self.value_by_val(input, ctx);
writeln!(code, "{}if {} == {} {{", indent, input, int_val).unwrap();
writeln!(code, "{}if {} == {} {{", indent, input, int_val).unwrap();
false
}
&PatternInst::MatchPrim { ref input, val, .. } => {

View File

@@ -49,7 +49,7 @@ pub enum PatternInst {
/// The value's type.
ty: TypeId,
/// The integer to match against the value.
int_val: i64,
int_val: i128,
},
/// Try matching the given value as the given constant. Produces no values.
@@ -128,7 +128,7 @@ pub enum ExprInst {
/// This integer type.
ty: TypeId,
/// The integer value. Must fit within the type.
val: i64,
val: i128,
},
/// Produce a constant extern value.
@@ -222,7 +222,7 @@ impl ExprSequence {
/// Is this expression sequence producing a constant integer?
///
/// If so, return the integer type and the constant.
pub fn is_const_int(&self) -> Option<(TypeId, i64)> {
pub fn is_const_int(&self) -> Option<(TypeId, i128)> {
if self.insts.len() == 2 && matches!(&self.insts[1], &ExprInst::Return { .. }) {
match &self.insts[0] {
&ExprInst::ConstInt { ty, val } => Some((ty, val)),
@@ -266,7 +266,7 @@ impl PatternSequence {
self.add_inst(PatternInst::MatchEqual { a, b, ty });
}
fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i64) {
fn add_match_int(&mut self, input: Value, ty: TypeId, int_val: i128) {
self.add_inst(PatternInst::MatchInt { input, ty, int_val });
}
@@ -486,7 +486,7 @@ impl ExprSequence {
id
}
fn add_const_int(&mut self, ty: TypeId, val: i64) -> Value {
fn add_const_int(&mut self, ty: TypeId, val: i128) -> Value {
let inst = InstId(self.insts.len());
self.add_inst(ExprInst::ConstInt { ty, val });
Value::Expr { inst, output: 0 }

View File

@@ -68,7 +68,7 @@ pub enum Token {
/// A symbol, e.g. `Foo`.
Symbol(String),
/// An integer.
Int(i64),
Int(i128),
/// `@`
At,
}
@@ -252,7 +252,8 @@ impl<'a> Lexer<'a> {
// Check for hex literals.
if self.buf.get(self.pos.offset).copied() == Some(b'0')
&& self.buf.get(self.pos.offset + 1).copied() == Some(b'x')
&& (self.buf.get(self.pos.offset + 1).copied() == Some(b'x')
|| self.buf.get(self.pos.offset + 1).copied() == Some(b'X'))
{
self.advance_pos();
self.advance_pos();
@@ -262,7 +263,7 @@ impl<'a> Lexer<'a> {
// Find the range in the buffer for this integer literal. We'll
// pass this range to `i64::from_str_radix` to do the actual
// string-to-integer conversion.
let start_offset = self.pos.offset;
let mut s = vec![];
while self.pos.offset < self.buf.len()
&& ((radix == 10
&& self.buf[self.pos.offset] >= b'0'
@@ -273,21 +274,25 @@ impl<'a> Lexer<'a> {
|| (self.buf[self.pos.offset] >= b'a'
&& self.buf[self.pos.offset] <= b'f')
|| (self.buf[self.pos.offset] >= b'A'
&& self.buf[self.pos.offset] <= b'F'))))
&& self.buf[self.pos.offset] <= b'F')))
|| self.buf[self.pos.offset] == b'_')
{
if self.buf[self.pos.offset] != b'_' {
s.push(self.buf[self.pos.offset]);
}
self.advance_pos();
}
let end_offset = self.pos.offset;
let s_utf8 = std::str::from_utf8(&s[..]).unwrap();
let num = i64::from_str_radix(
std::str::from_utf8(&self.buf[start_offset..end_offset]).unwrap(),
radix,
)
.map_err(|e| self.error(start_pos, e.to_string()))?;
// Support either signed range (-2^127..2^127) or
// unsigned range (0..2^128).
let num = i128::from_str_radix(s_utf8, radix)
.or_else(|_| u128::from_str_radix(s_utf8, radix).map(|val| val as i128))
.map_err(|e| self.error(start_pos, e.to_string()))?;
let tok = if neg {
Token::Int(num.checked_neg().ok_or_else(|| {
self.error(start_pos, "integer literal cannot fit in i64")
self.error(start_pos, "integer literal cannot fit in i128")
})?)
} else {
Token::Int(num)

View File

@@ -114,7 +114,7 @@ impl<'a> Parser<'a> {
}
}
fn int(&mut self) -> Result<i64> {
fn int(&mut self) -> Result<i128> {
match self.take(|tok| tok.is_int())? {
Token::Int(i) => Ok(i),
_ => unreachable!(),
@@ -391,7 +391,7 @@ impl<'a> Parser<'a> {
iflets,
expr,
pos,
prio,
prio: prio.map(|prio| i64::try_from(prio).unwrap()),
});
}
}

View File

@@ -449,7 +449,7 @@ pub enum Pattern {
/// Match the current value against a constant integer of the given integer
/// type.
ConstInt(TypeId, i64),
ConstInt(TypeId, i128),
/// Match the current value against a constant primitive value of the given
/// primitive type.
@@ -474,7 +474,7 @@ pub enum Expr {
/// Get the value of a variable that was bound in the left-hand side.
Var(TypeId, VarId),
/// Get a constant integer.
ConstInt(TypeId, i64),
ConstInt(TypeId, i128),
/// Get a constant primitive.
ConstPrim(TypeId, Sym),
/// Evaluate the nested expressions and bind their results to the given

View File

@@ -18,7 +18,7 @@ pub fn run_fail(filename: &str) {
assert!(build(filename).is_err());
}
pub fn run_link(isle_filename: &str) {
fn build_and_link_isle(isle_filename: &str) -> (tempfile::TempDir, std::path::PathBuf) {
let tempdir = tempfile::tempdir().unwrap();
let code = build(isle_filename).unwrap();
@@ -45,10 +45,27 @@ pub fn run_link(isle_filename: &str) {
let mut rustc = std::process::Command::new("rustc")
.arg(&rust_driver)
.arg("-o")
.arg(output)
.arg(output.clone())
.spawn()
.unwrap();
assert!(rustc.wait().unwrap().success());
(tempdir, output)
}
pub fn run_link(isle_filename: &str) {
build_and_link_isle(isle_filename);
}
pub fn run_run(isle_filename: &str) {
let (_tempdir, exe) = build_and_link_isle(isle_filename);
assert!(std::process::Command::new(exe)
.spawn()
.unwrap()
.wait()
.unwrap()
.success());
}
// Generated by build.rs.