Add support for binary/octal literals to ISLE (#6234)

* Add support for binary/octal literals to ISLE

In a number of x64-changes recently some u8 immediates are interpreted
as four bit-packed 2-bit numbers and I have a tough time going between
hex and these bit-packed numbers. I've been writing `0xAA == 0b...` in
comments to indicate the intent but I figured it'd be a bit clearer if
the binary literal was accepted directly!

This is a minor update to the ISLE lexer to allow for binary `0b00...`
and octal `0o00...` literals in the same manner as hex literals. Some
comments in the x64 backend are then removed to use the binary literal
syntax directly.

* Update ISLE reference for octal/binary

* Update ISLE tests for octal/binary
This commit is contained in:
Alex Crichton
2023-04-18 18:04:04 -05:00
committed by GitHub
parent c17a3d89f7
commit b6bb6a196a
5 changed files with 144 additions and 85 deletions

View File

@@ -1346,21 +1346,21 @@
;; result = [ vec[3] vec[2] tmp[0] tmp[2] ]
(rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 1)
(let ((tmp Xmm (x64_movlhps val vec)))
(x64_shufps tmp vec 0xe2))) ;; 0xe2 == 0b11_10_00_10
(x64_shufps tmp vec 0b11_10_00_10)))
;; f32x4.replace_lane 2 - without insertps
;; tmp = [ vec[0] vec[3] val[0] val[0] ]
;; result = [ tmp[2] tmp[0] vec[1] vec[0] ]
(rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 2)
(let ((tmp Xmm (x64_shufps val vec 0x30))) ;; 0x30 == 0b00_11_00_00
(x64_shufps vec tmp 0x84))) ;; 0x84 == 0b10_00_01_00
(let ((tmp Xmm (x64_shufps val vec 0b00_11_00_00)))
(x64_shufps vec tmp 0b10_00_01_00)))
;; f32x4.replace_lane 3 - without insertps
;; tmp = [ vec[3] vec[2] val[1] val[0] ]
;; result = [ tmp[0] tmp[2] vec[1] vec[0] ]
(rule (vec_insert_lane $F32X4 vec (RegMem.Reg val) 3)
(let ((tmp Xmm (x64_shufps val vec 0xe4))) ;; 0xe4 == 0b11_10_01_00
(x64_shufps vec tmp 0x24))) ;; 0x24 == 0b00_10_01_00
(let ((tmp Xmm (x64_shufps val vec 0b11_10_01_00)))
(x64_shufps vec tmp 0b00_10_01_00)))
;; Recursively delegate to the above rules by loading from memory first.
(rule (vec_insert_lane $F32X4 vec (RegMem.Mem addr) idx)
@@ -3422,7 +3422,7 @@
(a Xmm a)
(libcall LibCall (round_libcall $F64 imm))
(result Xmm (libcall_1 libcall a))
(a1 Xmm (libcall_1 libcall (x64_pshufd a 0x0e))) ;; 0x0e == 0b00_00_11_10
(a1 Xmm (libcall_1 libcall (x64_pshufd a 0b00_00_11_10)))
(result Xmm (vec_insert_lane $F64X2 result a1 1))
)
result))
@@ -3818,8 +3818,7 @@
;; This is the only remaining case for F64X2
(rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty))
(u8_from_uimm8 1))))
;; 0xee == 0b11_10_11_10
(x64_pshufd val 0xee))
(x64_pshufd val 0b11_10_11_10))
;; Note that the `pextrb` lowering here is relied upon by the `extend_to_gpr`
;; helper because it will elide a `uextend` operation when `extractlane` is the

View File

@@ -494,8 +494,11 @@ The pattern (left-hand side) is made up of the following match
operators:
* Wildcards (`_`).
* Integer constants (decimal/hex, positive/negative: `1`, `-1`,
`0x80`, `-0x80`) and boolean constants (`#t`, `#f`).
* Integer constants (decimal/hex/binary/octal, positive/negative: `1`, `-1`,
`0x80`, `-0x80`) and boolean constants (`#t`, `#f`). Hex constants can
start with either `0x` or `0X`. Binary constants start with `0b`. Octal
constants start with `0o`. Integers can also be interspersed with `_` as a
separator, for example `1_000` or `0x1234_5678`, for readability.
* constants imported from the embedding, of arbitrary type
(`$MyConst`).
* Variable captures and matches (bare identifiers like `x`; an
@@ -1449,6 +1452,8 @@ newline). The grammar accepted by the parser is as follows:
<int> ::= [ "-" ] ( "0".."9" )+
| [ "-" ] "0x" ( "0".."9" "A".."F" "a".."f" )+
| [ "-" ] "0o" ( "0".."7" )+
| [ "-" ] "0b" ( "0".."1" )+
<typevalue> ::= "(" "primitive" <ident> ")"
| "(" "enum" <enumvariant>* ")"

View File

@@ -15,3 +15,12 @@
(rule (Y -0x1000_0000_0000_0000_1234_5678_9abc_def0) 1)
(rule (Y -0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff) -3)
;; Test some various syntaxes for numbers
(type i32 (primitive i32))
(decl partial Z (i32) i32)
(rule (Z 0) 0x01)
(rule (Z 0x01) 0x0_2)
(rule (Z 0b10) 3)
(rule (Z 0b1_1) 0o4)
(rule (Z 0o7654321) 0b11_00_11_00)

View File

@@ -8,11 +8,44 @@ fn main() {
assert_eq!(iconst::constructor_X(&mut ctx, -1), Some(-2));
assert_eq!(iconst::constructor_X(&mut ctx, -2), Some(-3));
assert_eq!(iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff), Some(0x8000_0000_0000_0000u64 as i64));
assert_eq!(iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64), Some(1));
assert_eq!(
iconst::constructor_X(&mut ctx, 0x7fff_ffff_ffff_ffff),
Some(0x8000_0000_0000_0000u64 as i64)
);
assert_eq!(
iconst::constructor_X(&mut ctx, 0xffff_ffff_ffff_fff0_u64 as i64),
Some(1)
);
assert_eq!(iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(-1));
assert_eq!(iconst::constructor_Y(&mut ctx, 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128), Some(3));
assert_eq!(iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0), Some(1));
assert_eq!(iconst::constructor_Y(&mut ctx, -(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128)), Some(-3));
assert_eq!(
iconst::constructor_Y(&mut ctx, 0x1000_0000_0000_0000_1234_5678_9abc_def0),
Some(-1)
);
assert_eq!(
iconst::constructor_Y(
&mut ctx,
0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128
),
Some(3)
);
assert_eq!(
iconst::constructor_Y(&mut ctx, -0x1000_0000_0000_0000_1234_5678_9abc_def0),
Some(1)
);
assert_eq!(
iconst::constructor_Y(
&mut ctx,
-(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffffu128 as i128)
),
Some(-3)
);
assert_eq!(iconst::constructor_Z(&mut ctx, 0), Some(1));
assert_eq!(iconst::constructor_Z(&mut ctx, 1), Some(2));
assert_eq!(iconst::constructor_Z(&mut ctx, 2), Some(3));
assert_eq!(iconst::constructor_Z(&mut ctx, 3), Some(4));
assert_eq!(
iconst::constructor_Z(&mut ctx, 0o7654321),
Some(0b11_00_11_00)
);
}

View File

@@ -243,22 +243,35 @@ impl<'a> Lexer<'a> {
let mut radix = 10;
// Check for hex literals.
if self.buf.get(self.pos.offset).copied() == Some(b'0')
&& (self.buf.get(self.pos.offset + 1).copied() == Some(b'x')
|| self.buf.get(self.pos.offset + 1).copied() == Some(b'X'))
{
// Check for prefixed literals.
match (
self.buf.get(self.pos.offset),
self.buf.get(self.pos.offset + 1),
) {
(Some(b'0'), Some(b'x')) | (Some(b'0'), Some(b'X')) => {
self.advance_pos();
self.advance_pos();
radix = 16;
}
(Some(b'0'), Some(b'o')) => {
self.advance_pos();
self.advance_pos();
radix = 8;
}
(Some(b'0'), Some(b'b')) => {
self.advance_pos();
self.advance_pos();
radix = 2;
}
_ => {}
}
// Find the range in the buffer for this integer literal. We'll
// pass this range to `i64::from_str_radix` to do the actual
// string-to-integer conversion.
let mut s = vec![];
while self.pos.offset < self.buf.len()
&& ((radix == 10 && self.buf[self.pos.offset].is_ascii_digit())
&& ((radix <= 10 && self.buf[self.pos.offset].is_ascii_digit())
|| (radix == 16 && self.buf[self.pos.offset].is_ascii_hexdigit())
|| self.buf[self.pos.offset] == b'_')
{