Improve uimm128 parsing

This commit changes 128-bit constant parsing in two ways:
 - it adds the ability to use underscores to separate digits when writing a 128-bit constant in hexadecimal; e.g. `0x00010203...` can now be written as `0x0001_0203_...`
 - it adds a new mechanism for parsing 128-bit constants using integer/float/boolean literals; e.g. `vconst.i32x4 [1 2 3 4]`. Note that currently the controlling type of the instruction dictates how many literals to parse inside the brackets.
This commit is contained in:
Andrew Brown
2019-08-28 10:01:19 -07:00
committed by Dan Gohman
parent 98056aa05d
commit d64e454004
6 changed files with 328 additions and 36 deletions

View File

@@ -5,9 +5,29 @@
//! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language. //! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language.
use core::fmt::{self, Display, Formatter}; use core::fmt::{self, Display, Formatter};
use core::iter::FromIterator;
use core::mem; use core::mem;
use core::str::FromStr; use core::str::{from_utf8, FromStr};
use core::{i32, u32}; use core::{i32, u32};
use std::vec::Vec;
/// Convert a type into a vector of bytes; all implementors in this file must use little-endian
/// orderings of bytes to match WebAssembly's little-endianness.
trait IntoBytes {
fn into_bytes(self) -> Vec<u8>;
}
impl IntoBytes for u8 {
fn into_bytes(self) -> Vec<u8> {
vec![self]
}
}
impl IntoBytes for i32 {
fn into_bytes(self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
}
/// 64-bit immediate signed integer operand. /// 64-bit immediate signed integer operand.
/// ///
@@ -34,6 +54,12 @@ impl Into<i64> for Imm64 {
} }
} }
impl IntoBytes for Imm64 {
fn into_bytes(self) -> Vec<u8> {
self.0.to_le_bytes().to_vec()
}
}
impl From<i64> for Imm64 { impl From<i64> for Imm64 {
fn from(x: i64) -> Self { fn from(x: i64) -> Self {
Imm64(x) Imm64(x)
@@ -270,6 +296,23 @@ impl FromStr for Uimm32 {
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
pub struct Uimm128(pub [u8; 16]); pub struct Uimm128(pub [u8; 16]);
impl Uimm128 {
/// Iterate over the bytes in the constant
pub fn bytes(&self) -> impl Iterator<Item = &u8> {
self.0.iter()
}
/// Convert the immediate into a vector
pub fn to_vec(self) -> Vec<u8> {
self.0.to_vec()
}
/// Convert the immediate into a slice
pub fn as_slice(&self) -> &[u8] {
&self.0[..]
}
}
impl Display for Uimm128 { impl Display for Uimm128 {
// print a 128-bit vector in hexadecimal, e.g. 0x000102030405060708090a0b0c0d0e0f // print a 128-bit vector in hexadecimal, e.g. 0x000102030405060708090a0b0c0d0e0f
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -314,24 +357,84 @@ impl FromStr for Uimm128 {
fn from_str(s: &str) -> Result<Self, &'static str> { fn from_str(s: &str) -> Result<Self, &'static str> {
if s.len() <= 2 || &s[0..2] != "0x" { if s.len() <= 2 || &s[0..2] != "0x" {
Err("Expected a hexadecimal string, e.g. 0x1234") Err("Expected a hexadecimal string, e.g. 0x1234")
} else if s.len() % 2 != 0 { } else {
// clean and check the string
let cleaned: Vec<u8> = s[2..]
.as_bytes()
.iter()
.filter(|&&b| b as char != '_')
.cloned()
.collect(); // remove 0x prefix and any intervening _ characters
if cleaned.len() == 0 {
Err("Hexadecimal string must have some digits")
} else if cleaned.len() % 2 != 0 {
Err("Hexadecimal string must have an even number of digits") Err("Hexadecimal string must have an even number of digits")
} else if s.len() > 34 { } else if cleaned.len() > 32 {
Err("Hexadecimal string has too many digits to fit in a 128-bit vector") Err("Hexadecimal string has too many digits to fit in a 128-bit vector")
} else { } else {
let mut buffer = [0; 16]; // zero-fill let mut buffer = [0; 16]; // zero-fill the buffer
let start_at = s.len() / 2 - 1; let mut position = cleaned.len() / 2 - 1; // since Uimm128 is little-endian but the string is not, we write from back to front but must start at the highest position required by the string
for i in (2..s.len()).step_by(2) { for i in (0..cleaned.len()).step_by(2) {
let byte = u8::from_str_radix(&s[i..i + 2], 16) let pair = from_utf8(&cleaned[i..i + 2])
.or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?;
let byte = u8::from_str_radix(pair, 16)
.or_else(|_| Err("Unable to parse as hexadecimal"))?; .or_else(|_| Err("Unable to parse as hexadecimal"))?;
let position = start_at - (i / 2);
buffer[position] = byte; buffer[position] = byte;
position = position.wrapping_sub(1); // should only wrap on the last iteration
} }
Ok(Uimm128(buffer)) Ok(Uimm128(buffer))
} }
} }
}
} }
/// Implement a way to convert an iterator of immediates to a Uimm128:
/// - this expects the items in reverse order (e.g. last lane first) which is the natural output of pushing items into a vector
/// - this may not fully consume the iterator or may fail if it cannot take the expected number of items
/// - this requires the input type (i.e. $ty) to implement ToBytes
macro_rules! construct_uimm128_from_iterator_of {
( $ty:ident, $lanes:expr ) => {
impl FromIterator<$ty> for Uimm128 {
fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
let mut buffer: [u8; 16] = [0; 16];
iter.into_iter()
.take($lanes)
.map(|f| f.into_bytes())
.flat_map(|b| b)
.enumerate()
.for_each(|(i, b)| buffer[i] = b);
Uimm128(buffer)
}
}
};
}
/// Special case for booleans since we have to decide the bit-width based on the number of items
impl FromIterator<bool> for Uimm128 {
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
let bools = Vec::from_iter(iter);
let count = bools.len();
assert!(count > 0 && count <= 16); // ensure we don't have too many booleans
assert_eq!(count & (count - 1), 0); // ensure count is a power of two, see https://stackoverflow.com/questions/600293
let mut buffer: [u8; 16] = [0; 16];
let step = 16 / count;
bools
.iter()
.enumerate()
.map(|(i, &b)| (i * step, if b { 1 } else { 0 }))
.for_each(|(i, b)| buffer[i] = b);
Uimm128(buffer)
}
}
construct_uimm128_from_iterator_of!(u8, 16);
construct_uimm128_from_iterator_of!(i32, 4);
construct_uimm128_from_iterator_of!(Ieee32, 4);
construct_uimm128_from_iterator_of!(Imm64, 2);
construct_uimm128_from_iterator_of!(Ieee64, 2);
/// 32-bit signed immediate offset. /// 32-bit signed immediate offset.
/// ///
/// This is used to encode an immediate offset for load/store instructions. All supported ISAs have /// This is used to encode an immediate offset for load/store instructions. All supported ISAs have
@@ -739,6 +842,12 @@ impl From<f32> for Ieee32 {
} }
} }
impl IntoBytes for Ieee32 {
fn into_bytes(self) -> Vec<u8> {
self.0.to_le_bytes().to_vec()
}
}
impl Ieee64 { impl Ieee64 {
/// Create a new `Ieee64` containing the bits of `x`. /// Create a new `Ieee64` containing the bits of `x`.
pub fn with_bits(x: u64) -> Self { pub fn with_bits(x: u64) -> Self {
@@ -812,6 +921,12 @@ impl From<u64> for Ieee64 {
} }
} }
impl IntoBytes for Ieee64 {
fn into_bytes(self) -> Vec<u8> {
self.0.to_le_bytes().to_vec()
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -968,9 +1083,10 @@ mod tests {
parse_ok::<Uimm128>("0x00", "0x00"); parse_ok::<Uimm128>("0x00", "0x00");
parse_ok::<Uimm128>("0x00000042", "0x42"); parse_ok::<Uimm128>("0x00000042", "0x42");
parse_ok::<Uimm128>( parse_ok::<Uimm128>(
"0x0102030405060708090a0b0c0d0e0f", "0x0102030405060708090a0b0c0d0e0f00",
"0x0102030405060708090a0b0c0d0e0f", "0x0102030405060708090a0b0c0d0e0f00",
); );
parse_ok::<Uimm128>("0x_0000_0043_21", "0x4321");
parse_err::<Uimm128>("", "Expected a hexadecimal string, e.g. 0x1234"); parse_err::<Uimm128>("", "Expected a hexadecimal string, e.g. 0x1234");
parse_err::<Uimm128>("0x", "Expected a hexadecimal string, e.g. 0x1234"); parse_err::<Uimm128>("0x", "Expected a hexadecimal string, e.g. 0x1234");
@@ -982,6 +1098,24 @@ mod tests {
"0x00000000000000000000000000000000000000000000000000", "0x00000000000000000000000000000000000000000000000000",
"Hexadecimal string has too many digits to fit in a 128-bit vector", "Hexadecimal string has too many digits to fit in a 128-bit vector",
); );
parse_err::<Uimm128>("0xrstu", "Unable to parse as hexadecimal");
parse_err::<Uimm128>("0x__", "Hexadecimal string must have some digits");
}
#[test]
fn uimm128_equivalence() {
assert_eq!(
"0x01".parse::<Uimm128>().unwrap().0,
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);
assert_eq!(
Uimm128::from_iter(vec![1, 0, 0, 0]).0,
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);
assert_eq!(
Uimm128::from(1).0,
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);
} }
#[test] #[test]
@@ -997,6 +1131,50 @@ mod tests {
assert_eq!( assert_eq!(
"0x12345678".parse::<Uimm128>().unwrap().0, "0x12345678".parse::<Uimm128>().unwrap().0,
[0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);
assert_eq!(
"0x1234_5678".parse::<Uimm128>().unwrap().0,
[0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
);
}
#[test]
fn uimm128_from_iter() {
assert_eq!(
Uimm128::from_iter(vec![4, 3, 2, 1]).0,
[4, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]
);
assert_eq!(
Uimm128::from_iter(vec![false, true]).0,
[/* false */ 0, 0, 0, 0, 0, 0, 0, 0, /* true */ 1, 0, 0, 0, 0, 0, 0, 0]
);
assert_eq!(
Uimm128::from_iter(vec![false, true, false, true, false, true, false, true]).0,
[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
);
#[allow(trivial_numeric_casts)]
let u8s = vec![
1 as u8, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0,
];
assert_eq!(
Uimm128::from_iter(u8s).0,
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0]
);
#[allow(trivial_numeric_casts)]
let ieee32s: Vec<Ieee32> = vec![32.4 as f32, 0.0, 1.0, 6.6666]
.iter()
.map(|&f| Ieee32::from(f))
.collect();
assert_eq!(
Uimm128::from_iter(ieee32s).0,
[
/* 32.4 == */ 0x9a, 0x99, 0x01, 0x42, /* 0 == */ 0, 0, 0, 0,
/* 1 == */ 0, 0, 0x80, 0x3f, /* 6.6666 == */ 0xca, 0x54, 0xd5, 0x40,
]
) )
} }

View File

@@ -1,13 +0,0 @@
test rodata
set enable_simd=true
set probestack_enabled=false
target x86_64 haswell
; use baldrdash calling convention here for simplicity (avoids prologue, epilogue)
function %test_vconst_i32() -> i32x4 baldrdash_system_v {
ebb0:
v0 = vconst.i32x4 0x1234
return v0
}
; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

View File

@@ -0,0 +1,20 @@
test rodata
set enable_simd=true
set probestack_enabled=false
target x86_64 haswell
function %test_vconst_i32() -> i32x4 {
ebb0:
v0 = vconst.i32x4 0x1234
return v0
}
; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
function %test_vconst_b16() -> b16x8 {
ebb0:
v0 = vconst.b16x8 [true false true false true false true true]
return v0
}
; sameln: [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0]

View File

@@ -0,0 +1,21 @@
test run
set enable_simd
function %test_vconst_syntax() -> b1 {
ebb0:
v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001 ; build constant using hexadecimal syntax
v1 = vconst.i32x4 [1 2 3 4] ; build constant using literal list syntax
; verify lane 1 matches
v2 = extractlane v0, 1
v3 = extractlane v1, 1
v4 = icmp eq v3, v2
; verify lane 1 has the correct value
v5 = icmp_imm eq v3, 2
v6 = band v4, v5
return v6
}
; run

View File

@@ -12,6 +12,7 @@ use cranelift_codegen::ir::entities::AnyEntity;
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm128, Uimm32, Uimm64}; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm128, Uimm32, Uimm64};
use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs};
use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::INVALID;
use cranelift_codegen::ir::types::*;
use cranelift_codegen::ir::{ use cranelift_codegen::ir::{
AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFuncData, ExternalName, FuncRef, Function, AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFuncData, ExternalName, FuncRef, Function,
GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags, GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags,
@@ -21,6 +22,7 @@ use cranelift_codegen::ir::{
use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa};
use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::packed_option::ReservedValue;
use cranelift_codegen::{settings, timing}; use cranelift_codegen::{settings, timing};
use std::iter::FromIterator;
use std::mem; use std::mem;
use std::str::FromStr; use std::str::FromStr;
use std::{u16, u32}; use std::{u16, u32};
@@ -610,6 +612,19 @@ impl<'a> Parser<'a> {
} }
} }
// Match and consume either a hexadecimal Uimm128 immediate (e.g. 0x000102...) or its literal list form (e.g. [0 1 2...])
fn match_uimm128_or_literals(&mut self, controlling_type: Type) -> ParseResult<Uimm128> {
if self.optional(Token::LBracket) {
// parse using a list of values, e.g. vconst.i32x4 [0 1 2 3]
let uimm128 = self.parse_literals_to_uimm128(controlling_type)?;
self.match_token(Token::RBracket, "expected a terminating right bracket")?;
Ok(uimm128)
} else {
// parse using a hexadecimal value
self.match_uimm128("expected an immediate hexadecimal operand")
}
}
// Match and consume a Uimm64 immediate. // Match and consume a Uimm64 immediate.
fn match_uimm64(&mut self, err_msg: &str) -> ParseResult<Uimm64> { fn match_uimm64(&mut self, err_msg: &str) -> ParseResult<Uimm64> {
if let Some(Token::Integer(text)) = self.token() { if let Some(Token::Integer(text)) = self.token() {
@@ -821,6 +836,36 @@ impl<'a> Parser<'a> {
} }
} }
/// Parse a list of literals (i.e. integers, floats, booleans); e.g.
fn parse_literals_to_uimm128(&mut self, ty: Type) -> ParseResult<Uimm128> {
macro_rules! consume {
( $ty:ident, $match_fn:expr ) => {{
assert!($ty.is_vector());
let mut v = Vec::with_capacity($ty.lane_count() as usize);
for _ in 0..$ty.lane_count() {
v.push($match_fn?);
}
Uimm128::from_iter(v)
}};
}
if !ty.is_vector() {
err!(self.loc, "Expected a controlling vector type, not {}", ty)
} else {
let uimm128 = match ty.lane_type() {
I8 => consume!(ty, self.match_uimm8("Expected an 8-bit unsigned integer")),
I16 => unimplemented!(), // TODO no 16-bit match yet
I32 => consume!(ty, self.match_imm32("Expected a 32-bit integer")),
I64 => consume!(ty, self.match_imm64("Expected a 64-bit integer")),
F32 => consume!(ty, self.match_ieee32("Expected a 32-bit float...")),
F64 => consume!(ty, self.match_ieee64("Expected a 64-bit float")),
b if b.is_bool() => consume!(ty, self.match_bool("Expected a boolean")),
_ => return err!(self.loc, "Expected a type of: float, int, bool"),
};
Ok(uimm128)
}
}
/// Parse a list of test command passes specified in command line. /// Parse a list of test command passes specified in command line.
pub fn parse_cmdline_passes(&mut self, passes: &'a [String]) -> Vec<TestCommand<'a>> { pub fn parse_cmdline_passes(&mut self, passes: &'a [String]) -> Vec<TestCommand<'a>> {
let mut list = Vec::new(); let mut list = Vec::new();
@@ -1977,7 +2022,7 @@ impl<'a> Parser<'a> {
}; };
// instruction ::= [inst-results "="] Opcode(opc) ["." Type] * ... // instruction ::= [inst-results "="] Opcode(opc) ["." Type] * ...
let inst_data = self.parse_inst_operands(ctx, opcode)?; let inst_data = self.parse_inst_operands(ctx, opcode, explicit_ctrl_type)?;
// We're done parsing the instruction now. // We're done parsing the instruction now.
// //
@@ -2186,6 +2231,7 @@ impl<'a> Parser<'a> {
&mut self, &mut self,
ctx: &mut Context, ctx: &mut Context,
opcode: Opcode, opcode: Opcode,
explicit_control_type: Option<Type>,
) -> ParseResult<InstructionData> { ) -> ParseResult<InstructionData> {
let idata = match opcode.format() { let idata = match opcode.format() {
InstructionFormat::Unary => InstructionData::Unary { InstructionFormat::Unary => InstructionData::Unary {
@@ -2196,14 +2242,23 @@ impl<'a> Parser<'a> {
opcode, opcode,
imm: self.match_imm64("expected immediate integer operand")?, imm: self.match_imm64("expected immediate integer operand")?,
}, },
InstructionFormat::UnaryImm128 => { InstructionFormat::UnaryImm128 => match explicit_control_type {
let uimm128 = self.match_uimm128("expected immediate hexadecimal operand")?; None => {
return err!(
self.loc,
"Expected {:?} to have a controlling type variable, e.g. inst.i32x4",
opcode
)
}
Some(ty) => {
let uimm128 = self.match_uimm128_or_literals(ty)?;
let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec()); let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec());
InstructionData::UnaryImm128 { InstructionData::UnaryImm128 {
opcode, opcode,
imm: constant_handle, imm: constant_handle,
} }
} }
},
InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 { InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 {
opcode, opcode,
imm: self.match_ieee32("expected immediate 32-bit float operand")?, imm: self.match_ieee32("expected immediate 32-bit float operand")?,
@@ -3150,4 +3205,35 @@ mod tests {
CallConv::Cold CallConv::Cold
); );
} }
#[test]
fn uimm128() {
macro_rules! parse_as_uimm128 {
($text:expr, $type:expr) => {{
Parser::new($text).parse_literals_to_uimm128($type)
}};
}
macro_rules! can_parse_as_uimm128 {
($text:expr, $type:expr) => {{
assert!(parse_as_uimm128!($text, $type).is_ok())
}};
}
macro_rules! cannot_parse_as_uimm128 {
($text:expr, $type:expr) => {{
assert!(parse_as_uimm128!($text, $type).is_err())
}};
}
can_parse_as_uimm128!("1 2 3 4", I32X4);
can_parse_as_uimm128!("1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", I8X16);
can_parse_as_uimm128!("0x1.1 0x2.2 0x3.3 0x4.4", F32X4);
can_parse_as_uimm128!("true false true false true false true false", B16X8);
can_parse_as_uimm128!("0 -1", I64X2);
can_parse_as_uimm128!("true false", B64X2);
can_parse_as_uimm128!("true true true true true", B32X4); // note that parse_literals_to_uimm128 will leave extra tokens unconsumed
cannot_parse_as_uimm128!("0x0 0x1 0x2 0x3", I32X4);
cannot_parse_as_uimm128!("1 2 3", I32X4);
cannot_parse_as_uimm128!(" ", F32X4);
}
} }