Improve uimm128 parsing

This commit changes 128-bit constant parsing in two ways: - it adds the ability to use underscores to separate digits when writing a 128-bit constant in hexadecimal; e.g. `0x00010203...` can now be written as `0x0001_0203_...` - it adds a new mechanism for parsing 128-bit constants using integer/float/boolean literals; e.g. `vconst.i32x4 [1 2 3 4]`. Note that currently the controlling type of the instruction dictates how many literals to parse inside the brackets.
2019-08-28 10:01:19 -07:00
parent 98056aa05d
commit d64e454004
6 changed files with 328 additions and 36 deletions
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@@ -5,9 +5,29 @@
 //! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language.
 use core::fmt::{self, Display, Formatter};
 use core::iter::FromIterator;
 use core::mem;
-use core::str::FromStr;
+use core::str::{from_utf8, FromStr};
 use core::{i32, u32};
 use std::vec::Vec;
 /// Convert a type into a vector of bytes; all implementors in this file must use little-endian
 /// orderings of bytes to match WebAssembly's little-endianness.
 trait IntoBytes {
    fn into_bytes(self) -> Vec<u8>;
 }
 impl IntoBytes for u8 {
    fn into_bytes(self) -> Vec<u8> {
        vec![self]
    }
 }
 impl IntoBytes for i32 {
    fn into_bytes(self) -> Vec<u8> {
        self.to_le_bytes().to_vec()
    }
 }
 /// 64-bit immediate signed integer operand.
 ///
@@ -34,6 +54,12 @@ impl Into<i64> for Imm64 {
    }
 }
 impl IntoBytes for Imm64 {
    fn into_bytes(self) -> Vec<u8> {
        self.0.to_le_bytes().to_vec()
    }
 }
 impl From<i64> for Imm64 {
    fn from(x: i64) -> Self {
        Imm64(x)
@@ -270,6 +296,23 @@ impl FromStr for Uimm32 {
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
 pub struct Uimm128(pub [u8; 16]);
 impl Uimm128 {
    /// Iterate over the bytes in the constant
    pub fn bytes(&self) -> impl Iterator<Item = &u8> {
        self.0.iter()
    }
    /// Convert the immediate into a vector
    pub fn to_vec(self) -> Vec<u8> {
        self.0.to_vec()
    }
    /// Convert the immediate into a slice
    pub fn as_slice(&self) -> &[u8] {
        &self.0[..]
    }
 }
 impl Display for Uimm128 {
    // print a 128-bit vector in hexadecimal, e.g. 0x000102030405060708090a0b0c0d0e0f
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -314,24 +357,84 @@ impl FromStr for Uimm128 {
    fn from_str(s: &str) -> Result<Self, &'static str> {
        if s.len() <= 2 || &s[0..2] != "0x" {
            Err("Expected a hexadecimal string, e.g. 0x1234")
-        } else if s.len() % 2 != 0 {
+        } else {
            // clean and check the string
            let cleaned: Vec<u8> = s[2..]
                .as_bytes()
                .iter()
                .filter(|&&b| b as char != '_')
                .cloned()
                .collect(); // remove 0x prefix and any intervening _ characters
            if cleaned.len() == 0 {
                Err("Hexadecimal string must have some digits")
            } else if cleaned.len() % 2 != 0 {
                Err("Hexadecimal string must have an even number of digits")
-        } else if s.len() > 34 {
+            } else if cleaned.len() > 32 {
                Err("Hexadecimal string has too many digits to fit in a 128-bit vector")
            } else {
-            let mut buffer = [0; 16]; // zero-fill
+                let mut buffer = [0; 16]; // zero-fill the buffer
-            let start_at = s.len() / 2 - 1;
+                let mut position = cleaned.len() / 2 - 1; // since Uimm128 is little-endian but the string is not, we write from back to front but must start at the highest position required by the string
-            for i in (2..s.len()).step_by(2) {
+                for i in (0..cleaned.len()).step_by(2) {
-                let byte = u8::from_str_radix(&s[i..i + 2], 16)
+                    let pair = from_utf8(&cleaned[i..i + 2])
                        .or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?;
                    let byte = u8::from_str_radix(pair, 16)
                        .or_else(|_| Err("Unable to parse as hexadecimal"))?;
                let position = start_at - (i / 2);
                    buffer[position] = byte;
                    position = position.wrapping_sub(1); // should only wrap on the last iteration
                }
                Ok(Uimm128(buffer))
            }
        }
    }
 }
 /// Implement a way to convert an iterator of immediates to a Uimm128:
 ///  - this expects the items in reverse order (e.g. last lane first) which is the natural output of pushing items into a vector
 ///  - this may not fully consume the iterator or may fail if it cannot take the expected number of items
 ///  - this requires the input type (i.e. $ty) to implement ToBytes
 macro_rules! construct_uimm128_from_iterator_of {
    ( $ty:ident, $lanes:expr ) => {
        impl FromIterator<$ty> for Uimm128 {
            fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
                let mut buffer: [u8; 16] = [0; 16];
                iter.into_iter()
                    .take($lanes)
                    .map(|f| f.into_bytes())
                    .flat_map(|b| b)
                    .enumerate()
                    .for_each(|(i, b)| buffer[i] = b);
                Uimm128(buffer)
            }
        }
    };
 }
 /// Special case for booleans since we have to decide the bit-width based on the number of items
 impl FromIterator<bool> for Uimm128 {
    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
        let bools = Vec::from_iter(iter);
        let count = bools.len();
        assert!(count > 0 && count <= 16); // ensure we don't have too many booleans
        assert_eq!(count & (count - 1), 0); // ensure count is a power of two, see https://stackoverflow.com/questions/600293
        let mut buffer: [u8; 16] = [0; 16];
        let step = 16 / count;
        bools
            .iter()
            .enumerate()
            .map(|(i, &b)| (i * step, if b { 1 } else { 0 }))
            .for_each(|(i, b)| buffer[i] = b);
        Uimm128(buffer)
    }
 }
 construct_uimm128_from_iterator_of!(u8, 16);
 construct_uimm128_from_iterator_of!(i32, 4);
 construct_uimm128_from_iterator_of!(Ieee32, 4);
 construct_uimm128_from_iterator_of!(Imm64, 2);
 construct_uimm128_from_iterator_of!(Ieee64, 2);
 /// 32-bit signed immediate offset.
 ///
 /// This is used to encode an immediate offset for load/store instructions. All supported ISAs have
@@ -739,6 +842,12 @@ impl From<f32> for Ieee32 {
    }
 }
 impl IntoBytes for Ieee32 {
    fn into_bytes(self) -> Vec<u8> {
        self.0.to_le_bytes().to_vec()
    }
 }
 impl Ieee64 {
    /// Create a new `Ieee64` containing the bits of `x`.
    pub fn with_bits(x: u64) -> Self {
@@ -812,6 +921,12 @@ impl From<u64> for Ieee64 {
    }
 }
 impl IntoBytes for Ieee64 {
    fn into_bytes(self) -> Vec<u8> {
        self.0.to_le_bytes().to_vec()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -968,9 +1083,10 @@ mod tests {
        parse_ok::<Uimm128>("0x00", "0x00");
        parse_ok::<Uimm128>("0x00000042", "0x42");
        parse_ok::<Uimm128>(
-            "0x0102030405060708090a0b0c0d0e0f",
+            "0x0102030405060708090a0b0c0d0e0f00",
-            "0x0102030405060708090a0b0c0d0e0f",
+            "0x0102030405060708090a0b0c0d0e0f00",
        );
        parse_ok::<Uimm128>("0x_0000_0043_21", "0x4321");
        parse_err::<Uimm128>("", "Expected a hexadecimal string, e.g. 0x1234");
        parse_err::<Uimm128>("0x", "Expected a hexadecimal string, e.g. 0x1234");
@@ -982,6 +1098,24 @@ mod tests {
            "0x00000000000000000000000000000000000000000000000000",
            "Hexadecimal string has too many digits to fit in a 128-bit vector",
        );
        parse_err::<Uimm128>("0xrstu", "Unable to parse as hexadecimal");
        parse_err::<Uimm128>("0x__", "Hexadecimal string must have some digits");
    }
    #[test]
    fn uimm128_equivalence() {
        assert_eq!(
            "0x01".parse::<Uimm128>().unwrap().0,
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        );
        assert_eq!(
            Uimm128::from_iter(vec![1, 0, 0, 0]).0,
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        );
        assert_eq!(
            Uimm128::from(1).0,
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        );
    }
    #[test]
@@ -997,6 +1131,50 @@ mod tests {
        assert_eq!(
            "0x12345678".parse::<Uimm128>().unwrap().0,
            [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        );
        assert_eq!(
            "0x1234_5678".parse::<Uimm128>().unwrap().0,
            [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        );
    }
    #[test]
    fn uimm128_from_iter() {
        assert_eq!(
            Uimm128::from_iter(vec![4, 3, 2, 1]).0,
            [4, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]
        );
        assert_eq!(
            Uimm128::from_iter(vec![false, true]).0,
            [/* false */ 0, 0, 0, 0, 0, 0, 0, 0, /* true */ 1, 0, 0, 0, 0, 0, 0, 0]
        );
        assert_eq!(
            Uimm128::from_iter(vec![false, true, false, true, false, true, false, true]).0,
            [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
        );
        #[allow(trivial_numeric_casts)]
        let u8s = vec![
            1 as u8, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0,
        ];
        assert_eq!(
            Uimm128::from_iter(u8s).0,
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0]
        );
        #[allow(trivial_numeric_casts)]
        let ieee32s: Vec<Ieee32> = vec![32.4 as f32, 0.0, 1.0, 6.6666]
            .iter()
            .map(|&f| Ieee32::from(f))
            .collect();
        assert_eq!(
            Uimm128::from_iter(ieee32s).0,
            [
                /* 32.4 == */ 0x9a, 0x99, 0x01, 0x42, /* 0 == */ 0, 0, 0, 0,
                /* 1 == */ 0, 0, 0x80, 0x3f, /* 6.6666 == */ 0xca, 0x54, 0xd5, 0x40,
            ]
        )
    }
--- a/cranelift/filetests/filetests/isa/x86/rodata-vconst.clif
+++ b/cranelift/filetests/filetests/isa/x86/rodata-vconst.clif
@@ -1,13 +0,0 @@
 test rodata
 set enable_simd=true
 set probestack_enabled=false
 target x86_64 haswell
 ; use baldrdash calling convention here for simplicity (avoids prologue, epilogue)
 function %test_vconst_i32() -> i32x4 baldrdash_system_v {
 ebb0:
    v0 = vconst.i32x4 0x1234
    return v0
 }
 ; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
--- a/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif
+++ b/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif
--- a/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif
+++ b/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif
@@ -0,0 +1,20 @@
 test rodata
 set enable_simd=true
 set probestack_enabled=false
 target x86_64 haswell
 function %test_vconst_i32() -> i32x4 {
 ebb0:
    v0 = vconst.i32x4 0x1234
    return v0
 }
 ; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
 function %test_vconst_b16() -> b16x8 {
 ebb0:
    v0 = vconst.b16x8 [true false true false true false true true]
    return v0
 }
 ; sameln: [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0]
--- a/cranelift/filetests/filetests/isa/x86/vconst-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/vconst-run.clif
@@ -0,0 +1,21 @@
 test run
 set enable_simd
 function %test_vconst_syntax() -> b1 {
 ebb0:
    v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001     ; build constant using hexadecimal syntax
    v1 = vconst.i32x4 [1 2 3 4]                                 ; build constant using literal list syntax
    ; verify lane 1 matches
    v2 = extractlane v0, 1
    v3 = extractlane v1, 1
    v4 = icmp eq v3, v2
    ; verify lane 1 has the correct value
    v5 = icmp_imm eq v3, 2
    v6 = band v4, v5
    return v6
 }
 ; run
--- a/cranelift/reader/src/parser.rs
+++ b/cranelift/reader/src/parser.rs
@@ -12,6 +12,7 @@ use cranelift_codegen::ir::entities::AnyEntity;
 use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm128, Uimm32, Uimm64};
 use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs};
 use cranelift_codegen::ir::types::INVALID;
 use cranelift_codegen::ir::types::*;
 use cranelift_codegen::ir::{
    AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFuncData, ExternalName, FuncRef, Function,
    GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags,
@@ -21,6 +22,7 @@ use cranelift_codegen::ir::{
 use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa};
 use cranelift_codegen::packed_option::ReservedValue;
 use cranelift_codegen::{settings, timing};
 use std::iter::FromIterator;
 use std::mem;
 use std::str::FromStr;
 use std::{u16, u32};
@@ -610,6 +612,19 @@ impl<'a> Parser<'a> {
        }
    }
    // Match and consume either a hexadecimal Uimm128 immediate (e.g. 0x000102...) or its literal list form (e.g. [0 1 2...])
    fn match_uimm128_or_literals(&mut self, controlling_type: Type) -> ParseResult<Uimm128> {
        if self.optional(Token::LBracket) {
            // parse using a list of values, e.g. vconst.i32x4 [0 1 2 3]
            let uimm128 = self.parse_literals_to_uimm128(controlling_type)?;
            self.match_token(Token::RBracket, "expected a terminating right bracket")?;
            Ok(uimm128)
        } else {
            // parse using a hexadecimal value
            self.match_uimm128("expected an immediate hexadecimal operand")
        }
    }
    // Match and consume a Uimm64 immediate.
    fn match_uimm64(&mut self, err_msg: &str) -> ParseResult<Uimm64> {
        if let Some(Token::Integer(text)) = self.token() {
@@ -821,6 +836,36 @@ impl<'a> Parser<'a> {
        }
    }
    /// Parse a list of literals (i.e. integers, floats, booleans); e.g.
    fn parse_literals_to_uimm128(&mut self, ty: Type) -> ParseResult<Uimm128> {
        macro_rules! consume {
            ( $ty:ident, $match_fn:expr ) => {{
                assert!($ty.is_vector());
                let mut v = Vec::with_capacity($ty.lane_count() as usize);
                for _ in 0..$ty.lane_count() {
                    v.push($match_fn?);
                }
                Uimm128::from_iter(v)
            }};
        }
        if !ty.is_vector() {
            err!(self.loc, "Expected a controlling vector type, not {}", ty)
        } else {
            let uimm128 = match ty.lane_type() {
                I8 => consume!(ty, self.match_uimm8("Expected an 8-bit unsigned integer")),
                I16 => unimplemented!(), // TODO no 16-bit match yet
                I32 => consume!(ty, self.match_imm32("Expected a 32-bit integer")),
                I64 => consume!(ty, self.match_imm64("Expected a 64-bit integer")),
                F32 => consume!(ty, self.match_ieee32("Expected a 32-bit float...")),
                F64 => consume!(ty, self.match_ieee64("Expected a 64-bit float")),
                b if b.is_bool() => consume!(ty, self.match_bool("Expected a boolean")),
                _ => return err!(self.loc, "Expected a type of: float, int, bool"),
            };
            Ok(uimm128)
        }
    }
    /// Parse a list of test command passes specified in command line.
    pub fn parse_cmdline_passes(&mut self, passes: &'a [String]) -> Vec<TestCommand<'a>> {
        let mut list = Vec::new();
@@ -1977,7 +2022,7 @@ impl<'a> Parser<'a> {
        };
        // instruction ::=  [inst-results "="] Opcode(opc) ["." Type] * ...
-        let inst_data = self.parse_inst_operands(ctx, opcode)?;
+        let inst_data = self.parse_inst_operands(ctx, opcode, explicit_ctrl_type)?;
        // We're done parsing the instruction now.
        //
@@ -2186,6 +2231,7 @@ impl<'a> Parser<'a> {
        &mut self,
        ctx: &mut Context,
        opcode: Opcode,
        explicit_control_type: Option<Type>,
    ) -> ParseResult<InstructionData> {
        let idata = match opcode.format() {
            InstructionFormat::Unary => InstructionData::Unary {
@@ -2196,14 +2242,23 @@ impl<'a> Parser<'a> {
                opcode,
                imm: self.match_imm64("expected immediate integer operand")?,
            },
-            InstructionFormat::UnaryImm128 => {
+            InstructionFormat::UnaryImm128 => match explicit_control_type {
-                let uimm128 = self.match_uimm128("expected immediate hexadecimal operand")?;
+                None => {
                    return err!(
                        self.loc,
                        "Expected {:?} to have a controlling type variable, e.g. inst.i32x4",
                        opcode
                    )
                }
                Some(ty) => {
                    let uimm128 = self.match_uimm128_or_literals(ty)?;
                    let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec());
                    InstructionData::UnaryImm128 {
                        opcode,
                        imm: constant_handle,
                    }
                }
            },
            InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 {
                opcode,
                imm: self.match_ieee32("expected immediate 32-bit float operand")?,
@@ -3150,4 +3205,35 @@ mod tests {
            CallConv::Cold
        );
    }
    #[test]
    fn uimm128() {
        macro_rules! parse_as_uimm128 {
            ($text:expr, $type:expr) => {{
                Parser::new($text).parse_literals_to_uimm128($type)
            }};
        }
        macro_rules! can_parse_as_uimm128 {
            ($text:expr, $type:expr) => {{
                assert!(parse_as_uimm128!($text, $type).is_ok())
            }};
        }
        macro_rules! cannot_parse_as_uimm128 {
            ($text:expr, $type:expr) => {{
                assert!(parse_as_uimm128!($text, $type).is_err())
            }};
        }
        can_parse_as_uimm128!("1 2 3 4", I32X4);
        can_parse_as_uimm128!("1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", I8X16);
        can_parse_as_uimm128!("0x1.1 0x2.2 0x3.3 0x4.4", F32X4);
        can_parse_as_uimm128!("true false true false true false true false", B16X8);
        can_parse_as_uimm128!("0 -1", I64X2);
        can_parse_as_uimm128!("true false", B64X2);
        can_parse_as_uimm128!("true true true true true", B32X4); // note that parse_literals_to_uimm128 will leave extra tokens unconsumed
        cannot_parse_as_uimm128!("0x0 0x1 0x2 0x3", I32X4);
        cannot_parse_as_uimm128!("1 2 3", I32X4);
        cannot_parse_as_uimm128!(" ", F32X4);
    }
 }