Merge pull request #3362 from dheaton-arm/implement-unarrow

Implement `Unarrow`, `Uunarrow`, and `Snarrow` for the interpreter
2021-09-21 10:06:46 -07:00
parent e0bd4bd007 3fc29f5f6c
commit 38728c5746
9 changed files with 166 additions and 10 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -4047,7 +4047,7 @@ pub(crate) fn define(
        Combine `x` and `y` into a vector with twice the lanes but half the integer width while
        saturating overflowing values to the unsigned maximum and minimum.

-        Note that all input lanes are considered unsigned.
+        Note that all input lanes are considered unsigned: any negative values will be interpreted as unsigned, overflowing and being replaced with the unsigned maximum.

        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
--- a/cranelift/codegen/src/ir/types.rs
+++ b/cranelift/codegen/src/ir/types.rs
@@ -79,6 +79,30 @@ impl Type {
        }
    }

+    /// Get the (minimum, maximum) values represented by each lane in the type.
+    /// Note that these are returned as unsigned 'bit patterns'.
+    pub fn bounds(self, signed: bool) -> (u128, u128) {
+        if signed {
+            match self.lane_type() {
+                I8 => (i8::MIN as u128, i8::MAX as u128),
+                I16 => (i16::MIN as u128, i16::MAX as u128),
+                I32 => (i32::MIN as u128, i32::MAX as u128),
+                I64 => (i64::MIN as u128, i64::MAX as u128),
+                I128 => (i128::MIN as u128, i128::MAX as u128),
+                _ => unimplemented!(),
+            }
+        } else {
+            match self.lane_type() {
+                I8 => (u8::MIN as u128, u8::MAX as u128),
+                I16 => (u16::MIN as u128, u16::MAX as u128),
+                I32 => (u32::MIN as u128, u32::MAX as u128),
+                I64 => (u64::MIN as u128, u64::MAX as u128),
+                I128 => (u128::MIN, u128::MAX),
+                _ => unimplemented!(),
+            }
+        }
+    }
+
    /// Get an integer type with the requested number of bits.
    pub fn int(bits: u16) -> Option<Self> {
        match bits {
--- a/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif
@@ -0,0 +1,11 @@
+test interpret
+test run
+target aarch64
+; x86_64 considers the case `i64x2` -> `i32x4` to be 'unreachable'
+
+function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = snarrow v0, v1
+    return v2
+}
+; run: %snarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 -100000 2147483647 73]
--- a/cranelift/filetests/filetests/runtests/simd-snarrow.clif
+++ b/cranelift/filetests/filetests/runtests/simd-snarrow.clif
@@ -0,0 +1,19 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %snarrow_i16x8(i16x8, i16x8) -> i8x16 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = snarrow v0, v1
+    return v2
+}
+; run: %snarrow_i16x8([1 127 128 15 32767 -32 48 0], [8 255 -100 100 -32768 73 80 42]) == [1 127 127 15 127 -32 48 0 8 127 -100 100 -128 73 80 42]
+
+function %snarrow_i32x4(i32x4, i32x4) -> i16x8 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = snarrow v0, v1
+    return v2
+}
+; run: %snarrow_i32x4([32767 1048575 -70000 -5], [268435455 73 268435455 42]) == [32767 32767 -32768 -5 32767 73 32767 42]
--- a/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif
@@ -0,0 +1,11 @@
+test interpret
+test run
+target aarch64
+; x86_64 considers the case `i64x2 -> i32x4` to be 'unreachable'
+
+function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = unarrow v0, v1
+    return v2
+}
+; run: %unarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 0 4294967295 73]
--- a/cranelift/filetests/filetests/runtests/simd-unarrow.clif
+++ b/cranelift/filetests/filetests/runtests/simd-unarrow.clif
@@ -0,0 +1,19 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %unarrow_i16x8(i16x8, i16x8) -> i8x16 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = unarrow v0, v1
+    return v2
+}
+; run: %unarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 0 0 48 0 8 255 0 100 0 73 80 42]
+
+function %unarrow_i32x4(i32x4, i32x4) -> i16x8 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = unarrow v0, v1
+    return v2
+}
+; run: %unarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 0 0 65535 73 65535 42]
--- a/cranelift/filetests/filetests/runtests/simd-uunarrow.clif
+++ b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif
@@ -0,0 +1,26 @@
+test interpret
+test run
+target aarch64
+; x86_64 panics: `Did not match fcvt input!
+; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10`
+
+function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = uunarrow v0, v1
+    return v2
+}
+; run: %uunarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 255 255 48 0 8 255 255 100 255 73 80 42]
+
+function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = uunarrow v0, v1
+    return v2
+}
+; run: %uunarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 65535 65535 65535 73 65535 42]
+
+function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = uunarrow v0, v1
+    return v2
+}
+; run: %uunarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 4294967295 4294967295 73]
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -772,19 +772,40 @@ where
            arg(0)?,
            ValueConversionKind::Exact(ctrl_ty),
        )?),
-        Opcode::Snarrow => assign(Value::convert(
-            arg(0)?,
-            ValueConversionKind::Truncate(ctrl_ty),
-        )?),
+        Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
+            let new_type = ctrl_ty.split_lanes().unwrap();
+            let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow);
+            let mut min: V = Value::int(min as i128, ctrl_ty.lane_type())?;
+            let mut max: V = Value::int(max as i128, ctrl_ty.lane_type())?;
+            if inst.opcode() == Opcode::Uunarrow {
+                min = min.convert(ValueConversionKind::ToUnsigned)?;
+                max = max.convert(ValueConversionKind::ToUnsigned)?;
+            }
+            let narrow = |mut lane: V| -> ValueResult<V> {
+                if inst.opcode() == Opcode::Uunarrow {
+                    lane = lane.convert(ValueConversionKind::ToUnsigned)?;
+                }
+                lane = Value::max(lane, min.clone())?;
+                lane = Value::min(lane, max.clone())?;
+                lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?;
+                if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow {
+                    lane = lane.convert(ValueConversionKind::ToUnsigned)?;
+                }
+                Ok(lane)
+            };
+            let new_vec = arg0
+                .into_iter()
+                .chain(arg1)
+                .map(|lane| narrow(lane))
+                .collect::<ValueResult<Vec<_>>>()?;
+            assign(vectorizelanes(&new_vec, new_type)?)
+        }
        Opcode::Sextend => assign(Value::convert(
            arg(0)?,
            ValueConversionKind::SignExtend(ctrl_ty),
        )?),
-        Opcode::Unarrow => assign(Value::convert(
-            arg(0)?,
-            ValueConversionKind::Truncate(ctrl_ty),
-        )?),
-        Opcode::Uunarrow => unimplemented!("Uunarrow"),
        Opcode::Uextend => assign(Value::convert(
            arg(0)?,
            ValueConversionKind::ZeroExtend(ctrl_ty),
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -26,6 +26,9 @@ pub trait Value: Clone + From<DataValue> {
    fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
    fn concat(self, other: Self) -> ValueResult<Self>;

+    fn max(self, other: Self) -> ValueResult<Self>;
+    fn min(self, other: Self) -> ValueResult<Self>;
+
    // Comparison.
    fn eq(&self, other: &Self) -> ValueResult<bool>;
    fn gt(&self, other: &Self) -> ValueResult<bool>;
@@ -313,11 +316,17 @@ impl Value for DataValue {
                Self::from_integer(extracted, ty)?
            }
            ValueConversionKind::SignExtend(ty) => match (self, ty) {
+                (DataValue::U8(n), types::I16) => DataValue::U16(n as u16),
+                (DataValue::U8(n), types::I32) => DataValue::U32(n as u32),
+                (DataValue::U8(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I8(n), types::I16) => DataValue::I16(n as i16),
                (DataValue::I8(n), types::I32) => DataValue::I32(n as i32),
                (DataValue::I8(n), types::I64) => DataValue::I64(n as i64),
+                (DataValue::U16(n), types::I32) => DataValue::U32(n as u32),
+                (DataValue::U16(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I16(n), types::I32) => DataValue::I32(n as i32),
                (DataValue::I16(n), types::I64) => DataValue::I64(n as i64),
+                (DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I32(n), types::I64) => DataValue::I64(n as i64),
                (DataValue::I64(n), types::I128) => DataValue::I128(n as i128),
                (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
@@ -376,6 +385,22 @@ impl Value for DataValue {
        }
    }

+    fn max(self, other: Self) -> ValueResult<Self> {
+        if Value::gt(&self, &other)? {
+            Ok(self)
+        } else {
+            Ok(other)
+        }
+    }
+
+    fn min(self, other: Self) -> ValueResult<Self> {
+        if Value::lt(&self, &other)? {
+            Ok(self)
+        } else {
+            Ok(other)
+        }
+    }
+
    fn eq(&self, other: &Self) -> ValueResult<bool> {
        comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, F32, F64])
    }