Implement Extractlane, UaddSat, and UsubSat for Cranelift interpreter (#3188)

* Implement `Extractlane`, `UaddSat`, and `UsubSat` for Cranelift interpreter Implemented the `Extractlane`, `UaddSat`, and `UsubSat` opcodes for the interpreter, and added helper functions for working with SIMD vectors (`extractlanes`, `vectorizelanes`, and `binary_arith`). Copyright (c) 2021, Arm Limited * Re-use tests + constrict Vector assert - Re-use interpreter tests as runtests where supported. - Constrict Vector assertion. - Code style adjustments following feedback. Copyright (c) 2021, Arm Limited * Runtest `i32x4` vectors on AArch64; add `i64x2` tests Copyright (c) 2021, Arm Limited * Add `simd-` prefix to test filenames Copyright (c) 2021, Arm Limited * Return aliased `SmallVec` from `extractlanes` Using a `SmallVec<[i128; 4]>` allows larger-width 128-bit vectors (`i32x4`, `i64x2`, ...) to not cause heap allocations. Copyright (c) 2021, Arm Limited * Accept slice to `vectorizelanes` rather than `Vec` Copyright (c) 2021, Arm Limited
2021-08-25 17:03:19 +01:00
parent 7d05ebe7ff
commit 02ef6a02b8
7 changed files with 234 additions and 5 deletions
--- a/cranelift/filetests/filetests/runtests/simd-extractlane.clif
+++ b/cranelift/filetests/filetests/runtests/simd-extractlane.clif
@@ -0,0 +1,34 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %extractlane_4(i8x16) -> i8 {
+block0(v0: i8x16):
+    v1 = extractlane v0, 4
+    return v1
+}
+; run: %extractlane_4([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 5
+
+function %extractlane_7(i16x8) -> i16 {
+block0(v0: i16x8):
+    v1 = extractlane v0, 7
+    return v1
+}
+; run: %extractlane_7([65528 65529 65530 65531 65532 65533 65534 65535]) == 65535
+
+function %extractlane_0(i32x4) -> i32 {
+block0(v0: i32x4):
+    v1 = extractlane v0, 0
+    return v1
+}
+; run: %extractlane_0([0 1 2 3]) == 0
+
+
+function %extractlane_1(i64x2) -> i64 {
+block0(v0: i64x2):
+    v1 = extractlane v0, 1
+    return v1
+}
+; run: %extractlane_1([0 4294967297]) == 4294967297
--- a/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif
@@ -0,0 +1,20 @@
+test interpret
+test run
+target aarch64
+
+; i32x4 vectors aren't supported in `uadd_sat` outside AArch64 at the moment
+function %uaddsat_i32x4(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = uadd_sat v0, v1
+    return v2
+}
+; run: %uaddsat_i32x4([40 40 40 40], [2 2 2 2]) == [42 42 42 42]
+; run: %uaddsat_i32x4([4294967290 2147483640 4294967290 4294967290], [100 100 100 100]) == [4294967295 2147483740 4294967295 4294967295]
+
+function %uaddsat_i64x2(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = uadd_sat v0, v1
+    return v2
+}
+; run: %uaddsat_i64x2([40 40], [2 2]) == [42 42]
+; run: %uaddsat_i64x2([4294967290 18446744073709551610], [100 100]) == [4294967390 18446744073709551615]
--- a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif
+++ b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif
@@ -0,0 +1,19 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %uaddsat_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = uadd_sat v0, v1
+    return v2
+}
+; run: %uaddsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150]) == [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
+
+function %uaddsat_i16x8(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = uadd_sat v0, v1
+    return v2
+}
+; run: %uaddsat_i16x8([65000 65000 65000 65000 65000 65000 65000 65000], [1000 1000 1000 1000 1000 1000 1000 1000]) == [65535 65535 65535 65535 65535 65535 65535 65535]
--- a/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif
@@ -0,0 +1,20 @@
+test interpret
+test run
+target aarch64
+
+; i32x4 vectors aren't supported in `usub_sat` outside AArch64 at the moment
+function %usubsat_i32x4(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = usub_sat v0, v1
+    return v2
+}
+; run: %usubsat_i32x4([40 40 40 40], [2 2 2 2]) == [38 38 38 38]
+; run: %usubsat_i32x4([4294967290 2147483640 4294967290 4294967290], [4294967295 4294967295 4294967295 4294967295]) == [0 0 0 0]
+
+function %usubsat_i64x2(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = usub_sat v0, v1
+    return v2
+}
+; run: %usubsat_i64x2([40 40], [2 2]) == [38 38]
+; run: %usubsat_i64x2([4294967290 2147483640], [4294967295 4294967295]) == [0 0]
--- a/cranelift/filetests/filetests/runtests/simd-usubsat.clif
+++ b/cranelift/filetests/filetests/runtests/simd-usubsat.clif
@@ -0,0 +1,20 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %usubsat_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = usub_sat v0, v1
+    return v2
+}
+; run: %usubsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100]) == [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50]
+; run: %usubsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200]) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+
+function %usubsat_i16x8(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
+    v2 = usub_sat v0, v1
+    return v2
+}
+; run: %usubsat_i16x8([65534 65534 65534 65534 65534 65534 65534 65534], [65535 65535 65535 65535 65535 65535 65535 65535]) == [0 0 0 0 0 0 0 0]
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -496,10 +496,22 @@ where
            binary(Value::div, inc, two)?
        }
        Opcode::Iadd => binary(Value::add, arg(0)?, arg(1)?)?,
-        Opcode::UaddSat => unimplemented!("UaddSat"),
+        Opcode::UaddSat => assign(binary_arith(
+            arg(0)?,
+            arg(1)?,
+            ctrl_ty,
+            Value::add_sat,
+            true,
+        )?),
        Opcode::SaddSat => unimplemented!("SaddSat"),
        Opcode::Isub => binary(Value::sub, arg(0)?, arg(1)?)?,
-        Opcode::UsubSat => unimplemented!("UsubSat"),
+        Opcode::UsubSat => assign(binary_arith(
+            arg(0)?,
+            arg(1)?,
+            ctrl_ty,
+            Value::sub_sat,
+            true,
+        )?),
        Opcode::SsubSat => unimplemented!("SsubSat"),
        Opcode::Ineg => binary(Value::sub, Value::int(0, ctrl_ty)?, arg(0)?)?,
        Opcode::Iabs => unimplemented!("Iabs"),
@@ -661,7 +673,11 @@ where
        Opcode::Swizzle => unimplemented!("Swizzle"),
        Opcode::Splat => unimplemented!("Splat"),
        Opcode::Insertlane => unimplemented!("Insertlane"),
-        Opcode::Extractlane => unimplemented!("Extractlane"),
+        Opcode::Extractlane => {
+            let value =
+                extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize];
+            assign(Value::int(value, ctrl_ty.lane_type())?)
+        }
        Opcode::VhighBits => unimplemented!("VhighBits"),
        Opcode::Vsplit => unimplemented!("Vsplit"),
        Opcode::Vconcat => unimplemented!("Vconcat"),
@@ -869,3 +885,80 @@ where
        }
    })
 }
+
+type SimdVec = SmallVec<[i128; 4]>;
+
+/// Converts a SIMD vector value into a Rust vector of i128 for processing.
+fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec>
+where
+    V: Value,
+{
+    let iterations = match lane_type {
+        types::I8 => 1,
+        types::I16 => 2,
+        types::I32 => 4,
+        types::I64 => 8,
+        _ => unimplemented!("Only 128-bit vectors are currently supported."),
+    };
+
+    let x = x.into_array()?;
+    let mut lanes = SimdVec::new();
+    for (i, _) in x.iter().enumerate() {
+        let mut lane: i128 = 0;
+        if i % iterations != 0 {
+            continue;
+        }
+        for j in 0..iterations {
+            lane += (x[i + j] as i128) << (8 * j);
+        }
+        lanes.push(lane);
+    }
+    return Ok(lanes);
+}
+
+/// Convert a Rust array of i128s back into a `Value::vector`.
+fn vectorizelanes<V>(x: &[i128], vector_type: types::Type) -> ValueResult<V>
+where
+    V: Value,
+{
+    let iterations = match vector_type.lane_type() {
+        types::I8 => 1,
+        types::I16 => 2,
+        types::I32 => 4,
+        types::I64 => 8,
+        _ => unimplemented!("Only 128-bit vectors are currently supported."),
+    };
+    let mut result: [u8; 16] = [0; 16];
+    for (i, val) in x.iter().enumerate() {
+        let val = *val;
+        for j in 0..iterations {
+            result[(i * iterations) + j] = (val >> (8 * j)) as u8;
+        }
+    }
+    Value::vector(result, vector_type)
+}
+
+/// Performs the supplied binary arithmetic `op` on two SIMD vectors.
+fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V>
+where
+    V: Value,
+    F: Fn(V, V) -> ValueResult<V>,
+{
+    let arg0 = extractlanes(&x, vector_type.lane_type())?;
+    let arg1 = extractlanes(&y, vector_type.lane_type())?;
+    let mut result = Vec::new();
+    for (lhs, rhs) in arg0.into_iter().zip(arg1) {
+        // The initial Value::int needs to be on a separate line so the
+        // compiler can determine concrete types.
+        let mut lhs: V = Value::int(lhs, vector_type.lane_type())?;
+        let mut rhs: V = Value::int(rhs, vector_type.lane_type())?;
+        if unsigned {
+            lhs = lhs.convert(ValueConversionKind::ToUnsigned)?;
+            rhs = rhs.convert(ValueConversionKind::ToUnsigned)?;
+        }
+        let sum = op(lhs, rhs)?;
+        let sum = sum.into_int()?;
+        result.push(sum);
+    }
+    vectorizelanes(&result, vector_type)
+}
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -22,6 +22,7 @@ pub trait Value: Clone + From<DataValue> {
    fn bool(b: bool, ty: Type) -> ValueResult<Self>;
    fn into_bool(self) -> ValueResult<bool>;
    fn vector(v: [u8; 16], ty: Type) -> ValueResult<Self>;
+    fn into_array(&self) -> ValueResult<[u8; 16]>;
    fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
    fn concat(self, other: Self) -> ValueResult<Self>;

@@ -47,6 +48,10 @@ pub trait Value: Clone + From<DataValue> {
    fn div(self, other: Self) -> ValueResult<Self>;
    fn rem(self, other: Self) -> ValueResult<Self>;

+    // Saturating arithmetic.
+    fn add_sat(self, other: Self) -> ValueResult<Self>;
+    fn sub_sat(self, other: Self) -> ValueResult<Self>;
+
    // Bitwise.
    fn shl(self, other: Self) -> ValueResult<Self>;
    fn ushr(self, other: Self) -> ValueResult<Self>;
@@ -80,6 +85,7 @@ pub enum ValueTypeClass {
    Integer,
    Boolean,
    Float,
+    Vector,
 }

 impl Display for ValueTypeClass {
@@ -88,6 +94,7 @@ impl Display for ValueTypeClass {
            ValueTypeClass::Integer => write!(f, "integer"),
            ValueTypeClass::Boolean => write!(f, "boolean"),
            ValueTypeClass::Float => write!(f, "float"),
+            ValueTypeClass::Vector => write!(f, "vector"),
        }
    }
 }
@@ -225,8 +232,16 @@ impl Value for DataValue {
        }
    }

-    fn vector(_v: [u8; 16], _ty: Type) -> ValueResult<Self> {
-        unimplemented!()
+    fn vector(v: [u8; 16], ty: Type) -> ValueResult<Self> {
+        assert!(ty.is_vector() && ty.bytes() == 16);
+        Ok(DataValue::V128(v))
+    }
+
+    fn into_array(&self) -> ValueResult<[u8; 16]> {
+        match *self {
+            DataValue::V128(v) => Ok(v),
+            _ => Err(ValueError::InvalidType(ValueTypeClass::Vector, self.ty())),
+        }
    }

    fn convert(self, kind: ValueConversionKind) -> ValueResult<Self> {
@@ -380,6 +395,14 @@ impl Value for DataValue {
        binary_match!(%(&self, &other); [I8, I16, I32, I64])
    }

+    fn add_sat(self, other: Self) -> ValueResult<Self> {
+        binary_match!(saturating_add(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
+    }
+
+    fn sub_sat(self, other: Self) -> ValueResult<Self> {
+        binary_match!(saturating_sub(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
+    }
+
    fn shl(self, other: Self) -> ValueResult<Self> {
        binary_match!(<<(&self, &other); [I8, I16, I32, I64])
    }