Allow 64-bit vectors and implement for interpreter (#4509)

* Allow 64-bit vectors and implement for interpreter The AArch64 backend already supports 64-bit vectors; this simply allows instructions to make use of that. Implemented support for 64-bit vectors within the interpreter to allow interpret runtests to use them. Copyright (c) 2022 Arm Limited * Disable 64-bit SIMD `iaddpairwise` tests on s390x Copyright (c) 2022 Arm Limited
2022-07-25 21:00:43 +01:00
parent c5ddb4b803
commit 3ef89b7787
7 changed files with 119 additions and 48 deletions
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -547,7 +547,7 @@ where
        Opcode::Iabs => {
            let (min_val, _) = ctrl_ty.lane_type().bounds(true);
            let min_val: V = Value::int(min_val as i128, ctrl_ty.lane_type())?;
-            let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
            let new_vec = arg0
                .into_iter()
                .map(|lane| {
@@ -574,8 +574,8 @@ where
            } else {
                ValueConversionKind::SignExtend(double_length)
            };
-            let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
-            let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
+            let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;

            let res = arg0
                .into_iter()
@@ -681,7 +681,7 @@ where
            let count = if arg(0)?.ty().is_int() {
                arg(0)?.count_ones()?
            } else {
-                let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?
+                let lanes = extractlanes(&arg(0)?, ctrl_ty)?
                    .into_iter()
                    .map(|lane| lane.count_ones())
                    .collect::<ValueResult<SimdVec<V>>>()?;
@@ -786,8 +786,8 @@ where
            assign(Value::int(int, ctrl_ty)?)
        }
        Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
-            let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
-            let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
+            let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
            let new_type = ctrl_ty.split_lanes().unwrap();
            let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow);
            let mut min: V = Value::int(min as i128, ctrl_ty.lane_type())?;
@@ -818,7 +818,7 @@ where
        Opcode::Bmask => assign({
            let bool = arg(0)?;
            let bool_ty = ctrl_ty.as_bool_pedantic();
-            let lanes = extractlanes(&bool, bool_ty.lane_type())?
+            let lanes = extractlanes(&bool, bool_ty)?
                .into_iter()
                .map(|lane| lane.convert(ValueConversionKind::Exact(ctrl_ty.lane_type())))
                .collect::<ValueResult<SimdVec<V>>>()?;
@@ -874,23 +874,20 @@ where
        }
        Opcode::Insertlane => {
            let idx = imm().into_int()? as usize;
-            let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            let mut vector = extractlanes(&arg(0)?, ctrl_ty)?;
            vector[idx] = arg(1)?;
            assign(vectorizelanes(&vector, ctrl_ty)?)
        }
        Opcode::Extractlane => {
            let idx = imm().into_int()? as usize;
-            let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            let lanes = extractlanes(&arg(0)?, ctrl_ty)?;
            assign(lanes[idx].clone())
        }
        Opcode::VhighBits => {
            // `ctrl_ty` controls the return type for this, so the input type
            // must be retrieved via `inst_context`.
-            let lane_type = inst_context
-                .type_of(inst_context.args()[0])
-                .unwrap()
-                .lane_type();
-            let a = extractlanes(&arg(0)?, lane_type)?;
+            let vector_type = inst_context.type_of(inst_context.args()[0]).unwrap();
+            let a = extractlanes(&arg(0)?, vector_type)?;
            let mut result: i128 = 0;
            for (i, val) in a.into_iter().enumerate() {
                let val = val.reverse_bits()?.into_int()?; // MSB -> LSB
@@ -901,9 +898,9 @@ where
        Opcode::Vsplit => unimplemented!("Vsplit"),
        Opcode::Vconcat => unimplemented!("Vconcat"),
        Opcode::Vselect => {
-            let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
-            let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
-            let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?;
+            let c = extractlanes(&arg(0)?, ctrl_ty)?;
+            let x = extractlanes(&arg(1)?, ctrl_ty)?;
+            let y = extractlanes(&arg(2)?, ctrl_ty)?;
            let mut new_vec = SimdVec::new();
            for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
                if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? {
@@ -937,7 +934,7 @@ where
                }
                _ => unreachable!(),
            };
-            let vec_iter = extractlanes(&arg(0)?, ctrl_ty.lane_type())?.into_iter();
+            let vec_iter = extractlanes(&arg(0)?, ctrl_ty)?.into_iter();
            let new_vec = match inst.opcode() {
                Opcode::SwidenLow | Opcode::UwidenLow => vec_iter
                    .take(new_type.lane_count() as usize)
@@ -973,8 +970,8 @@ where
        Opcode::WideningPairwiseDotProductS => {
            let ctrl_ty = types::I16X8;
            let new_type = ctrl_ty.merge_lanes().unwrap();
-            let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
-            let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
+            let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
            let new_vec = arg0
                .chunks(2)
                .into_iter()
@@ -993,8 +990,8 @@ where
        Opcode::SqmulRoundSat => {
            let lane_type = ctrl_ty.lane_type();
            let double_width = ctrl_ty.double_width().unwrap().lane_type();
-            let arg0 = extractlanes(&arg(0)?, lane_type)?;
-            let arg1 = extractlanes(&arg(1)?, lane_type)?;
+            let arg0 = extractlanes(&arg(0)?, ctrl_ty)?;
+            let arg1 = extractlanes(&arg(1)?, ctrl_ty)?;
            let (min, max) = lane_type.bounds(true);
            let min: V = Value::int(min as i128, double_width)?;
            let max: V = Value::int(max as i128, double_width)?;
@@ -1130,9 +1127,8 @@ where
    };

    let dst_ty = ctrl_ty.as_bool();
-    let lane_type = ctrl_ty.lane_type();
-    let left = extractlanes(left, lane_type)?;
-    let right = extractlanes(right, lane_type)?;
+    let left = extractlanes(left, ctrl_ty)?;
+    let right = extractlanes(right, ctrl_ty)?;

    let res = left
        .into_iter()
@@ -1178,10 +1174,11 @@ type SimdVec<V> = SmallVec<[V; 4]>;

 /// Converts a SIMD vector value into a Rust array of [Value] for processing.
 /// If `x` is a scalar, it will be returned as a single-element array.
-fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec<V>>
+fn extractlanes<V>(x: &V, vector_type: types::Type) -> ValueResult<SimdVec<V>>
 where
    V: Value,
 {
+    let lane_type = vector_type.lane_type();
    let mut lanes = SimdVec::new();
    // Wrap scalar values as a single-element vector and return.
    if !x.ty().is_vector() {
@@ -1194,17 +1191,14 @@ where
        types::I16 | types::B16 => 2,
        types::I32 | types::B32 => 4,
        types::I64 | types::B64 => 8,
-        _ => unimplemented!("Only 128-bit vectors are currently supported."),
+        _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."),
    };

    let x = x.into_array()?;
-    for (i, _) in x.iter().enumerate() {
+    for i in 0..vector_type.lane_count() {
        let mut lane: i128 = 0;
-        if i % iterations != 0 {
-            continue;
-        }
        for j in 0..iterations {
-            lane += (x[i + j] as i128) << (8 * j);
+            lane += (x[((i * iterations) + j) as usize] as i128) << (8 * j);
        }

        let lane_val: V = if lane_type.is_bool() {
@@ -1234,7 +1228,7 @@ where
        types::I16 | types::B16 => 2,
        types::I32 | types::B32 => 4,
        types::I64 | types::B64 => 8,
-        _ => unimplemented!("Only 128-bit vectors are currently supported."),
+        _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."),
    };
    let mut result: [u8; 16] = [0; 16];
    for (i, val) in x.iter().enumerate() {
@@ -1256,9 +1250,7 @@ where
    V: Value,
    F: FnMut(V, V) -> ValueResult<V>,
 {
-    extractlanes(&v, ty.lane_type())?
-        .into_iter()
-        .try_fold(init, op)
+    extractlanes(&v, ty)?.into_iter().try_fold(init, op)
 }

 /// Performs the supplied binary arithmetic `op` on two SIMD vectors.
@@ -1267,8 +1259,8 @@ where
    V: Value,
    F: Fn(V, V) -> ValueResult<V>,
 {
-    let arg0 = extractlanes(&x, vector_type.lane_type())?;
-    let arg1 = extractlanes(&y, vector_type.lane_type())?;
+    let arg0 = extractlanes(&x, vector_type)?;
+    let arg1 = extractlanes(&y, vector_type)?;

    let result = arg0
        .into_iter()
@@ -1293,8 +1285,8 @@ where
    V: Value,
    F: Fn(V, V) -> ValueResult<V>,
 {
-    let arg0 = extractlanes(&x, vector_type.lane_type())?;
-    let arg1 = extractlanes(&y, vector_type.lane_type())?;
+    let arg0 = extractlanes(&x, vector_type)?;
+    let arg1 = extractlanes(&y, vector_type)?;

    let result = arg0
        .chunks(2)
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -279,13 +279,25 @@ impl Value for DataValue {
    }

    fn vector(v: [u8; 16], ty: Type) -> ValueResult<Self> {
-        assert!(ty.is_vector() && ty.bytes() == 16);
-        Ok(DataValue::V128(v))
+        assert!(ty.is_vector() && [8, 16].contains(&ty.bytes()));
+        if ty.bytes() == 16 {
+            Ok(DataValue::V128(v))
+        } else if ty.bytes() == 8 {
+            let v64: [u8; 8] = v[..8].try_into().unwrap();
+            Ok(DataValue::V64(v64))
+        } else {
+            unimplemented!()
+        }
    }

    fn into_array(&self) -> ValueResult<[u8; 16]> {
        match *self {
            DataValue::V128(v) => Ok(v),
+            DataValue::V64(v) => {
+                let mut v128 = [0; 16];
+                v128[..8].clone_from_slice(&v);
+                Ok(v128)
+            }
            _ => Err(ValueError::InvalidType(ValueTypeClass::Vector, self.ty())),
        }
    }