Implement vany_true and vall_true instructions in interpreter (#3304)

* cranelift: Implement ZeroExtend for a bunch of types in interpreter * cranelift: Implement VConst on interpreter * cranelift: Implement VallTrue on interpreter * cranelift: Implement VanyTrue on interpreter * cranelift: Mark `v{all,any}_true` tests as machinst only * cranelift: Disable `vany_true` tests on aarch64 The `b64x2` case produces an illegal instruction. See #3305
2021-09-07 17:50:39 +01:00
parent c73673559b
commit 63e9a81deb
4 changed files with 242 additions and 60 deletions
--- a/cranelift/filetests/filetests/runtests/simd-valltrue.clif
+++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif
@@ -0,0 +1,69 @@
+test interpret
+test run
+target aarch64
+target x86_64 machinst
+
+; TODO: Refactor this once we support simd bools in the trampoline
+
+function %vall_true_b8x16() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
+    v1 = vall_true v0
+
+    v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
+    v3 = vall_true v2
+
+    v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
+    v5 = vall_true v4
+
+    return v1, v3, v5
+}
+; run: %vall_true_b8x16() == [false, false, true]
+
+
+function %vall_true_b16x8() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b16x8 [false false false false false false false false]
+    v1 = vall_true v0
+
+    v2 = vconst.b16x8 [true false false false false false false false]
+    v3 = vall_true v2
+
+    v4 = vconst.b16x8 [true true true true true true true true]
+    v5 = vall_true v4
+
+    return v1, v3, v5
+}
+; run: %vall_true_b16x8() == [false, false, true]
+
+
+function %vall_true_b32x4() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b32x4 [false false false false]
+    v1 = vall_true v0
+
+    v2 = vconst.b32x4 [true false false false]
+    v3 = vall_true v2
+
+    v4 = vconst.b32x4 [true true true true]
+    v5 = vall_true v4
+
+    return v1, v3, v5
+}
+; run: %vall_true_b32x4() == [false, false, true]
+
+
+function %vall_true_b64x2() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b64x2 [false false]
+    v1 = vall_true v0
+
+    v2 = vconst.b64x2 [true false]
+    v3 = vall_true v2
+
+    v4 = vconst.b64x2 [true true]
+    v5 = vall_true v4
+
+    return v1, v3, v5
+}
+; run: %vall_true_b64x2() == [false, false, true]
--- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif
+++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif
@@ -0,0 +1,69 @@
+test interpret
+test run
+target x86_64 machinst
+; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
+
+; TODO: Refactor this once we support simd bools in the trampoline
+
+function %vany_true_b8x16() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
+    v1 = vany_true v0
+
+    v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
+    v3 = vany_true v2
+
+    v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
+    v5 = vany_true v4
+
+    return v1, v3, v5
+}
+; run: %vany_true_b8x16() == [false, true, true]
+
+
+function %vany_true_b16x8() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b16x8 [false false false false false false false false]
+    v1 = vany_true v0
+
+    v2 = vconst.b16x8 [true false false false false false false false]
+    v3 = vany_true v2
+
+    v4 = vconst.b16x8 [true true true true true true true true]
+    v5 = vany_true v4
+
+    return v1, v3, v5
+}
+; run: %vany_true_b16x8() == [false, true, true]
+
+
+function %vany_true_b32x4() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b32x4 [false false false false]
+    v1 = vany_true v0
+
+    v2 = vconst.b32x4 [true false false false]
+    v3 = vany_true v2
+
+    v4 = vconst.b32x4 [true true true true]
+    v5 = vany_true v4
+
+    return v1, v3, v5
+}
+; run: %vany_true_b32x4() == [false, true, true]
+
+
+function %vany_true_b64x2() -> b1, b1, b1 {
+block0:
+    v0 = vconst.b64x2 [false false]
+    v1 = vany_true v0
+
+    v2 = vconst.b64x2 [true false]
+    v3 = vany_true v2
+
+    v4 = vconst.b64x2 [true true]
+    v5 = vany_true v4
+
+    return v1, v3, v5
+}
+; run: %vany_true_b64x2() == [false, true, true]
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -11,7 +11,7 @@ use cranelift_codegen::ir::{
 };
 use log::trace;
 use smallvec::{smallvec, SmallVec};
-use std::convert::TryFrom;
+use std::convert::{TryFrom, TryInto};
 use std::ops::RangeFrom;
 use thiserror::Error;

@@ -63,7 +63,22 @@ where
    };

    // Retrieve the immediate value for an instruction, expecting it to exist.
-    let imm = || -> V { V::from(inst.imm_value().unwrap()) };
+    let imm = || -> V {
+        V::from(match inst {
+            InstructionData::UnaryConst {
+                constant_handle, ..
+            } => {
+                let buffer = state
+                    .get_current_function()
+                    .dfg
+                    .constants
+                    .get(constant_handle.clone())
+                    .as_slice();
+                DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"))
+            }
+            _ => inst.imm_value().unwrap(),
+        })
+    };

    // Retrieve the immediate value for an instruction and convert it to the controlling type of the
    // instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit
@@ -408,7 +423,7 @@ where
        Opcode::F32const => assign(imm()),
        Opcode::F64const => assign(imm()),
        Opcode::Bconst => assign(imm()),
-        Opcode::Vconst => unimplemented!("Vconst"),
+        Opcode::Vconst => assign(imm()),
        Opcode::ConstAddr => unimplemented!("ConstAddr"),
        Opcode::Null => unimplemented!("Null"),
        Opcode::Nop => ControlFlow::Continue,
@@ -537,19 +552,22 @@ where
                    64 => types::I128,
                    _ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()),
                };
-                let mut new_vec = SimdVec::new();
                let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
                let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
-                for (x, y) in arg0.into_iter().zip(arg1) {
-                    let x: V = Value::int(x, double_length)?;
-                    let y: V = Value::int(y, double_length)?;
-                    new_vec.push(
-                        Value::mul(x, y)?
-                            .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?
-                            .into_int()?,
-                    )
-                }
-                assign(vectorizelanes(&new_vec, ctrl_ty)?)
+
+                let res = arg0
+                    .into_iter()
+                    .zip(arg1)
+                    .map(|(x, y)| {
+                        let x = x.convert(ValueConversionKind::ZeroExtend(double_length))?;
+                        let y = y.convert(ValueConversionKind::ZeroExtend(double_length))?;
+
+                        Ok(Value::mul(x, y)?
+                            .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?)
+                    })
+                    .collect::<ValueResult<SimdVec<V>>>()?;
+
+                assign(vectorizelanes(&res, ctrl_ty)?)
            } else {
                let double_length = match ctrl_ty.bits() {
                    8 => types::I16,
@@ -762,21 +780,32 @@ where
        Opcode::Swizzle => unimplemented!("Swizzle"),
        Opcode::Splat => unimplemented!("Splat"),
        Opcode::Insertlane => {
+            let idx = imm().into_int()? as usize;
            let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
-            vector[Value::into_int(imm())? as usize] = arg(1)?.into_int()?;
+            vector[idx] = arg(1)?;
            assign(vectorizelanes(&vector, ctrl_ty)?)
        }
        Opcode::Extractlane => {
-            let value =
-                extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize];
-            assign(Value::int(value, ctrl_ty.lane_type())?)
+            let idx = imm().into_int()? as usize;
+            let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            assign(lanes[idx].clone())
        }
        Opcode::VhighBits => unimplemented!("VhighBits"),
        Opcode::Vsplit => unimplemented!("Vsplit"),
        Opcode::Vconcat => unimplemented!("Vconcat"),
        Opcode::Vselect => unimplemented!("Vselect"),
-        Opcode::VanyTrue => unimplemented!("VanyTrue"),
-        Opcode::VallTrue => unimplemented!("VallTrue"),
+        Opcode::VanyTrue => assign(fold_vector(
+            arg(0)?,
+            ctrl_ty,
+            V::bool(false, types::B1)?,
+            |acc, lane| acc.or(lane),
+        )?),
+        Opcode::VallTrue => assign(fold_vector(
+            arg(0)?,
+            ctrl_ty,
+            V::bool(true, types::B1)?,
+            |acc, lane| acc.and(lane),
+        )?),
        Opcode::SwidenLow => unimplemented!("SwidenLow"),
        Opcode::SwidenHigh => unimplemented!("SwidenHigh"),
        Opcode::UwidenLow => unimplemented!("UwidenLow"),
@@ -979,18 +1008,18 @@ where
    })
 }

-type SimdVec = SmallVec<[i128; 4]>;
+type SimdVec<V> = SmallVec<[V; 4]>;

-/// Converts a SIMD vector value into a Rust vector of i128 for processing.
-fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec>
+/// Converts a SIMD vector value into a Rust array of [Value] for processing.
+fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec<V>>
 where
    V: Value,
 {
    let iterations = match lane_type {
-        types::I8 => 1,
-        types::I16 => 2,
-        types::I32 => 4,
-        types::I64 => 8,
+        types::I8 | types::B1 | types::B8 => 1,
+        types::I16 | types::B16 => 2,
+        types::I32 | types::B32 => 4,
+        types::I64 | types::B64 => 8,
        _ => unimplemented!("Only 128-bit vectors are currently supported."),
    };

@@ -1004,13 +1033,19 @@ where
        for j in 0..iterations {
            lane += (x[i + j] as i128) << (8 * j);
        }
-        lanes.push(lane);
+
+        let lane_val: V = if lane_type.is_bool() {
+            Value::bool(lane != 0, lane_type)?
+        } else {
+            Value::int(lane, lane_type)?
+        };
+        lanes.push(lane_val);
    }
    return Ok(lanes);
 }

 /// Convert a Rust array of i128s back into a `Value::vector`.
-fn vectorizelanes<V>(x: &[i128], vector_type: types::Type) -> ValueResult<V>
+fn vectorizelanes<V>(x: &[V], vector_type: types::Type) -> ValueResult<V>
 where
    V: Value,
 {
@@ -1023,7 +1058,7 @@ where
    };
    let mut result: [u8; 16] = [0; 16];
    for (i, val) in x.iter().enumerate() {
-        let val = *val;
+        let val = val.clone().into_int()?;
        for j in 0..iterations {
            result[(i * iterations) + j] = (val >> (8 * j)) as u8;
        }
@@ -1031,6 +1066,17 @@ where
    Value::vector(result, vector_type)
 }

+/// Performs a lanewise fold on a vector type
+fn fold_vector<V, F>(v: V, ty: types::Type, init: V, op: F) -> ValueResult<V>
+where
+    V: Value,
+    F: FnMut(V, V) -> ValueResult<V>,
+{
+    extractlanes(&v, ty.lane_type())?
+        .into_iter()
+        .try_fold(init, op)
+}
+
 /// Performs the supplied binary arithmetic `op` on two SIMD vectors.
 fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V>
 where
@@ -1039,20 +1085,19 @@ where
 {
    let arg0 = extractlanes(&x, vector_type.lane_type())?;
    let arg1 = extractlanes(&y, vector_type.lane_type())?;
-    let mut result = Vec::new();
-    for (lhs, rhs) in arg0.into_iter().zip(arg1) {
-        // The initial Value::int needs to be on a separate line so the
-        // compiler can determine concrete types.
-        let mut lhs: V = Value::int(lhs, vector_type.lane_type())?;
-        let mut rhs: V = Value::int(rhs, vector_type.lane_type())?;
+
+    let result = arg0
+        .into_iter()
+        .zip(arg1)
+        .map(|(mut lhs, mut rhs)| {
            if unsigned {
                lhs = lhs.convert(ValueConversionKind::ToUnsigned)?;
                rhs = rhs.convert(ValueConversionKind::ToUnsigned)?;
            }
-        let sum = op(lhs, rhs)?;
-        let sum = sum.into_int()?;
-        result.push(sum);
-    }
+            Ok(op(lhs, rhs)?)
+        })
+        .collect::<ValueResult<SimdVec<V>>>()?;
+
    vectorizelanes(&result, vector_type)
 }

@@ -1066,13 +1111,12 @@ where
 {
    let arg0 = extractlanes(&x, vector_type.lane_type())?;
    let arg1 = extractlanes(&y, vector_type.lane_type())?;
-    let mut result = SimdVec::new();
-    for pair in arg0.chunks(2).chain(arg1.chunks(2)) {
-        let lhs: V = Value::int(pair[0], vector_type.lane_type())?;
-        let rhs: V = Value::int(pair[1], vector_type.lane_type())?;
-        let sum = op(lhs, rhs)?;
-        let sum = sum.into_int()?;
-        result.push(sum);
-    }
+
+    let result = arg0
+        .chunks(2)
+        .chain(arg1.chunks(2))
+        .map(|pair| op(pair[0].clone(), pair[1].clone()))
+        .collect::<ValueResult<SimdVec<V>>>()?;
+
    vectorizelanes(&result, vector_type)
 }
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -290,15 +290,15 @@ impl Value for DataValue {
                _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
            },
            ValueConversionKind::ZeroExtend(ty) => match (self, ty) {
-                (DataValue::I8(_), types::I16) => unimplemented!(),
-                (DataValue::I8(_), types::I32) => unimplemented!(),
-                (DataValue::I8(_), types::I64) => unimplemented!(),
-                (DataValue::I16(_), types::I32) => unimplemented!(),
-                (DataValue::I16(_), types::I64) => unimplemented!(),
+                (DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16),
+                (DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32),
+                (DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64),
+                (DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32),
+                (DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64),
                (DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64),
-                (DataValue::U64(n), types::I64) => DataValue::U64(n),
-                (DataValue::I64(n), types::I64) => DataValue::I64(n),
+                (DataValue::I64(n), types::I128) => DataValue::I128(n as u64 as i128),
+                (from, to) if from.ty() == to => from,
                (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
            },
            ValueConversionKind::ToUnsigned => match self {
@@ -428,7 +428,7 @@ impl Value for DataValue {
    }

    fn or(self, other: Self) -> ValueResult<Self> {
-        binary_match!(|(&self, &other); [I8, I16, I32, I64])
+        binary_match!(|(&self, &other); [B, I8, I16, I32, I64])
    }

    fn xor(self, other: Self) -> ValueResult<Self> {