Implement UwidenLow and UwidenHigh for the interpreter

Implemented `UwidenLow` and `UwidenHigh` for the Cranelift interpreter, doubling the width and halving the number of lanes preserving the low and high halves respectively. Conversions are performed using unsigned zero extension. Copyright (c) 2021, Arm Limited
2021-09-03 12:55:31 +01:00
parent 164835ecf5
commit f7a1b3f9bd
4 changed files with 79 additions and 2 deletions
--- a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif
+++ b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif
@@ -0,0 +1,26 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %uwidenhigh_i8x16(i8x16) -> i16x8 {
+block0(v0: i8x16):
+    v1 = uwiden_high v0
+    return v1
+}
+; run: %uwidenhigh_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [9 10 11 12 13 14 15 16]
+
+function %uwidenhigh_i16x8(i16x8) -> i32x4 {
+block0(v0: i16x8):
+    v1 = uwiden_high v0
+    return v1
+}
+; run: %uwidenhigh_i16x8([1 2 3 4 5 6 7 8]) == [5 6 7 8]
+
+function %uwidenhigh_i32x4(i32x4) -> i64x2 {
+block0(v0: i32x4):
+    v1 = uwiden_high v0
+    return v1
+}
+; run: %uwidenhigh_i32x4([1 2 3 4]) == [3 4]
--- a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif
+++ b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif
@@ -0,0 +1,26 @@
+test interpret
+test run
+target aarch64
+set enable_simd
+target x86_64
+
+function %uwidenlow_i8x16(i8x16) -> i16x8 {
+block0(v0: i8x16):
+    v1 = uwiden_low v0
+    return v1
+}
+; run: %uwidenlow_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [1 2 3 4 5 6 7 8]
+
+function %uwidenlow_i16x8(i16x8) -> i32x4 {
+block0(v0: i16x8):
+    v1 = uwiden_low v0
+    return v1
+}
+; run: %uwidenlow_i16x8([1 2 3 4 5 6 7 8]) == [1 2 3 4]
+
+function %uwidenlow_i32x4(i32x4) -> i64x2 {
+block0(v0: i32x4):
+    v1 = uwiden_low v0
+    return v1
+}
+; run: %uwidenlow_i32x4([1 2 3 4]) == [1 2]
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -824,8 +824,28 @@ where
        )?),
        Opcode::SwidenLow => unimplemented!("SwidenLow"),
        Opcode::SwidenHigh => unimplemented!("SwidenHigh"),
-        Opcode::UwidenLow => unimplemented!("UwidenLow"),
-        Opcode::UwidenHigh => unimplemented!("UwidenHigh"),
+        Opcode::UwidenLow => {
+            let new_type = ctrl_ty.merge_lanes().unwrap();
+            let mut new_vec = SimdVec::new();
+            let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            arg0.truncate(new_type.lane_count() as usize);
+            for lane in arg0 {
+                let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?;
+                new_vec.push(lane);
+            }
+            assign(vectorizelanes(&new_vec, new_type)?)
+        }
+        Opcode::UwidenHigh => {
+            let new_type = ctrl_ty.merge_lanes().unwrap();
+            let mut new_vec = SimdVec::new();
+            let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
+            arg0.drain(0..new_type.lane_count() as usize);
+            for lane in arg0 {
+                let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?;
+                new_vec.push(lane);
+            }
+            assign(vectorizelanes(&new_vec, new_type)?)
+        }
        Opcode::FcvtToUint => unimplemented!("FcvtToUint"),
        Opcode::FcvtToUintSat => unimplemented!("FcvtToUintSat"),
        Opcode::FcvtToSint => unimplemented!("FcvtToSint"),
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -290,9 +290,14 @@ impl Value for DataValue {
                _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
            },
            ValueConversionKind::ZeroExtend(ty) => match (self, ty) {
+                (DataValue::U8(n), types::I16) => DataValue::U16(n as u16),
+                (DataValue::U8(n), types::I32) => DataValue::U32(n as u32),
+                (DataValue::U8(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16),
                (DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32),
                (DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64),
+                (DataValue::U16(n), types::I32) => DataValue::U32(n as u32),
+                (DataValue::U16(n), types::I64) => DataValue::U64(n as u64),
                (DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32),
                (DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64),
                (DataValue::U32(n), types::I64) => DataValue::U64(n as u64),