From f7a1b3f9bd6136e7cbd42daaf7bfde60acc474c4 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Fri, 3 Sep 2021 12:55:31 +0100 Subject: [PATCH 1/3] Implement `UwidenLow` and `UwidenHigh` for the interpreter Implemented `UwidenLow` and `UwidenHigh` for the Cranelift interpreter, doubling the width and halving the number of lanes preserving the low and high halves respectively. Conversions are performed using unsigned zero extension. Copyright (c) 2021, Arm Limited --- .../filetests/runtests/simd-uwidenhigh.clif | 26 +++++++++++++++++++ .../filetests/runtests/simd-uwidenlow.clif | 26 +++++++++++++++++++ cranelift/interpreter/src/step.rs | 24 +++++++++++++++-- cranelift/interpreter/src/value.rs | 5 ++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-uwidenlow.clif diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif new file mode 100644 index 0000000000..d9e4b2b0e0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %uwidenhigh_i8x16(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_high v0 + return v1 +} +; run: %uwidenhigh_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [9 10 11 12 13 14 15 16] + +function %uwidenhigh_i16x8(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = uwiden_high v0 + return v1 +} +; run: %uwidenhigh_i16x8([1 2 3 4 5 6 7 8]) == [5 6 7 8] + +function %uwidenhigh_i32x4(i32x4) -> i64x2 { +block0(v0: i32x4): + v1 = uwiden_high v0 + return v1 +} +; run: %uwidenhigh_i32x4([1 2 3 4]) == [3 4] diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif new file mode 100644 index 0000000000..6acf4e1a2b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %uwidenlow_i8x16(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_low v0 + return v1 +} +; run: %uwidenlow_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [1 2 3 4 5 6 7 8] + +function %uwidenlow_i16x8(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = uwiden_low v0 + return v1 +} +; run: %uwidenlow_i16x8([1 2 3 4 5 6 7 8]) == [1 2 3 4] + +function %uwidenlow_i32x4(i32x4) -> i64x2 { +block0(v0: i32x4): + v1 = uwiden_low v0 + return v1 +} +; run: %uwidenlow_i32x4([1 2 3 4]) == [1 2] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index ce53528c20..c5cdd82fa3 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -824,8 +824,28 @@ where )?), Opcode::SwidenLow => unimplemented!("SwidenLow"), Opcode::SwidenHigh => unimplemented!("SwidenHigh"), - Opcode::UwidenLow => unimplemented!("UwidenLow"), - Opcode::UwidenHigh => unimplemented!("UwidenHigh"), + Opcode::UwidenLow => { + let new_type = ctrl_ty.merge_lanes().unwrap(); + let mut new_vec = SimdVec::new(); + let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + arg0.truncate(new_type.lane_count() as usize); + for lane in arg0 { + let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?; + new_vec.push(lane); + } + assign(vectorizelanes(&new_vec, new_type)?) + } + Opcode::UwidenHigh => { + let new_type = ctrl_ty.merge_lanes().unwrap(); + let mut new_vec = SimdVec::new(); + let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + arg0.drain(0..new_type.lane_count() as usize); + for lane in arg0 { + let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?; + new_vec.push(lane); + } + assign(vectorizelanes(&new_vec, new_type)?) + } Opcode::FcvtToUint => unimplemented!("FcvtToUint"), Opcode::FcvtToUintSat => unimplemented!("FcvtToUintSat"), Opcode::FcvtToSint => unimplemented!("FcvtToSint"), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index c76cacb0be..2f11673d4f 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -290,9 +290,14 @@ impl Value for DataValue { _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), }, ValueConversionKind::ZeroExtend(ty) => match (self, ty) { + (DataValue::U8(n), types::I16) => DataValue::U16(n as u16), + (DataValue::U8(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U8(n), types::I64) => DataValue::U64(n as u64), (DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16), (DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32), (DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64), + (DataValue::U16(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U16(n), types::I64) => DataValue::U64(n as u64), (DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32), (DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64), (DataValue::U32(n), types::I64) => DataValue::U64(n as u64), From 5824cca0f8e6639bf3fbbee5eb6c1227e274bb21 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 8 Sep 2021 15:43:08 +0100 Subject: [PATCH 2/3] Fix test failures from old x86 backend Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif | 2 +- cranelift/filetests/filetests/runtests/simd-uwidenlow.clif | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif index d9e4b2b0e0..281e63ac02 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %uwidenhigh_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif index 6acf4e1a2b..8b865a305b 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %uwidenlow_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): From 924b0368e9b18bf9d32d52b95878704059ac99d0 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Thu, 9 Sep 2021 10:31:23 +0100 Subject: [PATCH 3/3] Rewrite as iterator methods Copyright (c) 2021, Arm Limited --- cranelift/interpreter/src/step.rs | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index c5cdd82fa3..eb519d2e08 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -826,24 +826,20 @@ where Opcode::SwidenHigh => unimplemented!("SwidenHigh"), Opcode::UwidenLow => { let new_type = ctrl_ty.merge_lanes().unwrap(); - let mut new_vec = SimdVec::new(); - let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - arg0.truncate(new_type.lane_count() as usize); - for lane in arg0 { - let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?; - new_vec.push(lane); - } + let new_vec = extractlanes(&arg(0)?, ctrl_ty.lane_type())? + .into_iter() + .take(new_type.lane_count() as usize) + .map(|lane| lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))) + .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } Opcode::UwidenHigh => { let new_type = ctrl_ty.merge_lanes().unwrap(); - let mut new_vec = SimdVec::new(); - let mut arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - arg0.drain(0..new_type.lane_count() as usize); - for lane in arg0 { - let lane = lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))?; - new_vec.push(lane); - } + let new_vec = extractlanes(&arg(0)?, ctrl_ty.lane_type())? + .into_iter() + .skip(new_type.lane_count() as usize) + .map(|lane| lane.convert(ValueConversionKind::ZeroExtend(new_type.lane_type()))) + .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } Opcode::FcvtToUint => unimplemented!("FcvtToUint"),