diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 5fd415f32e..d7ceb2d034 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3533,8 +3533,8 @@ pub(crate) fn define( "A SIMD vector type containing integer lanes 8, 16, or 32 bits wide.", TypeSetBuilder::new() .ints(8..32) - .simd_lanes(4..16) - .dynamic_simd_lanes(4..16) + .simd_lanes(2..16) + .dynamic_simd_lanes(2..16) .includes_scalars(false) .build(), ); diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index e2b6d5aba3..6abc29987b 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -26,6 +26,7 @@ pub enum DataValue { F32(Ieee32), F64(Ieee64), V128([u8; 16]), + V64([u8; 8]), } impl DataValue { @@ -54,13 +55,14 @@ impl DataValue { DataValue::F32(_) => types::F32, DataValue::F64(_) => types::F64, DataValue::V128(_) => types::I8X16, // A default type. + DataValue::V64(_) => types::I8X8, // A default type. } } /// Return true if the value is a vector (i.e. `DataValue::V128`). pub fn is_vector(&self) -> bool { match self { - DataValue::V128(_) => true, + DataValue::V128(_) | DataValue::V64(_) => true, _ => false, } } @@ -90,6 +92,7 @@ impl DataValue { DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::V128(v) => dst[..16].copy_from_slice(&u128::from_le_bytes(*v).to_ne_bytes()), + DataValue::V64(v) => dst[..8].copy_from_slice(&u64::from_le_bytes(*v).to_ne_bytes()), _ => unimplemented!(), }; } @@ -119,8 +122,16 @@ impl DataValue { let size = ty.bytes() as usize; DataValue::B(src[..size].iter().any(|&i| i != 0)) } - _ if ty.is_vector() && ty.bytes() == 16 => { - DataValue::V128(u128::from_ne_bytes(src[..16].try_into().unwrap()).to_le_bytes()) + _ if ty.is_vector() => { + if ty.bytes() == 16 { + DataValue::V128( + u128::from_ne_bytes(src[..16].try_into().unwrap()).to_le_bytes(), + ) + } else if ty.bytes() == 8 { + DataValue::V64(u64::from_ne_bytes(src[..8].try_into().unwrap()).to_le_bytes()) + } else { + unimplemented!() + } } _ => unimplemented!(), } @@ -218,6 +229,7 @@ build_conversion_impl!(u128, U128, I128); build_conversion_impl!(Ieee32, F32, F32); build_conversion_impl!(Ieee64, F64, F64); build_conversion_impl!([u8; 16], V128, I8X16); +build_conversion_impl!([u8; 8], V64, I8X8); impl From for DataValue { fn from(o: Offset32) -> Self { DataValue::from(Into::::into(o)) @@ -243,6 +255,7 @@ impl Display for DataValue { DataValue::F64(dv) => write!(f, "{}", dv), // Again, for syntax consistency, use ConstantData, which in this case displays as hex. DataValue::V128(dv) => write!(f, "{}", ConstantData::from(&dv[..])), + DataValue::V64(dv) => write!(f, "{}", ConstantData::from(&dv[..])), } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 4a722d470e..071e4864b0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -4035,6 +4035,18 @@ fn test_aarch64_binemit() { "fmul v2.2d, v0.2d, v5.2d", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x8, + }, + "90BD210E", + "addp v16.8b, v12.8b, v1.8b", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Addp, @@ -4059,6 +4071,18 @@ fn test_aarch64_binemit() { "addp v8.4s, v12.4s, v14.4s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x2, + }, + "88BDAE0E", + "addp v8.2s, v12.2s, v14.2s", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Zip1, diff --git a/cranelift/filetests/filetests/runtests/simd-iaddpairwise-64bit.clif b/cranelift/filetests/filetests/runtests/simd-iaddpairwise-64bit.clif new file mode 100644 index 0000000000..0c29854cb0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-iaddpairwise-64bit.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 + +function %iaddp_i8x8(i8x8, i8x8) -> i8x8 { +block0(v0: i8x8, v1: i8x8): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; run: %iaddp_i8x8([1 2 3 4 5 6 7 8], [9 10 11 12 13 14 15 16]) == [3 7 11 15 19 23 27 31] + +function %iaddp_i16x4(i16x4, i16x4) -> i16x4 { +block0(v0: i16x4, v1: i16x4): + v2 = iadd_pairwise v0, v1 + return v2 +} +; run: %iaddp_i16x4([1 2 3 4], [100 99 98 97]) == [3 7 199 195] + +function %iaddp_i32x2(i32x2, i32x2) -> i32x2 { +block0(v0: i32x2, v1: i32x2): + v2 = iadd_pairwise v0, v1 + return v2 +} +; run: %iaddp_i32x2([1 2], [5 6]) == [3 11] +; run: %iaddp_i32x2([4294967290 5], [100 100]) == [4294967295 200] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index e01c37c6c7..18354389e5 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -547,7 +547,7 @@ where Opcode::Iabs => { let (min_val, _) = ctrl_ty.lane_type().bounds(true); let min_val: V = Value::int(min_val as i128, ctrl_ty.lane_type())?; - let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg0 = extractlanes(&arg(0)?, ctrl_ty)?; let new_vec = arg0 .into_iter() .map(|lane| { @@ -574,8 +574,8 @@ where } else { ValueConversionKind::SignExtend(double_length) }; - let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let arg0 = extractlanes(&arg(0)?, ctrl_ty)?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty)?; let res = arg0 .into_iter() @@ -681,7 +681,7 @@ where let count = if arg(0)?.ty().is_int() { arg(0)?.count_ones()? } else { - let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())? + let lanes = extractlanes(&arg(0)?, ctrl_ty)? .into_iter() .map(|lane| lane.count_ones()) .collect::>>()?; @@ -786,8 +786,8 @@ where assign(Value::int(int, ctrl_ty)?) } Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { - let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let arg0 = extractlanes(&arg(0)?, ctrl_ty)?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty)?; let new_type = ctrl_ty.split_lanes().unwrap(); let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow); let mut min: V = Value::int(min as i128, ctrl_ty.lane_type())?; @@ -818,7 +818,7 @@ where Opcode::Bmask => assign({ let bool = arg(0)?; let bool_ty = ctrl_ty.as_bool_pedantic(); - let lanes = extractlanes(&bool, bool_ty.lane_type())? + let lanes = extractlanes(&bool, bool_ty)? .into_iter() .map(|lane| lane.convert(ValueConversionKind::Exact(ctrl_ty.lane_type()))) .collect::>>()?; @@ -874,23 +874,20 @@ where } Opcode::Insertlane => { let idx = imm().into_int()? as usize; - let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let mut vector = extractlanes(&arg(0)?, ctrl_ty)?; vector[idx] = arg(1)?; assign(vectorizelanes(&vector, ctrl_ty)?) } Opcode::Extractlane => { let idx = imm().into_int()? as usize; - let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let lanes = extractlanes(&arg(0)?, ctrl_ty)?; assign(lanes[idx].clone()) } Opcode::VhighBits => { // `ctrl_ty` controls the return type for this, so the input type // must be retrieved via `inst_context`. - let lane_type = inst_context - .type_of(inst_context.args()[0]) - .unwrap() - .lane_type(); - let a = extractlanes(&arg(0)?, lane_type)?; + let vector_type = inst_context.type_of(inst_context.args()[0]).unwrap(); + let a = extractlanes(&arg(0)?, vector_type)?; let mut result: i128 = 0; for (i, val) in a.into_iter().enumerate() { let val = val.reverse_bits()?.into_int()?; // MSB -> LSB @@ -901,9 +898,9 @@ where Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), Opcode::Vselect => { - let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; - let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?; + let c = extractlanes(&arg(0)?, ctrl_ty)?; + let x = extractlanes(&arg(1)?, ctrl_ty)?; + let y = extractlanes(&arg(2)?, ctrl_ty)?; let mut new_vec = SimdVec::new(); for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? { @@ -937,7 +934,7 @@ where } _ => unreachable!(), }; - let vec_iter = extractlanes(&arg(0)?, ctrl_ty.lane_type())?.into_iter(); + let vec_iter = extractlanes(&arg(0)?, ctrl_ty)?.into_iter(); let new_vec = match inst.opcode() { Opcode::SwidenLow | Opcode::UwidenLow => vec_iter .take(new_type.lane_count() as usize) @@ -973,8 +970,8 @@ where Opcode::WideningPairwiseDotProductS => { let ctrl_ty = types::I16X8; let new_type = ctrl_ty.merge_lanes().unwrap(); - let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let arg0 = extractlanes(&arg(0)?, ctrl_ty)?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty)?; let new_vec = arg0 .chunks(2) .into_iter() @@ -993,8 +990,8 @@ where Opcode::SqmulRoundSat => { let lane_type = ctrl_ty.lane_type(); let double_width = ctrl_ty.double_width().unwrap().lane_type(); - let arg0 = extractlanes(&arg(0)?, lane_type)?; - let arg1 = extractlanes(&arg(1)?, lane_type)?; + let arg0 = extractlanes(&arg(0)?, ctrl_ty)?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty)?; let (min, max) = lane_type.bounds(true); let min: V = Value::int(min as i128, double_width)?; let max: V = Value::int(max as i128, double_width)?; @@ -1130,9 +1127,8 @@ where }; let dst_ty = ctrl_ty.as_bool(); - let lane_type = ctrl_ty.lane_type(); - let left = extractlanes(left, lane_type)?; - let right = extractlanes(right, lane_type)?; + let left = extractlanes(left, ctrl_ty)?; + let right = extractlanes(right, ctrl_ty)?; let res = left .into_iter() @@ -1178,10 +1174,11 @@ type SimdVec = SmallVec<[V; 4]>; /// Converts a SIMD vector value into a Rust array of [Value] for processing. /// If `x` is a scalar, it will be returned as a single-element array. -fn extractlanes(x: &V, lane_type: types::Type) -> ValueResult> +fn extractlanes(x: &V, vector_type: types::Type) -> ValueResult> where V: Value, { + let lane_type = vector_type.lane_type(); let mut lanes = SimdVec::new(); // Wrap scalar values as a single-element vector and return. if !x.ty().is_vector() { @@ -1194,17 +1191,14 @@ where types::I16 | types::B16 => 2, types::I32 | types::B32 => 4, types::I64 | types::B64 => 8, - _ => unimplemented!("Only 128-bit vectors are currently supported."), + _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."), }; let x = x.into_array()?; - for (i, _) in x.iter().enumerate() { + for i in 0..vector_type.lane_count() { let mut lane: i128 = 0; - if i % iterations != 0 { - continue; - } for j in 0..iterations { - lane += (x[i + j] as i128) << (8 * j); + lane += (x[((i * iterations) + j) as usize] as i128) << (8 * j); } let lane_val: V = if lane_type.is_bool() { @@ -1234,7 +1228,7 @@ where types::I16 | types::B16 => 2, types::I32 | types::B32 => 4, types::I64 | types::B64 => 8, - _ => unimplemented!("Only 128-bit vectors are currently supported."), + _ => unimplemented!("vectors with lanes wider than 64-bits are currently unsupported."), }; let mut result: [u8; 16] = [0; 16]; for (i, val) in x.iter().enumerate() { @@ -1256,9 +1250,7 @@ where V: Value, F: FnMut(V, V) -> ValueResult, { - extractlanes(&v, ty.lane_type())? - .into_iter() - .try_fold(init, op) + extractlanes(&v, ty)?.into_iter().try_fold(init, op) } /// Performs the supplied binary arithmetic `op` on two SIMD vectors. @@ -1267,8 +1259,8 @@ where V: Value, F: Fn(V, V) -> ValueResult, { - let arg0 = extractlanes(&x, vector_type.lane_type())?; - let arg1 = extractlanes(&y, vector_type.lane_type())?; + let arg0 = extractlanes(&x, vector_type)?; + let arg1 = extractlanes(&y, vector_type)?; let result = arg0 .into_iter() @@ -1293,8 +1285,8 @@ where V: Value, F: Fn(V, V) -> ValueResult, { - let arg0 = extractlanes(&x, vector_type.lane_type())?; - let arg1 = extractlanes(&y, vector_type.lane_type())?; + let arg0 = extractlanes(&x, vector_type)?; + let arg1 = extractlanes(&y, vector_type)?; let result = arg0 .chunks(2) diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index e1c51c0531..c73b501962 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -279,13 +279,25 @@ impl Value for DataValue { } fn vector(v: [u8; 16], ty: Type) -> ValueResult { - assert!(ty.is_vector() && ty.bytes() == 16); - Ok(DataValue::V128(v)) + assert!(ty.is_vector() && [8, 16].contains(&ty.bytes())); + if ty.bytes() == 16 { + Ok(DataValue::V128(v)) + } else if ty.bytes() == 8 { + let v64: [u8; 8] = v[..8].try_into().unwrap(); + Ok(DataValue::V64(v64)) + } else { + unimplemented!() + } } fn into_array(&self) -> ValueResult<[u8; 16]> { match *self { DataValue::V128(v) => Ok(v), + DataValue::V64(v) => { + let mut v128 = [0; 16]; + v128[..8].clone_from_slice(&v); + Ok(v128) + } _ => Err(ValueError::InvalidType(ValueTypeClass::Vector, self.ty())), } } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index f1f279ef15..7c854149ae 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2622,7 +2622,11 @@ impl<'a> Parser<'a> { let as_vec = self.match_uimm128(ty)?.into_vec(); if as_vec.len() == 16 { let mut as_array = [0; 16]; - as_array.copy_from_slice(&as_vec[..16]); + as_array.copy_from_slice(&as_vec[..]); + DataValue::from(as_array) + } else if as_vec.len() == 8 { + let mut as_array = [0; 8]; + as_array.copy_from_slice(&as_vec[..]); DataValue::from(as_array) } else { return Err(self.error("only 128-bit vectors are currently supported"));