Merge pull request #3363 from dheaton-arm/implement-widening-pairwise-dotprod

Implement `WideningPairwiseDotProductS` for interpreter
This commit is contained in:
Chris Fallin
2021-09-21 10:05:07 -07:00
committed by GitHub
2 changed files with 34 additions and 1 deletions

View File

@@ -0,0 +1,14 @@
test interpret
test run
target aarch64
set enable_simd
target x86_64
function %wpdps(i16x8, i16x8) -> i32x4 {
block0(v0: i16x8, v1: i16x8):
v2 = widening_pairwise_dot_product_s v0, v1
return v2
}
; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000]
; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768]
; run: %wpdps([-32768 -32768 32767 32767 -32768 -32768 32767 32767], [-32768 -32768 32767 32767 32767 32767 -32768 -32768]) == [2147483648 2147352578 -2147418112 -2147418112]

View File

@@ -900,7 +900,26 @@ where
Opcode::AtomicLoad => unimplemented!("AtomicLoad"),
Opcode::AtomicStore => unimplemented!("AtomicStore"),
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::WideningPairwiseDotProductS => {
let ctrl_ty = types::I16X8;
let new_type = ctrl_ty.merge_lanes().unwrap();
let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
let new_vec = arg0
.chunks(2)
.into_iter()
.zip(arg1.chunks(2))
.into_iter()
.map(|(x, y)| {
let mut z = 0i128;
for (lhs, rhs) in x.into_iter().zip(y.into_iter()) {
z += lhs.clone().into_int()? * rhs.clone().into_int()?;
}
Value::int(z, new_type.lane_type())
})
.collect::<ValueResult<Vec<_>>>()?;
assign(vectorizelanes(&new_vec, new_type)?)
}
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?),