Merge pull request #3379 from dheaton-arm/implement-sqmulroundsat
Implement `SqmulRoundSat` for interpreter
This commit is contained in:
@@ -0,0 +1,12 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
;; x86_64 hasn't implemented this for `i32x4`
|
||||||
|
|
||||||
|
function %sqmulrs_i32x4(i32x4, i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4, v1: i32x4):
|
||||||
|
v2 = sqmul_round_sat v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %sqmulrs_i32x4([1000 2000 3000 4000], [10000 100000 1000000 10000000]) == [0 0 1 19]
|
||||||
|
; run: %sqmulrs_i32x4([2147483647 -2147483648 -2147483648 0], [2147483647 -2147483648 2147483647 0]) == [2147483646 2147483647 -2147483647 0]
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 machinst
|
||||||
|
|
||||||
|
function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 {
|
||||||
|
block0(v0: i16x8, v1: i16x8):
|
||||||
|
v2 = sqmul_round_sat v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %sqmulrs_i16x8([1 2 3 4 5 6 7 8], [1 10 100 1000 10000 15000 20000 25000]) == [0 0 0 0 2 3 4 6]
|
||||||
|
; run: %sqmulrs_i16x8([32767 32767 -32768 -32768 -32768 -32768 0 0], [32767 32767 -32768 -32768 32767 32767 0 0]) == [32766 32766 32767 32767 -32767 -32767 0 0]
|
||||||
@@ -980,7 +980,34 @@ where
|
|||||||
.collect::<ValueResult<Vec<_>>>()?;
|
.collect::<ValueResult<Vec<_>>>()?;
|
||||||
assign(vectorizelanes(&new_vec, new_type)?)
|
assign(vectorizelanes(&new_vec, new_type)?)
|
||||||
}
|
}
|
||||||
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),
|
Opcode::SqmulRoundSat => {
|
||||||
|
let lane_type = ctrl_ty.lane_type();
|
||||||
|
let double_width = ctrl_ty.double_width().unwrap().lane_type();
|
||||||
|
let arg0 = extractlanes(&arg(0)?, lane_type)?;
|
||||||
|
let arg1 = extractlanes(&arg(1)?, lane_type)?;
|
||||||
|
let (min, max) = lane_type.bounds(true);
|
||||||
|
let min: V = Value::int(min as i128, double_width)?;
|
||||||
|
let max: V = Value::int(max as i128, double_width)?;
|
||||||
|
let new_vec = arg0
|
||||||
|
.into_iter()
|
||||||
|
.zip(arg1.into_iter())
|
||||||
|
.map(|(x, y)| {
|
||||||
|
let x = x.into_int()?;
|
||||||
|
let y = y.into_int()?;
|
||||||
|
// temporarily double width of the value to avoid overflow.
|
||||||
|
let z: V = Value::int(
|
||||||
|
(x * y + (1 << (lane_type.bits() - 2))) >> (lane_type.bits() - 1),
|
||||||
|
double_width,
|
||||||
|
)?;
|
||||||
|
// check bounds, saturate, and truncate to correct width.
|
||||||
|
let z = Value::min(z, max.clone())?;
|
||||||
|
let z = Value::max(z, min.clone())?;
|
||||||
|
let z = z.convert(ValueConversionKind::Truncate(lane_type))?;
|
||||||
|
Ok(z)
|
||||||
|
})
|
||||||
|
.collect::<ValueResult<SimdVec<_>>>()?;
|
||||||
|
assign(vectorizelanes(&new_vec, ctrl_ty)?)
|
||||||
|
}
|
||||||
Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?),
|
Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?),
|
||||||
|
|
||||||
// TODO: these instructions should be removed once the new backend makes these obsolete
|
// TODO: these instructions should be removed once the new backend makes these obsolete
|
||||||
|
|||||||
Reference in New Issue
Block a user