Implement Unarrow, Uunarrow, and Snarrow for the interpreter

Implemented the following Opcodes for the Cranelift interpreter:
- `Unarrow` to combine two SIMD vectors into a new vector with twice
the lanes but half the width, with signed inputs which are clamped to
`0x00`.
- `Uunarrow` to perform the same operation as `Unarrow` but treating
inputs as unsigned.
- `Snarrow` to perform the same operation as `Unarrow` but treating
both inputs and outputs as signed, and saturating accordingly.

Note that all 3 instructions saturate at the type boundaries.

Copyright (c) 2021, Arm Limited
This commit is contained in:
dheaton-arm
2021-09-08 17:04:05 +01:00
parent 2412e8d784
commit 83c3bc5b9d
9 changed files with 161 additions and 10 deletions

View File

@@ -779,19 +779,36 @@ where
arg(0)?,
ValueConversionKind::Exact(ctrl_ty),
)?),
Opcode::Snarrow => assign(Value::convert(
arg(0)?,
ValueConversionKind::Truncate(ctrl_ty),
)?),
Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
let mut new_vec = SimdVec::new();
let new_type = ctrl_ty.split_lanes().unwrap();
let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow);
let mut min: V = Value::int(min, ctrl_ty.lane_type())?;
let mut max: V = Value::int(max, ctrl_ty.lane_type())?;
if inst.opcode() == Opcode::Uunarrow {
min = min.convert(ValueConversionKind::ToUnsigned)?;
max = max.convert(ValueConversionKind::ToUnsigned)?;
}
for mut lane in arg0.into_iter().chain(arg1) {
if inst.opcode() == Opcode::Uunarrow {
lane = lane.convert(ValueConversionKind::ToUnsigned)?;
}
lane = Value::max(lane, min.clone())?;
lane = Value::min(lane, max.clone())?;
lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?;
if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow {
lane = lane.convert(ValueConversionKind::ToUnsigned)?;
}
new_vec.push(lane);
}
assign(vectorizelanes(&new_vec, new_type)?)
}
Opcode::Sextend => assign(Value::convert(
arg(0)?,
ValueConversionKind::SignExtend(ctrl_ty),
)?),
Opcode::Unarrow => assign(Value::convert(
arg(0)?,
ValueConversionKind::Truncate(ctrl_ty),
)?),
Opcode::Uunarrow => unimplemented!("Uunarrow"),
Opcode::Uextend => assign(Value::convert(
arg(0)?,
ValueConversionKind::ZeroExtend(ctrl_ty),

View File

@@ -26,6 +26,9 @@ pub trait Value: Clone + From<DataValue> {
fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
fn concat(self, other: Self) -> ValueResult<Self>;
fn max(self, other: Self) -> ValueResult<Self>;
fn min(self, other: Self) -> ValueResult<Self>;
// Comparison.
fn eq(&self, other: &Self) -> ValueResult<bool>;
fn gt(&self, other: &Self) -> ValueResult<bool>;
@@ -302,11 +305,17 @@ impl Value for DataValue {
Self::from_integer(extracted, ty)?
}
ValueConversionKind::SignExtend(ty) => match (self, ty) {
(DataValue::U8(n), types::I16) => DataValue::U16(n as u16),
(DataValue::U8(n), types::I32) => DataValue::U32(n as u32),
(DataValue::U8(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I8(n), types::I16) => DataValue::I16(n as i16),
(DataValue::I8(n), types::I32) => DataValue::I32(n as i32),
(DataValue::I8(n), types::I64) => DataValue::I64(n as i64),
(DataValue::U16(n), types::I32) => DataValue::U32(n as u32),
(DataValue::U16(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I16(n), types::I32) => DataValue::I32(n as i32),
(DataValue::I16(n), types::I64) => DataValue::I64(n as i64),
(DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I32(n), types::I64) => DataValue::I64(n as i64),
(dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
},
@@ -362,6 +371,22 @@ impl Value for DataValue {
}
}
fn max(self, other: Self) -> ValueResult<Self> {
if Value::gt(&self, &other)? {
Ok(self)
} else {
Ok(other)
}
}
fn min(self, other: Self) -> ValueResult<Self> {
if Value::lt(&self, &other)? {
Ok(self)
} else {
Ok(other)
}
}
fn eq(&self, other: &Self) -> ValueResult<bool> {
comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, U8, U16, U32, U64, F32, F64])
}