Implement Unarrow, Uunarrow, and Snarrow for the interpreter

Implemented the following Opcodes for the Cranelift interpreter:
- `Unarrow` to combine two SIMD vectors into a new vector with twice
the lanes but half the width, with signed inputs which are clamped to
`0x00`.
- `Uunarrow` to perform the same operation as `Unarrow` but treating
inputs as unsigned.
- `Snarrow` to perform the same operation as `Unarrow` but treating
both inputs and outputs as signed, and saturating accordingly.

Note that all 3 instructions saturate at the type boundaries.

Copyright (c) 2021, Arm Limited
This commit is contained in:
dheaton-arm
2021-09-08 17:04:05 +01:00
parent 2412e8d784
commit 83c3bc5b9d
9 changed files with 161 additions and 10 deletions

View File

@@ -4048,7 +4048,7 @@ pub(crate) fn define(
Combine `x` and `y` into a vector with twice the lanes but half the integer width while Combine `x` and `y` into a vector with twice the lanes but half the integer width while
saturating overflowing values to the unsigned maximum and minimum. saturating overflowing values to the unsigned maximum and minimum.
Note that all input lanes are considered unsigned. Note that all input lanes are considered unsigned: any negative values will be interpreted as unsigned, overflowing and being replaced with the unsigned maximum.
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value

View File

@@ -79,6 +79,29 @@ impl Type {
} }
} }
/// Get the (minimum, maximum) values represented by each lane in the type.
pub fn bounds(self, signed: bool) -> (i128, i128) {
if signed {
match self.lane_type() {
I8 => (i8::MIN as i128, i8::MAX as i128),
I16 => (i16::MIN as i128, i16::MAX as i128),
I32 => (i32::MIN as i128, i32::MAX as i128),
I64 => (i64::MIN as i128, i64::MAX as i128),
I128 => (i128::MIN, i128::MAX),
_ => unimplemented!(),
}
} else {
match self.lane_type() {
I8 => (u8::MIN as i128, u8::MAX as i128),
I16 => (u16::MIN as i128, u16::MAX as i128),
I32 => (u32::MIN as i128, u32::MAX as i128),
I64 => (u64::MIN as i128, u64::MAX as i128),
I128 => (u128::MIN as i128, u128::MAX as i128),
_ => unimplemented!(),
}
}
}
/// Get an integer type with the requested number of bits. /// Get an integer type with the requested number of bits.
pub fn int(bits: u16) -> Option<Self> { pub fn int(bits: u16) -> Option<Self> {
match bits { match bits {

View File

@@ -0,0 +1,11 @@
test interpret
test run
target aarch64
; x86_64 considers the case `i64x2` -> `i32x4` to be 'unreachable'
function %snarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 -100000 2147483647 73]

View File

@@ -0,0 +1,19 @@
test interpret
test run
target aarch64
set enable_simd
target x86_64
function %snarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow_i16x8([1 127 128 15 32767 -32 48 0], [8 255 -100 100 -32768 73 80 42]) == [1 127 127 15 127 -32 48 0 8 127 -100 100 -128 73 80 42]
function %snarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = snarrow v0, v1
return v2
}
; run: %snarrow_i32x4([32767 1048575 -70000 -5], [268435455 73 268435455 42]) == [32767 32767 -32768 -5 32767 73 32767 42]

View File

@@ -0,0 +1,11 @@
test interpret
test run
target aarch64
; x86_64 considers the case `i64x2 -> i32x4` to be 'unreachable'
function %unarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 0 4294967295 73]

View File

@@ -0,0 +1,19 @@
test interpret
test run
target aarch64
set enable_simd
target x86_64
function %unarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 0 0 48 0 8 255 0 100 0 73 80 42]
function %unarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = unarrow v0, v1
return v2
}
; run: %unarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 0 0 65535 73 65535 42]

View File

@@ -0,0 +1,26 @@
test interpret
test run
target aarch64
; x86_64 panics: `Did not match fcvt input!
; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10`
function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
v2 = uunarrow v0, v1
return v2
}
; run: %uunarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 255 255 48 0 8 255 255 100 255 73 80 42]
function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 {
block0(v0: i32x4, v1: i32x4):
v2 = uunarrow v0, v1
return v2
}
; run: %uunarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 65535 65535 65535 73 65535 42]
function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 {
block0(v0: i64x2, v1: i64x2):
v2 = uunarrow v0, v1
return v2
}
; run: %uunarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 4294967295 4294967295 73]

View File

@@ -779,19 +779,36 @@ where
arg(0)?, arg(0)?,
ValueConversionKind::Exact(ctrl_ty), ValueConversionKind::Exact(ctrl_ty),
)?), )?),
Opcode::Snarrow => assign(Value::convert( Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => {
arg(0)?, let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
ValueConversionKind::Truncate(ctrl_ty), let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
)?), let mut new_vec = SimdVec::new();
let new_type = ctrl_ty.split_lanes().unwrap();
let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow);
let mut min: V = Value::int(min, ctrl_ty.lane_type())?;
let mut max: V = Value::int(max, ctrl_ty.lane_type())?;
if inst.opcode() == Opcode::Uunarrow {
min = min.convert(ValueConversionKind::ToUnsigned)?;
max = max.convert(ValueConversionKind::ToUnsigned)?;
}
for mut lane in arg0.into_iter().chain(arg1) {
if inst.opcode() == Opcode::Uunarrow {
lane = lane.convert(ValueConversionKind::ToUnsigned)?;
}
lane = Value::max(lane, min.clone())?;
lane = Value::min(lane, max.clone())?;
lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?;
if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow {
lane = lane.convert(ValueConversionKind::ToUnsigned)?;
}
new_vec.push(lane);
}
assign(vectorizelanes(&new_vec, new_type)?)
}
Opcode::Sextend => assign(Value::convert( Opcode::Sextend => assign(Value::convert(
arg(0)?, arg(0)?,
ValueConversionKind::SignExtend(ctrl_ty), ValueConversionKind::SignExtend(ctrl_ty),
)?), )?),
Opcode::Unarrow => assign(Value::convert(
arg(0)?,
ValueConversionKind::Truncate(ctrl_ty),
)?),
Opcode::Uunarrow => unimplemented!("Uunarrow"),
Opcode::Uextend => assign(Value::convert( Opcode::Uextend => assign(Value::convert(
arg(0)?, arg(0)?,
ValueConversionKind::ZeroExtend(ctrl_ty), ValueConversionKind::ZeroExtend(ctrl_ty),

View File

@@ -26,6 +26,9 @@ pub trait Value: Clone + From<DataValue> {
fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>; fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
fn concat(self, other: Self) -> ValueResult<Self>; fn concat(self, other: Self) -> ValueResult<Self>;
fn max(self, other: Self) -> ValueResult<Self>;
fn min(self, other: Self) -> ValueResult<Self>;
// Comparison. // Comparison.
fn eq(&self, other: &Self) -> ValueResult<bool>; fn eq(&self, other: &Self) -> ValueResult<bool>;
fn gt(&self, other: &Self) -> ValueResult<bool>; fn gt(&self, other: &Self) -> ValueResult<bool>;
@@ -302,11 +305,17 @@ impl Value for DataValue {
Self::from_integer(extracted, ty)? Self::from_integer(extracted, ty)?
} }
ValueConversionKind::SignExtend(ty) => match (self, ty) { ValueConversionKind::SignExtend(ty) => match (self, ty) {
(DataValue::U8(n), types::I16) => DataValue::U16(n as u16),
(DataValue::U8(n), types::I32) => DataValue::U32(n as u32),
(DataValue::U8(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I8(n), types::I16) => DataValue::I16(n as i16), (DataValue::I8(n), types::I16) => DataValue::I16(n as i16),
(DataValue::I8(n), types::I32) => DataValue::I32(n as i32), (DataValue::I8(n), types::I32) => DataValue::I32(n as i32),
(DataValue::I8(n), types::I64) => DataValue::I64(n as i64), (DataValue::I8(n), types::I64) => DataValue::I64(n as i64),
(DataValue::U16(n), types::I32) => DataValue::U32(n as u32),
(DataValue::U16(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I16(n), types::I32) => DataValue::I32(n as i32), (DataValue::I16(n), types::I32) => DataValue::I32(n as i32),
(DataValue::I16(n), types::I64) => DataValue::I64(n as i64), (DataValue::I16(n), types::I64) => DataValue::I64(n as i64),
(DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I32(n), types::I64) => DataValue::I64(n as i64), (DataValue::I32(n), types::I64) => DataValue::I64(n as i64),
(dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
}, },
@@ -362,6 +371,22 @@ impl Value for DataValue {
} }
} }
fn max(self, other: Self) -> ValueResult<Self> {
if Value::gt(&self, &other)? {
Ok(self)
} else {
Ok(other)
}
}
fn min(self, other: Self) -> ValueResult<Self> {
if Value::lt(&self, &other)? {
Ok(self)
} else {
Ok(other)
}
}
fn eq(&self, other: &Self) -> ValueResult<bool> { fn eq(&self, other: &Self) -> ValueResult<bool> {
comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, U8, U16, U32, U64, F32, F64]) comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, U8, U16, U32, U64, F32, F64])
} }