diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 955782039d..1437a2d86d 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4048,7 +4048,7 @@ pub(crate) fn define( Combine `x` and `y` into a vector with twice the lanes but half the integer width while saturating overflowing values to the unsigned maximum and minimum. - Note that all input lanes are considered unsigned. + Note that all input lanes are considered unsigned: any negative values will be interpreted as unsigned, overflowing and being replaced with the unsigned maximum. The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 4284021190..2d9c7e709e 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -79,6 +79,29 @@ impl Type { } } + /// Get the (minimum, maximum) values represented by each lane in the type. + pub fn bounds(self, signed: bool) -> (i128, i128) { + if signed { + match self.lane_type() { + I8 => (i8::MIN as i128, i8::MAX as i128), + I16 => (i16::MIN as i128, i16::MAX as i128), + I32 => (i32::MIN as i128, i32::MAX as i128), + I64 => (i64::MIN as i128, i64::MAX as i128), + I128 => (i128::MIN, i128::MAX), + _ => unimplemented!(), + } + } else { + match self.lane_type() { + I8 => (u8::MIN as i128, u8::MAX as i128), + I16 => (u16::MIN as i128, u16::MAX as i128), + I32 => (u32::MIN as i128, u32::MAX as i128), + I64 => (u64::MIN as i128, u64::MAX as i128), + I128 => (u128::MIN as i128, u128::MAX as i128), + _ => unimplemented!(), + } + } + } + /// Get an integer type with the requested number of bits. pub fn int(bits: u16) -> Option { match bits { diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif new file mode 100644 index 0000000000..84c3de5d05 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2` -> `i32x4` to be 'unreachable' + +function %snarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 -100000 2147483647 73] diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow.clif b/cranelift/filetests/filetests/runtests/simd-snarrow.clif new file mode 100644 index 0000000000..18d667f743 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %snarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i16x8([1 127 128 15 32767 -32 48 0], [8 255 -100 100 -32768 73 80 42]) == [1 127 127 15 127 -32 48 0 8 127 -100 100 -128 73 80 42] + +function %snarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i32x4([32767 1048575 -70000 -5], [268435455 73 268435455 42]) == [32767 32767 -32768 -5 32767 73 32767 42] diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif new file mode 100644 index 0000000000..478a1860aa --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2 -> i32x4` to be 'unreachable' + +function %unarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 0 4294967295 73] diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow.clif b/cranelift/filetests/filetests/runtests/simd-unarrow.clif new file mode 100644 index 0000000000..0725afd811 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %unarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 0 0 48 0 8 255 0 100 0 73 80 42] + +function %unarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 0 0 65535 73 65535 42] diff --git a/cranelift/filetests/filetests/runtests/simd-uunarrow.clif b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif new file mode 100644 index 0000000000..b2a68c4480 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 +; x86_64 panics: `Did not match fcvt input! +; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10` + +function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 255 255 48 0 8 255 255 100 255 73 80 42] + +function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 65535 65535 65535 73 65535 42] + +function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 4294967295 4294967295 73] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 25ec8b2878..6a86eb1cb6 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -779,19 +779,36 @@ where arg(0)?, ValueConversionKind::Exact(ctrl_ty), )?), - Opcode::Snarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), + Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + let new_type = ctrl_ty.split_lanes().unwrap(); + let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow); + let mut min: V = Value::int(min, ctrl_ty.lane_type())?; + let mut max: V = Value::int(max, ctrl_ty.lane_type())?; + if inst.opcode() == Opcode::Uunarrow { + min = min.convert(ValueConversionKind::ToUnsigned)?; + max = max.convert(ValueConversionKind::ToUnsigned)?; + } + for mut lane in arg0.into_iter().chain(arg1) { + if inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + lane = Value::max(lane, min.clone())?; + lane = Value::min(lane, max.clone())?; + lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?; + if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + new_vec.push(lane); + } + assign(vectorizelanes(&new_vec, new_type)?) + } Opcode::Sextend => assign(Value::convert( arg(0)?, ValueConversionKind::SignExtend(ctrl_ty), )?), - Opcode::Unarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), - Opcode::Uunarrow => unimplemented!("Uunarrow"), Opcode::Uextend => assign(Value::convert( arg(0)?, ValueConversionKind::ZeroExtend(ctrl_ty), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 768ccfe8e2..b898707199 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -26,6 +26,9 @@ pub trait Value: Clone + From { fn convert(self, kind: ValueConversionKind) -> ValueResult; fn concat(self, other: Self) -> ValueResult; + fn max(self, other: Self) -> ValueResult; + fn min(self, other: Self) -> ValueResult; + // Comparison. fn eq(&self, other: &Self) -> ValueResult; fn gt(&self, other: &Self) -> ValueResult; @@ -302,11 +305,17 @@ impl Value for DataValue { Self::from_integer(extracted, ty)? } ValueConversionKind::SignExtend(ty) => match (self, ty) { + (DataValue::U8(n), types::I16) => DataValue::U16(n as u16), + (DataValue::U8(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U8(n), types::I64) => DataValue::U64(n as u64), (DataValue::I8(n), types::I16) => DataValue::I16(n as i16), (DataValue::I8(n), types::I32) => DataValue::I32(n as i32), (DataValue::I8(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U16(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U16(n), types::I64) => DataValue::U64(n as u64), (DataValue::I16(n), types::I32) => DataValue::I32(n as i32), (DataValue::I16(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U32(n), types::I64) => DataValue::U64(n as u64), (DataValue::I32(n), types::I64) => DataValue::I64(n as i64), (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), }, @@ -362,6 +371,22 @@ impl Value for DataValue { } } + fn max(self, other: Self) -> ValueResult { + if Value::gt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + + fn min(self, other: Self) -> ValueResult { + if Value::lt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + fn eq(&self, other: &Self) -> ValueResult { comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, U8, U16, U32, U64, F32, F64]) }