diff --git a/cranelift/filetests/filetests/runtests/simd-extractlane.clif b/cranelift/filetests/filetests/runtests/simd-extractlane.clif new file mode 100644 index 0000000000..215d1c696b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-extractlane.clif @@ -0,0 +1,34 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %extractlane_4(i8x16) -> i8 { +block0(v0: i8x16): + v1 = extractlane v0, 4 + return v1 +} +; run: %extractlane_4([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == 5 + +function %extractlane_7(i16x8) -> i16 { +block0(v0: i16x8): + v1 = extractlane v0, 7 + return v1 +} +; run: %extractlane_7([65528 65529 65530 65531 65532 65533 65534 65535]) == 65535 + +function %extractlane_0(i32x4) -> i32 { +block0(v0: i32x4): + v1 = extractlane v0, 0 + return v1 +} +; run: %extractlane_0([0 1 2 3]) == 0 + + +function %extractlane_1(i64x2) -> i64 { +block0(v0: i64x2): + v1 = extractlane v0, 1 + return v1 +} +; run: %extractlane_1([0 4294967297]) == 4294967297 diff --git a/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif new file mode 100644 index 0000000000..2342aed7e7 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif @@ -0,0 +1,20 @@ +test interpret +test run +target aarch64 + +; i32x4 vectors aren't supported in `uadd_sat` outside AArch64 at the moment +function %uaddsat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = uadd_sat v0, v1 + return v2 +} +; run: %uaddsat_i32x4([40 40 40 40], [2 2 2 2]) == [42 42 42 42] +; run: %uaddsat_i32x4([4294967290 2147483640 4294967290 4294967290], [100 100 100 100]) == [4294967295 2147483740 4294967295 4294967295] + +function %uaddsat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = uadd_sat v0, v1 + return v2 +} +; run: %uaddsat_i64x2([40 40], [2 2]) == [42 42] +; run: %uaddsat_i64x2([4294967290 18446744073709551610], [100 100]) == [4294967390 18446744073709551615] diff --git a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif new file mode 100644 index 0000000000..b904d9f877 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %uaddsat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = uadd_sat v0, v1 + return v2 +} +; run: %uaddsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150]) == [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] + +function %uaddsat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = uadd_sat v0, v1 + return v2 +} +; run: %uaddsat_i16x8([65000 65000 65000 65000 65000 65000 65000 65000], [1000 1000 1000 1000 1000 1000 1000 1000]) == [65535 65535 65535 65535 65535 65535 65535 65535] diff --git a/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif new file mode 100644 index 0000000000..4f331a5366 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif @@ -0,0 +1,20 @@ +test interpret +test run +target aarch64 + +; i32x4 vectors aren't supported in `usub_sat` outside AArch64 at the moment +function %usubsat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = usub_sat v0, v1 + return v2 +} +; run: %usubsat_i32x4([40 40 40 40], [2 2 2 2]) == [38 38 38 38] +; run: %usubsat_i32x4([4294967290 2147483640 4294967290 4294967290], [4294967295 4294967295 4294967295 4294967295]) == [0 0 0 0] + +function %usubsat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = usub_sat v0, v1 + return v2 +} +; run: %usubsat_i64x2([40 40], [2 2]) == [38 38] +; run: %usubsat_i64x2([4294967290 2147483640], [4294967295 4294967295]) == [0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-usubsat.clif b/cranelift/filetests/filetests/runtests/simd-usubsat.clif new file mode 100644 index 0000000000..d78190e0b1 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-usubsat.clif @@ -0,0 +1,20 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %usubsat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = usub_sat v0, v1 + return v2 +} +; run: %usubsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100]) == [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50] +; run: %usubsat_i8x16([150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150], [200 200 200 200 200 200 200 200 200 200 200 200 200 200 200 200]) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + +function %usubsat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = usub_sat v0, v1 + return v2 +} +; run: %usubsat_i16x8([65534 65534 65534 65534 65534 65534 65534 65534], [65535 65535 65535 65535 65535 65535 65535 65535]) == [0 0 0 0 0 0 0 0] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 9319b9a0fe..497ffb4173 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -496,10 +496,22 @@ where binary(Value::div, inc, two)? } Opcode::Iadd => binary(Value::add, arg(0)?, arg(1)?)?, - Opcode::UaddSat => unimplemented!("UaddSat"), + Opcode::UaddSat => assign(binary_arith( + arg(0)?, + arg(1)?, + ctrl_ty, + Value::add_sat, + true, + )?), Opcode::SaddSat => unimplemented!("SaddSat"), Opcode::Isub => binary(Value::sub, arg(0)?, arg(1)?)?, - Opcode::UsubSat => unimplemented!("UsubSat"), + Opcode::UsubSat => assign(binary_arith( + arg(0)?, + arg(1)?, + ctrl_ty, + Value::sub_sat, + true, + )?), Opcode::SsubSat => unimplemented!("SsubSat"), Opcode::Ineg => binary(Value::sub, Value::int(0, ctrl_ty)?, arg(0)?)?, Opcode::Iabs => unimplemented!("Iabs"), @@ -661,7 +673,11 @@ where Opcode::Swizzle => unimplemented!("Swizzle"), Opcode::Splat => unimplemented!("Splat"), Opcode::Insertlane => unimplemented!("Insertlane"), - Opcode::Extractlane => unimplemented!("Extractlane"), + Opcode::Extractlane => { + let value = + extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize]; + assign(Value::int(value, ctrl_ty.lane_type())?) + } Opcode::VhighBits => unimplemented!("VhighBits"), Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), @@ -869,3 +885,80 @@ where } }) } + +type SimdVec = SmallVec<[i128; 4]>; + +/// Converts a SIMD vector value into a Rust vector of i128 for processing. +fn extractlanes(x: &V, lane_type: types::Type) -> ValueResult +where + V: Value, +{ + let iterations = match lane_type { + types::I8 => 1, + types::I16 => 2, + types::I32 => 4, + types::I64 => 8, + _ => unimplemented!("Only 128-bit vectors are currently supported."), + }; + + let x = x.into_array()?; + let mut lanes = SimdVec::new(); + for (i, _) in x.iter().enumerate() { + let mut lane: i128 = 0; + if i % iterations != 0 { + continue; + } + for j in 0..iterations { + lane += (x[i + j] as i128) << (8 * j); + } + lanes.push(lane); + } + return Ok(lanes); +} + +/// Convert a Rust array of i128s back into a `Value::vector`. +fn vectorizelanes(x: &[i128], vector_type: types::Type) -> ValueResult +where + V: Value, +{ + let iterations = match vector_type.lane_type() { + types::I8 => 1, + types::I16 => 2, + types::I32 => 4, + types::I64 => 8, + _ => unimplemented!("Only 128-bit vectors are currently supported."), + }; + let mut result: [u8; 16] = [0; 16]; + for (i, val) in x.iter().enumerate() { + let val = *val; + for j in 0..iterations { + result[(i * iterations) + j] = (val >> (8 * j)) as u8; + } + } + Value::vector(result, vector_type) +} + +/// Performs the supplied binary arithmetic `op` on two SIMD vectors. +fn binary_arith(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult +where + V: Value, + F: Fn(V, V) -> ValueResult, +{ + let arg0 = extractlanes(&x, vector_type.lane_type())?; + let arg1 = extractlanes(&y, vector_type.lane_type())?; + let mut result = Vec::new(); + for (lhs, rhs) in arg0.into_iter().zip(arg1) { + // The initial Value::int needs to be on a separate line so the + // compiler can determine concrete types. + let mut lhs: V = Value::int(lhs, vector_type.lane_type())?; + let mut rhs: V = Value::int(rhs, vector_type.lane_type())?; + if unsigned { + lhs = lhs.convert(ValueConversionKind::ToUnsigned)?; + rhs = rhs.convert(ValueConversionKind::ToUnsigned)?; + } + let sum = op(lhs, rhs)?; + let sum = sum.into_int()?; + result.push(sum); + } + vectorizelanes(&result, vector_type) +} diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index b5c265855e..0e81d804a7 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -22,6 +22,7 @@ pub trait Value: Clone + From { fn bool(b: bool, ty: Type) -> ValueResult; fn into_bool(self) -> ValueResult; fn vector(v: [u8; 16], ty: Type) -> ValueResult; + fn into_array(&self) -> ValueResult<[u8; 16]>; fn convert(self, kind: ValueConversionKind) -> ValueResult; fn concat(self, other: Self) -> ValueResult; @@ -47,6 +48,10 @@ pub trait Value: Clone + From { fn div(self, other: Self) -> ValueResult; fn rem(self, other: Self) -> ValueResult; + // Saturating arithmetic. + fn add_sat(self, other: Self) -> ValueResult; + fn sub_sat(self, other: Self) -> ValueResult; + // Bitwise. fn shl(self, other: Self) -> ValueResult; fn ushr(self, other: Self) -> ValueResult; @@ -80,6 +85,7 @@ pub enum ValueTypeClass { Integer, Boolean, Float, + Vector, } impl Display for ValueTypeClass { @@ -88,6 +94,7 @@ impl Display for ValueTypeClass { ValueTypeClass::Integer => write!(f, "integer"), ValueTypeClass::Boolean => write!(f, "boolean"), ValueTypeClass::Float => write!(f, "float"), + ValueTypeClass::Vector => write!(f, "vector"), } } } @@ -225,8 +232,16 @@ impl Value for DataValue { } } - fn vector(_v: [u8; 16], _ty: Type) -> ValueResult { - unimplemented!() + fn vector(v: [u8; 16], ty: Type) -> ValueResult { + assert!(ty.is_vector() && ty.bytes() == 16); + Ok(DataValue::V128(v)) + } + + fn into_array(&self) -> ValueResult<[u8; 16]> { + match *self { + DataValue::V128(v) => Ok(v), + _ => Err(ValueError::InvalidType(ValueTypeClass::Vector, self.ty())), + } } fn convert(self, kind: ValueConversionKind) -> ValueResult { @@ -380,6 +395,14 @@ impl Value for DataValue { binary_match!(%(&self, &other); [I8, I16, I32, I64]) } + fn add_sat(self, other: Self) -> ValueResult { + binary_match!(saturating_add(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) + } + + fn sub_sat(self, other: Self) -> ValueResult { + binary_match!(saturating_sub(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) + } + fn shl(self, other: Self) -> ValueResult { binary_match!(<<(&self, &other); [I8, I16, I32, I64]) }