diff --git a/cranelift/filetests/filetests/runtests/simd-valltrue.clif b/cranelift/filetests/filetests/runtests/simd-valltrue.clif new file mode 100644 index 0000000000..247b3d4bfd --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif @@ -0,0 +1,69 @@ +test interpret +test run +target aarch64 +target x86_64 machinst + +; TODO: Refactor this once we support simd bools in the trampoline + +function %vall_true_b8x16() -> b1, b1, b1 { +block0: + v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false] + v1 = vall_true v0 + + v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false] + v3 = vall_true v2 + + v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true] + v5 = vall_true v4 + + return v1, v3, v5 +} +; run: %vall_true_b8x16() == [false, false, true] + + +function %vall_true_b16x8() -> b1, b1, b1 { +block0: + v0 = vconst.b16x8 [false false false false false false false false] + v1 = vall_true v0 + + v2 = vconst.b16x8 [true false false false false false false false] + v3 = vall_true v2 + + v4 = vconst.b16x8 [true true true true true true true true] + v5 = vall_true v4 + + return v1, v3, v5 +} +; run: %vall_true_b16x8() == [false, false, true] + + +function %vall_true_b32x4() -> b1, b1, b1 { +block0: + v0 = vconst.b32x4 [false false false false] + v1 = vall_true v0 + + v2 = vconst.b32x4 [true false false false] + v3 = vall_true v2 + + v4 = vconst.b32x4 [true true true true] + v5 = vall_true v4 + + return v1, v3, v5 +} +; run: %vall_true_b32x4() == [false, false, true] + + +function %vall_true_b64x2() -> b1, b1, b1 { +block0: + v0 = vconst.b64x2 [false false] + v1 = vall_true v0 + + v2 = vconst.b64x2 [true false] + v3 = vall_true v2 + + v4 = vconst.b64x2 [true true] + v5 = vall_true v4 + + return v1, v3, v5 +} +; run: %vall_true_b64x2() == [false, false, true] diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif new file mode 100644 index 0000000000..15d7f9a040 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -0,0 +1,69 @@ +test interpret +test run +target x86_64 machinst +; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304 + +; TODO: Refactor this once we support simd bools in the trampoline + +function %vany_true_b8x16() -> b1, b1, b1 { +block0: + v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false] + v1 = vany_true v0 + + v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false] + v3 = vany_true v2 + + v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true] + v5 = vany_true v4 + + return v1, v3, v5 +} +; run: %vany_true_b8x16() == [false, true, true] + + +function %vany_true_b16x8() -> b1, b1, b1 { +block0: + v0 = vconst.b16x8 [false false false false false false false false] + v1 = vany_true v0 + + v2 = vconst.b16x8 [true false false false false false false false] + v3 = vany_true v2 + + v4 = vconst.b16x8 [true true true true true true true true] + v5 = vany_true v4 + + return v1, v3, v5 +} +; run: %vany_true_b16x8() == [false, true, true] + + +function %vany_true_b32x4() -> b1, b1, b1 { +block0: + v0 = vconst.b32x4 [false false false false] + v1 = vany_true v0 + + v2 = vconst.b32x4 [true false false false] + v3 = vany_true v2 + + v4 = vconst.b32x4 [true true true true] + v5 = vany_true v4 + + return v1, v3, v5 +} +; run: %vany_true_b32x4() == [false, true, true] + + +function %vany_true_b64x2() -> b1, b1, b1 { +block0: + v0 = vconst.b64x2 [false false] + v1 = vany_true v0 + + v2 = vconst.b64x2 [true false] + v3 = vany_true v2 + + v4 = vconst.b64x2 [true true] + v5 = vany_true v4 + + return v1, v3, v5 +} +; run: %vany_true_b64x2() == [false, true, true] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index c38b849010..a485401776 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -11,7 +11,7 @@ use cranelift_codegen::ir::{ }; use log::trace; use smallvec::{smallvec, SmallVec}; -use std::convert::TryFrom; +use std::convert::{TryFrom, TryInto}; use std::ops::RangeFrom; use thiserror::Error; @@ -63,7 +63,22 @@ where }; // Retrieve the immediate value for an instruction, expecting it to exist. - let imm = || -> V { V::from(inst.imm_value().unwrap()) }; + let imm = || -> V { + V::from(match inst { + InstructionData::UnaryConst { + constant_handle, .. + } => { + let buffer = state + .get_current_function() + .dfg + .constants + .get(constant_handle.clone()) + .as_slice(); + DataValue::V128(buffer.try_into().expect("a 16-byte data buffer")) + } + _ => inst.imm_value().unwrap(), + }) + }; // Retrieve the immediate value for an instruction and convert it to the controlling type of the // instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit @@ -408,7 +423,7 @@ where Opcode::F32const => assign(imm()), Opcode::F64const => assign(imm()), Opcode::Bconst => assign(imm()), - Opcode::Vconst => unimplemented!("Vconst"), + Opcode::Vconst => assign(imm()), Opcode::ConstAddr => unimplemented!("ConstAddr"), Opcode::Null => unimplemented!("Null"), Opcode::Nop => ControlFlow::Continue, @@ -537,19 +552,22 @@ where 64 => types::I128, _ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()), }; - let mut new_vec = SimdVec::new(); let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; - for (x, y) in arg0.into_iter().zip(arg1) { - let x: V = Value::int(x, double_length)?; - let y: V = Value::int(y, double_length)?; - new_vec.push( - Value::mul(x, y)? - .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))? - .into_int()?, - ) - } - assign(vectorizelanes(&new_vec, ctrl_ty)?) + + let res = arg0 + .into_iter() + .zip(arg1) + .map(|(x, y)| { + let x = x.convert(ValueConversionKind::ZeroExtend(double_length))?; + let y = y.convert(ValueConversionKind::ZeroExtend(double_length))?; + + Ok(Value::mul(x, y)? + .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?) + }) + .collect::>>()?; + + assign(vectorizelanes(&res, ctrl_ty)?) } else { let double_length = match ctrl_ty.bits() { 8 => types::I16, @@ -762,21 +780,32 @@ where Opcode::Swizzle => unimplemented!("Swizzle"), Opcode::Splat => unimplemented!("Splat"), Opcode::Insertlane => { + let idx = imm().into_int()? as usize; let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; - vector[Value::into_int(imm())? as usize] = arg(1)?.into_int()?; + vector[idx] = arg(1)?; assign(vectorizelanes(&vector, ctrl_ty)?) } Opcode::Extractlane => { - let value = - extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize]; - assign(Value::int(value, ctrl_ty.lane_type())?) + let idx = imm().into_int()? as usize; + let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + assign(lanes[idx].clone()) } Opcode::VhighBits => unimplemented!("VhighBits"), Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), Opcode::Vselect => unimplemented!("Vselect"), - Opcode::VanyTrue => unimplemented!("VanyTrue"), - Opcode::VallTrue => unimplemented!("VallTrue"), + Opcode::VanyTrue => assign(fold_vector( + arg(0)?, + ctrl_ty, + V::bool(false, types::B1)?, + |acc, lane| acc.or(lane), + )?), + Opcode::VallTrue => assign(fold_vector( + arg(0)?, + ctrl_ty, + V::bool(true, types::B1)?, + |acc, lane| acc.and(lane), + )?), Opcode::SwidenLow => unimplemented!("SwidenLow"), Opcode::SwidenHigh => unimplemented!("SwidenHigh"), Opcode::UwidenLow => unimplemented!("UwidenLow"), @@ -979,18 +1008,18 @@ where }) } -type SimdVec = SmallVec<[i128; 4]>; +type SimdVec = SmallVec<[V; 4]>; -/// Converts a SIMD vector value into a Rust vector of i128 for processing. -fn extractlanes(x: &V, lane_type: types::Type) -> ValueResult +/// Converts a SIMD vector value into a Rust array of [Value] for processing. +fn extractlanes(x: &V, lane_type: types::Type) -> ValueResult> where V: Value, { let iterations = match lane_type { - types::I8 => 1, - types::I16 => 2, - types::I32 => 4, - types::I64 => 8, + types::I8 | types::B1 | types::B8 => 1, + types::I16 | types::B16 => 2, + types::I32 | types::B32 => 4, + types::I64 | types::B64 => 8, _ => unimplemented!("Only 128-bit vectors are currently supported."), }; @@ -1004,13 +1033,19 @@ where for j in 0..iterations { lane += (x[i + j] as i128) << (8 * j); } - lanes.push(lane); + + let lane_val: V = if lane_type.is_bool() { + Value::bool(lane != 0, lane_type)? + } else { + Value::int(lane, lane_type)? + }; + lanes.push(lane_val); } return Ok(lanes); } /// Convert a Rust array of i128s back into a `Value::vector`. -fn vectorizelanes(x: &[i128], vector_type: types::Type) -> ValueResult +fn vectorizelanes(x: &[V], vector_type: types::Type) -> ValueResult where V: Value, { @@ -1023,7 +1058,7 @@ where }; let mut result: [u8; 16] = [0; 16]; for (i, val) in x.iter().enumerate() { - let val = *val; + let val = val.clone().into_int()?; for j in 0..iterations { result[(i * iterations) + j] = (val >> (8 * j)) as u8; } @@ -1031,6 +1066,17 @@ where Value::vector(result, vector_type) } +/// Performs a lanewise fold on a vector type +fn fold_vector(v: V, ty: types::Type, init: V, op: F) -> ValueResult +where + V: Value, + F: FnMut(V, V) -> ValueResult, +{ + extractlanes(&v, ty.lane_type())? + .into_iter() + .try_fold(init, op) +} + /// Performs the supplied binary arithmetic `op` on two SIMD vectors. fn binary_arith(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult where @@ -1039,20 +1085,19 @@ where { let arg0 = extractlanes(&x, vector_type.lane_type())?; let arg1 = extractlanes(&y, vector_type.lane_type())?; - let mut result = Vec::new(); - for (lhs, rhs) in arg0.into_iter().zip(arg1) { - // The initial Value::int needs to be on a separate line so the - // compiler can determine concrete types. - let mut lhs: V = Value::int(lhs, vector_type.lane_type())?; - let mut rhs: V = Value::int(rhs, vector_type.lane_type())?; - if unsigned { - lhs = lhs.convert(ValueConversionKind::ToUnsigned)?; - rhs = rhs.convert(ValueConversionKind::ToUnsigned)?; - } - let sum = op(lhs, rhs)?; - let sum = sum.into_int()?; - result.push(sum); - } + + let result = arg0 + .into_iter() + .zip(arg1) + .map(|(mut lhs, mut rhs)| { + if unsigned { + lhs = lhs.convert(ValueConversionKind::ToUnsigned)?; + rhs = rhs.convert(ValueConversionKind::ToUnsigned)?; + } + Ok(op(lhs, rhs)?) + }) + .collect::>>()?; + vectorizelanes(&result, vector_type) } @@ -1066,13 +1111,12 @@ where { let arg0 = extractlanes(&x, vector_type.lane_type())?; let arg1 = extractlanes(&y, vector_type.lane_type())?; - let mut result = SimdVec::new(); - for pair in arg0.chunks(2).chain(arg1.chunks(2)) { - let lhs: V = Value::int(pair[0], vector_type.lane_type())?; - let rhs: V = Value::int(pair[1], vector_type.lane_type())?; - let sum = op(lhs, rhs)?; - let sum = sum.into_int()?; - result.push(sum); - } + + let result = arg0 + .chunks(2) + .chain(arg1.chunks(2)) + .map(|pair| op(pair[0].clone(), pair[1].clone())) + .collect::>>()?; + vectorizelanes(&result, vector_type) } diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 023d009dca..c76cacb0be 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -290,15 +290,15 @@ impl Value for DataValue { _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), }, ValueConversionKind::ZeroExtend(ty) => match (self, ty) { - (DataValue::I8(_), types::I16) => unimplemented!(), - (DataValue::I8(_), types::I32) => unimplemented!(), - (DataValue::I8(_), types::I64) => unimplemented!(), - (DataValue::I16(_), types::I32) => unimplemented!(), - (DataValue::I16(_), types::I64) => unimplemented!(), + (DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16), + (DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32), + (DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64), + (DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32), + (DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64), (DataValue::U32(n), types::I64) => DataValue::U64(n as u64), (DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64), - (DataValue::U64(n), types::I64) => DataValue::U64(n), - (DataValue::I64(n), types::I64) => DataValue::I64(n), + (DataValue::I64(n), types::I128) => DataValue::I128(n as u64 as i128), + (from, to) if from.ty() == to => from, (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), }, ValueConversionKind::ToUnsigned => match self { @@ -428,7 +428,7 @@ impl Value for DataValue { } fn or(self, other: Self) -> ValueResult { - binary_match!(|(&self, &other); [I8, I16, I32, I64]) + binary_match!(|(&self, &other); [B, I8, I16, I32, I64]) } fn xor(self, other: Self) -> ValueResult {