Implement vany_true and vall_true instructions in interpreter (#3304)
* cranelift: Implement ZeroExtend for a bunch of types in interpreter
* cranelift: Implement VConst on interpreter
* cranelift: Implement VallTrue on interpreter
* cranelift: Implement VanyTrue on interpreter
* cranelift: Mark `v{all,any}_true` tests as machinst only
* cranelift: Disable `vany_true` tests on aarch64
The `b64x2` case produces an illegal instruction. See #3305
This commit is contained in:
69
cranelift/filetests/filetests/runtests/simd-valltrue.clif
Normal file
69
cranelift/filetests/filetests/runtests/simd-valltrue.clif
Normal file
@@ -0,0 +1,69 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target x86_64 machinst
|
||||
|
||||
; TODO: Refactor this once we support simd bools in the trampoline
|
||||
|
||||
function %vall_true_b8x16() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
|
||||
v1 = vall_true v0
|
||||
|
||||
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
|
||||
v3 = vall_true v2
|
||||
|
||||
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
|
||||
v5 = vall_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vall_true_b8x16() == [false, false, true]
|
||||
|
||||
|
||||
function %vall_true_b16x8() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b16x8 [false false false false false false false false]
|
||||
v1 = vall_true v0
|
||||
|
||||
v2 = vconst.b16x8 [true false false false false false false false]
|
||||
v3 = vall_true v2
|
||||
|
||||
v4 = vconst.b16x8 [true true true true true true true true]
|
||||
v5 = vall_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vall_true_b16x8() == [false, false, true]
|
||||
|
||||
|
||||
function %vall_true_b32x4() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b32x4 [false false false false]
|
||||
v1 = vall_true v0
|
||||
|
||||
v2 = vconst.b32x4 [true false false false]
|
||||
v3 = vall_true v2
|
||||
|
||||
v4 = vconst.b32x4 [true true true true]
|
||||
v5 = vall_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vall_true_b32x4() == [false, false, true]
|
||||
|
||||
|
||||
function %vall_true_b64x2() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b64x2 [false false]
|
||||
v1 = vall_true v0
|
||||
|
||||
v2 = vconst.b64x2 [true false]
|
||||
v3 = vall_true v2
|
||||
|
||||
v4 = vconst.b64x2 [true true]
|
||||
v5 = vall_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vall_true_b64x2() == [false, false, true]
|
||||
69
cranelift/filetests/filetests/runtests/simd-vanytrue.clif
Normal file
69
cranelift/filetests/filetests/runtests/simd-vanytrue.clif
Normal file
@@ -0,0 +1,69 @@
|
||||
test interpret
|
||||
test run
|
||||
target x86_64 machinst
|
||||
; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
|
||||
|
||||
; TODO: Refactor this once we support simd bools in the trampoline
|
||||
|
||||
function %vany_true_b8x16() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
|
||||
v1 = vany_true v0
|
||||
|
||||
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
|
||||
v3 = vany_true v2
|
||||
|
||||
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
|
||||
v5 = vany_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vany_true_b8x16() == [false, true, true]
|
||||
|
||||
|
||||
function %vany_true_b16x8() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b16x8 [false false false false false false false false]
|
||||
v1 = vany_true v0
|
||||
|
||||
v2 = vconst.b16x8 [true false false false false false false false]
|
||||
v3 = vany_true v2
|
||||
|
||||
v4 = vconst.b16x8 [true true true true true true true true]
|
||||
v5 = vany_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vany_true_b16x8() == [false, true, true]
|
||||
|
||||
|
||||
function %vany_true_b32x4() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b32x4 [false false false false]
|
||||
v1 = vany_true v0
|
||||
|
||||
v2 = vconst.b32x4 [true false false false]
|
||||
v3 = vany_true v2
|
||||
|
||||
v4 = vconst.b32x4 [true true true true]
|
||||
v5 = vany_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vany_true_b32x4() == [false, true, true]
|
||||
|
||||
|
||||
function %vany_true_b64x2() -> b1, b1, b1 {
|
||||
block0:
|
||||
v0 = vconst.b64x2 [false false]
|
||||
v1 = vany_true v0
|
||||
|
||||
v2 = vconst.b64x2 [true false]
|
||||
v3 = vany_true v2
|
||||
|
||||
v4 = vconst.b64x2 [true true]
|
||||
v5 = vany_true v4
|
||||
|
||||
return v1, v3, v5
|
||||
}
|
||||
; run: %vany_true_b64x2() == [false, true, true]
|
||||
@@ -11,7 +11,7 @@ use cranelift_codegen::ir::{
|
||||
};
|
||||
use log::trace;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::convert::TryFrom;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::ops::RangeFrom;
|
||||
use thiserror::Error;
|
||||
|
||||
@@ -63,7 +63,22 @@ where
|
||||
};
|
||||
|
||||
// Retrieve the immediate value for an instruction, expecting it to exist.
|
||||
let imm = || -> V { V::from(inst.imm_value().unwrap()) };
|
||||
let imm = || -> V {
|
||||
V::from(match inst {
|
||||
InstructionData::UnaryConst {
|
||||
constant_handle, ..
|
||||
} => {
|
||||
let buffer = state
|
||||
.get_current_function()
|
||||
.dfg
|
||||
.constants
|
||||
.get(constant_handle.clone())
|
||||
.as_slice();
|
||||
DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"))
|
||||
}
|
||||
_ => inst.imm_value().unwrap(),
|
||||
})
|
||||
};
|
||||
|
||||
// Retrieve the immediate value for an instruction and convert it to the controlling type of the
|
||||
// instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit
|
||||
@@ -408,7 +423,7 @@ where
|
||||
Opcode::F32const => assign(imm()),
|
||||
Opcode::F64const => assign(imm()),
|
||||
Opcode::Bconst => assign(imm()),
|
||||
Opcode::Vconst => unimplemented!("Vconst"),
|
||||
Opcode::Vconst => assign(imm()),
|
||||
Opcode::ConstAddr => unimplemented!("ConstAddr"),
|
||||
Opcode::Null => unimplemented!("Null"),
|
||||
Opcode::Nop => ControlFlow::Continue,
|
||||
@@ -537,19 +552,22 @@ where
|
||||
64 => types::I128,
|
||||
_ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()),
|
||||
};
|
||||
let mut new_vec = SimdVec::new();
|
||||
let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||
let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
|
||||
for (x, y) in arg0.into_iter().zip(arg1) {
|
||||
let x: V = Value::int(x, double_length)?;
|
||||
let y: V = Value::int(y, double_length)?;
|
||||
new_vec.push(
|
||||
Value::mul(x, y)?
|
||||
.convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?
|
||||
.into_int()?,
|
||||
)
|
||||
}
|
||||
assign(vectorizelanes(&new_vec, ctrl_ty)?)
|
||||
|
||||
let res = arg0
|
||||
.into_iter()
|
||||
.zip(arg1)
|
||||
.map(|(x, y)| {
|
||||
let x = x.convert(ValueConversionKind::ZeroExtend(double_length))?;
|
||||
let y = y.convert(ValueConversionKind::ZeroExtend(double_length))?;
|
||||
|
||||
Ok(Value::mul(x, y)?
|
||||
.convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?)
|
||||
})
|
||||
.collect::<ValueResult<SimdVec<V>>>()?;
|
||||
|
||||
assign(vectorizelanes(&res, ctrl_ty)?)
|
||||
} else {
|
||||
let double_length = match ctrl_ty.bits() {
|
||||
8 => types::I16,
|
||||
@@ -762,21 +780,32 @@ where
|
||||
Opcode::Swizzle => unimplemented!("Swizzle"),
|
||||
Opcode::Splat => unimplemented!("Splat"),
|
||||
Opcode::Insertlane => {
|
||||
let idx = imm().into_int()? as usize;
|
||||
let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||
vector[Value::into_int(imm())? as usize] = arg(1)?.into_int()?;
|
||||
vector[idx] = arg(1)?;
|
||||
assign(vectorizelanes(&vector, ctrl_ty)?)
|
||||
}
|
||||
Opcode::Extractlane => {
|
||||
let value =
|
||||
extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize];
|
||||
assign(Value::int(value, ctrl_ty.lane_type())?)
|
||||
let idx = imm().into_int()? as usize;
|
||||
let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||
assign(lanes[idx].clone())
|
||||
}
|
||||
Opcode::VhighBits => unimplemented!("VhighBits"),
|
||||
Opcode::Vsplit => unimplemented!("Vsplit"),
|
||||
Opcode::Vconcat => unimplemented!("Vconcat"),
|
||||
Opcode::Vselect => unimplemented!("Vselect"),
|
||||
Opcode::VanyTrue => unimplemented!("VanyTrue"),
|
||||
Opcode::VallTrue => unimplemented!("VallTrue"),
|
||||
Opcode::VanyTrue => assign(fold_vector(
|
||||
arg(0)?,
|
||||
ctrl_ty,
|
||||
V::bool(false, types::B1)?,
|
||||
|acc, lane| acc.or(lane),
|
||||
)?),
|
||||
Opcode::VallTrue => assign(fold_vector(
|
||||
arg(0)?,
|
||||
ctrl_ty,
|
||||
V::bool(true, types::B1)?,
|
||||
|acc, lane| acc.and(lane),
|
||||
)?),
|
||||
Opcode::SwidenLow => unimplemented!("SwidenLow"),
|
||||
Opcode::SwidenHigh => unimplemented!("SwidenHigh"),
|
||||
Opcode::UwidenLow => unimplemented!("UwidenLow"),
|
||||
@@ -979,18 +1008,18 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
type SimdVec = SmallVec<[i128; 4]>;
|
||||
type SimdVec<V> = SmallVec<[V; 4]>;
|
||||
|
||||
/// Converts a SIMD vector value into a Rust vector of i128 for processing.
|
||||
fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec>
|
||||
/// Converts a SIMD vector value into a Rust array of [Value] for processing.
|
||||
fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec<V>>
|
||||
where
|
||||
V: Value,
|
||||
{
|
||||
let iterations = match lane_type {
|
||||
types::I8 => 1,
|
||||
types::I16 => 2,
|
||||
types::I32 => 4,
|
||||
types::I64 => 8,
|
||||
types::I8 | types::B1 | types::B8 => 1,
|
||||
types::I16 | types::B16 => 2,
|
||||
types::I32 | types::B32 => 4,
|
||||
types::I64 | types::B64 => 8,
|
||||
_ => unimplemented!("Only 128-bit vectors are currently supported."),
|
||||
};
|
||||
|
||||
@@ -1004,13 +1033,19 @@ where
|
||||
for j in 0..iterations {
|
||||
lane += (x[i + j] as i128) << (8 * j);
|
||||
}
|
||||
lanes.push(lane);
|
||||
|
||||
let lane_val: V = if lane_type.is_bool() {
|
||||
Value::bool(lane != 0, lane_type)?
|
||||
} else {
|
||||
Value::int(lane, lane_type)?
|
||||
};
|
||||
lanes.push(lane_val);
|
||||
}
|
||||
return Ok(lanes);
|
||||
}
|
||||
|
||||
/// Convert a Rust array of i128s back into a `Value::vector`.
|
||||
fn vectorizelanes<V>(x: &[i128], vector_type: types::Type) -> ValueResult<V>
|
||||
fn vectorizelanes<V>(x: &[V], vector_type: types::Type) -> ValueResult<V>
|
||||
where
|
||||
V: Value,
|
||||
{
|
||||
@@ -1023,7 +1058,7 @@ where
|
||||
};
|
||||
let mut result: [u8; 16] = [0; 16];
|
||||
for (i, val) in x.iter().enumerate() {
|
||||
let val = *val;
|
||||
let val = val.clone().into_int()?;
|
||||
for j in 0..iterations {
|
||||
result[(i * iterations) + j] = (val >> (8 * j)) as u8;
|
||||
}
|
||||
@@ -1031,6 +1066,17 @@ where
|
||||
Value::vector(result, vector_type)
|
||||
}
|
||||
|
||||
/// Performs a lanewise fold on a vector type
|
||||
fn fold_vector<V, F>(v: V, ty: types::Type, init: V, op: F) -> ValueResult<V>
|
||||
where
|
||||
V: Value,
|
||||
F: FnMut(V, V) -> ValueResult<V>,
|
||||
{
|
||||
extractlanes(&v, ty.lane_type())?
|
||||
.into_iter()
|
||||
.try_fold(init, op)
|
||||
}
|
||||
|
||||
/// Performs the supplied binary arithmetic `op` on two SIMD vectors.
|
||||
fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V>
|
||||
where
|
||||
@@ -1039,20 +1085,19 @@ where
|
||||
{
|
||||
let arg0 = extractlanes(&x, vector_type.lane_type())?;
|
||||
let arg1 = extractlanes(&y, vector_type.lane_type())?;
|
||||
let mut result = Vec::new();
|
||||
for (lhs, rhs) in arg0.into_iter().zip(arg1) {
|
||||
// The initial Value::int needs to be on a separate line so the
|
||||
// compiler can determine concrete types.
|
||||
let mut lhs: V = Value::int(lhs, vector_type.lane_type())?;
|
||||
let mut rhs: V = Value::int(rhs, vector_type.lane_type())?;
|
||||
|
||||
let result = arg0
|
||||
.into_iter()
|
||||
.zip(arg1)
|
||||
.map(|(mut lhs, mut rhs)| {
|
||||
if unsigned {
|
||||
lhs = lhs.convert(ValueConversionKind::ToUnsigned)?;
|
||||
rhs = rhs.convert(ValueConversionKind::ToUnsigned)?;
|
||||
}
|
||||
let sum = op(lhs, rhs)?;
|
||||
let sum = sum.into_int()?;
|
||||
result.push(sum);
|
||||
}
|
||||
Ok(op(lhs, rhs)?)
|
||||
})
|
||||
.collect::<ValueResult<SimdVec<V>>>()?;
|
||||
|
||||
vectorizelanes(&result, vector_type)
|
||||
}
|
||||
|
||||
@@ -1066,13 +1111,12 @@ where
|
||||
{
|
||||
let arg0 = extractlanes(&x, vector_type.lane_type())?;
|
||||
let arg1 = extractlanes(&y, vector_type.lane_type())?;
|
||||
let mut result = SimdVec::new();
|
||||
for pair in arg0.chunks(2).chain(arg1.chunks(2)) {
|
||||
let lhs: V = Value::int(pair[0], vector_type.lane_type())?;
|
||||
let rhs: V = Value::int(pair[1], vector_type.lane_type())?;
|
||||
let sum = op(lhs, rhs)?;
|
||||
let sum = sum.into_int()?;
|
||||
result.push(sum);
|
||||
}
|
||||
|
||||
let result = arg0
|
||||
.chunks(2)
|
||||
.chain(arg1.chunks(2))
|
||||
.map(|pair| op(pair[0].clone(), pair[1].clone()))
|
||||
.collect::<ValueResult<SimdVec<V>>>()?;
|
||||
|
||||
vectorizelanes(&result, vector_type)
|
||||
}
|
||||
|
||||
@@ -290,15 +290,15 @@ impl Value for DataValue {
|
||||
_ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
|
||||
},
|
||||
ValueConversionKind::ZeroExtend(ty) => match (self, ty) {
|
||||
(DataValue::I8(_), types::I16) => unimplemented!(),
|
||||
(DataValue::I8(_), types::I32) => unimplemented!(),
|
||||
(DataValue::I8(_), types::I64) => unimplemented!(),
|
||||
(DataValue::I16(_), types::I32) => unimplemented!(),
|
||||
(DataValue::I16(_), types::I64) => unimplemented!(),
|
||||
(DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16),
|
||||
(DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32),
|
||||
(DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64),
|
||||
(DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32),
|
||||
(DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64),
|
||||
(DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
|
||||
(DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64),
|
||||
(DataValue::U64(n), types::I64) => DataValue::U64(n),
|
||||
(DataValue::I64(n), types::I64) => DataValue::I64(n),
|
||||
(DataValue::I64(n), types::I128) => DataValue::I128(n as u64 as i128),
|
||||
(from, to) if from.ty() == to => from,
|
||||
(dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
|
||||
},
|
||||
ValueConversionKind::ToUnsigned => match self {
|
||||
@@ -428,7 +428,7 @@ impl Value for DataValue {
|
||||
}
|
||||
|
||||
fn or(self, other: Self) -> ValueResult<Self> {
|
||||
binary_match!(|(&self, &other); [I8, I16, I32, I64])
|
||||
binary_match!(|(&self, &other); [B, I8, I16, I32, I64])
|
||||
}
|
||||
|
||||
fn xor(self, other: Self) -> ValueResult<Self> {
|
||||
|
||||
Reference in New Issue
Block a user