Implement vany_true and vall_true instructions in interpreter (#3304)

* cranelift: Implement ZeroExtend for a bunch of types in interpreter

* cranelift: Implement VConst on interpreter

* cranelift: Implement VallTrue on interpreter

* cranelift: Implement VanyTrue on interpreter

* cranelift: Mark `v{all,any}_true` tests as machinst only

* cranelift: Disable `vany_true` tests on aarch64

The `b64x2` case produces an illegal instruction. See #3305
This commit is contained in:
Afonso Bordado
2021-09-07 17:50:39 +01:00
committed by GitHub
parent c73673559b
commit 63e9a81deb
4 changed files with 242 additions and 60 deletions

View File

@@ -0,0 +1,69 @@
test interpret
test run
target aarch64
target x86_64 machinst
; TODO: Refactor this once we support simd bools in the trampoline
function %vall_true_b8x16() -> b1, b1, b1 {
block0:
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
v1 = vall_true v0
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
v3 = vall_true v2
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b8x16() == [false, false, true]
function %vall_true_b16x8() -> b1, b1, b1 {
block0:
v0 = vconst.b16x8 [false false false false false false false false]
v1 = vall_true v0
v2 = vconst.b16x8 [true false false false false false false false]
v3 = vall_true v2
v4 = vconst.b16x8 [true true true true true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b16x8() == [false, false, true]
function %vall_true_b32x4() -> b1, b1, b1 {
block0:
v0 = vconst.b32x4 [false false false false]
v1 = vall_true v0
v2 = vconst.b32x4 [true false false false]
v3 = vall_true v2
v4 = vconst.b32x4 [true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b32x4() == [false, false, true]
function %vall_true_b64x2() -> b1, b1, b1 {
block0:
v0 = vconst.b64x2 [false false]
v1 = vall_true v0
v2 = vconst.b64x2 [true false]
v3 = vall_true v2
v4 = vconst.b64x2 [true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b64x2() == [false, false, true]

View File

@@ -0,0 +1,69 @@
test interpret
test run
target x86_64 machinst
; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
; TODO: Refactor this once we support simd bools in the trampoline
function %vany_true_b8x16() -> b1, b1, b1 {
block0:
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
v1 = vany_true v0
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
v3 = vany_true v2
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b8x16() == [false, true, true]
function %vany_true_b16x8() -> b1, b1, b1 {
block0:
v0 = vconst.b16x8 [false false false false false false false false]
v1 = vany_true v0
v2 = vconst.b16x8 [true false false false false false false false]
v3 = vany_true v2
v4 = vconst.b16x8 [true true true true true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b16x8() == [false, true, true]
function %vany_true_b32x4() -> b1, b1, b1 {
block0:
v0 = vconst.b32x4 [false false false false]
v1 = vany_true v0
v2 = vconst.b32x4 [true false false false]
v3 = vany_true v2
v4 = vconst.b32x4 [true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b32x4() == [false, true, true]
function %vany_true_b64x2() -> b1, b1, b1 {
block0:
v0 = vconst.b64x2 [false false]
v1 = vany_true v0
v2 = vconst.b64x2 [true false]
v3 = vany_true v2
v4 = vconst.b64x2 [true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b64x2() == [false, true, true]

View File

@@ -11,7 +11,7 @@ use cranelift_codegen::ir::{
}; };
use log::trace; use log::trace;
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom; use std::convert::{TryFrom, TryInto};
use std::ops::RangeFrom; use std::ops::RangeFrom;
use thiserror::Error; use thiserror::Error;
@@ -63,7 +63,22 @@ where
}; };
// Retrieve the immediate value for an instruction, expecting it to exist. // Retrieve the immediate value for an instruction, expecting it to exist.
let imm = || -> V { V::from(inst.imm_value().unwrap()) }; let imm = || -> V {
V::from(match inst {
InstructionData::UnaryConst {
constant_handle, ..
} => {
let buffer = state
.get_current_function()
.dfg
.constants
.get(constant_handle.clone())
.as_slice();
DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"))
}
_ => inst.imm_value().unwrap(),
})
};
// Retrieve the immediate value for an instruction and convert it to the controlling type of the // Retrieve the immediate value for an instruction and convert it to the controlling type of the
// instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit // instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit
@@ -408,7 +423,7 @@ where
Opcode::F32const => assign(imm()), Opcode::F32const => assign(imm()),
Opcode::F64const => assign(imm()), Opcode::F64const => assign(imm()),
Opcode::Bconst => assign(imm()), Opcode::Bconst => assign(imm()),
Opcode::Vconst => unimplemented!("Vconst"), Opcode::Vconst => assign(imm()),
Opcode::ConstAddr => unimplemented!("ConstAddr"), Opcode::ConstAddr => unimplemented!("ConstAddr"),
Opcode::Null => unimplemented!("Null"), Opcode::Null => unimplemented!("Null"),
Opcode::Nop => ControlFlow::Continue, Opcode::Nop => ControlFlow::Continue,
@@ -537,19 +552,22 @@ where
64 => types::I128, 64 => types::I128,
_ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()), _ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()),
}; };
let mut new_vec = SimdVec::new();
let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
for (x, y) in arg0.into_iter().zip(arg1) {
let x: V = Value::int(x, double_length)?; let res = arg0
let y: V = Value::int(y, double_length)?; .into_iter()
new_vec.push( .zip(arg1)
Value::mul(x, y)? .map(|(x, y)| {
.convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))? let x = x.convert(ValueConversionKind::ZeroExtend(double_length))?;
.into_int()?, let y = y.convert(ValueConversionKind::ZeroExtend(double_length))?;
)
} Ok(Value::mul(x, y)?
assign(vectorizelanes(&new_vec, ctrl_ty)?) .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?)
})
.collect::<ValueResult<SimdVec<V>>>()?;
assign(vectorizelanes(&res, ctrl_ty)?)
} else { } else {
let double_length = match ctrl_ty.bits() { let double_length = match ctrl_ty.bits() {
8 => types::I16, 8 => types::I16,
@@ -762,21 +780,32 @@ where
Opcode::Swizzle => unimplemented!("Swizzle"), Opcode::Swizzle => unimplemented!("Swizzle"),
Opcode::Splat => unimplemented!("Splat"), Opcode::Splat => unimplemented!("Splat"),
Opcode::Insertlane => { Opcode::Insertlane => {
let idx = imm().into_int()? as usize;
let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
vector[Value::into_int(imm())? as usize] = arg(1)?.into_int()?; vector[idx] = arg(1)?;
assign(vectorizelanes(&vector, ctrl_ty)?) assign(vectorizelanes(&vector, ctrl_ty)?)
} }
Opcode::Extractlane => { Opcode::Extractlane => {
let value = let idx = imm().into_int()? as usize;
extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize]; let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
assign(Value::int(value, ctrl_ty.lane_type())?) assign(lanes[idx].clone())
} }
Opcode::VhighBits => unimplemented!("VhighBits"), Opcode::VhighBits => unimplemented!("VhighBits"),
Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vsplit => unimplemented!("Vsplit"),
Opcode::Vconcat => unimplemented!("Vconcat"), Opcode::Vconcat => unimplemented!("Vconcat"),
Opcode::Vselect => unimplemented!("Vselect"), Opcode::Vselect => unimplemented!("Vselect"),
Opcode::VanyTrue => unimplemented!("VanyTrue"), Opcode::VanyTrue => assign(fold_vector(
Opcode::VallTrue => unimplemented!("VallTrue"), arg(0)?,
ctrl_ty,
V::bool(false, types::B1)?,
|acc, lane| acc.or(lane),
)?),
Opcode::VallTrue => assign(fold_vector(
arg(0)?,
ctrl_ty,
V::bool(true, types::B1)?,
|acc, lane| acc.and(lane),
)?),
Opcode::SwidenLow => unimplemented!("SwidenLow"), Opcode::SwidenLow => unimplemented!("SwidenLow"),
Opcode::SwidenHigh => unimplemented!("SwidenHigh"), Opcode::SwidenHigh => unimplemented!("SwidenHigh"),
Opcode::UwidenLow => unimplemented!("UwidenLow"), Opcode::UwidenLow => unimplemented!("UwidenLow"),
@@ -979,18 +1008,18 @@ where
}) })
} }
type SimdVec = SmallVec<[i128; 4]>; type SimdVec<V> = SmallVec<[V; 4]>;
/// Converts a SIMD vector value into a Rust vector of i128 for processing. /// Converts a SIMD vector value into a Rust array of [Value] for processing.
fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec> fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec<V>>
where where
V: Value, V: Value,
{ {
let iterations = match lane_type { let iterations = match lane_type {
types::I8 => 1, types::I8 | types::B1 | types::B8 => 1,
types::I16 => 2, types::I16 | types::B16 => 2,
types::I32 => 4, types::I32 | types::B32 => 4,
types::I64 => 8, types::I64 | types::B64 => 8,
_ => unimplemented!("Only 128-bit vectors are currently supported."), _ => unimplemented!("Only 128-bit vectors are currently supported."),
}; };
@@ -1004,13 +1033,19 @@ where
for j in 0..iterations { for j in 0..iterations {
lane += (x[i + j] as i128) << (8 * j); lane += (x[i + j] as i128) << (8 * j);
} }
lanes.push(lane);
let lane_val: V = if lane_type.is_bool() {
Value::bool(lane != 0, lane_type)?
} else {
Value::int(lane, lane_type)?
};
lanes.push(lane_val);
} }
return Ok(lanes); return Ok(lanes);
} }
/// Convert a Rust array of i128s back into a `Value::vector`. /// Convert a Rust array of i128s back into a `Value::vector`.
fn vectorizelanes<V>(x: &[i128], vector_type: types::Type) -> ValueResult<V> fn vectorizelanes<V>(x: &[V], vector_type: types::Type) -> ValueResult<V>
where where
V: Value, V: Value,
{ {
@@ -1023,7 +1058,7 @@ where
}; };
let mut result: [u8; 16] = [0; 16]; let mut result: [u8; 16] = [0; 16];
for (i, val) in x.iter().enumerate() { for (i, val) in x.iter().enumerate() {
let val = *val; let val = val.clone().into_int()?;
for j in 0..iterations { for j in 0..iterations {
result[(i * iterations) + j] = (val >> (8 * j)) as u8; result[(i * iterations) + j] = (val >> (8 * j)) as u8;
} }
@@ -1031,6 +1066,17 @@ where
Value::vector(result, vector_type) Value::vector(result, vector_type)
} }
/// Performs a lanewise fold on a vector type
fn fold_vector<V, F>(v: V, ty: types::Type, init: V, op: F) -> ValueResult<V>
where
V: Value,
F: FnMut(V, V) -> ValueResult<V>,
{
extractlanes(&v, ty.lane_type())?
.into_iter()
.try_fold(init, op)
}
/// Performs the supplied binary arithmetic `op` on two SIMD vectors. /// Performs the supplied binary arithmetic `op` on two SIMD vectors.
fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V> fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V>
where where
@@ -1039,20 +1085,19 @@ where
{ {
let arg0 = extractlanes(&x, vector_type.lane_type())?; let arg0 = extractlanes(&x, vector_type.lane_type())?;
let arg1 = extractlanes(&y, vector_type.lane_type())?; let arg1 = extractlanes(&y, vector_type.lane_type())?;
let mut result = Vec::new();
for (lhs, rhs) in arg0.into_iter().zip(arg1) { let result = arg0
// The initial Value::int needs to be on a separate line so the .into_iter()
// compiler can determine concrete types. .zip(arg1)
let mut lhs: V = Value::int(lhs, vector_type.lane_type())?; .map(|(mut lhs, mut rhs)| {
let mut rhs: V = Value::int(rhs, vector_type.lane_type())?;
if unsigned { if unsigned {
lhs = lhs.convert(ValueConversionKind::ToUnsigned)?; lhs = lhs.convert(ValueConversionKind::ToUnsigned)?;
rhs = rhs.convert(ValueConversionKind::ToUnsigned)?; rhs = rhs.convert(ValueConversionKind::ToUnsigned)?;
} }
let sum = op(lhs, rhs)?; Ok(op(lhs, rhs)?)
let sum = sum.into_int()?; })
result.push(sum); .collect::<ValueResult<SimdVec<V>>>()?;
}
vectorizelanes(&result, vector_type) vectorizelanes(&result, vector_type)
} }
@@ -1066,13 +1111,12 @@ where
{ {
let arg0 = extractlanes(&x, vector_type.lane_type())?; let arg0 = extractlanes(&x, vector_type.lane_type())?;
let arg1 = extractlanes(&y, vector_type.lane_type())?; let arg1 = extractlanes(&y, vector_type.lane_type())?;
let mut result = SimdVec::new();
for pair in arg0.chunks(2).chain(arg1.chunks(2)) { let result = arg0
let lhs: V = Value::int(pair[0], vector_type.lane_type())?; .chunks(2)
let rhs: V = Value::int(pair[1], vector_type.lane_type())?; .chain(arg1.chunks(2))
let sum = op(lhs, rhs)?; .map(|pair| op(pair[0].clone(), pair[1].clone()))
let sum = sum.into_int()?; .collect::<ValueResult<SimdVec<V>>>()?;
result.push(sum);
}
vectorizelanes(&result, vector_type) vectorizelanes(&result, vector_type)
} }

View File

@@ -290,15 +290,15 @@ impl Value for DataValue {
_ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
}, },
ValueConversionKind::ZeroExtend(ty) => match (self, ty) { ValueConversionKind::ZeroExtend(ty) => match (self, ty) {
(DataValue::I8(_), types::I16) => unimplemented!(), (DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16),
(DataValue::I8(_), types::I32) => unimplemented!(), (DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32),
(DataValue::I8(_), types::I64) => unimplemented!(), (DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64),
(DataValue::I16(_), types::I32) => unimplemented!(), (DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32),
(DataValue::I16(_), types::I64) => unimplemented!(), (DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64),
(DataValue::U32(n), types::I64) => DataValue::U64(n as u64), (DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64), (DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64),
(DataValue::U64(n), types::I64) => DataValue::U64(n), (DataValue::I64(n), types::I128) => DataValue::I128(n as u64 as i128),
(DataValue::I64(n), types::I64) => DataValue::I64(n), (from, to) if from.ty() == to => from,
(dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
}, },
ValueConversionKind::ToUnsigned => match self { ValueConversionKind::ToUnsigned => match self {
@@ -428,7 +428,7 @@ impl Value for DataValue {
} }
fn or(self, other: Self) -> ValueResult<Self> { fn or(self, other: Self) -> ValueResult<Self> {
binary_match!(|(&self, &other); [I8, I16, I32, I64]) binary_match!(|(&self, &other); [B, I8, I16, I32, I64])
} }
fn xor(self, other: Self) -> ValueResult<Self> { fn xor(self, other: Self) -> ValueResult<Self> {