Implement vany_true and vall_true instructions in interpreter (#3304)

* cranelift: Implement ZeroExtend for a bunch of types in interpreter

* cranelift: Implement VConst on interpreter

* cranelift: Implement VallTrue on interpreter

* cranelift: Implement VanyTrue on interpreter

* cranelift: Mark `v{all,any}_true` tests as machinst only

* cranelift: Disable `vany_true` tests on aarch64

The `b64x2` case produces an illegal instruction. See #3305
This commit is contained in:
Afonso Bordado
2021-09-07 17:50:39 +01:00
committed by GitHub
parent c73673559b
commit 63e9a81deb
4 changed files with 242 additions and 60 deletions

View File

@@ -0,0 +1,69 @@
test interpret
test run
target aarch64
target x86_64 machinst
; TODO: Refactor this once we support simd bools in the trampoline
function %vall_true_b8x16() -> b1, b1, b1 {
block0:
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
v1 = vall_true v0
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
v3 = vall_true v2
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b8x16() == [false, false, true]
function %vall_true_b16x8() -> b1, b1, b1 {
block0:
v0 = vconst.b16x8 [false false false false false false false false]
v1 = vall_true v0
v2 = vconst.b16x8 [true false false false false false false false]
v3 = vall_true v2
v4 = vconst.b16x8 [true true true true true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b16x8() == [false, false, true]
function %vall_true_b32x4() -> b1, b1, b1 {
block0:
v0 = vconst.b32x4 [false false false false]
v1 = vall_true v0
v2 = vconst.b32x4 [true false false false]
v3 = vall_true v2
v4 = vconst.b32x4 [true true true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b32x4() == [false, false, true]
function %vall_true_b64x2() -> b1, b1, b1 {
block0:
v0 = vconst.b64x2 [false false]
v1 = vall_true v0
v2 = vconst.b64x2 [true false]
v3 = vall_true v2
v4 = vconst.b64x2 [true true]
v5 = vall_true v4
return v1, v3, v5
}
; run: %vall_true_b64x2() == [false, false, true]

View File

@@ -0,0 +1,69 @@
test interpret
test run
target x86_64 machinst
; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
; TODO: Refactor this once we support simd bools in the trampoline
function %vany_true_b8x16() -> b1, b1, b1 {
block0:
v0 = vconst.b8x16 [false false false false false false false false false false false false false false false false]
v1 = vany_true v0
v2 = vconst.b8x16 [true false false false false false false false false false false false false false false false]
v3 = vany_true v2
v4 = vconst.b8x16 [true true true true true true true true true true true true true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b8x16() == [false, true, true]
function %vany_true_b16x8() -> b1, b1, b1 {
block0:
v0 = vconst.b16x8 [false false false false false false false false]
v1 = vany_true v0
v2 = vconst.b16x8 [true false false false false false false false]
v3 = vany_true v2
v4 = vconst.b16x8 [true true true true true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b16x8() == [false, true, true]
function %vany_true_b32x4() -> b1, b1, b1 {
block0:
v0 = vconst.b32x4 [false false false false]
v1 = vany_true v0
v2 = vconst.b32x4 [true false false false]
v3 = vany_true v2
v4 = vconst.b32x4 [true true true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b32x4() == [false, true, true]
function %vany_true_b64x2() -> b1, b1, b1 {
block0:
v0 = vconst.b64x2 [false false]
v1 = vany_true v0
v2 = vconst.b64x2 [true false]
v3 = vany_true v2
v4 = vconst.b64x2 [true true]
v5 = vany_true v4
return v1, v3, v5
}
; run: %vany_true_b64x2() == [false, true, true]

View File

@@ -11,7 +11,7 @@ use cranelift_codegen::ir::{
};
use log::trace;
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
use std::convert::{TryFrom, TryInto};
use std::ops::RangeFrom;
use thiserror::Error;
@@ -63,7 +63,22 @@ where
};
// Retrieve the immediate value for an instruction, expecting it to exist.
let imm = || -> V { V::from(inst.imm_value().unwrap()) };
let imm = || -> V {
V::from(match inst {
InstructionData::UnaryConst {
constant_handle, ..
} => {
let buffer = state
.get_current_function()
.dfg
.constants
.get(constant_handle.clone())
.as_slice();
DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"))
}
_ => inst.imm_value().unwrap(),
})
};
// Retrieve the immediate value for an instruction and convert it to the controlling type of the
// instruction. For example, since `InstructionData` stores all integer immediates in a 64-bit
@@ -408,7 +423,7 @@ where
Opcode::F32const => assign(imm()),
Opcode::F64const => assign(imm()),
Opcode::Bconst => assign(imm()),
Opcode::Vconst => unimplemented!("Vconst"),
Opcode::Vconst => assign(imm()),
Opcode::ConstAddr => unimplemented!("ConstAddr"),
Opcode::Null => unimplemented!("Null"),
Opcode::Nop => ControlFlow::Continue,
@@ -537,19 +552,22 @@ where
64 => types::I128,
_ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()),
};
let mut new_vec = SimdVec::new();
let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
for (x, y) in arg0.into_iter().zip(arg1) {
let x: V = Value::int(x, double_length)?;
let y: V = Value::int(y, double_length)?;
new_vec.push(
Value::mul(x, y)?
.convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?
.into_int()?,
)
}
assign(vectorizelanes(&new_vec, ctrl_ty)?)
let res = arg0
.into_iter()
.zip(arg1)
.map(|(x, y)| {
let x = x.convert(ValueConversionKind::ZeroExtend(double_length))?;
let y = y.convert(ValueConversionKind::ZeroExtend(double_length))?;
Ok(Value::mul(x, y)?
.convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))?)
})
.collect::<ValueResult<SimdVec<V>>>()?;
assign(vectorizelanes(&res, ctrl_ty)?)
} else {
let double_length = match ctrl_ty.bits() {
8 => types::I16,
@@ -762,21 +780,32 @@ where
Opcode::Swizzle => unimplemented!("Swizzle"),
Opcode::Splat => unimplemented!("Splat"),
Opcode::Insertlane => {
let idx = imm().into_int()? as usize;
let mut vector = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
vector[Value::into_int(imm())? as usize] = arg(1)?.into_int()?;
vector[idx] = arg(1)?;
assign(vectorizelanes(&vector, ctrl_ty)?)
}
Opcode::Extractlane => {
let value =
extractlanes(&arg(0)?, ctrl_ty.lane_type())?[Value::into_int(imm())? as usize];
assign(Value::int(value, ctrl_ty.lane_type())?)
let idx = imm().into_int()? as usize;
let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
assign(lanes[idx].clone())
}
Opcode::VhighBits => unimplemented!("VhighBits"),
Opcode::Vsplit => unimplemented!("Vsplit"),
Opcode::Vconcat => unimplemented!("Vconcat"),
Opcode::Vselect => unimplemented!("Vselect"),
Opcode::VanyTrue => unimplemented!("VanyTrue"),
Opcode::VallTrue => unimplemented!("VallTrue"),
Opcode::VanyTrue => assign(fold_vector(
arg(0)?,
ctrl_ty,
V::bool(false, types::B1)?,
|acc, lane| acc.or(lane),
)?),
Opcode::VallTrue => assign(fold_vector(
arg(0)?,
ctrl_ty,
V::bool(true, types::B1)?,
|acc, lane| acc.and(lane),
)?),
Opcode::SwidenLow => unimplemented!("SwidenLow"),
Opcode::SwidenHigh => unimplemented!("SwidenHigh"),
Opcode::UwidenLow => unimplemented!("UwidenLow"),
@@ -979,18 +1008,18 @@ where
})
}
type SimdVec = SmallVec<[i128; 4]>;
type SimdVec<V> = SmallVec<[V; 4]>;
/// Converts a SIMD vector value into a Rust vector of i128 for processing.
fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec>
/// Converts a SIMD vector value into a Rust array of [Value] for processing.
fn extractlanes<V>(x: &V, lane_type: types::Type) -> ValueResult<SimdVec<V>>
where
V: Value,
{
let iterations = match lane_type {
types::I8 => 1,
types::I16 => 2,
types::I32 => 4,
types::I64 => 8,
types::I8 | types::B1 | types::B8 => 1,
types::I16 | types::B16 => 2,
types::I32 | types::B32 => 4,
types::I64 | types::B64 => 8,
_ => unimplemented!("Only 128-bit vectors are currently supported."),
};
@@ -1004,13 +1033,19 @@ where
for j in 0..iterations {
lane += (x[i + j] as i128) << (8 * j);
}
lanes.push(lane);
let lane_val: V = if lane_type.is_bool() {
Value::bool(lane != 0, lane_type)?
} else {
Value::int(lane, lane_type)?
};
lanes.push(lane_val);
}
return Ok(lanes);
}
/// Convert a Rust array of i128s back into a `Value::vector`.
fn vectorizelanes<V>(x: &[i128], vector_type: types::Type) -> ValueResult<V>
fn vectorizelanes<V>(x: &[V], vector_type: types::Type) -> ValueResult<V>
where
V: Value,
{
@@ -1023,7 +1058,7 @@ where
};
let mut result: [u8; 16] = [0; 16];
for (i, val) in x.iter().enumerate() {
let val = *val;
let val = val.clone().into_int()?;
for j in 0..iterations {
result[(i * iterations) + j] = (val >> (8 * j)) as u8;
}
@@ -1031,6 +1066,17 @@ where
Value::vector(result, vector_type)
}
/// Performs a lanewise fold on a vector type
fn fold_vector<V, F>(v: V, ty: types::Type, init: V, op: F) -> ValueResult<V>
where
V: Value,
F: FnMut(V, V) -> ValueResult<V>,
{
extractlanes(&v, ty.lane_type())?
.into_iter()
.try_fold(init, op)
}
/// Performs the supplied binary arithmetic `op` on two SIMD vectors.
fn binary_arith<V, F>(x: V, y: V, vector_type: types::Type, op: F, unsigned: bool) -> ValueResult<V>
where
@@ -1039,20 +1085,19 @@ where
{
let arg0 = extractlanes(&x, vector_type.lane_type())?;
let arg1 = extractlanes(&y, vector_type.lane_type())?;
let mut result = Vec::new();
for (lhs, rhs) in arg0.into_iter().zip(arg1) {
// The initial Value::int needs to be on a separate line so the
// compiler can determine concrete types.
let mut lhs: V = Value::int(lhs, vector_type.lane_type())?;
let mut rhs: V = Value::int(rhs, vector_type.lane_type())?;
let result = arg0
.into_iter()
.zip(arg1)
.map(|(mut lhs, mut rhs)| {
if unsigned {
lhs = lhs.convert(ValueConversionKind::ToUnsigned)?;
rhs = rhs.convert(ValueConversionKind::ToUnsigned)?;
}
let sum = op(lhs, rhs)?;
let sum = sum.into_int()?;
result.push(sum);
}
Ok(op(lhs, rhs)?)
})
.collect::<ValueResult<SimdVec<V>>>()?;
vectorizelanes(&result, vector_type)
}
@@ -1066,13 +1111,12 @@ where
{
let arg0 = extractlanes(&x, vector_type.lane_type())?;
let arg1 = extractlanes(&y, vector_type.lane_type())?;
let mut result = SimdVec::new();
for pair in arg0.chunks(2).chain(arg1.chunks(2)) {
let lhs: V = Value::int(pair[0], vector_type.lane_type())?;
let rhs: V = Value::int(pair[1], vector_type.lane_type())?;
let sum = op(lhs, rhs)?;
let sum = sum.into_int()?;
result.push(sum);
}
let result = arg0
.chunks(2)
.chain(arg1.chunks(2))
.map(|pair| op(pair[0].clone(), pair[1].clone()))
.collect::<ValueResult<SimdVec<V>>>()?;
vectorizelanes(&result, vector_type)
}

View File

@@ -290,15 +290,15 @@ impl Value for DataValue {
_ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
},
ValueConversionKind::ZeroExtend(ty) => match (self, ty) {
(DataValue::I8(_), types::I16) => unimplemented!(),
(DataValue::I8(_), types::I32) => unimplemented!(),
(DataValue::I8(_), types::I64) => unimplemented!(),
(DataValue::I16(_), types::I32) => unimplemented!(),
(DataValue::I16(_), types::I64) => unimplemented!(),
(DataValue::I8(n), types::I16) => DataValue::I16(n as u8 as i16),
(DataValue::I8(n), types::I32) => DataValue::I32(n as u8 as i32),
(DataValue::I8(n), types::I64) => DataValue::I64(n as u8 as i64),
(DataValue::I16(n), types::I32) => DataValue::I32(n as u16 as i32),
(DataValue::I16(n), types::I64) => DataValue::I64(n as u16 as i64),
(DataValue::U32(n), types::I64) => DataValue::U64(n as u64),
(DataValue::I32(n), types::I64) => DataValue::I64(n as u32 as i64),
(DataValue::U64(n), types::I64) => DataValue::U64(n),
(DataValue::I64(n), types::I64) => DataValue::I64(n),
(DataValue::I64(n), types::I128) => DataValue::I128(n as u64 as i128),
(from, to) if from.ty() == to => from,
(dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind),
},
ValueConversionKind::ToUnsigned => match self {
@@ -428,7 +428,7 @@ impl Value for DataValue {
}
fn or(self, other: Self) -> ValueResult<Self> {
binary_match!(|(&self, &other); [I8, I16, I32, I64])
binary_match!(|(&self, &other); [B, I8, I16, I32, I64])
}
fn xor(self, other: Self) -> ValueResult<Self> {