Fix compilation
This commit is contained in:
@@ -1127,6 +1127,11 @@ fn bind_vector(
|
||||
mut value_types: Vec<ValueTypeOrAny>,
|
||||
) -> BoundInstruction {
|
||||
let num_lanes = vector_size_in_bits / lane_type.lane_bits();
|
||||
assert!(
|
||||
num_lanes >= 2,
|
||||
"Minimum lane number for bind_vector is 2, found {}.",
|
||||
num_lanes,
|
||||
);
|
||||
let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
|
||||
value_types.push(ValueTypeOrAny::ValueType(vector_type));
|
||||
verify_polymorphic_binding(&inst, &value_types);
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::cdsl::instructions::{
|
||||
};
|
||||
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
||||
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
||||
use crate::cdsl::types::ValueType;
|
||||
use crate::cdsl::types::{LaneType, ValueType};
|
||||
use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
@@ -1735,6 +1735,8 @@ pub(crate) fn define(
|
||||
// legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
|
||||
// value across the register
|
||||
|
||||
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
||||
|
||||
// PSHUFB, 8-bit shuffle using two XMM registers
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
||||
let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
|
||||
@@ -1756,7 +1758,7 @@ pub(crate) fn define(
|
||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
||||
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
||||
if ty.lane_bits() < 64 {
|
||||
@@ -1816,8 +1818,9 @@ pub(crate) fn define(
|
||||
}
|
||||
|
||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
|
||||
for from_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
||||
for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type)
|
||||
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
for to_type in
|
||||
ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
|
||||
{
|
||||
let instruction = raw_bitcast
|
||||
.bind_vector_from_lane(to_type, sse_vector_size)
|
||||
@@ -1833,7 +1836,7 @@ pub(crate) fn define(
|
||||
// for that; alternately, constants could be loaded into XMM registers using a sequence like:
|
||||
// MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
|
||||
// in memory) but some performance measurements are needed.
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
|
||||
let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
|
||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||
|
||||
Reference in New Issue
Block a user