Fix compilation
This commit is contained in:
@@ -1127,6 +1127,11 @@ fn bind_vector(
|
|||||||
mut value_types: Vec<ValueTypeOrAny>,
|
mut value_types: Vec<ValueTypeOrAny>,
|
||||||
) -> BoundInstruction {
|
) -> BoundInstruction {
|
||||||
let num_lanes = vector_size_in_bits / lane_type.lane_bits();
|
let num_lanes = vector_size_in_bits / lane_type.lane_bits();
|
||||||
|
assert!(
|
||||||
|
num_lanes >= 2,
|
||||||
|
"Minimum lane number for bind_vector is 2, found {}.",
|
||||||
|
num_lanes,
|
||||||
|
);
|
||||||
let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
|
let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
|
||||||
value_types.push(ValueTypeOrAny::ValueType(vector_type));
|
value_types.push(ValueTypeOrAny::ValueType(vector_type));
|
||||||
verify_polymorphic_binding(&inst, &value_types);
|
verify_polymorphic_binding(&inst, &value_types);
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use crate::cdsl::instructions::{
|
|||||||
};
|
};
|
||||||
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
||||||
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
||||||
use crate::cdsl::types::ValueType;
|
use crate::cdsl::types::{LaneType, ValueType};
|
||||||
use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
|
use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
|
||||||
use crate::shared::types::Float::{F32, F64};
|
use crate::shared::types::Float::{F32, F64};
|
||||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||||
@@ -1735,6 +1735,8 @@ pub(crate) fn define(
|
|||||||
// legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
|
// legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
|
||||||
// value across the register
|
// value across the register
|
||||||
|
|
||||||
|
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
||||||
|
|
||||||
// PSHUFB, 8-bit shuffle using two XMM registers
|
// PSHUFB, 8-bit shuffle using two XMM registers
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
||||||
let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
@@ -1756,7 +1758,7 @@ pub(crate) fn define(
|
|||||||
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||||
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||||
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
@@ -1816,8 +1818,9 @@ pub(crate) fn define(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
|
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
|
||||||
for from_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type)
|
for to_type in
|
||||||
|
ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
|
||||||
{
|
{
|
||||||
let instruction = raw_bitcast
|
let instruction = raw_bitcast
|
||||||
.bind_vector_from_lane(to_type, sse_vector_size)
|
.bind_vector_from_lane(to_type, sse_vector_size)
|
||||||
@@ -1833,7 +1836,7 @@ pub(crate) fn define(
|
|||||||
// for that; alternately, constants could be loaded into XMM registers using a sequence like:
|
// for that; alternately, constants could be loaded into XMM registers using a sequence like:
|
||||||
// MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
|
// MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
|
||||||
// in memory) but some performance measurements are needed.
|
// in memory) but some performance measurements are needed.
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
|
let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
|
||||||
|
|||||||
Reference in New Issue
Block a user