Add scalar_to_vector instruction
Moves scalar values in a GPR register to an FPR register
This commit is contained in:
@@ -10,7 +10,8 @@ use crate::cdsl::instructions::{
|
|||||||
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
||||||
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
||||||
|
|
||||||
use crate::shared::types::Bool::B1;
|
use crate::cdsl::types::ValueType;
|
||||||
|
use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
|
||||||
use crate::shared::types::Float::{F32, F64};
|
use crate::shared::types::Float::{F32, F64};
|
||||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||||
use crate::shared::Definitions as SharedDefinitions;
|
use crate::shared::Definitions as SharedDefinitions;
|
||||||
@@ -340,6 +341,7 @@ pub fn define(
|
|||||||
let rotl_imm = shared.by_name("rotl_imm");
|
let rotl_imm = shared.by_name("rotl_imm");
|
||||||
let rotr = shared.by_name("rotr");
|
let rotr = shared.by_name("rotr");
|
||||||
let rotr_imm = shared.by_name("rotr_imm");
|
let rotr_imm = shared.by_name("rotr_imm");
|
||||||
|
let scalar_to_vector = shared.by_name("scalar_to_vector");
|
||||||
let selectif = shared.by_name("selectif");
|
let selectif = shared.by_name("selectif");
|
||||||
let sextend = shared.by_name("sextend");
|
let sextend = shared.by_name("sextend");
|
||||||
let sload16 = shared.by_name("sload16");
|
let sload16 = shared.by_name("sload16");
|
||||||
@@ -515,6 +517,7 @@ pub fn define(
|
|||||||
let use_popcnt = settings.predicate_by_name("use_popcnt");
|
let use_popcnt = settings.predicate_by_name("use_popcnt");
|
||||||
let use_lzcnt = settings.predicate_by_name("use_lzcnt");
|
let use_lzcnt = settings.predicate_by_name("use_lzcnt");
|
||||||
let use_bmi1 = settings.predicate_by_name("use_bmi1");
|
let use_bmi1 = settings.predicate_by_name("use_bmi1");
|
||||||
|
let use_sse2 = settings.predicate_by_name("use_sse2");
|
||||||
let use_sse41 = settings.predicate_by_name("use_sse41");
|
let use_sse41 = settings.predicate_by_name("use_sse41");
|
||||||
|
|
||||||
// Definitions.
|
// Definitions.
|
||||||
@@ -603,8 +606,14 @@ pub fn define(
|
|||||||
// Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
|
// Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
|
||||||
e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w());
|
e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w());
|
||||||
|
|
||||||
// Bool constants.
|
// Bool constants (uses MOV)
|
||||||
e.enc_both(bconst.bind(B1), rec_pu_id_bool.opcodes(vec![0xb8]));
|
for &ty in &[B1, B8, B16, B32] {
|
||||||
|
e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(vec![0xb8]));
|
||||||
|
}
|
||||||
|
e.enc64(
|
||||||
|
bconst.bind(B64),
|
||||||
|
rec_pu_id_bool.opcodes(vec![0xb8]).rex().w(),
|
||||||
|
);
|
||||||
|
|
||||||
// Shifts and rotates.
|
// Shifts and rotates.
|
||||||
// Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
|
// Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
|
||||||
@@ -1565,5 +1574,19 @@ pub fn define(
|
|||||||
e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e]));
|
e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e]));
|
||||||
e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e]));
|
e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e]));
|
||||||
|
|
||||||
|
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
|
||||||
|
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
|
||||||
|
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
||||||
|
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
|
||||||
|
let number_of_lanes = 128 / ty.lane_bits();
|
||||||
|
let instruction = scalar_to_vector.bind_vector(ty, number_of_lanes).bind(ty);
|
||||||
|
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
||||||
|
if ty.lane_bits() < 64 {
|
||||||
|
// no 32-bit encodings for 64-bit widths
|
||||||
|
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
|
||||||
|
}
|
||||||
|
e.enc_x86_64_isap(instruction, template, use_sse2);
|
||||||
|
}
|
||||||
|
|
||||||
e
|
e
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -106,6 +106,16 @@ pub fn define(
|
|||||||
.build(),
|
.build(),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let Scalar = &TypeVar::new(
|
||||||
|
"scalar",
|
||||||
|
"Any scalar value that can be used as a lane in a vector",
|
||||||
|
TypeSetBuilder::new()
|
||||||
|
.bools(Interval::All)
|
||||||
|
.ints(Interval::All)
|
||||||
|
.floats(Interval::All)
|
||||||
|
.build(),
|
||||||
|
);
|
||||||
|
|
||||||
let Any = &TypeVar::new(
|
let Any = &TypeVar::new(
|
||||||
"Any",
|
"Any",
|
||||||
"Any integer, float, or boolean scalar or vector type",
|
"Any integer, float, or boolean scalar or vector type",
|
||||||
@@ -2630,6 +2640,22 @@ pub fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let s = &operand_doc("s", Scalar, "A scalar value");
|
||||||
|
let a = &operand_doc("a", TxN, "A vector value (i.e. held in an XMM register)");
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"scalar_to_vector",
|
||||||
|
r#"
|
||||||
|
Scalar To Vector -- move a value out of a scalar register and into a vector
|
||||||
|
register; the scalar will be moved to the lowest-order bits of the vector
|
||||||
|
register and any higher bits will be zeroed.
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.operands_in(vec![s])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let Bool = &TypeVar::new(
|
let Bool = &TypeVar::new(
|
||||||
"Bool",
|
"Bool",
|
||||||
"A scalar or vector boolean type",
|
"A scalar or vector boolean type",
|
||||||
|
|||||||
32
cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif
Normal file
32
cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
test binemit
|
||||||
|
set opt_level=best
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 has_sse2=true
|
||||||
|
|
||||||
|
function %test_scalar_to_vector_b8() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rax] v0 = bconst.b8 true
|
||||||
|
[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_scalar_to_vector_i16() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rbx] v0 = iconst.i16 42
|
||||||
|
[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_scalar_to_vector_f32() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rcx] v0 = f32const 0x0.42
|
||||||
|
[-, %xmm3] v1 = scalar_to_vector.f32x4 v0 ; bin: 66 0f 6e d9
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %test_scalar_to_vector_i64() {
|
||||||
|
ebb0:
|
||||||
|
[-, %rdx] v0 = iconst.i64 42
|
||||||
|
[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 0f 6e fa
|
||||||
|
return
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user