diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 99bfecafc2..506abdc53c 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -10,7 +10,8 @@ use crate::cdsl::instructions::{ use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; -use crate::shared::types::Bool::B1; +use crate::cdsl::types::ValueType; +use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; use crate::shared::types::Float::{F32, F64}; use crate::shared::types::Int::{I16, I32, I64, I8}; use crate::shared::Definitions as SharedDefinitions; @@ -340,6 +341,7 @@ pub fn define( let rotl_imm = shared.by_name("rotl_imm"); let rotr = shared.by_name("rotr"); let rotr_imm = shared.by_name("rotr_imm"); + let scalar_to_vector = shared.by_name("scalar_to_vector"); let selectif = shared.by_name("selectif"); let sextend = shared.by_name("sextend"); let sload16 = shared.by_name("sload16"); @@ -515,6 +517,7 @@ pub fn define( let use_popcnt = settings.predicate_by_name("use_popcnt"); let use_lzcnt = settings.predicate_by_name("use_lzcnt"); let use_bmi1 = settings.predicate_by_name("use_bmi1"); + let use_sse2 = settings.predicate_by_name("use_sse2"); let use_sse41 = settings.predicate_by_name("use_sse41"); // Definitions. @@ -603,8 +606,14 @@ pub fn define( // Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w()); - // Bool constants. - e.enc_both(bconst.bind(B1), rec_pu_id_bool.opcodes(vec![0xb8])); + // Bool constants (uses MOV) + for &ty in &[B1, B8, B16, B32] { + e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(vec![0xb8])); + } + e.enc64( + bconst.bind(B64), + rec_pu_id_bool.opcodes(vec![0xb8]).rex().w(), + ); // Shifts and rotates. // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit @@ -1565,5 +1574,19 @@ pub fn define( e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e])); e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e])); + // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according + // to the Intel manual: "When the destination operand is an XMM register, the source operand is + // written to the low doubleword of the register and the regiser is zero-extended to 128 bits." + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { + let number_of_lanes = 128 / ty.lane_bits(); + let instruction = scalar_to_vector.bind_vector(ty, number_of_lanes).bind(ty); + let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ + if ty.lane_bits() < 64 { + // no 32-bit encodings for 64-bit widths + e.enc32_isap(instruction.clone(), template.clone(), use_sse2); + } + e.enc_x86_64_isap(instruction, template, use_sse2); + } + e } diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 8aaa71947c..7af098f920 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -106,6 +106,16 @@ pub fn define( .build(), ); + let Scalar = &TypeVar::new( + "scalar", + "Any scalar value that can be used as a lane in a vector", + TypeSetBuilder::new() + .bools(Interval::All) + .ints(Interval::All) + .floats(Interval::All) + .build(), + ); + let Any = &TypeVar::new( "Any", "Any integer, float, or boolean scalar or vector type", @@ -2630,6 +2640,22 @@ pub fn define( .operands_out(vec![a]), ); + let s = &operand_doc("s", Scalar, "A scalar value"); + let a = &operand_doc("a", TxN, "A vector value (i.e. held in an XMM register)"); + + ig.push( + Inst::new( + "scalar_to_vector", + r#" + Scalar To Vector -- move a value out of a scalar register and into a vector + register; the scalar will be moved to the lowest-order bits of the vector + register and any higher bits will be zeroed. + "#, + ) + .operands_in(vec![s]) + .operands_out(vec![a]), + ); + let Bool = &TypeVar::new( "Bool", "A scalar or vector boolean type", diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif new file mode 100644 index 0000000000..6c77dfafdb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif @@ -0,0 +1,32 @@ +test binemit +set opt_level=best +set enable_simd +target x86_64 has_sse2=true + +function %test_scalar_to_vector_b8() { +ebb0: +[-, %rax] v0 = bconst.b8 true +[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0 + return +} + +function %test_scalar_to_vector_i16() { +ebb0: +[-, %rbx] v0 = iconst.i16 42 +[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3 + return +} + +function %test_scalar_to_vector_f32() { +ebb0: +[-, %rcx] v0 = f32const 0x0.42 +[-, %xmm3] v1 = scalar_to_vector.f32x4 v0 ; bin: 66 0f 6e d9 + return +} + +function %test_scalar_to_vector_i64() { +ebb0: +[-, %rdx] v0 = iconst.i64 42 +[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 0f 6e fa + return +}