diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index f431bb3ed7..92b9ab3a2f 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -264,6 +264,27 @@ impl LaneType { ValueType::Vector(VectorType::new(*self, lanes.into())) } } + + pub fn is_float(&self) -> bool { + match self { + LaneType::FloatType(_) => true, + _ => false, + } + } + + pub fn is_int(&self) -> bool { + match self { + LaneType::IntType(_) => true, + _ => false, + } + } + + pub fn is_bool(&self) -> bool { + match self { + LaneType::BoolType(_) => true, + _ => false, + } + } } impl fmt::Display for LaneType { diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 30246b85ad..b95705f9bc 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -4,8 +4,8 @@ use std::collections::HashMap; use crate::cdsl::encodings::{Encoding, EncodingBuilder}; use crate::cdsl::instructions::{ - BoundInstruction, InstSpec, Instruction, InstructionGroup, InstructionPredicate, - InstructionPredicateNode, InstructionPredicateRegistry, + InstSpec, Instruction, InstructionGroup, InstructionPredicate, InstructionPredicateNode, + InstructionPredicateRegistry, }; use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; @@ -279,6 +279,17 @@ impl PerCpuModeEncodings { } } + /// Add the same encoding/recipe pairing to both X86_32 and X86_64 + fn enc_32_64_rec( + &mut self, + inst: impl Clone + Into, + recipe: &EncodingRecipe, + bits: u16, + ) { + self.enc32_rec(inst.clone(), recipe, bits); + self.enc64_rec(inst, recipe, bits); + } + /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand /// binding) has already happened. fn enc_32_64_maybe_isap( @@ -1761,12 +1772,16 @@ pub(crate) fn define( // written to the low doubleword of the register and the regiser is zero-extended to 128 bits." for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ - if ty.lane_bits() < 64 { - // no 32-bit encodings for 64-bit widths - e.enc32(instruction.clone(), template.clone()); + if ty.is_float() { + e.enc_32_64_rec(instruction, rec_null_fpr, 0); + } else { + let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ + if ty.lane_bits() < 64 { + // no 32-bit encodings for 64-bit widths + e.enc32(instruction.clone(), template.clone()); + } + e.enc_x86_64(instruction, template); } - e.enc_x86_64(instruction, template); } // SIMD insertlane @@ -1811,37 +1826,34 @@ pub(crate) fn define( } } - // helper for generating null encodings for FPRs on both 32- and 64-bit architectures - let mut null_encode_32_64 = |instruction: BoundInstruction| { - e.enc32_rec(instruction.clone(), rec_null_fpr, 0); - e.enc64_rec(instruction, rec_null_fpr, 0); - }; - // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8) for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { for to_type in ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) { - null_encode_32_64( - raw_bitcast - .bind_vector_from_lane(to_type, sse_vector_size) - .bind_vector_from_lane(from_type, sse_vector_size), - ); + let instruction = raw_bitcast + .bind_vector_from_lane(to_type, sse_vector_size) + .bind_vector_from_lane(from_type, sse_vector_size); + e.enc_32_64_rec(instruction, rec_null_fpr, 0); } } // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an XMM register for float_type in &[F32, F64] { for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { - null_encode_32_64( + e.enc_32_64_rec( raw_bitcast .bind_vector_from_lane(lane_type, sse_vector_size) .bind(*float_type), + rec_null_fpr, + 0, ); - null_encode_32_64( + e.enc_32_64_rec( raw_bitcast .bind(*float_type) .bind_vector_from_lane(lane_type, sse_vector_size), + rec_null_fpr, + 0, ); } } diff --git a/cranelift/filetests/filetests/isa/x86/extractlane-run.clif b/cranelift/filetests/filetests/isa/x86/extractlane-run.clif index ce8c00a933..4590bd0673 100644 --- a/cranelift/filetests/filetests/isa/x86/extractlane-run.clif +++ b/cranelift/filetests/filetests/isa/x86/extractlane-run.clif @@ -24,8 +24,7 @@ ebb0: v0 = f32const 0x42.42 v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] v2 = extractlane v1, 3 - v10 = f32const 0x42.42 ; TODO this should not be necessary, v0 should be re-usable - v3 = fcmp eq v2, v10 + v3 = fcmp eq v2, v0 return v3 } ; run diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif similarity index 80% rename from cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif rename to cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif index 51ddea3e7e..b26f3d2e6b 100644 --- a/cranelift/filetests/filetests/isa/x86/scalar_to_vector.clif +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif @@ -17,10 +17,10 @@ ebb0: return } -function %test_scalar_to_vector_f32() { +function %test_scalar_to_vector_b32() { ebb0: -[-, %rcx] v0 = f32const 0x0.42 -[-, %xmm3] v1 = scalar_to_vector.f32x4 v0 ; bin: 66 0f 6e d9 +[-, %rcx] v0 = bconst.b32 false +[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9 return } diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif new file mode 100644 index 0000000000..2d2ab331f7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif @@ -0,0 +1,19 @@ +test compile +set opt_level=best +set probestack_enabled=false +set enable_simd +target x86_64 + +; ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) +function %test_scalar_to_vector_f32() -> f32x4 baldrdash_system_v { +ebb0: + v0 = f32const 0x0.42 + v1 = scalar_to_vector.f32x4 v0 + return v1 +} + +; check: ebb0 +; nextln: v2 = iconst.i32 0x3e84_0000 +; nextln: v0 = bitcast.f32 v2 +; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0 +; nextln: return v1