diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index 43a879066c..2a40d5128d 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -181,8 +181,17 @@ impl Instruction { bind_ref(self.clone(), Some(reference_type.into()), Vec::new()) } - pub fn bind_vector(&self, lane_type: impl Into, num_lanes: u64) -> BoundInstruction { - bind_vector(self.clone(), lane_type.into(), num_lanes, Vec::new()) + pub fn bind_vector_from_lane( + &self, + lane_type: impl Into, + vector_size_in_bits: u64, + ) -> BoundInstruction { + bind_vector( + self.clone(), + lane_type.into(), + vector_size_in_bits, + Vec::new(), + ) } pub fn bind_any(&self) -> BoundInstruction { @@ -414,8 +423,17 @@ impl BoundInstruction { bind_ref(self.inst, Some(reference_type.into()), self.value_types) } - pub fn bind_vector(self, lane_type: impl Into, num_lanes: u64) -> BoundInstruction { - bind_vector(self.inst, lane_type.into(), num_lanes, self.value_types) + pub fn bind_vector_from_lane( + self, + lane_type: impl Into, + vector_size_in_bits: u64, + ) -> BoundInstruction { + bind_vector( + self.inst, + lane_type.into(), + vector_size_in_bits, + self.value_types, + ) } pub fn bind_any(self) -> BoundInstruction { @@ -1116,9 +1134,10 @@ fn bind_ref( fn bind_vector( inst: Instruction, lane_type: LaneType, - num_lanes: u64, + vector_size_in_bits: u64, mut value_types: Vec, ) -> BoundInstruction { + let num_lanes = vector_size_in_bits / lane_type.lane_bits(); let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); value_types.push(ValueTypeOrAny::ValueType(vector_type)); verify_polymorphic_binding(&inst, &value_types); diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index ed4cf18d94..3b4be51a66 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1627,23 +1627,24 @@ pub fn define( e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e])); e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e])); + // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available + let sse_vector_size: u64 = 128; + // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the // value across the register // PSHUFB, 8-bit shuffle using two XMM registers for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let number_of_lanes = 128 / ty.lane_bits(); - let instruction = x86_pshufb.bind_vector(ty, number_of_lanes); - let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 0x00]); + let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); + let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]); e.enc32_isap(instruction.clone(), template.clone(), use_ssse3); e.enc64_isap(instruction, template, use_ssse3); } // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let number_of_lanes = 128 / ty.lane_bits(); - let instruction = x86_pshufd.bind_vector(ty, number_of_lanes); + let instruction = x86_pshufd.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_fpr .nonrex() .opcodes(vec![0x66, 0x0f, 0x70]); @@ -1655,8 +1656,9 @@ pub fn define( // to the Intel manual: "When the destination operand is an XMM register, the source operand is // written to the low doubleword of the register and the regiser is zero-extended to 128 bits." for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) { - let number_of_lanes = 128 / ty.lane_bits(); - let instruction = scalar_to_vector.bind_vector(ty, number_of_lanes).bind(ty); + let instruction = scalar_to_vector + .bind_vector_from_lane(ty, sse_vector_size) + .bind(ty); let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ if ty.lane_bits() < 64 { // no 32-bit encodings for 64-bit widths @@ -1674,8 +1676,7 @@ pub fn define( for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) { - let number_of_lanes = 128 / ty.lane_bits(); - let instruction = insertlane.bind_vector(ty, number_of_lanes); + let instruction = insertlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_r.opcodes(opcode.clone()); if ty.lane_bits() < 64 { e.enc_32_64_isap(instruction, template.nonrex(), isap.clone()); @@ -1695,8 +1696,7 @@ pub fn define( for ty in ValueType::all_lane_types() { if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) { - let number_of_lanes = 128 / ty.lane_bits(); - let instruction = extractlane.bind_vector(ty, number_of_lanes); + let instruction = extractlane.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone()); if ty.lane_bits() < 64 { e.enc_32_64_isap(instruction, template.nonrex(), isap.clone()); @@ -1709,7 +1709,7 @@ pub fn define( // SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let instruction = bitcast.bind_vector(ty, 16).bind(F64); + let instruction = bitcast.bind_vector_from_lane(ty, sse_vector_size).bind(F64); e.enc32_rec(instruction.clone(), rec_null_fpr, 0); e.enc64_rec(instruction, rec_null_fpr, 0); } @@ -1719,8 +1719,8 @@ pub fn define( for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type) { let instruction = raw_bitcast - .bind_vector(to_type, 128 / to_type.lane_bits()) - .bind_vector(from_type, 128 / from_type.lane_bits()); + .bind_vector_from_lane(to_type, sse_vector_size) + .bind_vector_from_lane(from_type, sse_vector_size); e.enc32_rec(instruction.clone(), rec_null_fpr, 0); e.enc64_rec(instruction, rec_null_fpr, 0); } diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 8423b10990..a64f631168 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -320,12 +320,15 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou let c = var("c"); let d = var("d"); + // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available + let sse_vector_size: u64 = 128; + // SIMD splat: 8-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let splat_x8x16 = splat.bind_vector(ty, 128 / ty.lane_bits()); - let bitcast_f64_to_any8x16 = bitcast.bind_vector(ty, 128 / ty.lane_bits()).bind(F64); + let splat_any8x16 = splat.bind_vector_from_lane(ty, sse_vector_size); + let bitcast_f64_to_any8x16 = bitcast.bind_vector_from_lane(ty, sse_vector_size).bind(F64); narrow.legalize( - def!(y = splat_x8x16(x)), + def!(y = splat_any8x16(x)), vec![ def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0 @@ -337,13 +340,13 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou // SIMD splat: 16-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let splat_x16x8 = splat.bind_vector(ty, 128 / ty.lane_bits()); + let splat_x16x8 = splat.bind_vector_from_lane(ty, sse_vector_size); let raw_bitcast_any16x8_to_i32x4 = raw_bitcast - .bind_vector(I32, 4) - .bind_vector(ty, 128 / ty.lane_bits()); + .bind_vector_from_lane(I32, sse_vector_size) + .bind_vector_from_lane(ty, sse_vector_size); let raw_bitcast_i32x4_to_any16x8 = raw_bitcast - .bind_vector(ty, 128 / ty.lane_bits()) - .bind_vector(I32, 4); + .bind_vector_from_lane(ty, sse_vector_size) + .bind_vector_from_lane(I32, sse_vector_size); narrow.legalize( def!(y = splat_x16x8(x)), vec![ @@ -358,7 +361,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou // SIMD splat: 32-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let splat_any32x4 = splat.bind_vector(ty, 128 / ty.lane_bits()); + let splat_any32x4 = splat.bind_vector_from_lane(ty, sse_vector_size); narrow.legalize( def!(y = splat_any32x4(x)), vec![ @@ -370,7 +373,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou // SIMD splat: 64-bits for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { - let splat_any64x2 = splat.bind_vector(ty, 128 / ty.lane_bits()); + let splat_any64x2 = splat.bind_vector_from_lane(ty, sse_vector_size); narrow.legalize( def!(y = splat_any64x2(x)), vec![