Avoid unnecessary lane calculations in codegen code

This refactor moves the calculation of the number of lanes to code closer to where the Instruction/BoundInstruction is bound.
2019-07-11 15:59:39 -07:00
parent 3fdc78174f
commit d492cf7e0e
3 changed files with 51 additions and 29 deletions
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -320,12 +320,15 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
    let c = var("c");
    let d = var("d");

+    // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available
+    let sse_vector_size: u64 = 128;
+
    // SIMD splat: 8-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
-        let splat_x8x16 = splat.bind_vector(ty, 128 / ty.lane_bits());
-        let bitcast_f64_to_any8x16 = bitcast.bind_vector(ty, 128 / ty.lane_bits()).bind(F64);
+        let splat_any8x16 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let bitcast_f64_to_any8x16 = bitcast.bind_vector_from_lane(ty, sse_vector_size).bind(F64);
        narrow.legalize(
-            def!(y = splat_x8x16(x)),
+            def!(y = splat_any8x16(x)),
            vec![
                def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
                def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
@@ -337,13 +340,13 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou

    // SIMD splat: 16-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
-        let splat_x16x8 = splat.bind_vector(ty, 128 / ty.lane_bits());
+        let splat_x16x8 = splat.bind_vector_from_lane(ty, sse_vector_size);
        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
-            .bind_vector(I32, 4)
-            .bind_vector(ty, 128 / ty.lane_bits());
+            .bind_vector_from_lane(I32, sse_vector_size)
+            .bind_vector_from_lane(ty, sse_vector_size);
        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
-            .bind_vector(ty, 128 / ty.lane_bits())
-            .bind_vector(I32, 4);
+            .bind_vector_from_lane(ty, sse_vector_size)
+            .bind_vector_from_lane(I32, sse_vector_size);
        narrow.legalize(
            def!(y = splat_x16x8(x)),
            vec![
@@ -358,7 +361,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou

    // SIMD splat: 32-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let splat_any32x4 = splat.bind_vector(ty, 128 / ty.lane_bits());
+        let splat_any32x4 = splat.bind_vector_from_lane(ty, sse_vector_size);
        narrow.legalize(
            def!(y = splat_any32x4(x)),
            vec![
@@ -370,7 +373,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou

    // SIMD splat: 64-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
-        let splat_any64x2 = splat.bind_vector(ty, 128 / ty.lane_bits());
+        let splat_any64x2 = splat.bind_vector_from_lane(ty, sse_vector_size);
        narrow.legalize(
            def!(y = splat_any64x2(x)),
            vec![