Add x86 implentation of 8x16 ishl

This involves some large mask tables that may hurt code size but reduce the number of instructions. See https://github.com/WebAssembly/simd/issues/117 for a more in-depth discussion on this.
2020-03-24 15:57:23 -07:00
parent d8920c0125
commit 5f0286696c
4 changed files with 98 additions and 12 deletions
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -355,7 +355,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
    let x86_pminu = x86_instructions.by_name("x86_pminu");
    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
-    let x86_psll = x86_instructions.by_name("x86_psll");
    let x86_psra = x86_instructions.by_name("x86_psra");
    let x86_ptest = x86_instructions.by_name("x86_ptest");

@@ -485,16 +484,6 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
        );
    }

-    // SIMD shift left (logical)
-    for ty in &[I16, I32, I64] {
-        let ishl = ishl.bind(vector(*ty, sse_vector_size));
-        let bitcast = bitcast.bind(vector(I64, sse_vector_size));
-        narrow.legalize(
-            def!(a = ishl(x, y)),
-            vec![def!(b = bitcast(y)), def!(a = x86_psll(x, b))],
-        );
-    }
-
    // SIMD shift left (arithmetic)
    for ty in &[I16, I32, I64] {
        let sshr = sshr.bind(vector(*ty, sse_vector_size));
@@ -685,6 +674,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
    narrow.custom_legalize(insertlane, "convert_insertlane");
    narrow.custom_legalize(ineg, "convert_ineg");
    narrow.custom_legalize(ushr, "convert_ushr");
+    narrow.custom_legalize(ishl, "convert_ishl");

    narrow.build_and_add_to(&mut shared.transform_groups);
 }