Add x86 SIMD sshr and ushr

Only the shifts with applicable SSE2 instructions are implemented here: PSRL* (for ushr) only has 16-64 bit instructions and PSRA* (for sshr) only has 16-32 bit instructions.
2019-10-07 10:38:35 -07:00
parent 808885ce56
commit f1904bffea
7 changed files with 197 additions and 2 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -522,6 +522,8 @@ pub(crate) fn define<'defs>(
    let x86_pshufd = x86.by_name("x86_pshufd");
    let x86_pshufb = x86.by_name("x86_pshufb");
    let x86_psll = x86.by_name("x86_psll");
+    let x86_psra = x86.by_name("x86_psra");
+    let x86_psrl = x86.by_name("x86_psrl");
    let x86_push = x86.by_name("x86_push");
    let x86_sdivmodx = x86.by_name("x86_sdivmodx");
    let x86_smulx = x86.by_name("x86_smulx");
@@ -2009,6 +2011,18 @@ pub(crate) fn define<'defs>(
        e.enc_32_64(x86_psll, rec_fa.opcodes(*opcodes));
    }

+    // SIMD shift right (logical)
+    for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
+        let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(x86_psrl, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD shift right (arithmetic)
+    for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
+        let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
+        e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
+    }
+
    // SIMD icmp using PCMPEQ*
    for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
        let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() {