Encode [u|s]widen_low for x86

2020-07-07 16:13:50 -07:00
parent fafef7db77
commit c8ddf8a34c
5 changed files with 103 additions and 10 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1669,6 +1669,7 @@ fn define_simd(
    let ssub_sat = shared.by_name("ssub_sat");
    let store = shared.by_name("store");
    let store_complex = shared.by_name("store_complex");
+    let swiden_low = shared.by_name("swiden_low");
    let uadd_sat = shared.by_name("uadd_sat");
    let uload8x8 = shared.by_name("uload8x8");
    let uload8x8_complex = shared.by_name("uload8x8_complex");
@@ -1678,6 +1679,7 @@ fn define_simd(
    let uload32x2_complex = shared.by_name("uload32x2_complex");
    let snarrow = shared.by_name("snarrow");
    let unarrow = shared.by_name("unarrow");
+    let uwiden_low = shared.by_name("uwiden_low");
    let ushr_imm = shared.by_name("ushr_imm");
    let usub_sat = shared.by_name("usub_sat");
    let vconst = shared.by_name("vconst");
@@ -1915,6 +1917,16 @@ fn define_simd(
        let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
        e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
    }
+    for (ty, swiden_opcode, uwiden_opcode) in &[
+        (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
+        (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
+    ] {
+        let isap = Some(use_sse41_simd);
+        let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
+        let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
+    }
    for ty in &[I8, I16, I32, I64] {
        e.enc_both_inferred_maybe_isap(
            x86_palignr.bind(vector(*ty, sse_vector_size)),
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
@@ -477,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
 pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];

 /// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];

 /// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
@@ -489,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
 pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];

 /// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1.
+/// integers in xmm1 (SSE4.1).
 pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];

 /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of