x64: lower i64x2.imul to VPMULLQ when possible

This adds the machinery to encode the VPMULLQ instruction which is available in AVX512VL and AVX512DQ. When these feature sets are available, we use this instruction instead of a lengthy 12-instruction sequence.
2021-05-10 16:25:03 -07:00
parent 5929a5e6ee
commit e676589b0c
5 changed files with 195 additions and 91 deletions
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -128,6 +128,7 @@ pub(crate) fn emit(
            InstructionSet::BMI2 => info.isa_flags.has_bmi2(),
            InstructionSet::AVX512F => info.isa_flags.has_avx512f(),
            InstructionSet::AVX512VL => info.isa_flags.has_avx512vl(),
+            InstructionSet::AVX512DQ => info.isa_flags.has_avx512dq(),
        }
    };

@@ -1409,6 +1410,7 @@ pub(crate) fn emit(
        Inst::XmmUnaryRmREvex { op, src, dst } => {
            let opcode = match op {
                Avx512Opcode::Vpabsq => 0x1f,
+                _ => unimplemented!("Opcode {:?} not implemented", op),
            };
            match src {
                RegMem::Reg { reg: src } => EvexInstruction::new()
@@ -1545,6 +1547,31 @@ pub(crate) fn emit(
            }
        }

+        Inst::XmmRmREvex {
+            op,
+            src1,
+            src2,
+            dst,
+        } => {
+            let opcode = match op {
+                Avx512Opcode::Vpmullq => 0x40,
+                _ => unimplemented!("Opcode {:?} not implemented", op),
+            };
+            match src1 {
+                RegMem::Reg { reg: src } => EvexInstruction::new()
+                    .length(EvexVectorLength::V128)
+                    .prefix(LegacyPrefixes::_66)
+                    .map(OpcodeMap::_0F38)
+                    .w(true)
+                    .opcode(opcode)
+                    .reg(dst.to_reg().get_hw_encoding())
+                    .rm(src.get_hw_encoding())
+                    .vvvvv(src2.get_hw_encoding())
+                    .encode(sink),
+                _ => todo!(),
+            };
+        }
+
        Inst::XmmMinMaxSeq {
            size,
            is_min,