machinst x64: use zero-latency move instructions for f32/f64;

As found by @julian-seward1, movss/movsd aren't included in the zero-latency move instructions section of the Intel optimization manual. Use MOVAPS instead for those moves.
2020-10-06 18:30:14 +02:00
parent fc430eef76
commit 84ac3feef8
1 changed files with 5 additions and 3 deletions
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -2401,10 +2401,12 @@ impl MachInst for Inst {
        match rc_dst {
            RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
            RegClass::V128 => {
                // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
                // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
                // those, which may write more lanes that we need, but are specified to have
                // zero-latency.
                let opcode = match ty {
-                    types::F32 => SseOpcode::Movss,
+                    types::F32 | types::F64 | types::F32X4 => SseOpcode::Movaps,
                    types::F64 => SseOpcode::Movsd,
                    types::F32X4 => SseOpcode::Movaps,
                    types::F64X2 => SseOpcode::Movapd,
                    _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa,
                    _ => unimplemented!("unable to move type: {}", ty),