machinst x64: avoid emitting movzx when the input is an ALU 32-bits operation;

2020-10-07 16:31:16 +02:00
parent 3980a43cda
commit e8c2a1763a
3 changed files with 81 additions and 15 deletions
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1168,7 +1168,11 @@ impl Inst {
    ) -> Inst {
        let rc = from_reg.get_class();
        match rc {
-            RegClass::I64 => Inst::mov_r_m(ty.bytes() as u8, from_reg, to_addr, srcloc),
+            RegClass::I64 => {
+                // Always store the full register, to ensure that the high bits are properly set
+                // when doing a full reload.
+                Inst::mov_r_m(8 /* bytes */, from_reg, to_addr, srcloc)
+            }
            RegClass::V128 => {
                let opcode = match ty {
                    types::F32 => SseOpcode::Movss,
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -70,6 +70,25 @@ fn matches_input<C: LowerCtx<I = Inst>>(
    })
 }

+/// Returns whether the given specified `input` is a result produced by an instruction with any of
+/// the opcodes specified in `ops`.
+fn matches_input_any<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    input: InsnInput,
+    ops: &[Opcode],
+) -> Option<IRInst> {
+    let inputs = ctx.get_input(input.insn, input.input);
+    inputs.inst.and_then(|(src_inst, _)| {
+        let data = ctx.data(src_inst);
+        for &op in ops {
+            if data.opcode() == op {
+                return Some(src_inst);
+            }
+        }
+        None
+    })
+}
+
 fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
    ctx.use_input_reg(input);
    input.reg
@@ -1339,29 +1358,55 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let src_ty = ctx.input_ty(insn, 0);
            let dst_ty = ctx.output_ty(insn, 0);

+            // Sextend requires a sign-extended move, but all the other opcodes are simply a move
+            // from a zero-extended source. Here is why this works, in each case:
+            //
+            // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
+            // zero-extend here.
+            //
+            // - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
+            // again, this is a zero-extend / no-op.
+            //
+            // - Ireduce: changing width of an integer. Smaller ints are stored with undefined
+            // high-order bits, so we can simply do a copy.
+
+            if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
+                // As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
+                // 32-bits will zero-extend the upper 32-bits, so we can even not generate a
+                // zero-extended move in this case.
+                // TODO add loads and shifts here.
+                if let Some(_) = matches_input_any(
+                    ctx,
+                    inputs[0],
+                    &[
+                        Opcode::Iadd,
+                        Opcode::IaddIfcout,
+                        Opcode::Isub,
+                        Opcode::Imul,
+                        Opcode::Band,
+                        Opcode::Bor,
+                        Opcode::Bxor,
+                    ],
+                ) {
+                    let src = put_input_in_reg(ctx, inputs[0]);
+                    let dst = get_output_reg(ctx, outputs[0]);
+                    ctx.emit(Inst::gen_move(dst, src, types::I64));
+                    return Ok(());
+                }
+            }
+
            let src = input_to_reg_mem(ctx, inputs[0]);
            let dst = get_output_reg(ctx, outputs[0]);

            let ext_mode = ExtMode::new(src_ty.bits(), dst_ty.bits());
-            assert!(
-                (src_ty.bits() < dst_ty.bits() && ext_mode.is_some()) || ext_mode.is_none(),
+            assert_eq!(
+                src_ty.bits() < dst_ty.bits(),
+                ext_mode.is_some(),
                "unexpected extension: {} -> {}",
                src_ty,
                dst_ty
            );

-            // All of these other opcodes are simply a move from a zero-extended source.  Here
-            // is why this works, in each case:
-            //
-            // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we
-            //   merely need to zero-extend here.
-            //
-            // - Breduce, Bextend: changing width of a boolean. We represent a
-            //   bool as a 0 or 1, so again, this is a zero-extend / no-op.
-            //
-            // - Ireduce: changing width of an integer. Smaller ints are stored
-            //   with undefined high-order bits, so we can simply do a copy.
-
            if let Some(ext_mode) = ext_mode {
                if op == Opcode::Sextend {
                    ctx.emit(Inst::movsx_rm_r(
--- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
@@ -0,0 +1,17 @@
+test compile
+target x86_64
+feature "experimental_x64"
+
+function %elide_uextend_add(i32, i32) -> i64 {
+block0(v0: i32, v1: i32):
+    ; check: pushq   %rbp
+    ; check: movq    %rsp, %rbp
+    v2 = iadd v0, v1
+    ; check: addl    %esi, %edi
+    v3 = uextend.i64 v2
+    ; check: movq    %rdi, %rax
+    ; check: movq    %rbp, %rsp
+    ; check: popq    %rbp
+    ; check: ret
+    return v3
+}