Merge pull request #2576 from cfallin/cmp-load-bug

x64 bugfix: prevent load-op fusion of cmp because it could be emitted multiple times.
2021-01-13 10:33:37 -08:00
parent 90b80a2d3c 4638de673c
commit 2b2f369bbb
2 changed files with 62 additions and 4 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -380,11 +380,18 @@ fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    // TODO Try to commute the operands (and invert the condition) if one is an immediate.
    let lhs = put_input_in_reg(ctx, inputs[0]);
-    let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
+    // We force the RHS into a register, and disallow load-op fusion, because we
    // do not have a transitive guarantee that this cmp-site will be the sole
    // user of the value. Consider: the icmp might be the only user of a load,
    // but there may be multiple users of the icmp (e.g.  select or bint
    // instructions) that each invoke `emit_cmp()`. If we were to allow a load
    // to sink to the *latest* one, but other sites did not permit sinking, then
    // we would be missing the load for other cmp-sites.
    let rhs = put_input_in_reg(ctx, inputs[1]);
    // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
    // us dst - src at the machine instruction level, so invert operands.
-    ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
+    ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::reg(rhs), lhs));
 }
 /// A specification for a fcmp emission.
@@ -465,8 +472,10 @@ fn emit_fcmp<C: LowerCtx<I = Inst>>(
        (inputs[0], inputs[1])
    };
    let lhs = put_input_in_reg(ctx, lhs_input);
-    let rhs = input_to_reg_mem(ctx, rhs_input);
+    // See above in `emit_cmp()`. We must only use the reg/reg form of the
-    ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
+    // comparison in order to avoid issues with merged loads.
    let rhs = put_input_in_reg(ctx, rhs_input);
    ctx.emit(Inst::xmm_cmp_rm_r(op, RegMem::reg(rhs), lhs));
    let cond_result = match cond_code {
        FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
--- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
@@ -0,0 +1,49 @@
 test compile
 target x86_64
 feature "experimental_x64"
 function %f0(i64, i64) -> i64, i64 {
 block0(v0: i64, v1: i64):
    v2 = load.i64 v1
 ; check:  movq    0(%rsi), %rax
    v3 = icmp eq v0, v2
    v4 = bint.i64 v3
 ; nextln: cmpq    %rax, %rdi
 ; nextln: setz    %cl
 ; nextln: movzbq  %cl, %rcx
    v5 = select.i64 v3, v0, v1
 ; nextln: cmpq    %rax, %rdi
 ; nextln: cmovzq  %rdi, %rsi
    return v4, v5
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %rsi, %rdx
 }
 function %f1(f64, i64) -> i64, f64 {
 block0(v0: f64, v1: i64):
    v2 = load.f64 v1
 ; check:  movsd   0(%rdi), %xmm1
    v3 = fcmp eq v0, v2
    v4 = bint.i64 v3
 ; nextln: ucomisd %xmm1, %xmm0
 ; nextln: setnp   %dil
 ; nextln: setz    %sil
 ; nextln: andl    %edi, %esi
 ; nextln: movzbq  %sil, %rsi
    v5 = select.f64 v3, v0, v0
 ; nextln: ucomisd %xmm1, %xmm0
 ; nextln: movaps  %xmm0, %xmm1
 ; nextln: jnp $$next; movsd %xmm0, %xmm1; $$next:
 ; nextln: jz $$next; movsd %xmm0, %xmm1; $$next:
    return v4, v5
 ; nextln: movq    %rsi, %rax
 ; nextln: movaps  %xmm1, %xmm0
 }