Merge pull request #2576 from cfallin/cmp-load-bug
x64 bugfix: prevent load-op fusion of cmp because it could be emitted multiple times.
This commit is contained in:
@@ -380,11 +380,18 @@ fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
||||
|
||||
// TODO Try to commute the operands (and invert the condition) if one is an immediate.
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||
// We force the RHS into a register, and disallow load-op fusion, because we
|
||||
// do not have a transitive guarantee that this cmp-site will be the sole
|
||||
// user of the value. Consider: the icmp might be the only user of a load,
|
||||
// but there may be multiple users of the icmp (e.g. select or bint
|
||||
// instructions) that each invoke `emit_cmp()`. If we were to allow a load
|
||||
// to sink to the *latest* one, but other sites did not permit sinking, then
|
||||
// we would be missing the load for other cmp-sites.
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
|
||||
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
|
||||
// us dst - src at the machine instruction level, so invert operands.
|
||||
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
||||
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::reg(rhs), lhs));
|
||||
}
|
||||
|
||||
/// A specification for a fcmp emission.
|
||||
@@ -465,8 +472,10 @@ fn emit_fcmp<C: LowerCtx<I = Inst>>(
|
||||
(inputs[0], inputs[1])
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, lhs_input);
|
||||
let rhs = input_to_reg_mem(ctx, rhs_input);
|
||||
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
|
||||
// See above in `emit_cmp()`. We must only use the reg/reg form of the
|
||||
// comparison in order to avoid issues with merged loads.
|
||||
let rhs = put_input_in_reg(ctx, rhs_input);
|
||||
ctx.emit(Inst::xmm_cmp_rm_r(op, RegMem::reg(rhs), lhs));
|
||||
|
||||
let cond_result = match cond_code {
|
||||
FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
|
||||
|
||||
49
cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
Normal file
49
cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
Normal file
@@ -0,0 +1,49 @@
|
||||
test compile
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
function %f0(i64, i64) -> i64, i64 {
|
||||
block0(v0: i64, v1: i64):
|
||||
v2 = load.i64 v1
|
||||
; check: movq 0(%rsi), %rax
|
||||
|
||||
v3 = icmp eq v0, v2
|
||||
|
||||
v4 = bint.i64 v3
|
||||
; nextln: cmpq %rax, %rdi
|
||||
; nextln: setz %cl
|
||||
; nextln: movzbq %cl, %rcx
|
||||
|
||||
v5 = select.i64 v3, v0, v1
|
||||
; nextln: cmpq %rax, %rdi
|
||||
; nextln: cmovzq %rdi, %rsi
|
||||
|
||||
return v4, v5
|
||||
; nextln: movq %rcx, %rax
|
||||
; nextln: movq %rsi, %rdx
|
||||
}
|
||||
|
||||
function %f1(f64, i64) -> i64, f64 {
|
||||
block0(v0: f64, v1: i64):
|
||||
v2 = load.f64 v1
|
||||
; check: movsd 0(%rdi), %xmm1
|
||||
|
||||
v3 = fcmp eq v0, v2
|
||||
|
||||
v4 = bint.i64 v3
|
||||
; nextln: ucomisd %xmm1, %xmm0
|
||||
; nextln: setnp %dil
|
||||
; nextln: setz %sil
|
||||
; nextln: andl %edi, %esi
|
||||
; nextln: movzbq %sil, %rsi
|
||||
|
||||
v5 = select.f64 v3, v0, v0
|
||||
; nextln: ucomisd %xmm1, %xmm0
|
||||
; nextln: movaps %xmm0, %xmm1
|
||||
; nextln: jnp $$next; movsd %xmm0, %xmm1; $$next:
|
||||
; nextln: jz $$next; movsd %xmm0, %xmm1; $$next:
|
||||
|
||||
return v4, v5
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movaps %xmm1, %xmm0
|
||||
}
|
||||
Reference in New Issue
Block a user