From efe39302158587ac8490e8c5064e267a1c47065d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 21 Jun 2021 11:03:44 -0700 Subject: [PATCH] Fix `bint` on x64, and make `bextend` consistent with bool representation. There has been occasional confusion with the representation that we use for bool-typed values in registers, at least when these are wider than one bit. Does a `b8` store `true` as 1, or as all-ones (`0xff`)? We've settled on the latter because of some use-cases where the wide bool becomes a mask -- see #2058 for more on this. This is fine, and transparent, to most operations within CLIF, because the bool-typed value still has only two semantically-visible states, namely `true` and `false`. However, we have to be careful with bool-to-int conversions. `bint` on aarch64 correctly masked the all-ones value down to 0 or 1, as required by the instruction specification, but on x64 it did not. This PR fixes that bug and makes x64 consistent with aarch64. While staring at this code I realized that `bextend` was also not consistent with the all-ones invariant: it should do a sign-extend, not a zero-extend as it previously did. This is also rectified and tested. (Aarch64 also already had this case implemented correctly.) Fixes #3003. --- cranelift/codegen/src/isa/x64/lower.rs | 51 ++++++++++++++----- .../filetests/filetests/isa/x64/bextend.clif | 16 ++++++ .../filetests/isa/x64/cmp-mem-bug.clif | 4 +- .../filetests/filetests/isa/x64/i128.clif | 3 +- .../filetests/filetests/runtests/bint.clif | 37 ++++++++++++++ .../filetests/runtests/i128-bint.clif | 11 ++++ 6 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/bextend.clif create mode 100644 cranelift/filetests/filetests/runtests/bint.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-bint.clif diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index b16147a925..c9f52e71d2 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3217,12 +3217,7 @@ fn lower_insn_to_regs>( ctx.emit(Inst::setcc(CC::Z, dst)); } - Opcode::Uextend - | Opcode::Sextend - | Opcode::Bint - | Opcode::Breduce - | Opcode::Bextend - | Opcode::Ireduce => { + Opcode::Uextend | Opcode::Sextend | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => { let src_ty = ctx.input_ty(insn, 0); let dst_ty = ctx.output_ty(insn, 0); @@ -3236,7 +3231,7 @@ fn lower_insn_to_regs>( assert!(src_ty.bits() <= 64); let src = put_input_in_reg(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]); - assert!(op == Opcode::Uextend || op == Opcode::Sextend || op == Opcode::Bint); + assert!(op == Opcode::Uextend || op == Opcode::Sextend); // Extend to 64 bits first. let ext_mode = ExtMode::new(src_ty.bits(), /* dst bits = */ 64); @@ -3278,15 +3273,17 @@ fn lower_insn_to_regs>( // Sextend requires a sign-extended move, but all the other opcodes are simply a move // from a zero-extended source. Here is why this works, in each case: // - // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to - // zero-extend here. - // - // - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so - // again, this is a zero-extend / no-op. + // - Breduce, Bextend: changing width of a boolean. We + // represent a bool as a 0 or -1, so Breduce can mask, while + // Bextend must sign-extend. // // - Ireduce: changing width of an integer. Smaller ints are stored with undefined // high-order bits, so we can simply do a copy. - if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend { + let is_sextend = match op { + Opcode::Sextend | Opcode::Bextend => true, + _ => false, + }; + if src_ty == types::I32 && dst_ty == types::I64 && !is_sextend { // As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on // 32-bits will zero-extend the upper 32-bits, so we can even not generate a // zero-extended move in this case. @@ -3324,7 +3321,7 @@ fn lower_insn_to_regs>( ); if let Some(ext_mode) = ext_mode { - if op == Opcode::Sextend { + if is_sextend { ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst)); } else { ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst)); @@ -3335,6 +3332,32 @@ fn lower_insn_to_regs>( } } + Opcode::Bint => { + // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND + // out the LSB to give a 0 / 1-valued integer result. + let rn = put_input_in_reg(ctx, inputs[0]); + let rd = get_output_reg(ctx, outputs[0]); + let ty = ctx.output_ty(insn, 0); + + ctx.emit(Inst::gen_move(rd.regs()[0], rn, types::I64)); + ctx.emit(Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::And, + RegMemImm::imm(1), + rd.regs()[0], + )); + + if ty == types::I128 { + let upper = rd.regs()[1]; + ctx.emit(Inst::alu_rmi_r( + OperandSize::Size64, + AluRmiROpcode::Xor, + RegMemImm::reg(upper.to_reg()), + upper, + )); + } + } + Opcode::Icmp => { let condcode = ctx.data(insn).cond_code().unwrap(); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif new file mode 100644 index 0000000000..6b53f3c3bd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/bextend.clif @@ -0,0 +1,16 @@ +test compile +target x86_64 machinst + +function %f0(b8) -> b64 { +block0(v0: b8): + v1 = bextend.b64 v0 + return v1 +} + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: movsbq %dil, %rsi +; nextln: movq %rsi, %rax +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index fe43a2ce0d..16c788ed84 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -11,7 +11,7 @@ block0(v0: i64, v1: i64): v4 = bint.i64 v3 ; nextln: cmpq %rax, %rdi ; nextln: setz %cl -; nextln: movzbq %cl, %rcx +; nextln: andq $$1, %rcx v5 = select.i64 v3, v0, v1 ; nextln: cmpq %rax, %rdi @@ -34,7 +34,7 @@ block0(v0: f64, v1: i64): ; nextln: setnp %dil ; nextln: setz %sil ; nextln: andl %edi, %esi -; nextln: movzbq %sil, %rsi +; nextln: andq $$1, %rsi v5 = select.f64 v3, v0, v0 ; nextln: ucomisd %xmm1, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index c480b857bb..61783e366d 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -463,7 +463,8 @@ block0(v0: b1): v1 = bint.i128 v0 return v1 -; check: movzbq %dil, %rsi +; nextln: movq %rdi, %rsi +; nextln: andq $$1, %rsi ; nextln: xorq %rdi, %rdi ; nextln: movq %rsi, %rax ; nextln: movq %rdi, %rdx diff --git a/cranelift/filetests/filetests/runtests/bint.clif b/cranelift/filetests/filetests/runtests/bint.clif new file mode 100644 index 0000000000..30bb91be11 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bint.clif @@ -0,0 +1,37 @@ +test run +target aarch64 +target x86_64 machinst + +function %bint_b8_i16_true() -> i16 { +block0: + v0 = bconst.b8 true + v1 = bint.i16 v0 + return v1 +} +; run: %bint_b8_i16_true() == 1 + + +function %bint_b16_i16_true() -> i16 { +block0: + v0 = bconst.b16 true + v1 = bint.i16 v0 + return v1 +} +; run: %bint_b16_i16_true() == 1 + +function %bint_b8_i16_false() -> i16 { +block0: + v0 = bconst.b8 false + v1 = bint.i16 v0 + return v1 +} +; run: %bint_b8_i16_false() == 0 + + +function %bint_b16_i16_false() -> i16 { +block0: + v0 = bconst.b16 false + v1 = bint.i16 v0 + return v1 +} +; run: %bint_b16_i16_false() == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-bint.clif b/cranelift/filetests/filetests/runtests/i128-bint.clif new file mode 100644 index 0000000000..3f51febe64 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bint.clif @@ -0,0 +1,11 @@ +test run +target x86_64 machinst + +function %bint_b8_i128() -> i64, i64 { +block0: + v0 = bconst.b8 true + v1 = bint.i128 v0 + v2, v3 = isplit.i128 v1 + return v2, v3 +} +; run: %bint_b8_i128() == [1, 0]