Fix bint on x64, and make bextend consistent with bool representation.

There has been occasional confusion with the representation that we use
for bool-typed values in registers, at least when these are wider than
one bit. Does a `b8` store `true` as 1, or as all-ones (`0xff`)?

We've settled on the latter because of some use-cases where the wide
bool becomes a mask -- see #2058 for more on this.

This is fine, and transparent, to most operations within CLIF, because
the bool-typed value still has only two semantically-visible states,
namely `true` and `false`.

However, we have to be careful with bool-to-int conversions. `bint` on
aarch64 correctly masked the all-ones value down to 0 or 1, as required
by the instruction specification, but on x64 it did not. This PR fixes
that bug and makes x64 consistent with aarch64.

While staring at this code I realized that `bextend` was also not
consistent with the all-ones invariant: it should do a sign-extend, not
a zero-extend as it previously did. This is also rectified and tested.
(Aarch64 also already had this case implemented correctly.)

Fixes #3003.
This commit is contained in:
Chris Fallin
2021-06-21 11:03:44 -07:00
parent 443eb7a843
commit efe3930215
6 changed files with 105 additions and 17 deletions

View File

@@ -3217,12 +3217,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::setcc(CC::Z, dst));
}
Opcode::Uextend
| Opcode::Sextend
| Opcode::Bint
| Opcode::Breduce
| Opcode::Bextend
| Opcode::Ireduce => {
Opcode::Uextend | Opcode::Sextend | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => {
let src_ty = ctx.input_ty(insn, 0);
let dst_ty = ctx.output_ty(insn, 0);
@@ -3236,7 +3231,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(src_ty.bits() <= 64);
let src = put_input_in_reg(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
assert!(op == Opcode::Uextend || op == Opcode::Sextend || op == Opcode::Bint);
assert!(op == Opcode::Uextend || op == Opcode::Sextend);
// Extend to 64 bits first.
let ext_mode = ExtMode::new(src_ty.bits(), /* dst bits = */ 64);
@@ -3278,15 +3273,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Sextend requires a sign-extended move, but all the other opcodes are simply a move
// from a zero-extended source. Here is why this works, in each case:
//
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
// zero-extend here.
//
// - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
// again, this is a zero-extend / no-op.
// - Breduce, Bextend: changing width of a boolean. We
// represent a bool as a 0 or -1, so Breduce can mask, while
// Bextend must sign-extend.
//
// - Ireduce: changing width of an integer. Smaller ints are stored with undefined
// high-order bits, so we can simply do a copy.
if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
let is_sextend = match op {
Opcode::Sextend | Opcode::Bextend => true,
_ => false,
};
if src_ty == types::I32 && dst_ty == types::I64 && !is_sextend {
// As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
// 32-bits will zero-extend the upper 32-bits, so we can even not generate a
// zero-extended move in this case.
@@ -3324,7 +3321,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
);
if let Some(ext_mode) = ext_mode {
if op == Opcode::Sextend {
if is_sextend {
ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
} else {
ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
@@ -3335,6 +3332,32 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::Bint => {
// Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
// out the LSB to give a 0 / 1-valued integer result.
let rn = put_input_in_reg(ctx, inputs[0]);
let rd = get_output_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
ctx.emit(Inst::gen_move(rd.regs()[0], rn, types::I64));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(1),
rd.regs()[0],
));
if ty == types::I128 {
let upper = rd.regs()[1];
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(upper.to_reg()),
upper,
));
}
}
Opcode::Icmp => {
let condcode = ctx.data(insn).cond_code().unwrap();
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();