Fix bint on x64, and make bextend consistent with bool representation.
There has been occasional confusion with the representation that we use for bool-typed values in registers, at least when these are wider than one bit. Does a `b8` store `true` as 1, or as all-ones (`0xff`)? We've settled on the latter because of some use-cases where the wide bool becomes a mask -- see #2058 for more on this. This is fine, and transparent, to most operations within CLIF, because the bool-typed value still has only two semantically-visible states, namely `true` and `false`. However, we have to be careful with bool-to-int conversions. `bint` on aarch64 correctly masked the all-ones value down to 0 or 1, as required by the instruction specification, but on x64 it did not. This PR fixes that bug and makes x64 consistent with aarch64. While staring at this code I realized that `bextend` was also not consistent with the all-ones invariant: it should do a sign-extend, not a zero-extend as it previously did. This is also rectified and tested. (Aarch64 also already had this case implemented correctly.) Fixes #3003.
This commit is contained in:
@@ -3217,12 +3217,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::setcc(CC::Z, dst));
|
ctx.emit(Inst::setcc(CC::Z, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Uextend
|
Opcode::Uextend | Opcode::Sextend | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => {
|
||||||
| Opcode::Sextend
|
|
||||||
| Opcode::Bint
|
|
||||||
| Opcode::Breduce
|
|
||||||
| Opcode::Bextend
|
|
||||||
| Opcode::Ireduce => {
|
|
||||||
let src_ty = ctx.input_ty(insn, 0);
|
let src_ty = ctx.input_ty(insn, 0);
|
||||||
let dst_ty = ctx.output_ty(insn, 0);
|
let dst_ty = ctx.output_ty(insn, 0);
|
||||||
|
|
||||||
@@ -3236,7 +3231,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
assert!(src_ty.bits() <= 64);
|
assert!(src_ty.bits() <= 64);
|
||||||
let src = put_input_in_reg(ctx, inputs[0]);
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
assert!(op == Opcode::Uextend || op == Opcode::Sextend || op == Opcode::Bint);
|
assert!(op == Opcode::Uextend || op == Opcode::Sextend);
|
||||||
// Extend to 64 bits first.
|
// Extend to 64 bits first.
|
||||||
|
|
||||||
let ext_mode = ExtMode::new(src_ty.bits(), /* dst bits = */ 64);
|
let ext_mode = ExtMode::new(src_ty.bits(), /* dst bits = */ 64);
|
||||||
@@ -3278,15 +3273,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// Sextend requires a sign-extended move, but all the other opcodes are simply a move
|
// Sextend requires a sign-extended move, but all the other opcodes are simply a move
|
||||||
// from a zero-extended source. Here is why this works, in each case:
|
// from a zero-extended source. Here is why this works, in each case:
|
||||||
//
|
//
|
||||||
// - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
|
// - Breduce, Bextend: changing width of a boolean. We
|
||||||
// zero-extend here.
|
// represent a bool as a 0 or -1, so Breduce can mask, while
|
||||||
//
|
// Bextend must sign-extend.
|
||||||
// - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
|
|
||||||
// again, this is a zero-extend / no-op.
|
|
||||||
//
|
//
|
||||||
// - Ireduce: changing width of an integer. Smaller ints are stored with undefined
|
// - Ireduce: changing width of an integer. Smaller ints are stored with undefined
|
||||||
// high-order bits, so we can simply do a copy.
|
// high-order bits, so we can simply do a copy.
|
||||||
if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
|
let is_sextend = match op {
|
||||||
|
Opcode::Sextend | Opcode::Bextend => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
if src_ty == types::I32 && dst_ty == types::I64 && !is_sextend {
|
||||||
// As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
|
// As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
|
||||||
// 32-bits will zero-extend the upper 32-bits, so we can even not generate a
|
// 32-bits will zero-extend the upper 32-bits, so we can even not generate a
|
||||||
// zero-extended move in this case.
|
// zero-extended move in this case.
|
||||||
@@ -3324,7 +3321,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
if let Some(ext_mode) = ext_mode {
|
if let Some(ext_mode) = ext_mode {
|
||||||
if op == Opcode::Sextend {
|
if is_sextend {
|
||||||
ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
|
ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
|
||||||
} else {
|
} else {
|
||||||
ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
|
ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
|
||||||
@@ -3335,6 +3332,32 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Bint => {
|
||||||
|
// Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
|
||||||
|
// out the LSB to give a 0 / 1-valued integer result.
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
let ty = ctx.output_ty(insn, 0);
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(rd.regs()[0], rn, types::I64));
|
||||||
|
ctx.emit(Inst::alu_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
AluRmiROpcode::And,
|
||||||
|
RegMemImm::imm(1),
|
||||||
|
rd.regs()[0],
|
||||||
|
));
|
||||||
|
|
||||||
|
if ty == types::I128 {
|
||||||
|
let upper = rd.regs()[1];
|
||||||
|
ctx.emit(Inst::alu_rmi_r(
|
||||||
|
OperandSize::Size64,
|
||||||
|
AluRmiROpcode::Xor,
|
||||||
|
RegMemImm::reg(upper.to_reg()),
|
||||||
|
upper,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Icmp => {
|
Opcode::Icmp => {
|
||||||
let condcode = ctx.data(insn).cond_code().unwrap();
|
let condcode = ctx.data(insn).cond_code().unwrap();
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
|||||||
16
cranelift/filetests/filetests/isa/x64/bextend.clif
Normal file
16
cranelift/filetests/filetests/isa/x64/bextend.clif
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64 machinst
|
||||||
|
|
||||||
|
function %f0(b8) -> b64 {
|
||||||
|
block0(v0: b8):
|
||||||
|
v1 = bextend.b64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
; nextln: movsbq %dil, %rsi
|
||||||
|
; nextln: movq %rsi, %rax
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
@@ -11,7 +11,7 @@ block0(v0: i64, v1: i64):
|
|||||||
v4 = bint.i64 v3
|
v4 = bint.i64 v3
|
||||||
; nextln: cmpq %rax, %rdi
|
; nextln: cmpq %rax, %rdi
|
||||||
; nextln: setz %cl
|
; nextln: setz %cl
|
||||||
; nextln: movzbq %cl, %rcx
|
; nextln: andq $$1, %rcx
|
||||||
|
|
||||||
v5 = select.i64 v3, v0, v1
|
v5 = select.i64 v3, v0, v1
|
||||||
; nextln: cmpq %rax, %rdi
|
; nextln: cmpq %rax, %rdi
|
||||||
@@ -34,7 +34,7 @@ block0(v0: f64, v1: i64):
|
|||||||
; nextln: setnp %dil
|
; nextln: setnp %dil
|
||||||
; nextln: setz %sil
|
; nextln: setz %sil
|
||||||
; nextln: andl %edi, %esi
|
; nextln: andl %edi, %esi
|
||||||
; nextln: movzbq %sil, %rsi
|
; nextln: andq $$1, %rsi
|
||||||
|
|
||||||
v5 = select.f64 v3, v0, v0
|
v5 = select.f64 v3, v0, v0
|
||||||
; nextln: ucomisd %xmm1, %xmm0
|
; nextln: ucomisd %xmm1, %xmm0
|
||||||
|
|||||||
@@ -463,7 +463,8 @@ block0(v0: b1):
|
|||||||
v1 = bint.i128 v0
|
v1 = bint.i128 v0
|
||||||
return v1
|
return v1
|
||||||
|
|
||||||
; check: movzbq %dil, %rsi
|
; nextln: movq %rdi, %rsi
|
||||||
|
; nextln: andq $$1, %rsi
|
||||||
; nextln: xorq %rdi, %rdi
|
; nextln: xorq %rdi, %rdi
|
||||||
; nextln: movq %rsi, %rax
|
; nextln: movq %rsi, %rax
|
||||||
; nextln: movq %rdi, %rdx
|
; nextln: movq %rdi, %rdx
|
||||||
|
|||||||
37
cranelift/filetests/filetests/runtests/bint.clif
Normal file
37
cranelift/filetests/filetests/runtests/bint.clif
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
target x86_64 machinst
|
||||||
|
|
||||||
|
function %bint_b8_i16_true() -> i16 {
|
||||||
|
block0:
|
||||||
|
v0 = bconst.b8 true
|
||||||
|
v1 = bint.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bint_b8_i16_true() == 1
|
||||||
|
|
||||||
|
|
||||||
|
function %bint_b16_i16_true() -> i16 {
|
||||||
|
block0:
|
||||||
|
v0 = bconst.b16 true
|
||||||
|
v1 = bint.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bint_b16_i16_true() == 1
|
||||||
|
|
||||||
|
function %bint_b8_i16_false() -> i16 {
|
||||||
|
block0:
|
||||||
|
v0 = bconst.b8 false
|
||||||
|
v1 = bint.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bint_b8_i16_false() == 0
|
||||||
|
|
||||||
|
|
||||||
|
function %bint_b16_i16_false() -> i16 {
|
||||||
|
block0:
|
||||||
|
v0 = bconst.b16 false
|
||||||
|
v1 = bint.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %bint_b16_i16_false() == 0
|
||||||
11
cranelift/filetests/filetests/runtests/i128-bint.clif
Normal file
11
cranelift/filetests/filetests/runtests/i128-bint.clif
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
test run
|
||||||
|
target x86_64 machinst
|
||||||
|
|
||||||
|
function %bint_b8_i128() -> i64, i64 {
|
||||||
|
block0:
|
||||||
|
v0 = bconst.b8 true
|
||||||
|
v1 = bint.i128 v0
|
||||||
|
v2, v3 = isplit.i128 v1
|
||||||
|
return v2, v3
|
||||||
|
}
|
||||||
|
; run: %bint_b8_i128() == [1, 0]
|
||||||
Reference in New Issue
Block a user