x64: Fix codegen for the select instruction with v128 (#4317)

This commit fixes a bug in the previous codegen for the `select`
instruction when the operations of the `select` were of the `v128` type.
Previously teh `XmmCmove` instruction only stored an `OperandSize` of 32
or 64 for a 64 or 32-bit move, but this was also used for these 128-bit
types which meant that when used the wrong move instruction was
generated. The fix applied here is to store the whole `Type` being moved
so the 128-bit variant can be selected as well.
This commit is contained in:
Alex Crichton
2022-06-27 13:02:40 -05:00
committed by GitHub
parent 23ae9016af
commit 8bb07523e2
5 changed files with 49 additions and 28 deletions

View File

@@ -168,7 +168,7 @@
(dst WritableGpr))
;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize)
(XmmCmove (ty Type)
(cc CC)
(consequent XmmMem)
(alternative Xmm)
@@ -1896,10 +1896,9 @@
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_xmm ty cc consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(let ((dst WritableXmm (temp_writable_xmm)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmove size cc consequent alternative dst)
(MInst.XmmCmove ty cc consequent alternative dst)
dst)))
;; Helper for creating `cmove` instructions directly from values. This allows us
@@ -1952,9 +1951,8 @@
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(tmp WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty))
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
(cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2

View File

@@ -1112,7 +1112,7 @@ pub(crate) fn emit(
}
Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
@@ -1130,10 +1130,15 @@ pub(crate) fn emit(
// Jump if cc is *not* set.
one_way_jmp(sink, cc.invert(), next);
let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
let op = match *ty {
types::F64 => SseOpcode::Movsd,
types::F32 => SseOpcode::Movsd,
types::F32X4 => SseOpcode::Movaps,
types::F64X2 => SseOpcode::Movapd,
ty => {
debug_assert!(ty.is_vector() && ty.bytes() == 16);
SseOpcode::Movdqa
}
};
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
inst.emit(&[], sink, info, state);

View File

@@ -617,14 +617,14 @@ impl Inst {
}
}
pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove {
size,
ty,
cc,
consequent: src,
alternative: dst.to_reg(),
@@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
}
Inst::XmmCmove {
size,
ty,
cc,
consequent,
alternative,
dst,
..
} => {
let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
let consequent = consequent.pretty_print(size.to_bytes(), allocs);
let size = u8::try_from(ty.bytes()).unwrap();
let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
let consequent = consequent.pretty_print(size, allocs);
format!(
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
cc.invert().to_string(),
if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
match *ty {
types::F64 => "sd",
types::F32 => "ss",
types::F32X4 => "aps",
types::F64X2 => "apd",
_ => "dqa",
},
consequent,
dst,

View File

@@ -2271,11 +2271,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
debug_assert!(ty == types::F32 || ty == types::F64);
emit_moves(ctx, dst, rhs, ty);
ctx.emit(Inst::xmm_cmove(
if ty == types::F64 {
OperandSize::Size64
} else {
OperandSize::Size32
},
ty,
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),