x64: Fix codegen for the select instruction with v128 (#4317)
This commit fixes a bug in the previous codegen for the `select` instruction when the operations of the `select` were of the `v128` type. Previously teh `XmmCmove` instruction only stored an `OperandSize` of 32 or 64 for a 64 or 32-bit move, but this was also used for these 128-bit types which meant that when used the wrong move instruction was generated. The fix applied here is to store the whole `Type` being moved so the 128-bit variant can be selected as well.
This commit is contained in:
@@ -168,7 +168,7 @@
|
||||
(dst WritableGpr))
|
||||
|
||||
;; XMM conditional move; overwrites the destination register.
|
||||
(XmmCmove (size OperandSize)
|
||||
(XmmCmove (ty Type)
|
||||
(cc CC)
|
||||
(consequent XmmMem)
|
||||
(alternative Xmm)
|
||||
@@ -1896,10 +1896,9 @@
|
||||
|
||||
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
|
||||
(rule (cmove_xmm ty cc consequent alternative)
|
||||
(let ((dst WritableXmm (temp_writable_xmm))
|
||||
(size OperandSize (operand_size_of_type_32_64 ty)))
|
||||
(let ((dst WritableXmm (temp_writable_xmm)))
|
||||
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||
(MInst.XmmCmove size cc consequent alternative dst)
|
||||
(MInst.XmmCmove ty cc consequent alternative dst)
|
||||
dst)))
|
||||
|
||||
;; Helper for creating `cmove` instructions directly from values. This allows us
|
||||
@@ -1952,9 +1951,8 @@
|
||||
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
|
||||
(let ((dst WritableXmm (temp_writable_xmm))
|
||||
(tmp WritableXmm (temp_writable_xmm))
|
||||
(size OperandSize (operand_size_of_type_32_64 ty))
|
||||
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
|
||||
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
|
||||
(cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
|
||||
(cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
|
||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||
cmove1
|
||||
cmove2
|
||||
|
||||
@@ -1112,7 +1112,7 @@ pub(crate) fn emit(
|
||||
}
|
||||
|
||||
Inst::XmmCmove {
|
||||
size,
|
||||
ty,
|
||||
cc,
|
||||
consequent,
|
||||
alternative,
|
||||
@@ -1130,10 +1130,15 @@ pub(crate) fn emit(
|
||||
// Jump if cc is *not* set.
|
||||
one_way_jmp(sink, cc.invert(), next);
|
||||
|
||||
let op = if *size == OperandSize::Size64 {
|
||||
SseOpcode::Movsd
|
||||
} else {
|
||||
SseOpcode::Movss
|
||||
let op = match *ty {
|
||||
types::F64 => SseOpcode::Movsd,
|
||||
types::F32 => SseOpcode::Movsd,
|
||||
types::F32X4 => SseOpcode::Movaps,
|
||||
types::F64X2 => SseOpcode::Movapd,
|
||||
ty => {
|
||||
debug_assert!(ty.is_vector() && ty.bytes() == 16);
|
||||
SseOpcode::Movdqa
|
||||
}
|
||||
};
|
||||
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
|
||||
inst.emit(&[], sink, info, state);
|
||||
|
||||
@@ -617,14 +617,14 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
|
||||
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
|
||||
src.assert_regclass_is(RegClass::Float);
|
||||
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
||||
let src = XmmMem::new(src).unwrap();
|
||||
let dst = WritableXmm::from_writable_reg(dst).unwrap();
|
||||
Inst::XmmCmove {
|
||||
size,
|
||||
ty,
|
||||
cc,
|
||||
consequent: src,
|
||||
alternative: dst.to_reg(),
|
||||
@@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
|
||||
}
|
||||
|
||||
Inst::XmmCmove {
|
||||
size,
|
||||
ty,
|
||||
cc,
|
||||
consequent,
|
||||
alternative,
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
let consequent = consequent.pretty_print(size.to_bytes(), allocs);
|
||||
let size = u8::try_from(ty.bytes()).unwrap();
|
||||
let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
|
||||
let consequent = consequent.pretty_print(size, allocs);
|
||||
format!(
|
||||
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
|
||||
cc.invert().to_string(),
|
||||
if *size == OperandSize::Size64 {
|
||||
"sd"
|
||||
} else {
|
||||
"ss"
|
||||
match *ty {
|
||||
types::F64 => "sd",
|
||||
types::F32 => "ss",
|
||||
types::F32X4 => "aps",
|
||||
types::F64X2 => "apd",
|
||||
_ => "dqa",
|
||||
},
|
||||
consequent,
|
||||
dst,
|
||||
|
||||
@@ -2271,11 +2271,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
debug_assert!(ty == types::F32 || ty == types::F64);
|
||||
emit_moves(ctx, dst, rhs, ty);
|
||||
ctx.emit(Inst::xmm_cmove(
|
||||
if ty == types::F64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
},
|
||||
ty,
|
||||
cc,
|
||||
RegMem::reg(lhs.only_reg().unwrap()),
|
||||
dst.only_reg().unwrap(),
|
||||
|
||||
Reference in New Issue
Block a user