x64: Fix codegen for the select instruction with v128 (#4317)
This commit fixes a bug in the previous codegen for the `select` instruction when the operations of the `select` were of the `v128` type. Previously teh `XmmCmove` instruction only stored an `OperandSize` of 32 or 64 for a 64 or 32-bit move, but this was also used for these 128-bit types which meant that when used the wrong move instruction was generated. The fix applied here is to store the whole `Type` being moved so the 128-bit variant can be selected as well.
This commit is contained in:
@@ -168,7 +168,7 @@
|
|||||||
(dst WritableGpr))
|
(dst WritableGpr))
|
||||||
|
|
||||||
;; XMM conditional move; overwrites the destination register.
|
;; XMM conditional move; overwrites the destination register.
|
||||||
(XmmCmove (size OperandSize)
|
(XmmCmove (ty Type)
|
||||||
(cc CC)
|
(cc CC)
|
||||||
(consequent XmmMem)
|
(consequent XmmMem)
|
||||||
(alternative Xmm)
|
(alternative Xmm)
|
||||||
@@ -1896,10 +1896,9 @@
|
|||||||
|
|
||||||
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
|
(decl cmove_xmm (Type CC XmmMem Xmm) ConsumesFlags)
|
||||||
(rule (cmove_xmm ty cc consequent alternative)
|
(rule (cmove_xmm ty cc consequent alternative)
|
||||||
(let ((dst WritableXmm (temp_writable_xmm))
|
(let ((dst WritableXmm (temp_writable_xmm)))
|
||||||
(size OperandSize (operand_size_of_type_32_64 ty)))
|
|
||||||
(ConsumesFlags.ConsumesFlagsReturnsReg
|
(ConsumesFlags.ConsumesFlagsReturnsReg
|
||||||
(MInst.XmmCmove size cc consequent alternative dst)
|
(MInst.XmmCmove ty cc consequent alternative dst)
|
||||||
dst)))
|
dst)))
|
||||||
|
|
||||||
;; Helper for creating `cmove` instructions directly from values. This allows us
|
;; Helper for creating `cmove` instructions directly from values. This allows us
|
||||||
@@ -1952,9 +1951,8 @@
|
|||||||
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
|
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
|
||||||
(let ((dst WritableXmm (temp_writable_xmm))
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
(tmp WritableXmm (temp_writable_xmm))
|
(tmp WritableXmm (temp_writable_xmm))
|
||||||
(size OperandSize (operand_size_of_type_32_64 ty))
|
(cmove1 MInst (MInst.XmmCmove ty cc1 consequent alternative tmp))
|
||||||
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
|
(cmove2 MInst (MInst.XmmCmove ty cc2 consequent tmp dst)))
|
||||||
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
|
|
||||||
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
|
||||||
cmove1
|
cmove1
|
||||||
cmove2
|
cmove2
|
||||||
|
|||||||
@@ -1112,7 +1112,7 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmCmove {
|
Inst::XmmCmove {
|
||||||
size,
|
ty,
|
||||||
cc,
|
cc,
|
||||||
consequent,
|
consequent,
|
||||||
alternative,
|
alternative,
|
||||||
@@ -1130,10 +1130,15 @@ pub(crate) fn emit(
|
|||||||
// Jump if cc is *not* set.
|
// Jump if cc is *not* set.
|
||||||
one_way_jmp(sink, cc.invert(), next);
|
one_way_jmp(sink, cc.invert(), next);
|
||||||
|
|
||||||
let op = if *size == OperandSize::Size64 {
|
let op = match *ty {
|
||||||
SseOpcode::Movsd
|
types::F64 => SseOpcode::Movsd,
|
||||||
} else {
|
types::F32 => SseOpcode::Movsd,
|
||||||
SseOpcode::Movss
|
types::F32X4 => SseOpcode::Movaps,
|
||||||
|
types::F64X2 => SseOpcode::Movapd,
|
||||||
|
ty => {
|
||||||
|
debug_assert!(ty.is_vector() && ty.bytes() == 16);
|
||||||
|
SseOpcode::Movdqa
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
|
let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst));
|
||||||
inst.emit(&[], sink, info, state);
|
inst.emit(&[], sink, info, state);
|
||||||
|
|||||||
@@ -617,14 +617,14 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
|
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
|
||||||
src.assert_regclass_is(RegClass::Float);
|
src.assert_regclass_is(RegClass::Float);
|
||||||
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
||||||
let src = XmmMem::new(src).unwrap();
|
let src = XmmMem::new(src).unwrap();
|
||||||
let dst = WritableXmm::from_writable_reg(dst).unwrap();
|
let dst = WritableXmm::from_writable_reg(dst).unwrap();
|
||||||
Inst::XmmCmove {
|
Inst::XmmCmove {
|
||||||
size,
|
ty,
|
||||||
cc,
|
cc,
|
||||||
consequent: src,
|
consequent: src,
|
||||||
alternative: dst.to_reg(),
|
alternative: dst.to_reg(),
|
||||||
@@ -1507,23 +1507,26 @@ impl PrettyPrint for Inst {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmCmove {
|
Inst::XmmCmove {
|
||||||
size,
|
ty,
|
||||||
cc,
|
cc,
|
||||||
consequent,
|
consequent,
|
||||||
alternative,
|
alternative,
|
||||||
dst,
|
dst,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs);
|
let size = u8::try_from(ty.bytes()).unwrap();
|
||||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
|
||||||
let consequent = consequent.pretty_print(size.to_bytes(), allocs);
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
|
||||||
|
let consequent = consequent.pretty_print(size, allocs);
|
||||||
format!(
|
format!(
|
||||||
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
|
"mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
|
||||||
cc.invert().to_string(),
|
cc.invert().to_string(),
|
||||||
if *size == OperandSize::Size64 {
|
match *ty {
|
||||||
"sd"
|
types::F64 => "sd",
|
||||||
} else {
|
types::F32 => "ss",
|
||||||
"ss"
|
types::F32X4 => "aps",
|
||||||
|
types::F64X2 => "apd",
|
||||||
|
_ => "dqa",
|
||||||
},
|
},
|
||||||
consequent,
|
consequent,
|
||||||
dst,
|
dst,
|
||||||
|
|||||||
@@ -2271,11 +2271,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
debug_assert!(ty == types::F32 || ty == types::F64);
|
debug_assert!(ty == types::F32 || ty == types::F64);
|
||||||
emit_moves(ctx, dst, rhs, ty);
|
emit_moves(ctx, dst, rhs, ty);
|
||||||
ctx.emit(Inst::xmm_cmove(
|
ctx.emit(Inst::xmm_cmove(
|
||||||
if ty == types::F64 {
|
ty,
|
||||||
OperandSize::Size64
|
|
||||||
} else {
|
|
||||||
OperandSize::Size32
|
|
||||||
},
|
|
||||||
cc,
|
cc,
|
||||||
RegMem::reg(lhs.only_reg().unwrap()),
|
RegMem::reg(lhs.only_reg().unwrap()),
|
||||||
dst.only_reg().unwrap(),
|
dst.only_reg().unwrap(),
|
||||||
|
|||||||
19
tests/misc_testsuite/simd/v128-select.wast
Normal file
19
tests/misc_testsuite/simd/v128-select.wast
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
(module
|
||||||
|
(func (export "select") (param v128 v128 i32) (result v128)
|
||||||
|
local.get 0
|
||||||
|
local.get 1
|
||||||
|
local.get 2
|
||||||
|
select)
|
||||||
|
)
|
||||||
|
|
||||||
|
(assert_return (invoke "select"
|
||||||
|
(v128.const i64x2 1 1)
|
||||||
|
(v128.const i64x2 2 2)
|
||||||
|
(i32.const 0))
|
||||||
|
(v128.const i64x2 2 2))
|
||||||
|
|
||||||
|
(assert_return (invoke "select"
|
||||||
|
(v128.const i64x2 1 1)
|
||||||
|
(v128.const i64x2 2 2)
|
||||||
|
(i32.const 1))
|
||||||
|
(v128.const i64x2 1 1))
|
||||||
Reference in New Issue
Block a user