Refactor packed moves to use xmm_mov instead of xmm_rm_r
Refactors previous packed move implementation to use xmm_mov instead of xmm_rm_r which looks to simplify register accounting during lowering.
This commit is contained in:
@@ -1873,18 +1873,6 @@ pub(crate) fn emit(
|
||||
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
|
||||
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
|
||||
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
|
||||
SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3),
|
||||
SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3),
|
||||
SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3),
|
||||
SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3),
|
||||
SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3),
|
||||
SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3),
|
||||
SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3),
|
||||
SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3),
|
||||
SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3),
|
||||
SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3),
|
||||
SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3),
|
||||
SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3),
|
||||
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
|
||||
//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
|
||||
//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
|
||||
//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
|
||||
//! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit
|
||||
|
||||
use super::*;
|
||||
use crate::isa::test_utils;
|
||||
@@ -3201,81 +3201,6 @@ fn test_x64_emit() {
|
||||
"cvttps2dq %xmm9, %xmm8",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM_RM_R: Packed Move
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
|
||||
"66440F3821C6",
|
||||
"pmovsxbd %xmm6, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
|
||||
"66450F3820D1",
|
||||
"pmovsxbw %xmm9, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
|
||||
"660F3822C9",
|
||||
"pmovsxbq %xmm1, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
|
||||
"66450F3823D5",
|
||||
"pmovsxwd %xmm13, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
|
||||
"66450F3824E4",
|
||||
"pmovsxwq %xmm12, %xmm12",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
|
||||
"66450F3825C2",
|
||||
"pmovsxdq %xmm10, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
|
||||
"660F3831F5",
|
||||
"pmovzxbd %xmm5, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
|
||||
"66440F3830ED",
|
||||
"pmovzxbw %xmm5, %xmm13",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
|
||||
"66450F3832DA",
|
||||
"pmovzxbq %xmm10, %xmm11",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
|
||||
"66440F3833D2",
|
||||
"pmovzxwd %xmm2, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
|
||||
"660F3834E7",
|
||||
"pmovzxwq %xmm7, %xmm4",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
|
||||
"660F3835E3",
|
||||
"pmovzxdq %xmm3, %xmm4",
|
||||
));
|
||||
|
||||
// XMM_Mov_R_M: float stores
|
||||
insns.push((
|
||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
|
||||
@@ -3288,6 +3213,81 @@ fn test_x64_emit() {
|
||||
"movsd %xmm1, 0(%rsi)",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// XMM_MOV: Packed Move
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
|
||||
"66440F3821C6",
|
||||
"pmovsxbd %xmm6, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
|
||||
"66450F3820D1",
|
||||
"pmovsxbw %xmm9, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
|
||||
"660F3822C9",
|
||||
"pmovsxbq %xmm1, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
|
||||
"66450F3823D5",
|
||||
"pmovsxwd %xmm13, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
|
||||
"66450F3824E4",
|
||||
"pmovsxwq %xmm12, %xmm12",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
|
||||
"66450F3825C2",
|
||||
"pmovsxdq %xmm10, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
|
||||
"660F3831F5",
|
||||
"pmovzxbd %xmm5, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
|
||||
"66440F3830ED",
|
||||
"pmovzxbw %xmm5, %xmm13",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
|
||||
"66450F3832DA",
|
||||
"pmovzxbq %xmm10, %xmm11",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
|
||||
"66440F3833D2",
|
||||
"pmovzxwd %xmm2, %xmm10",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
|
||||
"660F3834E7",
|
||||
"pmovzxwq %xmm7, %xmm4",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
|
||||
"660F3835E3",
|
||||
"pmovzxdq %xmm3, %xmm4",
|
||||
));
|
||||
|
||||
// XmmUnary: moves and unary float ops
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),
|
||||
|
||||
@@ -2910,12 +2910,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
match op {
|
||||
Opcode::SwidenLow => match (input_ty, output_ty) {
|
||||
(types::I8X16, types::I16X8) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(src), dst));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(src), dst));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
@@ -2929,7 +2927,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
8,
|
||||
false,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
@@ -2940,18 +2938,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
8,
|
||||
false,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Opcode::UwidenLow => match (input_ty, output_ty) {
|
||||
(types::I8X16, types::I16X8) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(src), dst));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
@@ -2965,7 +2961,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
8,
|
||||
false,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||
@@ -2976,7 +2972,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
8,
|
||||
false,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user