Refactor packed moves to use xmm_mov instead of xmm_rm_r

Refactors previous packed move implementation to use xmm_mov
instead of xmm_rm_r which looks to simplify register accounting
during lowering.
This commit is contained in:
Johnnie Birch
2020-12-04 13:28:25 -08:00
parent 51973aefbb
commit f705a72aeb
3 changed files with 84 additions and 100 deletions

View File

@@ -1873,18 +1873,6 @@ pub(crate) fn emit(
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3),
SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3),
SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3),
SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3),
SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3),
SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3),
SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3),
SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3),
SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3),
SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3),
SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3),
SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3),
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),

View File

@@ -10,7 +10,7 @@
//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
//! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit
use super::*;
use crate::isa::test_utils;
@@ -3201,81 +3201,6 @@ fn test_x64_emit() {
"cvttps2dq %xmm9, %xmm8",
));
// ========================================================
// XMM_RM_R: Packed Move
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
"66440F3821C6",
"pmovsxbd %xmm6, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
"66450F3820D1",
"pmovsxbw %xmm9, %xmm10",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
"660F3822C9",
"pmovsxbq %xmm1, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
"66450F3823D5",
"pmovsxwd %xmm13, %xmm10",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
"66450F3824E4",
"pmovsxwq %xmm12, %xmm12",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
"66450F3825C2",
"pmovsxdq %xmm10, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
"660F3831F5",
"pmovzxbd %xmm5, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
"66440F3830ED",
"pmovzxbw %xmm5, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
"66450F3832DA",
"pmovzxbq %xmm10, %xmm11",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
"66440F3833D2",
"pmovzxwd %xmm2, %xmm10",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
"660F3834E7",
"pmovzxwq %xmm7, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
"660F3835E3",
"pmovzxdq %xmm3, %xmm4",
));
// XMM_Mov_R_M: float stores
insns.push((
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
@@ -3288,6 +3213,81 @@ fn test_x64_emit() {
"movsd %xmm1, 0(%rsi)",
));
// ========================================================
// XMM_MOV: Packed Move
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
"66440F3821C6",
"pmovsxbd %xmm6, %xmm8",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
"66450F3820D1",
"pmovsxbw %xmm9, %xmm10",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
"660F3822C9",
"pmovsxbq %xmm1, %xmm1",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
"66450F3823D5",
"pmovsxwd %xmm13, %xmm10",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
"66450F3824E4",
"pmovsxwq %xmm12, %xmm12",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
"66450F3825C2",
"pmovsxdq %xmm10, %xmm8",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
"660F3831F5",
"pmovzxbd %xmm5, %xmm6",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
"66440F3830ED",
"pmovzxbw %xmm5, %xmm13",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
"66450F3832DA",
"pmovzxbq %xmm10, %xmm11",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
"66440F3833D2",
"pmovzxwd %xmm2, %xmm10",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
"660F3834E7",
"pmovzxwq %xmm7, %xmm4",
));
insns.push((
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
"660F3835E3",
"pmovzxdq %xmm3, %xmm4",
));
// XmmUnary: moves and unary float ops
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),

View File

@@ -2910,12 +2910,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match op {
Opcode::SwidenLow => match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(src), dst));
}
(types::I16X8, types::I32X4) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(src), dst));
}
_ => unreachable!(),
},
@@ -2929,7 +2927,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8,
false,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
}
(types::I16X8, types::I32X4) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
@@ -2940,18 +2938,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8,
false,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
}
_ => unreachable!(),
},
Opcode::UwidenLow => match (input_ty, output_ty) {
(types::I8X16, types::I16X8) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(src), dst));
}
(types::I16X8, types::I32X4) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst));
}
_ => unreachable!(),
},
@@ -2965,7 +2961,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8,
false,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
}
(types::I16X8, types::I32X4) => {
ctx.emit(Inst::gen_move(dst, src, output_ty));
@@ -2976,7 +2972,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
8,
false,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
}
_ => unreachable!(),
},