Refactor packed moves to use xmm_mov instead of xmm_rm_r
Refactors previous packed move implementation to use xmm_mov instead of xmm_rm_r which looks to simplify register accounting during lowering.
This commit is contained in:
@@ -1873,18 +1873,6 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
|
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
|
||||||
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
|
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
|
||||||
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
|
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
|
||||||
SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3),
|
|
||||||
SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3),
|
|
||||||
SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3),
|
|
||||||
SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3),
|
|
||||||
SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3),
|
|
||||||
SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3),
|
|
||||||
SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3),
|
|
||||||
SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3),
|
|
||||||
SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3),
|
|
||||||
SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3),
|
|
||||||
SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3),
|
|
||||||
SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3),
|
|
||||||
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||||
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||||
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
|
//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
|
||||||
//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
|
//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
|
||||||
//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
|
//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
|
||||||
//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
|
//! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::isa::test_utils;
|
use crate::isa::test_utils;
|
||||||
@@ -3201,81 +3201,6 @@ fn test_x64_emit() {
|
|||||||
"cvttps2dq %xmm9, %xmm8",
|
"cvttps2dq %xmm9, %xmm8",
|
||||||
));
|
));
|
||||||
|
|
||||||
// ========================================================
|
|
||||||
// XMM_RM_R: Packed Move
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
|
|
||||||
"66440F3821C6",
|
|
||||||
"pmovsxbd %xmm6, %xmm8",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
|
|
||||||
"66450F3820D1",
|
|
||||||
"pmovsxbw %xmm9, %xmm10",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
|
|
||||||
"660F3822C9",
|
|
||||||
"pmovsxbq %xmm1, %xmm1",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
|
|
||||||
"66450F3823D5",
|
|
||||||
"pmovsxwd %xmm13, %xmm10",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
|
|
||||||
"66450F3824E4",
|
|
||||||
"pmovsxwq %xmm12, %xmm12",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
|
|
||||||
"66450F3825C2",
|
|
||||||
"pmovsxdq %xmm10, %xmm8",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
|
|
||||||
"660F3831F5",
|
|
||||||
"pmovzxbd %xmm5, %xmm6",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
|
|
||||||
"66440F3830ED",
|
|
||||||
"pmovzxbw %xmm5, %xmm13",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
|
|
||||||
"66450F3832DA",
|
|
||||||
"pmovzxbq %xmm10, %xmm11",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
|
|
||||||
"66440F3833D2",
|
|
||||||
"pmovzxwd %xmm2, %xmm10",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
|
|
||||||
"660F3834E7",
|
|
||||||
"pmovzxwq %xmm7, %xmm4",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::xmm_rm_r(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
|
|
||||||
"660F3835E3",
|
|
||||||
"pmovzxdq %xmm3, %xmm4",
|
|
||||||
));
|
|
||||||
|
|
||||||
// XMM_Mov_R_M: float stores
|
// XMM_Mov_R_M: float stores
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
|
Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
|
||||||
@@ -3288,6 +3213,81 @@ fn test_x64_emit() {
|
|||||||
"movsd %xmm1, 0(%rsi)",
|
"movsd %xmm1, 0(%rsi)",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// ========================================================
|
||||||
|
// XMM_MOV: Packed Move
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8),
|
||||||
|
"66440F3821C6",
|
||||||
|
"pmovsxbd %xmm6, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10),
|
||||||
|
"66450F3820D1",
|
||||||
|
"pmovsxbw %xmm9, %xmm10",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1),
|
||||||
|
"660F3822C9",
|
||||||
|
"pmovsxbq %xmm1, %xmm1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10),
|
||||||
|
"66450F3823D5",
|
||||||
|
"pmovsxwd %xmm13, %xmm10",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12),
|
||||||
|
"66450F3824E4",
|
||||||
|
"pmovsxwq %xmm12, %xmm12",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8),
|
||||||
|
"66450F3825C2",
|
||||||
|
"pmovsxdq %xmm10, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6),
|
||||||
|
"660F3831F5",
|
||||||
|
"pmovzxbd %xmm5, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13),
|
||||||
|
"66440F3830ED",
|
||||||
|
"pmovzxbw %xmm5, %xmm13",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11),
|
||||||
|
"66450F3832DA",
|
||||||
|
"pmovzxbq %xmm10, %xmm11",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10),
|
||||||
|
"66440F3833D2",
|
||||||
|
"pmovzxwd %xmm2, %xmm10",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4),
|
||||||
|
"660F3834E7",
|
||||||
|
"pmovzxwq %xmm7, %xmm4",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4),
|
||||||
|
"660F3835E3",
|
||||||
|
"pmovzxdq %xmm3, %xmm4",
|
||||||
|
));
|
||||||
|
|
||||||
// XmmUnary: moves and unary float ops
|
// XmmUnary: moves and unary float ops
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),
|
Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),
|
||||||
|
|||||||
@@ -2910,12 +2910,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
match op {
|
match op {
|
||||||
Opcode::SwidenLow => match (input_ty, output_ty) {
|
Opcode::SwidenLow => match (input_ty, output_ty) {
|
||||||
(types::I8X16, types::I16X8) => {
|
(types::I8X16, types::I16X8) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(src), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
|
||||||
}
|
}
|
||||||
(types::I16X8, types::I32X4) => {
|
(types::I16X8, types::I32X4) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(src), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
@@ -2929,7 +2927,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
8,
|
8,
|
||||||
false,
|
false,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::from(dst), dst));
|
||||||
}
|
}
|
||||||
(types::I16X8, types::I32X4) => {
|
(types::I16X8, types::I32X4) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||||
@@ -2940,18 +2938,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
8,
|
8,
|
||||||
false,
|
false,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::from(dst), dst));
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
Opcode::UwidenLow => match (input_ty, output_ty) {
|
Opcode::UwidenLow => match (input_ty, output_ty) {
|
||||||
(types::I8X16, types::I16X8) => {
|
(types::I8X16, types::I16X8) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(src), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
|
||||||
}
|
}
|
||||||
(types::I16X8, types::I32X4) => {
|
(types::I16X8, types::I32X4) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
@@ -2965,7 +2961,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
8,
|
8,
|
||||||
false,
|
false,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::from(dst), dst));
|
||||||
}
|
}
|
||||||
(types::I16X8, types::I32X4) => {
|
(types::I16X8, types::I32X4) => {
|
||||||
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
ctx.emit(Inst::gen_move(dst, src, output_ty));
|
||||||
@@ -2976,7 +2972,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
8,
|
8,
|
||||||
false,
|
false,
|
||||||
));
|
));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::from(dst), dst));
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user