[SIMD][x86_64] Add encoding for PMADDWD (#2530)
* [SIMD][x86_64] Add encoding for PMADDWD * also for "experimental_x64"
This commit is contained in:
@@ -498,6 +498,7 @@ pub enum SseOpcode {
|
||||
Pinsrb,
|
||||
Pinsrw,
|
||||
Pinsrd,
|
||||
Pmaddwd,
|
||||
Pmaxsb,
|
||||
Pmaxsw,
|
||||
Pmaxsd,
|
||||
@@ -661,6 +662,7 @@ impl SseOpcode {
|
||||
| SseOpcode::Pcmpgtd
|
||||
| SseOpcode::Pextrw
|
||||
| SseOpcode::Pinsrw
|
||||
| SseOpcode::Pmaddwd
|
||||
| SseOpcode::Pmaxsw
|
||||
| SseOpcode::Pmaxub
|
||||
| SseOpcode::Pminsw
|
||||
@@ -842,6 +844,7 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Pinsrb => "pinsrb",
|
||||
SseOpcode::Pinsrw => "pinsrw",
|
||||
SseOpcode::Pinsrd => "pinsrd",
|
||||
SseOpcode::Pmaddwd => "pmaddwd",
|
||||
SseOpcode::Pmaxsb => "pmaxsb",
|
||||
SseOpcode::Pmaxsw => "pmaxsw",
|
||||
SseOpcode::Pmaxsd => "pmaxsd",
|
||||
|
||||
@@ -1873,6 +1873,7 @@ pub(crate) fn emit(
|
||||
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
|
||||
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
|
||||
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
|
||||
SseOpcode::Pmaddwd => (LegacyPrefixes::_66, 0x0FF5, 2),
|
||||
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||
|
||||
@@ -3067,6 +3067,12 @@ fn test_x64_emit() {
|
||||
"pmuludq %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(xmm8), w_xmm1),
|
||||
"66410FF5C8",
|
||||
"pmaddwd %xmm8, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410F383CF7",
|
||||
|
||||
@@ -2235,6 +2235,24 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::WideningPairwiseDotProductS => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
|
||||
if ty == types::I32X4 {
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddwd, rhs, dst));
|
||||
} else {
|
||||
panic!(
|
||||
"Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
|
||||
ty
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
|
||||
Reference in New Issue
Block a user