diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index bf15513665..b655178bdf 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1873,18 +1873,6 @@ pub(crate) fn emit( SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2), SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2), SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3), - SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3), - SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3), - SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3), - SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3), - SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3), - SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3), - SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3), - SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3), - SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3), - SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3), - SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3), - SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3), SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3), SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2), SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index bda26e3f27..0786746672 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -10,7 +10,7 @@ //! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \ //! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \ //! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \ -//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit +//! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit use super::*; use crate::isa::test_utils; @@ -3201,81 +3201,6 @@ fn test_x64_emit() { "cvttps2dq %xmm9, %xmm8", )); - // ======================================================== - // XMM_RM_R: Packed Move - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8), - "66440F3821C6", - "pmovsxbd %xmm6, %xmm8", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10), - "66450F3820D1", - "pmovsxbw %xmm9, %xmm10", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1), - "660F3822C9", - "pmovsxbq %xmm1, %xmm1", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10), - "66450F3823D5", - "pmovsxwd %xmm13, %xmm10", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12), - "66450F3824E4", - "pmovsxwq %xmm12, %xmm12", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8), - "66450F3825C2", - "pmovsxdq %xmm10, %xmm8", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6), - "660F3831F5", - "pmovzxbd %xmm5, %xmm6", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13), - "66440F3830ED", - "pmovzxbw %xmm5, %xmm13", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11), - "66450F3832DA", - "pmovzxbq %xmm10, %xmm11", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10), - "66440F3833D2", - "pmovzxwd %xmm2, %xmm10", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4), - "660F3834E7", - "pmovzxwq %xmm7, %xmm4", - )); - - insns.push(( - Inst::xmm_rm_r(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4), - "660F3835E3", - "pmovzxdq %xmm3, %xmm4", - )); - // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)), @@ -3288,6 +3213,81 @@ fn test_x64_emit() { "movsd %xmm1, 0(%rsi)", )); + // ======================================================== + // XMM_MOV: Packed Move + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8), + "66440F3821C6", + "pmovsxbd %xmm6, %xmm8", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10), + "66450F3820D1", + "pmovsxbw %xmm9, %xmm10", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1), + "660F3822C9", + "pmovsxbq %xmm1, %xmm1", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10), + "66450F3823D5", + "pmovsxwd %xmm13, %xmm10", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12), + "66450F3824E4", + "pmovsxwq %xmm12, %xmm12", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8), + "66450F3825C2", + "pmovsxdq %xmm10, %xmm8", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6), + "660F3831F5", + "pmovzxbd %xmm5, %xmm6", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13), + "66440F3830ED", + "pmovzxbw %xmm5, %xmm13", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11), + "66450F3832DA", + "pmovzxbq %xmm10, %xmm11", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10), + "66440F3833D2", + "pmovzxwd %xmm2, %xmm10", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4), + "660F3834E7", + "pmovzxwq %xmm7, %xmm4", + )); + + insns.push(( + Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4), + "660F3835E3", + "pmovzxdq %xmm3, %xmm4", + )); + // XmmUnary: moves and unary float ops insns.push(( Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 21ed356cc1..b15cb62a73 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2910,12 +2910,10 @@ fn lower_insn_to_regs>( match op { Opcode::SwidenLow => match (input_ty, output_ty) { (types::I8X16, types::I16X8) => { - ctx.emit(Inst::gen_move(dst, src, output_ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::reg(src), dst)); } (types::I16X8, types::I32X4) => { - ctx.emit(Inst::gen_move(dst, src, output_ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::reg(src), dst)); } _ => unreachable!(), }, @@ -2929,7 +2927,7 @@ fn lower_insn_to_regs>( 8, false, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::from(dst), dst)); } (types::I16X8, types::I32X4) => { ctx.emit(Inst::gen_move(dst, src, output_ty)); @@ -2940,18 +2938,16 @@ fn lower_insn_to_regs>( 8, false, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::from(dst), dst)); } _ => unreachable!(), }, Opcode::UwidenLow => match (input_ty, output_ty) { (types::I8X16, types::I16X8) => { - ctx.emit(Inst::gen_move(dst, src, output_ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::reg(src), dst)); } (types::I16X8, types::I32X4) => { - ctx.emit(Inst::gen_move(dst, src, output_ty)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::reg(src), dst)); } _ => unreachable!(), }, @@ -2965,7 +2961,7 @@ fn lower_insn_to_regs>( 8, false, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::from(dst), dst)); } (types::I16X8, types::I32X4) => { ctx.emit(Inst::gen_move(dst, src, output_ty)); @@ -2976,7 +2972,7 @@ fn lower_insn_to_regs>( 8, false, )); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::from(dst), dst)); + ctx.emit(Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::from(dst), dst)); } _ => unreachable!(), },