From b202464fa022adb6b8b7cb5abd3aae3c4a351401 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 21 Sep 2020 11:40:04 -0700 Subject: [PATCH] [machinst x64]: add iabs implementation --- cranelift/codegen/src/isa/x64/inst/args.rs | 9 ++++ cranelift/codegen/src/isa/x64/inst/emit.rs | 45 ++++++++++++------- .../codegen/src/isa/x64/inst/emit_tests.rs | 16 +++++++ cranelift/codegen/src/isa/x64/lower.rs | 17 +++++++ 4 files changed, 70 insertions(+), 17 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 4b236ee163..18e5d15d46 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -336,6 +336,7 @@ impl fmt::Display for UnaryRmROpcode { pub(crate) enum InstructionSet { SSE, SSE2, + SSSE3, SSE41, } @@ -393,6 +394,9 @@ pub enum SseOpcode { Mulsd, Orps, Orpd, + Pabsb, + Pabsw, + Pabsd, Paddb, Paddd, Paddq, @@ -521,6 +525,8 @@ impl SseOpcode { | SseOpcode::Ucomisd | SseOpcode::Xorpd => SSE2, + SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3, + SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => { SSE41 } @@ -590,6 +596,9 @@ impl fmt::Debug for SseOpcode { SseOpcode::Mulsd => "mulsd", SseOpcode::Orpd => "orpd", SseOpcode::Orps => "orps", + SseOpcode::Pabsb => "pabsb", + SseOpcode::Pabsw => "pabsw", + SseOpcode::Pabsd => "pabsd", SseOpcode::Paddb => "paddb", SseOpcode::Paddd => "paddd", SseOpcode::Paddq => "paddq", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 53632d03a0..5aaca4e65b 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1697,27 +1697,38 @@ pub(crate) fn emit( } => { let rex = RexFlags::clear_w(); - let (prefix, opcode) = match op { - SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A), - SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A), - SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28), - SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28), - SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F), - SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F), - SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10), - SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10), - SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10), - SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10), - SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51), - SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51), - SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51), - SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51), + let (prefix, opcode, num_opcodes) = match op { + SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2), + SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2), + SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2), + SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2), + SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2), + SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2), + SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2), + SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2), + SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2), + SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2), + SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3), + SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3), + SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3), + SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2), + SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2), + SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2), + SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; match src_e { RegMem::Reg { reg: reg_e } => { - emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); + emit_std_reg_reg( + sink, + prefix, + opcode, + num_opcodes, + reg_g.to_reg(), + *reg_e, + rex, + ); } RegMem::Mem { addr } => { let addr = &addr.finalize(state); @@ -1725,7 +1736,7 @@ pub(crate) fn emit( // Register the offset at which the actual load instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } - emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); + emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g.to_reg(), addr, rex); } }; } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index b4a3b10d8d..866d2e305c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3226,6 +3226,22 @@ fn test_x64_emit() { "cvtsd2ss %xmm1, %xmm0", )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1), + "660F381CCA", + "pabsb %xmm2, %xmm1", + )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0), + "660F381DC0", + "pabsw %xmm0, %xmm0", + )); + insns.push(( + Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11), + "66450F381EDA", + "pabsd %xmm10, %xmm11", + )); + // Xmm to int conversions, and conversely. insns.push(( diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index a6079c6549..474a6d86c3 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -686,6 +686,23 @@ fn lower_insn_to_regs>( } } + Opcode::Iabs => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = get_output_reg(ctx, outputs[0]); + let ty = ty.unwrap(); + if ty.is_vector() { + let opcode = match ty { + types::I8X16 => SseOpcode::Pabsb, + types::I16X8 => SseOpcode::Pabsw, + types::I32X4 => SseOpcode::Pabsd, + _ => panic!("Unsupported type for packed iabs instruction: {}", ty), + }; + ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst)); + } else { + unimplemented!("iabs is unimplemented for non-vector type: {}", ty); + } + } + Opcode::Bnot => { let ty = ty.unwrap(); if ty.is_vector() {