diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index c80db41f09..8037df9ba1 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -185,7 +185,7 @@ pub enum RegMemImm { impl RegMemImm { pub(crate) fn reg(reg: Reg) -> Self { - debug_assert!(reg.get_class() == RegClass::I64); + debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); Self::Reg { reg } } pub(crate) fn mem(addr: impl Into) -> Self { @@ -383,6 +383,14 @@ pub enum SseOpcode { Mulsd, Orps, Orpd, + Psllw, + Pslld, + Psllq, + Psraw, + Psrad, + Psrlw, + Psrld, + Psrlq, Rcpss, Roundss, Roundsd, @@ -463,6 +471,14 @@ impl SseOpcode { | SseOpcode::Mulpd | SseOpcode::Mulsd | SseOpcode::Orpd + | SseOpcode::Psllw + | SseOpcode::Pslld + | SseOpcode::Psllq + | SseOpcode::Psraw + | SseOpcode::Psrad + | SseOpcode::Psrlw + | SseOpcode::Psrld + | SseOpcode::Psrlq | SseOpcode::Sqrtpd | SseOpcode::Sqrtsd | SseOpcode::Subpd @@ -535,6 +551,14 @@ impl fmt::Debug for SseOpcode { SseOpcode::Mulsd => "mulsd", SseOpcode::Orpd => "orpd", SseOpcode::Orps => "orps", + SseOpcode::Psllw => "psllw", + SseOpcode::Pslld => "pslld", + SseOpcode::Psllq => "psllq", + SseOpcode::Psraw => "psraw", + SseOpcode::Psrad => "psrad", + SseOpcode::Psrlw => "psrlw", + SseOpcode::Psrld => "psrld", + SseOpcode::Psrlq => "psrlq", SseOpcode::Rcpss => "rcpss", SseOpcode::Roundss => "roundss", SseOpcode::Roundsd => "roundsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 84075a9335..d31b2e8b93 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -4,6 +4,7 @@ use crate::ir::TrapCode; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::machinst::{MachBuffer, MachInstEmit, MachLabel}; +use core::convert::TryInto; use log::debug; use regalloc::{Reg, RegClass, Writable}; use std::convert::TryFrom; @@ -1122,6 +1123,53 @@ pub(crate) fn emit( } } + Inst::XmmRmiReg { opcode, src, dst } => { + let rex = RexFlags::clear_w(); + let prefix = LegacyPrefix::_66; + if let RegMemImm::Imm { simm32 } = src { + let (opcode_bytes, reg_digit) = match opcode { + SseOpcode::Psllw => (0x0F71, 6), + SseOpcode::Pslld => (0x0F72, 6), + SseOpcode::Psllq => (0x0F73, 6), + SseOpcode::Psraw => (0x0F71, 4), + SseOpcode::Psrad => (0x0F72, 4), + SseOpcode::Psrlw => (0x0F71, 2), + SseOpcode::Psrld => (0x0F72, 2), + SseOpcode::Psrlq => (0x0F73, 2), + _ => panic!("invalid opcode: {}", opcode), + }; + let dst_enc = reg_enc(dst.to_reg()); + emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex); + let imm = (*simm32) + .try_into() + .expect("the immediate must be convertible to a u8"); + sink.put1(imm); + } else { + let opcode_bytes = match opcode { + SseOpcode::Psllw => 0x0FF1, + SseOpcode::Pslld => 0x0FF2, + SseOpcode::Psllq => 0x0FF3, + SseOpcode::Psraw => 0x0FE1, + SseOpcode::Psrad => 0x0FE2, + SseOpcode::Psrlw => 0x0FD1, + SseOpcode::Psrld => 0x0FD2, + SseOpcode::Psrlq => 0x0FD3, + _ => panic!("invalid opcode: {}", opcode), + }; + + match src { + RegMemImm::Reg { reg } => { + emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex); + } + RegMemImm::Mem { addr } => { + let addr = &addr.finalize(state); + emit_std_reg_mem(sink, prefix, opcode_bytes, 2, dst.to_reg(), addr, rex); + } + RegMemImm::Imm { .. } => unreachable!(), + } + }; + } + Inst::Cmp_RMI_R { size, src: src_e, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 036b3bb7b6..1d28e64557 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3173,6 +3173,24 @@ fn test_x64_emit() { "cvtsi2sd %rsi, %xmm1", )); + // ======================================================== + // XmmRmi + insns.push(( + Inst::xmm_rmi_reg(SseOpcode::Psraw, RegMemImm::reg(xmm10), w_xmm1), + "66410FE1CA", + "psraw %xmm10, %xmm1", + )); + insns.push(( + Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(31), w_xmm1), + "660F72F11F", + "pslld $31, %xmm1", + )); + insns.push(( + Inst::xmm_rmi_reg(SseOpcode::Psrlq, RegMemImm::imm(1), w_xmm3), + "660F73D301", + "psrlq $1, %xmm3", + )); + // ======================================================== // Misc instructions. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 818f73c887..f27b448fa3 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -162,6 +162,13 @@ pub enum Inst { dst: Writable, }, + /// Arithmetic SIMD shifts. + XmmRmiReg { + opcode: SseOpcode, + src: RegMemImm, + dst: Writable, + }, + /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg. Cmp_RMI_R { size: u8, // 1, 2, 4 or 8 @@ -712,6 +719,12 @@ impl Inst { } } + pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable) -> Inst { + src.assert_regclass_is(RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XmmRmiReg { opcode, src, dst } + } + pub(crate) fn movsx_rm_r( ext_mode: ExtMode, src: RegMem, @@ -1276,6 +1289,13 @@ impl ShowWithRRU for Inst { ), }, + Inst::XmmRmiReg { opcode, src, dst } => format!( + "{} {}, {}", + ljustify(opcode.to_string()), + src.show_rru(mb_rru), + dst.to_reg().show_rru(mb_rru) + ), + Inst::Cmp_RMI_R { size, src, dst } => format!( "{} {}, {}", ljustify2("cmp".to_string(), suffixBWLQ(*size)), @@ -1458,6 +1478,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(*lhs); collector.add_mod(*rhs_dst); } + Inst::XmmRmiReg { src, dst, .. } => { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } Inst::Xmm_Mov_R_M { src, dst, .. } => { collector.add_use(*src); dst.get_regs_as_uses(collector); @@ -1733,6 +1757,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XmmRmiReg { + ref mut src, + ref mut dst, + .. + } => { + src.map_uses(mapper); + map_mod(mapper, dst); + } Inst::XmmMinMaxSeq { ref mut lhs, ref mut rhs_dst,