[machinst x64]: add iabs implementation

This commit is contained in:
Andrew Brown
2020-09-21 11:40:04 -07:00
parent 5e08eb3b83
commit b202464fa0
4 changed files with 70 additions and 17 deletions

View File

@@ -336,6 +336,7 @@ impl fmt::Display for UnaryRmROpcode {
pub(crate) enum InstructionSet { pub(crate) enum InstructionSet {
SSE, SSE,
SSE2, SSE2,
SSSE3,
SSE41, SSE41,
} }
@@ -393,6 +394,9 @@ pub enum SseOpcode {
Mulsd, Mulsd,
Orps, Orps,
Orpd, Orpd,
Pabsb,
Pabsw,
Pabsd,
Paddb, Paddb,
Paddd, Paddd,
Paddq, Paddq,
@@ -521,6 +525,8 @@ impl SseOpcode {
| SseOpcode::Ucomisd | SseOpcode::Ucomisd
| SseOpcode::Xorpd => SSE2, | SseOpcode::Xorpd => SSE2,
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => { SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
SSE41 SSE41
} }
@@ -590,6 +596,9 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Mulsd => "mulsd", SseOpcode::Mulsd => "mulsd",
SseOpcode::Orpd => "orpd", SseOpcode::Orpd => "orpd",
SseOpcode::Orps => "orps", SseOpcode::Orps => "orps",
SseOpcode::Pabsb => "pabsb",
SseOpcode::Pabsw => "pabsw",
SseOpcode::Pabsd => "pabsd",
SseOpcode::Paddb => "paddb", SseOpcode::Paddb => "paddb",
SseOpcode::Paddd => "paddd", SseOpcode::Paddd => "paddd",
SseOpcode::Paddq => "paddq", SseOpcode::Paddq => "paddq",

View File

@@ -1697,27 +1697,38 @@ pub(crate) fn emit(
} => { } => {
let rex = RexFlags::clear_w(); let rex = RexFlags::clear_w();
let (prefix, opcode) = match op { let (prefix, opcode, num_opcodes) = match op {
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A), SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A), SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28), SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28), SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F), SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F), SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10), SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10), SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10), SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10), SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51), SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51), SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51), SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51), SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
match src_e { match src_e {
RegMem::Reg { reg: reg_e } => { RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex); emit_std_reg_reg(
sink,
prefix,
opcode,
num_opcodes,
reg_g.to_reg(),
*reg_e,
rex,
);
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
let addr = &addr.finalize(state); let addr = &addr.finalize(state);
@@ -1725,7 +1736,7 @@ pub(crate) fn emit(
// Register the offset at which the actual load instruction starts. // Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
} }
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex); emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g.to_reg(), addr, rex);
} }
}; };
} }

View File

@@ -3226,6 +3226,22 @@ fn test_x64_emit() {
"cvtsd2ss %xmm1, %xmm0", "cvtsd2ss %xmm1, %xmm0",
)); ));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
"660F381CCA",
"pabsb %xmm2, %xmm1",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
"660F381DC0",
"pabsw %xmm0, %xmm0",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
"66450F381EDA",
"pabsd %xmm10, %xmm11",
));
// Xmm to int conversions, and conversely. // Xmm to int conversions, and conversely.
insns.push(( insns.push((

View File

@@ -686,6 +686,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap();
if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
types::I16X8 => SseOpcode::Pabsw,
types::I32X4 => SseOpcode::Pabsd,
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
};
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
} else {
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
}
}
Opcode::Bnot => { Opcode::Bnot => {
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty.is_vector() { if ty.is_vector() {