[machinst x64]: add iabs implementation

This commit is contained in:
Andrew Brown
2020-09-21 11:40:04 -07:00
parent 5e08eb3b83
commit b202464fa0
4 changed files with 70 additions and 17 deletions

View File

@@ -336,6 +336,7 @@ impl fmt::Display for UnaryRmROpcode {
pub(crate) enum InstructionSet {
SSE,
SSE2,
SSSE3,
SSE41,
}
@@ -393,6 +394,9 @@ pub enum SseOpcode {
Mulsd,
Orps,
Orpd,
Pabsb,
Pabsw,
Pabsd,
Paddb,
Paddd,
Paddq,
@@ -521,6 +525,8 @@ impl SseOpcode {
| SseOpcode::Ucomisd
| SseOpcode::Xorpd => SSE2,
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
SSE41
}
@@ -590,6 +596,9 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Mulsd => "mulsd",
SseOpcode::Orpd => "orpd",
SseOpcode::Orps => "orps",
SseOpcode::Pabsb => "pabsb",
SseOpcode::Pabsw => "pabsw",
SseOpcode::Pabsd => "pabsd",
SseOpcode::Paddb => "paddb",
SseOpcode::Paddd => "paddd",
SseOpcode::Paddq => "paddq",

View File

@@ -1697,27 +1697,38 @@ pub(crate) fn emit(
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
let (prefix, opcode, num_opcodes) = match op {
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src_e {
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
emit_std_reg_reg(
sink,
prefix,
opcode,
num_opcodes,
reg_g.to_reg(),
*reg_e,
rex,
);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state);
@@ -1725,7 +1736,7 @@ pub(crate) fn emit(
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g.to_reg(), addr, rex);
}
};
}

View File

@@ -3226,6 +3226,22 @@ fn test_x64_emit() {
"cvtsd2ss %xmm1, %xmm0",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
"660F381CCA",
"pabsb %xmm2, %xmm1",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
"660F381DC0",
"pabsw %xmm0, %xmm0",
));
insns.push((
Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
"66450F381EDA",
"pabsd %xmm10, %xmm11",
));
// Xmm to int conversions, and conversely.
insns.push((

View File

@@ -686,6 +686,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap();
if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
types::I16X8 => SseOpcode::Pabsw,
types::I32X4 => SseOpcode::Pabsd,
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
};
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
} else {
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
}
}
Opcode::Bnot => {
let ty = ty.unwrap();
if ty.is_vector() {