[machinst x64]: add iabs implementation
This commit is contained in:
@@ -336,6 +336,7 @@ impl fmt::Display for UnaryRmROpcode {
|
||||
pub(crate) enum InstructionSet {
|
||||
SSE,
|
||||
SSE2,
|
||||
SSSE3,
|
||||
SSE41,
|
||||
}
|
||||
|
||||
@@ -393,6 +394,9 @@ pub enum SseOpcode {
|
||||
Mulsd,
|
||||
Orps,
|
||||
Orpd,
|
||||
Pabsb,
|
||||
Pabsw,
|
||||
Pabsd,
|
||||
Paddb,
|
||||
Paddd,
|
||||
Paddq,
|
||||
@@ -521,6 +525,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Ucomisd
|
||||
| SseOpcode::Xorpd => SSE2,
|
||||
|
||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||
|
||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||
SSE41
|
||||
}
|
||||
@@ -590,6 +596,9 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Mulsd => "mulsd",
|
||||
SseOpcode::Orpd => "orpd",
|
||||
SseOpcode::Orps => "orps",
|
||||
SseOpcode::Pabsb => "pabsb",
|
||||
SseOpcode::Pabsw => "pabsw",
|
||||
SseOpcode::Pabsd => "pabsd",
|
||||
SseOpcode::Paddb => "paddb",
|
||||
SseOpcode::Paddd => "paddd",
|
||||
SseOpcode::Paddq => "paddq",
|
||||
|
||||
@@ -1697,27 +1697,38 @@ pub(crate) fn emit(
|
||||
} => {
|
||||
let rex = RexFlags::clear_w();
|
||||
|
||||
let (prefix, opcode) = match op {
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
|
||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
|
||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
|
||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
|
||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
|
||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
|
||||
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
|
||||
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
|
||||
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
|
||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
|
||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
|
||||
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
|
||||
let (prefix, opcode, num_opcodes) = match op {
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
|
||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
|
||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
|
||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
|
||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
|
||||
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
|
||||
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
|
||||
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
|
||||
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
|
||||
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
|
||||
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
|
||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
|
||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
|
||||
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
||||
emit_std_reg_reg(
|
||||
sink,
|
||||
prefix,
|
||||
opcode,
|
||||
num_opcodes,
|
||||
reg_g.to_reg(),
|
||||
*reg_e,
|
||||
rex,
|
||||
);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state);
|
||||
@@ -1725,7 +1736,7 @@ pub(crate) fn emit(
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
||||
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g.to_reg(), addr, rex);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3226,6 +3226,22 @@ fn test_x64_emit() {
|
||||
"cvtsd2ss %xmm1, %xmm0",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
|
||||
"660F381CCA",
|
||||
"pabsb %xmm2, %xmm1",
|
||||
));
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
|
||||
"660F381DC0",
|
||||
"pabsw %xmm0, %xmm0",
|
||||
));
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
|
||||
"66450F381EDA",
|
||||
"pabsd %xmm10, %xmm11",
|
||||
));
|
||||
|
||||
// Xmm to int conversions, and conversely.
|
||||
|
||||
insns.push((
|
||||
|
||||
@@ -686,6 +686,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Iabs => {
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty.is_vector() {
|
||||
let opcode = match ty {
|
||||
types::I8X16 => SseOpcode::Pabsb,
|
||||
types::I16X8 => SseOpcode::Pabsw,
|
||||
types::I32X4 => SseOpcode::Pabsd,
|
||||
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
|
||||
};
|
||||
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
|
||||
} else {
|
||||
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Bnot => {
|
||||
let ty = ty.unwrap();
|
||||
if ty.is_vector() {
|
||||
|
||||
Reference in New Issue
Block a user