[machinst x64]: add iabs implementation
This commit is contained in:
@@ -336,6 +336,7 @@ impl fmt::Display for UnaryRmROpcode {
|
|||||||
pub(crate) enum InstructionSet {
|
pub(crate) enum InstructionSet {
|
||||||
SSE,
|
SSE,
|
||||||
SSE2,
|
SSE2,
|
||||||
|
SSSE3,
|
||||||
SSE41,
|
SSE41,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -393,6 +394,9 @@ pub enum SseOpcode {
|
|||||||
Mulsd,
|
Mulsd,
|
||||||
Orps,
|
Orps,
|
||||||
Orpd,
|
Orpd,
|
||||||
|
Pabsb,
|
||||||
|
Pabsw,
|
||||||
|
Pabsd,
|
||||||
Paddb,
|
Paddb,
|
||||||
Paddd,
|
Paddd,
|
||||||
Paddq,
|
Paddq,
|
||||||
@@ -521,6 +525,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Ucomisd
|
| SseOpcode::Ucomisd
|
||||||
| SseOpcode::Xorpd => SSE2,
|
| SseOpcode::Xorpd => SSE2,
|
||||||
|
|
||||||
|
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||||
SSE41
|
SSE41
|
||||||
}
|
}
|
||||||
@@ -590,6 +596,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Mulsd => "mulsd",
|
SseOpcode::Mulsd => "mulsd",
|
||||||
SseOpcode::Orpd => "orpd",
|
SseOpcode::Orpd => "orpd",
|
||||||
SseOpcode::Orps => "orps",
|
SseOpcode::Orps => "orps",
|
||||||
|
SseOpcode::Pabsb => "pabsb",
|
||||||
|
SseOpcode::Pabsw => "pabsw",
|
||||||
|
SseOpcode::Pabsd => "pabsd",
|
||||||
SseOpcode::Paddb => "paddb",
|
SseOpcode::Paddb => "paddb",
|
||||||
SseOpcode::Paddd => "paddd",
|
SseOpcode::Paddd => "paddd",
|
||||||
SseOpcode::Paddq => "paddq",
|
SseOpcode::Paddq => "paddq",
|
||||||
|
|||||||
@@ -1697,27 +1697,38 @@ pub(crate) fn emit(
|
|||||||
} => {
|
} => {
|
||||||
let rex = RexFlags::clear_w();
|
let rex = RexFlags::clear_w();
|
||||||
|
|
||||||
let (prefix, opcode) = match op {
|
let (prefix, opcode, num_opcodes) = match op {
|
||||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
|
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
||||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
|
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
||||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
|
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
||||||
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
|
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
|
||||||
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
|
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
|
||||||
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
|
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
|
||||||
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
|
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
|
||||||
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
|
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
|
||||||
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
|
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
|
||||||
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
|
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
|
||||||
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
|
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
|
||||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
|
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
|
||||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
|
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
|
||||||
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
|
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
|
||||||
|
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
|
||||||
|
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
|
||||||
|
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
|
|
||||||
match src_e {
|
match src_e {
|
||||||
RegMem::Reg { reg: reg_e } => {
|
RegMem::Reg { reg: reg_e } => {
|
||||||
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
|
emit_std_reg_reg(
|
||||||
|
sink,
|
||||||
|
prefix,
|
||||||
|
opcode,
|
||||||
|
num_opcodes,
|
||||||
|
reg_g.to_reg(),
|
||||||
|
*reg_e,
|
||||||
|
rex,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
RegMem::Mem { addr } => {
|
RegMem::Mem { addr } => {
|
||||||
let addr = &addr.finalize(state);
|
let addr = &addr.finalize(state);
|
||||||
@@ -1725,7 +1736,7 @@ pub(crate) fn emit(
|
|||||||
// Register the offset at which the actual load instruction starts.
|
// Register the offset at which the actual load instruction starts.
|
||||||
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
}
|
}
|
||||||
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
|
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, reg_g.to_reg(), addr, rex);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3226,6 +3226,22 @@ fn test_x64_emit() {
|
|||||||
"cvtsd2ss %xmm1, %xmm0",
|
"cvtsd2ss %xmm1, %xmm0",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
|
||||||
|
"660F381CCA",
|
||||||
|
"pabsb %xmm2, %xmm1",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
|
||||||
|
"660F381DC0",
|
||||||
|
"pabsw %xmm0, %xmm0",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
|
||||||
|
"66450F381EDA",
|
||||||
|
"pabsd %xmm10, %xmm11",
|
||||||
|
));
|
||||||
|
|
||||||
// Xmm to int conversions, and conversely.
|
// Xmm to int conversions, and conversely.
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
|
|||||||
@@ -686,6 +686,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Iabs => {
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
if ty.is_vector() {
|
||||||
|
let opcode = match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pabsb,
|
||||||
|
types::I16X8 => SseOpcode::Pabsw,
|
||||||
|
types::I32X4 => SseOpcode::Pabsd,
|
||||||
|
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
|
||||||
|
} else {
|
||||||
|
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Bnot => {
|
Opcode::Bnot => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if ty.is_vector() {
|
if ty.is_vector() {
|
||||||
|
|||||||
Reference in New Issue
Block a user