x64: Fill out more AVX instructions (#5849)
* x64: Fill out more AVX instructions This commit fills out more AVX instructions for SSE counterparts currently used. Many of these instructions do not benefit from the 3-operand form that AVX uses but instead benefit from being able to use `XmmMem` instead of `XmmMemAligned` which may be able to avoid some extra temporary registers in some cases. * Review comments
This commit is contained in:
@@ -1630,7 +1630,38 @@ impl AvxOpcode {
|
||||
| AvxOpcode::Vpslld
|
||||
| AvxOpcode::Vpsllq
|
||||
| AvxOpcode::Vpsraw
|
||||
| AvxOpcode::Vpsrad => {
|
||||
| AvxOpcode::Vpsrad
|
||||
| AvxOpcode::Vpmovsxbw
|
||||
| AvxOpcode::Vpmovzxbw
|
||||
| AvxOpcode::Vpmovsxwd
|
||||
| AvxOpcode::Vpmovzxwd
|
||||
| AvxOpcode::Vpmovsxdq
|
||||
| AvxOpcode::Vpmovzxdq
|
||||
| AvxOpcode::Vaddss
|
||||
| AvxOpcode::Vaddsd
|
||||
| AvxOpcode::Vmulss
|
||||
| AvxOpcode::Vmulsd
|
||||
| AvxOpcode::Vsubss
|
||||
| AvxOpcode::Vsubsd
|
||||
| AvxOpcode::Vdivss
|
||||
| AvxOpcode::Vdivsd
|
||||
| AvxOpcode::Vpabsb
|
||||
| AvxOpcode::Vpabsw
|
||||
| AvxOpcode::Vpabsd
|
||||
| AvxOpcode::Vminss
|
||||
| AvxOpcode::Vminsd
|
||||
| AvxOpcode::Vmaxss
|
||||
| AvxOpcode::Vmaxsd
|
||||
| AvxOpcode::Vsqrtps
|
||||
| AvxOpcode::Vsqrtpd
|
||||
| AvxOpcode::Vroundpd
|
||||
| AvxOpcode::Vroundps
|
||||
| AvxOpcode::Vcvtdq2pd
|
||||
| AvxOpcode::Vcvtdq2ps
|
||||
| AvxOpcode::Vcvtpd2ps
|
||||
| AvxOpcode::Vcvtps2pd
|
||||
| AvxOpcode::Vcvttpd2dq
|
||||
| AvxOpcode::Vcvttps2dq => {
|
||||
smallvec![InstructionSet::AVX]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2182,6 +2182,18 @@ pub(crate) fn emit(
|
||||
AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3),
|
||||
AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1),
|
||||
AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2),
|
||||
AvxOpcode::Vaddss => (LP::_F3, OM::_0F, 0x58),
|
||||
AvxOpcode::Vaddsd => (LP::_F2, OM::_0F, 0x58),
|
||||
AvxOpcode::Vmulss => (LP::_F3, OM::_0F, 0x59),
|
||||
AvxOpcode::Vmulsd => (LP::_F2, OM::_0F, 0x59),
|
||||
AvxOpcode::Vsubss => (LP::_F3, OM::_0F, 0x5C),
|
||||
AvxOpcode::Vsubsd => (LP::_F2, OM::_0F, 0x5C),
|
||||
AvxOpcode::Vdivss => (LP::_F3, OM::_0F, 0x5E),
|
||||
AvxOpcode::Vdivsd => (LP::_F2, OM::_0F, 0x5E),
|
||||
AvxOpcode::Vminss => (LP::_F3, OM::_0F, 0x5D),
|
||||
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
|
||||
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
|
||||
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
|
||||
_ => panic!("unexpected rmir vex opcode {op:?}"),
|
||||
};
|
||||
VexInstruction::new()
|
||||
@@ -2359,6 +2371,72 @@ pub(crate) fn emit(
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmRVex { op, src, dst } => {
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
|
||||
RegMem::Reg { reg } => {
|
||||
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||
}
|
||||
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||
};
|
||||
|
||||
let (prefix, map, opcode) = match op {
|
||||
AvxOpcode::Vpmovsxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x20),
|
||||
AvxOpcode::Vpmovzxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x30),
|
||||
AvxOpcode::Vpmovsxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x23),
|
||||
AvxOpcode::Vpmovzxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x33),
|
||||
AvxOpcode::Vpmovsxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x25),
|
||||
AvxOpcode::Vpmovzxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x35),
|
||||
AvxOpcode::Vpabsb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1C),
|
||||
AvxOpcode::Vpabsw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1D),
|
||||
AvxOpcode::Vpabsd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1E),
|
||||
AvxOpcode::Vsqrtps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x51),
|
||||
AvxOpcode::Vsqrtpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x51),
|
||||
AvxOpcode::Vcvtdq2pd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0xE6),
|
||||
AvxOpcode::Vcvtdq2ps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5B),
|
||||
AvxOpcode::Vcvtpd2ps => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x5A),
|
||||
AvxOpcode::Vcvtps2pd => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5A),
|
||||
AvxOpcode::Vcvttpd2dq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xE6),
|
||||
AvxOpcode::Vcvttps2dq => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5B),
|
||||
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||
};
|
||||
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src)
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
|
||||
RegMem::Reg { reg } => {
|
||||
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||
}
|
||||
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||
};
|
||||
|
||||
let (prefix, map, opcode) = match op {
|
||||
AvxOpcode::Vroundps => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x08),
|
||||
AvxOpcode::Vroundpd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x09),
|
||||
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
|
||||
};
|
||||
|
||||
VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src)
|
||||
.imm(*imm)
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
|
||||
@@ -151,7 +151,9 @@ impl Inst {
|
||||
| Inst::XmmRmRVex3 { op, .. }
|
||||
| Inst::XmmRmRImmVex { op, .. }
|
||||
| Inst::XmmRmRBlendVex { op, .. }
|
||||
| Inst::XmmVexPinsr { op, .. } => op.available_from(),
|
||||
| Inst::XmmVexPinsr { op, .. }
|
||||
| Inst::XmmUnaryRmRVex { op, .. }
|
||||
| Inst::XmmUnaryRmRImmVex { op, .. } => op.available_from(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -910,6 +912,20 @@ impl PrettyPrint for Inst {
|
||||
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src = src.pretty_print(8, allocs);
|
||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmRImmVex {
|
||||
op, src, dst, imm, ..
|
||||
} => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src = src.pretty_print(8, allocs);
|
||||
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
|
||||
}
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src = src.pretty_print(8, allocs);
|
||||
@@ -1887,7 +1903,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
src.get_operands(collector);
|
||||
}
|
||||
Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRUnaligned { src, dst, .. } => {
|
||||
Inst::XmmUnaryRmREvex { src, dst, .. }
|
||||
| Inst::XmmUnaryRmRUnaligned { src, dst, .. }
|
||||
| Inst::XmmUnaryRmRVex { src, dst, .. }
|
||||
| Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
src.get_operands(collector);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user