x64: Fill out more AVX instructions (#5849)

* x64: Fill out more AVX instructions

This commit fills out more AVX instructions for SSE counterparts
currently used. Many of these instructions do not benefit from the
3-operand form that AVX uses but instead benefit from being able to use
`XmmMem` instead of `XmmMemAligned` which may be able to avoid some
extra temporary registers in some cases.

* Review comments
This commit is contained in:
Alex Crichton
2023-02-23 16:31:31 -06:00
committed by GitHub
parent 8abfe928d6
commit 3fc3bc9ec8
7 changed files with 1114 additions and 13 deletions

View File

@@ -1630,7 +1630,38 @@ impl AvxOpcode {
| AvxOpcode::Vpslld
| AvxOpcode::Vpsllq
| AvxOpcode::Vpsraw
| AvxOpcode::Vpsrad => {
| AvxOpcode::Vpsrad
| AvxOpcode::Vpmovsxbw
| AvxOpcode::Vpmovzxbw
| AvxOpcode::Vpmovsxwd
| AvxOpcode::Vpmovzxwd
| AvxOpcode::Vpmovsxdq
| AvxOpcode::Vpmovzxdq
| AvxOpcode::Vaddss
| AvxOpcode::Vaddsd
| AvxOpcode::Vmulss
| AvxOpcode::Vmulsd
| AvxOpcode::Vsubss
| AvxOpcode::Vsubsd
| AvxOpcode::Vdivss
| AvxOpcode::Vdivsd
| AvxOpcode::Vpabsb
| AvxOpcode::Vpabsw
| AvxOpcode::Vpabsd
| AvxOpcode::Vminss
| AvxOpcode::Vminsd
| AvxOpcode::Vmaxss
| AvxOpcode::Vmaxsd
| AvxOpcode::Vsqrtps
| AvxOpcode::Vsqrtpd
| AvxOpcode::Vroundpd
| AvxOpcode::Vroundps
| AvxOpcode::Vcvtdq2pd
| AvxOpcode::Vcvtdq2ps
| AvxOpcode::Vcvtpd2ps
| AvxOpcode::Vcvtps2pd
| AvxOpcode::Vcvttpd2dq
| AvxOpcode::Vcvttps2dq => {
smallvec![InstructionSet::AVX]
}
}

View File

@@ -2182,6 +2182,18 @@ pub(crate) fn emit(
AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3),
AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1),
AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2),
AvxOpcode::Vaddss => (LP::_F3, OM::_0F, 0x58),
AvxOpcode::Vaddsd => (LP::_F2, OM::_0F, 0x58),
AvxOpcode::Vmulss => (LP::_F3, OM::_0F, 0x59),
AvxOpcode::Vmulsd => (LP::_F2, OM::_0F, 0x59),
AvxOpcode::Vsubss => (LP::_F3, OM::_0F, 0x5C),
AvxOpcode::Vsubsd => (LP::_F2, OM::_0F, 0x5C),
AvxOpcode::Vdivss => (LP::_F3, OM::_0F, 0x5E),
AvxOpcode::Vdivsd => (LP::_F2, OM::_0F, 0x5E),
AvxOpcode::Vminss => (LP::_F3, OM::_0F, 0x5D),
AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D),
AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F),
AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F),
_ => panic!("unexpected rmir vex opcode {op:?}"),
};
VexInstruction::new()
@@ -2359,6 +2371,72 @@ pub(crate) fn emit(
.encode(sink);
}
Inst::XmmUnaryRmRVex { op, src, dst } => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};
let (prefix, map, opcode) = match op {
AvxOpcode::Vpmovsxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x20),
AvxOpcode::Vpmovzxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x30),
AvxOpcode::Vpmovsxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x23),
AvxOpcode::Vpmovzxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x33),
AvxOpcode::Vpmovsxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x25),
AvxOpcode::Vpmovzxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x35),
AvxOpcode::Vpabsb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1C),
AvxOpcode::Vpabsw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1D),
AvxOpcode::Vpabsd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1E),
AvxOpcode::Vsqrtps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x51),
AvxOpcode::Vsqrtpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x51),
AvxOpcode::Vcvtdq2pd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0xE6),
AvxOpcode::Vcvtdq2ps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5B),
AvxOpcode::Vcvtpd2ps => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x5A),
AvxOpcode::Vcvtps2pd => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5A),
AvxOpcode::Vcvttpd2dq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xE6),
AvxOpcode::Vcvttps2dq => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5B),
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
};
VexInstruction::new()
.length(VexVectorLength::V128)
.prefix(prefix)
.map(map)
.opcode(opcode)
.reg(dst.to_real_reg().unwrap().hw_enc())
.rm(src)
.encode(sink);
}
Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};
let (prefix, map, opcode) = match op {
AvxOpcode::Vroundps => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x08),
AvxOpcode::Vroundpd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x09),
_ => panic!("unexpected rmr_imm_vex opcode {op:?}"),
};
VexInstruction::new()
.length(VexVectorLength::V128)
.prefix(prefix)
.map(map)
.opcode(opcode)
.reg(dst.to_real_reg().unwrap().hw_enc())
.rm(src)
.imm(*imm)
.encode(sink);
}
Inst::XmmRmREvex {
op,
src1,

View File

@@ -151,7 +151,9 @@ impl Inst {
| Inst::XmmRmRVex3 { op, .. }
| Inst::XmmRmRImmVex { op, .. }
| Inst::XmmRmRBlendVex { op, .. }
| Inst::XmmVexPinsr { op, .. } => op.available_from(),
| Inst::XmmVexPinsr { op, .. }
| Inst::XmmUnaryRmRVex { op, .. }
| Inst::XmmUnaryRmRImmVex { op, .. } => op.available_from(),
}
}
}
@@ -910,6 +912,20 @@ impl PrettyPrint for Inst {
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
}
Inst::XmmUnaryRmRVex { op, src, dst, .. } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmUnaryRmRImmVex {
op, src, dst, imm, ..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
@@ -1887,7 +1903,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}
Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRUnaligned { src, dst, .. } => {
Inst::XmmUnaryRmREvex { src, dst, .. }
| Inst::XmmUnaryRmRUnaligned { src, dst, .. }
| Inst::XmmUnaryRmRVex { src, dst, .. }
| Inst::XmmUnaryRmRImmVex { src, dst, .. } => {
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}