x64: add support for packed promote and demote (#2783)
* Add support for x64 packed promote low * Add support for x64 packed floating point demote * Update vector promote low and demote by adding constraints Also does some renaming and minor refactoring
This commit is contained in:
@@ -3193,6 +3193,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::TlsValue => unimplemented!("tls_value"),
|
||||
Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
|
||||
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
|
||||
Opcode::Fvdemote => unimplemented!("Fvdemote"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -2548,7 +2548,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::SwidenHigh
|
||||
| Opcode::UwidenLow
|
||||
| Opcode::UwidenHigh
|
||||
| Opcode::WideningPairwiseDotProductS => {
|
||||
| Opcode::WideningPairwiseDotProductS
|
||||
| Opcode::FvpromoteLow
|
||||
| Opcode::Fvdemote => {
|
||||
// TODO
|
||||
panic!("Vector ops not implemented.");
|
||||
}
|
||||
|
||||
@@ -489,6 +489,8 @@ pub enum SseOpcode {
|
||||
Cmpsd,
|
||||
Cvtdq2ps,
|
||||
Cvtdq2pd,
|
||||
Cvtpd2ps,
|
||||
Cvtps2pd,
|
||||
Cvtsd2ss,
|
||||
Cvtsd2si,
|
||||
Cvtsi2ss,
|
||||
@@ -684,6 +686,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Comisd
|
||||
| SseOpcode::Cvtdq2ps
|
||||
| SseOpcode::Cvtdq2pd
|
||||
| SseOpcode::Cvtpd2ps
|
||||
| SseOpcode::Cvtps2pd
|
||||
| SseOpcode::Cvtsd2ss
|
||||
| SseOpcode::Cvtsd2si
|
||||
| SseOpcode::Cvtsi2sd
|
||||
@@ -843,6 +847,8 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Comisd => "comisd",
|
||||
SseOpcode::Cvtdq2ps => "cvtdq2ps",
|
||||
SseOpcode::Cvtdq2pd => "cvtdq2pd",
|
||||
SseOpcode::Cvtpd2ps => "cvtpd2ps",
|
||||
SseOpcode::Cvtps2pd => "cvtps2pd",
|
||||
SseOpcode::Cvtsd2ss => "cvtsd2ss",
|
||||
SseOpcode::Cvtsd2si => "cvtsd2si",
|
||||
SseOpcode::Cvtsi2ss => "cvtsi2ss",
|
||||
|
||||
@@ -1348,6 +1348,8 @@ pub(crate) fn emit(
|
||||
|
||||
let (prefix, opcode, num_opcodes) = match op {
|
||||
SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
|
||||
SseOpcode::Cvtpd2ps => (LegacyPrefixes::_66, 0x0F5A, 2),
|
||||
SseOpcode::Cvtps2pd => (LegacyPrefixes::None, 0x0F5A, 2),
|
||||
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
|
||||
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
|
||||
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
|
||||
|
||||
@@ -3913,6 +3913,18 @@ fn test_x64_emit() {
|
||||
"vpopcntb %xmm2, %xmm8",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Cvtpd2ps, RegMem::reg(xmm7), w_xmm7),
|
||||
"660F5AFF",
|
||||
"cvtpd2ps %xmm7, %xmm7",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_unary_rm_r(SseOpcode::Cvtps2pd, RegMem::reg(xmm11), w_xmm9),
|
||||
"450F5ACB",
|
||||
"cvtps2pd %xmm11, %xmm9",
|
||||
));
|
||||
|
||||
// Xmm to int conversions, and conversely.
|
||||
|
||||
insns.push((
|
||||
|
||||
@@ -4057,6 +4057,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, src, dst));
|
||||
}
|
||||
|
||||
Opcode::FvpromoteLow => {
|
||||
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtps2pd,
|
||||
RegMem::from(src),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::Fdemote => {
|
||||
// We can't guarantee the RHS (if a load) is 128-bit aligned, so we
|
||||
// must avoid merging a load here.
|
||||
@@ -4065,6 +4075,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst));
|
||||
}
|
||||
|
||||
Opcode::Fvdemote => {
|
||||
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtpd2ps,
|
||||
RegMem::from(src),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
|
||||
Opcode::FcvtFromSint => {
|
||||
let output_ty = ty.unwrap();
|
||||
if !output_ty.is_vector() {
|
||||
|
||||
Reference in New Issue
Block a user