Use regalloc constraints for sse blend operations (#5251)
Instead of using xmm0 explicitly for the mask argument to instructions like blendvpd, use regalloc constraints to constrain it to xmm0 instead.
This commit is contained in:
@@ -1820,8 +1820,6 @@ pub(crate) fn emit(
|
||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
|
||||
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
||||
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
||||
@@ -1859,7 +1857,6 @@ pub(crate) fn emit(
|
||||
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
|
||||
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
|
||||
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
|
||||
@@ -1924,6 +1921,39 @@ pub(crate) fn emit(
|
||||
}
|
||||
}
|
||||
|
||||
Inst::XmmRmRBlend {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
mask,
|
||||
} => {
|
||||
let src1 = allocs.next(src1.to_reg());
|
||||
let mask = allocs.next(mask.to_reg());
|
||||
debug_assert_eq!(mask, regs::xmm0());
|
||||
let reg_g = allocs.next(dst.to_reg().to_reg());
|
||||
debug_assert_eq!(src1, reg_g);
|
||||
let src_e = src2.clone().to_reg_mem().with_allocs(allocs);
|
||||
|
||||
let rex = RexFlags::clear_w();
|
||||
let (prefix, opcode, length) = match op {
|
||||
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
|
||||
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
||||
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
|
||||
match src_e {
|
||||
RegMem::Reg { reg: reg_e } => {
|
||||
emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex);
|
||||
}
|
||||
RegMem::Mem { addr } => {
|
||||
let addr = &addr.finalize(state, sink);
|
||||
emit_std_reg_mem(sink, info, prefix, opcode, length, reg_g, addr, rex, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src1,
|
||||
|
||||
@@ -131,6 +131,16 @@ impl Inst {
|
||||
size,
|
||||
}
|
||||
}
|
||||
|
||||
fn xmm_rm_r_blend(op: SseOpcode, src2: RegMem, dst: Writable<Reg>) -> Inst {
|
||||
Inst::XmmRmRBlend {
|
||||
op,
|
||||
src1: Xmm::new(dst.to_reg()).unwrap(),
|
||||
src2: XmmMem::new(src2).unwrap(),
|
||||
mask: Xmm::new(regs::xmm0()).unwrap(),
|
||||
dst: WritableXmm::from_writable_reg(dst).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -3961,19 +3971,19 @@ fn test_x64_emit() {
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
|
||||
Inst::xmm_rm_r_blend(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
|
||||
"66410F3815E7",
|
||||
"blendvpd %xmm4, %xmm15, %xmm4",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
|
||||
Inst::xmm_rm_r_blend(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
|
||||
"660F3814DA",
|
||||
"blendvps %xmm3, %xmm2, %xmm3",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
|
||||
Inst::xmm_rm_r_blend(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
|
||||
"66450F3810EC",
|
||||
"pblendvb %xmm13, %xmm12, %xmm13",
|
||||
));
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use crate::binemit::{Addend, CodeOffset, Reloc, StackMap};
|
||||
use crate::ir::{types, ExternalName, LibCall, Opcode, RelSourceLoc, TrapCode, Type};
|
||||
use crate::isa::x64::abi::X64ABIMachineSpec;
|
||||
use crate::isa::x64::inst::regs::pretty_print_reg;
|
||||
use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
|
||||
use crate::isa::x64::settings as x64_settings;
|
||||
use crate::isa::CallConv;
|
||||
use crate::{machinst::*, trace};
|
||||
@@ -130,6 +130,7 @@ impl Inst {
|
||||
| Inst::XmmMovRM { op, .. }
|
||||
| Inst::XmmRmiReg { opcode: op, .. }
|
||||
| Inst::XmmRmR { op, .. }
|
||||
| Inst::XmmRmRBlend { op, .. }
|
||||
| Inst::XmmRmRImm { op, .. }
|
||||
| Inst::XmmToGpr { op, .. }
|
||||
| Inst::XmmUnaryRmRImm { op, .. }
|
||||
@@ -938,6 +939,33 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
|
||||
}
|
||||
|
||||
Inst::XmmRmRBlend {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
mask,
|
||||
dst,
|
||||
} => {
|
||||
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||
let mask = allocs.next(mask.to_reg());
|
||||
let mask = if mask.is_virtual() {
|
||||
format!(" <{}>", show_ireg_sized(mask, 8))
|
||||
} else {
|
||||
debug_assert_eq!(mask, regs::xmm0());
|
||||
String::new()
|
||||
};
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src2 = src2.pretty_print(8, allocs);
|
||||
format!(
|
||||
"{} {}, {}, {}{}",
|
||||
ljustify(op.to_string()),
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
mask
|
||||
)
|
||||
}
|
||||
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
src1,
|
||||
@@ -1765,11 +1793,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
src.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmR {
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
op,
|
||||
..
|
||||
src1, src2, dst, ..
|
||||
} => {
|
||||
if inst.produces_const() {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
@@ -1777,15 +1801,24 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
|
||||
// Some instructions have an implicit use of XMM0.
|
||||
if *op == SseOpcode::Blendvpd
|
||||
}
|
||||
}
|
||||
Inst::XmmRmRBlend {
|
||||
src1,
|
||||
src2,
|
||||
mask,
|
||||
dst,
|
||||
op,
|
||||
} => {
|
||||
assert!(
|
||||
*op == SseOpcode::Blendvpd
|
||||
|| *op == SseOpcode::Blendvps
|
||||
|| *op == SseOpcode::Pblendvb
|
||||
{
|
||||
collector.reg_use(regs::xmm0());
|
||||
}
|
||||
}
|
||||
);
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_fixed_use(mask.to_reg(), regs::xmm0());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmRVex {
|
||||
op,
|
||||
|
||||
Reference in New Issue
Block a user