[machinst x64]: add insertlane implementation
This commit is contained in:
@@ -383,6 +383,7 @@ pub enum SseOpcode {
|
|||||||
Movd,
|
Movd,
|
||||||
Movdqa,
|
Movdqa,
|
||||||
Movdqu,
|
Movdqu,
|
||||||
|
Movlhps,
|
||||||
Movq,
|
Movq,
|
||||||
Movss,
|
Movss,
|
||||||
Movsd,
|
Movsd,
|
||||||
@@ -403,6 +404,9 @@ pub enum SseOpcode {
|
|||||||
Paddw,
|
Paddw,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
Pavgw,
|
Pavgw,
|
||||||
|
Pinsrb,
|
||||||
|
Pinsrw,
|
||||||
|
Pinsrd,
|
||||||
Pmaxsb,
|
Pmaxsb,
|
||||||
Pmaxsw,
|
Pmaxsw,
|
||||||
Pmaxsd,
|
Pmaxsd,
|
||||||
@@ -471,6 +475,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Minps
|
| SseOpcode::Minps
|
||||||
| SseOpcode::Minss
|
| SseOpcode::Minss
|
||||||
| SseOpcode::Movaps
|
| SseOpcode::Movaps
|
||||||
|
| SseOpcode::Movlhps
|
||||||
| SseOpcode::Movss
|
| SseOpcode::Movss
|
||||||
| SseOpcode::Movups
|
| SseOpcode::Movups
|
||||||
| SseOpcode::Mulps
|
| SseOpcode::Mulps
|
||||||
@@ -519,6 +524,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddw
|
| SseOpcode::Paddw
|
||||||
| SseOpcode::Pavgb
|
| SseOpcode::Pavgb
|
||||||
| SseOpcode::Pavgw
|
| SseOpcode::Pavgw
|
||||||
|
| SseOpcode::Pinsrw
|
||||||
| SseOpcode::Pmaxsw
|
| SseOpcode::Pmaxsw
|
||||||
| SseOpcode::Pmaxub
|
| SseOpcode::Pmaxub
|
||||||
| SseOpcode::Pminsw
|
| SseOpcode::Pminsw
|
||||||
@@ -548,6 +554,8 @@ impl SseOpcode {
|
|||||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps
|
SseOpcode::Insertps
|
||||||
|
| SseOpcode::Pinsrb
|
||||||
|
| SseOpcode::Pinsrd
|
||||||
| SseOpcode::Pmaxsb
|
| SseOpcode::Pmaxsb
|
||||||
| SseOpcode::Pmaxsd
|
| SseOpcode::Pmaxsd
|
||||||
| SseOpcode::Pmaxuw
|
| SseOpcode::Pmaxuw
|
||||||
@@ -614,6 +622,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Movd => "movd",
|
SseOpcode::Movd => "movd",
|
||||||
SseOpcode::Movdqa => "movdqa",
|
SseOpcode::Movdqa => "movdqa",
|
||||||
SseOpcode::Movdqu => "movdqu",
|
SseOpcode::Movdqu => "movdqu",
|
||||||
|
SseOpcode::Movlhps => "movlhps",
|
||||||
SseOpcode::Movq => "movq",
|
SseOpcode::Movq => "movq",
|
||||||
SseOpcode::Movss => "movss",
|
SseOpcode::Movss => "movss",
|
||||||
SseOpcode::Movsd => "movsd",
|
SseOpcode::Movsd => "movsd",
|
||||||
@@ -634,6 +643,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddw => "paddw",
|
SseOpcode::Paddw => "paddw",
|
||||||
SseOpcode::Pavgb => "pavgb",
|
SseOpcode::Pavgb => "pavgb",
|
||||||
SseOpcode::Pavgw => "pavgw",
|
SseOpcode::Pavgw => "pavgw",
|
||||||
|
SseOpcode::Pinsrb => "pinsrb",
|
||||||
|
SseOpcode::Pinsrw => "pinsrw",
|
||||||
|
SseOpcode::Pinsrd => "pinsrd",
|
||||||
SseOpcode::Pmaxsb => "pmaxsb",
|
SseOpcode::Pmaxsb => "pmaxsb",
|
||||||
SseOpcode::Pmaxsw => "pmaxsw",
|
SseOpcode::Pmaxsw => "pmaxsw",
|
||||||
SseOpcode::Pmaxsd => "pmaxsd",
|
SseOpcode::Pmaxsd => "pmaxsd",
|
||||||
|
|||||||
@@ -1760,14 +1760,16 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
||||||
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
||||||
SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
|
SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
|
||||||
SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
|
|
||||||
SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
|
|
||||||
SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
|
|
||||||
SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
|
|
||||||
SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
|
SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
|
||||||
SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
|
SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
|
||||||
SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
|
SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
|
||||||
SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
|
SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
|
||||||
|
SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
|
||||||
|
SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
|
||||||
|
SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
|
||||||
|
SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
|
||||||
|
SseOpcode::Movlhps => (LegacyPrefixes::None, 0x0F16, 2),
|
||||||
|
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
|
||||||
SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
|
SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
|
||||||
SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
|
SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
|
||||||
SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
|
SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
|
||||||
@@ -1906,23 +1908,36 @@ pub(crate) fn emit(
|
|||||||
sink.bind_label(done);
|
sink.bind_label(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmRmRImm { op, src, dst, imm } => {
|
Inst::XmmRmRImm {
|
||||||
let prefix = match op {
|
op,
|
||||||
SseOpcode::Cmpps => LegacyPrefixes::None,
|
src,
|
||||||
SseOpcode::Cmppd => LegacyPrefixes::_66,
|
dst,
|
||||||
SseOpcode::Cmpss => LegacyPrefixes::_F3,
|
imm,
|
||||||
SseOpcode::Cmpsd => LegacyPrefixes::_F2,
|
is64: w,
|
||||||
|
} => {
|
||||||
|
let (prefix, opcode, num_opcodes) = match op {
|
||||||
|
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
||||||
|
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
|
||||||
|
SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
|
||||||
|
SseOpcode::Cmpsd => (LegacyPrefixes::_F2, 0x0FC2, 2),
|
||||||
|
SseOpcode::Insertps => (LegacyPrefixes::_66, 0x0F3A21, 3),
|
||||||
|
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
|
||||||
|
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
|
||||||
|
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
let opcode = 0x0FC2;
|
let rex = if *w {
|
||||||
let rex = RexFlags::clear_w();
|
RexFlags::set_w()
|
||||||
|
} else {
|
||||||
|
RexFlags::clear_w()
|
||||||
|
};
|
||||||
match src {
|
match src {
|
||||||
RegMem::Reg { reg } => {
|
RegMem::Reg { reg } => {
|
||||||
emit_std_reg_reg(sink, prefix, opcode, 2, dst.to_reg(), *reg, rex);
|
emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst.to_reg(), *reg, rex);
|
||||||
}
|
}
|
||||||
RegMem::Mem { addr } => {
|
RegMem::Mem { addr } => {
|
||||||
let addr = &addr.finalize(state);
|
let addr = &addr.finalize(state);
|
||||||
emit_std_reg_mem(sink, prefix, opcode, 2, dst.to_reg(), addr, rex);
|
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, dst.to_reg(), addr, rex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sink.put1(*imm)
|
sink.put1(*imm)
|
||||||
|
|||||||
@@ -3441,12 +3441,12 @@ fn test_x64_emit() {
|
|||||||
// ========================================================
|
// ========================================================
|
||||||
// XmmRmRImm
|
// XmmRmRImm
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2),
|
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false),
|
||||||
"660FC2CD02",
|
"660FC2CD02",
|
||||||
"cmppd $2, %xmm5, %xmm1",
|
"cmppd $2, %xmm5, %xmm1",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0),
|
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false),
|
||||||
"410FC2FF00",
|
"410FC2FF00",
|
||||||
"cmpps $0, %xmm15, %xmm7",
|
"cmpps $0, %xmm15, %xmm7",
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -333,12 +333,13 @@ pub enum Inst {
|
|||||||
dst: Reg,
|
dst: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// A binary XMM instruction with an 8-bit immediate: cmp (ps pd) imm (reg addr) reg
|
/// A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm (reg addr) reg
|
||||||
XmmRmRImm {
|
XmmRmRImm {
|
||||||
op: SseOpcode,
|
op: SseOpcode,
|
||||||
src: RegMem,
|
src: RegMem,
|
||||||
dst: Writable<Reg>,
|
dst: Writable<Reg>,
|
||||||
imm: u8,
|
imm: u8,
|
||||||
|
is64: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
// =====================================
|
// =====================================
|
||||||
@@ -780,11 +781,22 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn xmm_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable<Reg>, imm: u8) -> Inst {
|
pub(crate) fn xmm_rm_r_imm(
|
||||||
src.assert_regclass_is(RegClass::V128);
|
op: SseOpcode,
|
||||||
|
src: RegMem,
|
||||||
|
dst: Writable<Reg>,
|
||||||
|
imm: u8,
|
||||||
|
w: bool,
|
||||||
|
) -> Inst {
|
||||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
||||||
debug_assert!(imm < 8);
|
debug_assert!(imm < 8);
|
||||||
Inst::XmmRmRImm { op, src, dst, imm }
|
Inst::XmmRmRImm {
|
||||||
|
op,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
imm,
|
||||||
|
is64: w,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn movzx_rm_r(
|
pub(crate) fn movzx_rm_r(
|
||||||
@@ -1118,7 +1130,9 @@ impl Inst {
|
|||||||
|| *op == SseOpcode::Pxor)
|
|| *op == SseOpcode::Pxor)
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::XmmRmRImm { op, src, dst, imm } => {
|
Self::XmmRmRImm {
|
||||||
|
op, src, dst, imm, ..
|
||||||
|
} => {
|
||||||
src.to_reg() == Some(dst.to_reg())
|
src.to_reg() == Some(dst.to_reg())
|
||||||
&& (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
&& (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
|
||||||
&& *imm == FcmpImm::Equal.encode()
|
&& *imm == FcmpImm::Equal.encode()
|
||||||
@@ -1300,9 +1314,9 @@ impl ShowWithRRU for Inst {
|
|||||||
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
|
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
|
||||||
),
|
),
|
||||||
|
|
||||||
Inst::XmmRmRImm { op, src, dst, imm } => format!(
|
Inst::XmmRmRImm { op, src, dst, imm, is64 } => format!(
|
||||||
"{} ${}, {}, {}",
|
"{} ${}, {}, {}",
|
||||||
ljustify(op.to_string()),
|
ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })),
|
||||||
imm,
|
imm,
|
||||||
src.show_rru(mb_rru),
|
src.show_rru(mb_rru),
|
||||||
dst.show_rru(mb_rru),
|
dst.show_rru(mb_rru),
|
||||||
|
|||||||
@@ -1394,7 +1394,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::gen_move(dst, lhs, input_ty));
|
ctx.emit(Inst::gen_move(dst, lhs, input_ty));
|
||||||
|
|
||||||
// Emit the comparison.
|
// Emit the comparison.
|
||||||
ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode()));
|
ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1859,6 +1859,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
RegMem::reg(tmp.to_reg()),
|
RegMem::reg(tmp.to_reg()),
|
||||||
tmp,
|
tmp,
|
||||||
cond.encode(),
|
cond.encode(),
|
||||||
|
false,
|
||||||
);
|
);
|
||||||
ctx.emit(cmpps);
|
ctx.emit(cmpps);
|
||||||
|
|
||||||
@@ -2639,6 +2640,56 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::gen_move(dst, src, ty));
|
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Insertlane => {
|
||||||
|
// The instruction format maps to variables like: %dst = insertlane %in_vec, %src, %lane
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let in_vec = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let src_ty = ctx.input_ty(insn, 1);
|
||||||
|
debug_assert!(!src_ty.is_vector());
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
let lane = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
|
||||||
|
*imm
|
||||||
|
} else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
debug_assert!(lane < ty.lane_count() as u8);
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(dst, in_vec, ty));
|
||||||
|
if !src_ty.is_float() {
|
||||||
|
let (sse_op, w_bit) = match ty.lane_bits() {
|
||||||
|
8 => (SseOpcode::Pinsrb, false),
|
||||||
|
16 => (SseOpcode::Pinsrw, false),
|
||||||
|
32 => (SseOpcode::Pinsrd, false),
|
||||||
|
64 => (SseOpcode::Pinsrd, true),
|
||||||
|
_ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit));
|
||||||
|
} else if src_ty == types::F32 {
|
||||||
|
let sse_op = SseOpcode::Insertps;
|
||||||
|
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
|
||||||
|
// shifted into bits 5:6).
|
||||||
|
let lane = 0b00_00_00_00 | lane << 4;
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
|
||||||
|
} else if src_ty == types::F64 {
|
||||||
|
let sse_op = match lane {
|
||||||
|
// Move the lowest quadword in replacement to vector without changing
|
||||||
|
// the upper bits.
|
||||||
|
0 => SseOpcode::Movsd,
|
||||||
|
// Move the low 64 bits of replacement vector to the high 64 bits of the
|
||||||
|
// vector.
|
||||||
|
1 => SseOpcode::Movlhps,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
// Here we use the `xmm_rm_r` encoding because it correctly tells the register
|
||||||
|
// allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
|
||||||
|
// encoding formats like `xmm_unary_rm_r` treat it as a `def`.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
|
||||||
|
} else {
|
||||||
|
panic!("Unable to insertlane for type: {}", ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::IaddImm
|
Opcode::IaddImm
|
||||||
| Opcode::ImulImm
|
| Opcode::ImulImm
|
||||||
| Opcode::UdivImm
|
| Opcode::UdivImm
|
||||||
|
|||||||
Reference in New Issue
Block a user