[machinst x64]: add extractlane implementation

This commit is contained in:
Andrew Brown
2020-09-23 07:13:53 -07:00
parent 29fa894790
commit f4836f9ca9
4 changed files with 100 additions and 6 deletions

View File

@@ -404,6 +404,9 @@ pub enum SseOpcode {
Paddw, Paddw,
Pavgb, Pavgb,
Pavgw, Pavgw,
Pextrb,
Pextrw,
Pextrd,
Pinsrb, Pinsrb,
Pinsrw, Pinsrw,
Pinsrd, Pinsrd,
@@ -422,6 +425,7 @@ pub enum SseOpcode {
Pmulld, Pmulld,
Pmullw, Pmullw,
Pmuludq, Pmuludq,
Pshufd,
Psllw, Psllw,
Pslld, Pslld,
Psllq, Psllq,
@@ -524,6 +528,7 @@ impl SseOpcode {
| SseOpcode::Paddw | SseOpcode::Paddw
| SseOpcode::Pavgb | SseOpcode::Pavgb
| SseOpcode::Pavgw | SseOpcode::Pavgw
| SseOpcode::Pextrw
| SseOpcode::Pinsrw | SseOpcode::Pinsrw
| SseOpcode::Pmaxsw | SseOpcode::Pmaxsw
| SseOpcode::Pmaxub | SseOpcode::Pmaxub
@@ -531,6 +536,7 @@ impl SseOpcode {
| SseOpcode::Pminub | SseOpcode::Pminub
| SseOpcode::Pmullw | SseOpcode::Pmullw
| SseOpcode::Pmuludq | SseOpcode::Pmuludq
| SseOpcode::Pshufd
| SseOpcode::Psllw | SseOpcode::Psllw
| SseOpcode::Pslld | SseOpcode::Pslld
| SseOpcode::Psllq | SseOpcode::Psllq
@@ -554,6 +560,8 @@ impl SseOpcode {
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3, SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
SseOpcode::Insertps SseOpcode::Insertps
| SseOpcode::Pextrb
| SseOpcode::Pextrd
| SseOpcode::Pinsrb | SseOpcode::Pinsrb
| SseOpcode::Pinsrd | SseOpcode::Pinsrd
| SseOpcode::Pmaxsb | SseOpcode::Pmaxsb
@@ -643,6 +651,9 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddw => "paddw", SseOpcode::Paddw => "paddw",
SseOpcode::Pavgb => "pavgb", SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw", SseOpcode::Pavgw => "pavgw",
SseOpcode::Pextrb => "pextrb",
SseOpcode::Pextrw => "pextrw",
SseOpcode::Pextrd => "pextrd",
SseOpcode::Pinsrb => "pinsrb", SseOpcode::Pinsrb => "pinsrb",
SseOpcode::Pinsrw => "pinsrw", SseOpcode::Pinsrw => "pinsrw",
SseOpcode::Pinsrd => "pinsrd", SseOpcode::Pinsrd => "pinsrd",
@@ -661,6 +672,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pmulld => "pmulld", SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw", SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq", SseOpcode::Pmuludq => "pmuludq",
SseOpcode::Pshufd => "pshufd",
SseOpcode::Psllw => "psllw", SseOpcode::Psllw => "psllw",
SseOpcode::Pslld => "pslld", SseOpcode::Pslld => "pslld",
SseOpcode::Psllq => "psllq", SseOpcode::Psllq => "psllq",

View File

@@ -1915,7 +1915,7 @@ pub(crate) fn emit(
imm, imm,
is64: w, is64: w,
} => { } => {
let (prefix, opcode, num_opcodes) = match op { let (prefix, opcode, len) = match op {
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2), SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2), SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
@@ -1924,6 +1924,10 @@ pub(crate) fn emit(
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3), SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2), SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3), SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
let rex = if *w { let rex = if *w {
@@ -1931,13 +1935,29 @@ pub(crate) fn emit(
} else { } else {
RexFlags::clear_w() RexFlags::clear_w()
}; };
let regs_swapped = match *op {
// These opcodes (and not the SSE2 version of PEXTRW) flip the operand
// encoding: `dst` in ModRM's r/m, `src` in ModRM's reg field.
SseOpcode::Pextrb | SseOpcode::Pextrd => true,
// The rest of the opcodes have the customary encoding: `dst` in ModRM's reg,
// `src` in ModRM's r/m field.
_ => false,
};
match src { match src {
RegMem::Reg { reg } => { RegMem::Reg { reg } => {
emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst.to_reg(), *reg, rex); if regs_swapped {
emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
} else {
emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex);
}
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
let addr = &addr.finalize(state); let addr = &addr.finalize(state);
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, dst.to_reg(), addr, rex); assert!(
!regs_swapped,
"No existing way to encode a mem argument in the ModRM r/m field."
);
emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex);
} }
} }
sink.put1(*imm) sink.put1(*imm)

View File

@@ -788,8 +788,6 @@ impl Inst {
imm: u8, imm: u8,
w: bool, w: bool,
) -> Inst { ) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
debug_assert!(imm < 8);
Inst::XmmRmRImm { Inst::XmmRmRImm {
op, op,
src, src,
@@ -1736,10 +1734,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_mod(*dst); collector.add_mod(*dst);
} }
} }
Inst::XmmRmRImm { src, dst, .. } => { Inst::XmmRmRImm { op, src, dst, .. } => {
if inst.produces_const() { if inst.produces_const() {
// No need to account for src, since src == dst. // No need to account for src, since src == dst.
collector.add_def(*dst); collector.add_def(*dst);
} else if *op == SseOpcode::Pextrb
|| *op == SseOpcode::Pextrw
|| *op == SseOpcode::Pextrd
|| *op == SseOpcode::Pshufd
{
src.get_regs_as_uses(collector);
collector.add_def(*dst);
} else { } else {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(*dst); collector.add_mod(*dst);
@@ -2038,6 +2043,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, dst); map_def(mapper, dst);
} }
Inst::XmmRmRImm { Inst::XmmRmRImm {
ref op,
ref mut src, ref mut src,
ref mut dst, ref mut dst,
.. ..
@@ -2045,6 +2051,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
if produces_const { if produces_const {
src.map_as_def(mapper); src.map_as_def(mapper);
map_def(mapper, dst); map_def(mapper, dst);
} else if *op == SseOpcode::Pextrb
|| *op == SseOpcode::Pextrw
|| *op == SseOpcode::Pextrd
|| *op == SseOpcode::Pshufd
{
src.map_uses(mapper);
map_def(mapper, dst);
} else { } else {
src.map_uses(mapper); src.map_uses(mapper);
map_mod(mapper, dst); map_mod(mapper, dst);

View File

@@ -2690,6 +2690,55 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
Opcode::Extractlane => {
// The instruction format maps to variables like: %dst = extractlane %src, %lane
let ty = ty.unwrap();
let dst = get_output_reg(ctx, outputs[0]);
let src_ty = ctx.input_ty(insn, 0);
assert_eq!(src_ty.bits(), 128);
let src = put_input_in_reg(ctx, inputs[0]);
let lane = if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
*imm
} else {
unreachable!();
};
if !ty.is_float() {
let (sse_op, w_bit) = match ty.lane_bits() {
8 => (SseOpcode::Pextrb, false),
16 => (SseOpcode::Pextrw, false),
32 => (SseOpcode::Pextrd, false),
64 => (SseOpcode::Pextrd, true),
_ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()),
};
let src = RegMem::reg(src);
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit));
} else {
if lane == 0 {
// Remove the extractlane instruction, leaving the float where it is. The upper
// bits will remain unchanged; for correctness, this relies on Cranelift type
// checking to avoid using those bits.
ctx.emit(Inst::gen_move(dst, src, ty));
} else {
// Otherwise, shuffle the bits in `lane` to the lowest lane.
let sse_op = SseOpcode::Pshufd;
let mask = match src_ty {
// Move the value at `lane` to lane 0, copying existing value at lane 0 to
// other lanes. Again, this relies on Cranelift type checking to avoid
// using those bits.
types::F32X4 => 0b00_00_00_00 | lane,
// Move the value at `lane` 1 (we know it must be 1 because of the `if`
// statement above) to lane 0 and leave lane 1 unchanged. The Cranelift type
// checking assumption also applies here.
types::F64X2 => 0b11_10_11_10,
_ => unreachable!(),
};
let src = RegMem::reg(src);
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false));
}
}
}
Opcode::IaddImm Opcode::IaddImm
| Opcode::ImulImm | Opcode::ImulImm
| Opcode::UdivImm | Opcode::UdivImm