[machinst x64]: add extractlane implementation
This commit is contained in:
@@ -404,6 +404,9 @@ pub enum SseOpcode {
|
|||||||
Paddw,
|
Paddw,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
Pavgw,
|
Pavgw,
|
||||||
|
Pextrb,
|
||||||
|
Pextrw,
|
||||||
|
Pextrd,
|
||||||
Pinsrb,
|
Pinsrb,
|
||||||
Pinsrw,
|
Pinsrw,
|
||||||
Pinsrd,
|
Pinsrd,
|
||||||
@@ -422,6 +425,7 @@ pub enum SseOpcode {
|
|||||||
Pmulld,
|
Pmulld,
|
||||||
Pmullw,
|
Pmullw,
|
||||||
Pmuludq,
|
Pmuludq,
|
||||||
|
Pshufd,
|
||||||
Psllw,
|
Psllw,
|
||||||
Pslld,
|
Pslld,
|
||||||
Psllq,
|
Psllq,
|
||||||
@@ -524,6 +528,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddw
|
| SseOpcode::Paddw
|
||||||
| SseOpcode::Pavgb
|
| SseOpcode::Pavgb
|
||||||
| SseOpcode::Pavgw
|
| SseOpcode::Pavgw
|
||||||
|
| SseOpcode::Pextrw
|
||||||
| SseOpcode::Pinsrw
|
| SseOpcode::Pinsrw
|
||||||
| SseOpcode::Pmaxsw
|
| SseOpcode::Pmaxsw
|
||||||
| SseOpcode::Pmaxub
|
| SseOpcode::Pmaxub
|
||||||
@@ -531,6 +536,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pminub
|
| SseOpcode::Pminub
|
||||||
| SseOpcode::Pmullw
|
| SseOpcode::Pmullw
|
||||||
| SseOpcode::Pmuludq
|
| SseOpcode::Pmuludq
|
||||||
|
| SseOpcode::Pshufd
|
||||||
| SseOpcode::Psllw
|
| SseOpcode::Psllw
|
||||||
| SseOpcode::Pslld
|
| SseOpcode::Pslld
|
||||||
| SseOpcode::Psllq
|
| SseOpcode::Psllq
|
||||||
@@ -554,6 +560,8 @@ impl SseOpcode {
|
|||||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps
|
SseOpcode::Insertps
|
||||||
|
| SseOpcode::Pextrb
|
||||||
|
| SseOpcode::Pextrd
|
||||||
| SseOpcode::Pinsrb
|
| SseOpcode::Pinsrb
|
||||||
| SseOpcode::Pinsrd
|
| SseOpcode::Pinsrd
|
||||||
| SseOpcode::Pmaxsb
|
| SseOpcode::Pmaxsb
|
||||||
@@ -643,6 +651,9 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddw => "paddw",
|
SseOpcode::Paddw => "paddw",
|
||||||
SseOpcode::Pavgb => "pavgb",
|
SseOpcode::Pavgb => "pavgb",
|
||||||
SseOpcode::Pavgw => "pavgw",
|
SseOpcode::Pavgw => "pavgw",
|
||||||
|
SseOpcode::Pextrb => "pextrb",
|
||||||
|
SseOpcode::Pextrw => "pextrw",
|
||||||
|
SseOpcode::Pextrd => "pextrd",
|
||||||
SseOpcode::Pinsrb => "pinsrb",
|
SseOpcode::Pinsrb => "pinsrb",
|
||||||
SseOpcode::Pinsrw => "pinsrw",
|
SseOpcode::Pinsrw => "pinsrw",
|
||||||
SseOpcode::Pinsrd => "pinsrd",
|
SseOpcode::Pinsrd => "pinsrd",
|
||||||
@@ -661,6 +672,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Pmulld => "pmulld",
|
SseOpcode::Pmulld => "pmulld",
|
||||||
SseOpcode::Pmullw => "pmullw",
|
SseOpcode::Pmullw => "pmullw",
|
||||||
SseOpcode::Pmuludq => "pmuludq",
|
SseOpcode::Pmuludq => "pmuludq",
|
||||||
|
SseOpcode::Pshufd => "pshufd",
|
||||||
SseOpcode::Psllw => "psllw",
|
SseOpcode::Psllw => "psllw",
|
||||||
SseOpcode::Pslld => "pslld",
|
SseOpcode::Pslld => "pslld",
|
||||||
SseOpcode::Psllq => "psllq",
|
SseOpcode::Psllq => "psllq",
|
||||||
|
|||||||
@@ -1915,7 +1915,7 @@ pub(crate) fn emit(
|
|||||||
imm,
|
imm,
|
||||||
is64: w,
|
is64: w,
|
||||||
} => {
|
} => {
|
||||||
let (prefix, opcode, num_opcodes) = match op {
|
let (prefix, opcode, len) = match op {
|
||||||
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
||||||
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
|
SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
|
||||||
SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
|
SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
|
||||||
@@ -1924,6 +1924,10 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
|
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
|
||||||
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
|
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
|
||||||
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
|
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
|
||||||
|
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
|
||||||
|
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
||||||
|
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
||||||
|
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
let rex = if *w {
|
let rex = if *w {
|
||||||
@@ -1931,13 +1935,29 @@ pub(crate) fn emit(
|
|||||||
} else {
|
} else {
|
||||||
RexFlags::clear_w()
|
RexFlags::clear_w()
|
||||||
};
|
};
|
||||||
|
let regs_swapped = match *op {
|
||||||
|
// These opcodes (and not the SSE2 version of PEXTRW) flip the operand
|
||||||
|
// encoding: `dst` in ModRM's r/m, `src` in ModRM's reg field.
|
||||||
|
SseOpcode::Pextrb | SseOpcode::Pextrd => true,
|
||||||
|
// The rest of the opcodes have the customary encoding: `dst` in ModRM's reg,
|
||||||
|
// `src` in ModRM's r/m field.
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
match src {
|
match src {
|
||||||
RegMem::Reg { reg } => {
|
RegMem::Reg { reg } => {
|
||||||
emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst.to_reg(), *reg, rex);
|
if regs_swapped {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
|
||||||
|
} else {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
RegMem::Mem { addr } => {
|
RegMem::Mem { addr } => {
|
||||||
let addr = &addr.finalize(state);
|
let addr = &addr.finalize(state);
|
||||||
emit_std_reg_mem(sink, prefix, opcode, num_opcodes, dst.to_reg(), addr, rex);
|
assert!(
|
||||||
|
!regs_swapped,
|
||||||
|
"No existing way to encode a mem argument in the ModRM r/m field."
|
||||||
|
);
|
||||||
|
emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sink.put1(*imm)
|
sink.put1(*imm)
|
||||||
|
|||||||
@@ -788,8 +788,6 @@ impl Inst {
|
|||||||
imm: u8,
|
imm: u8,
|
||||||
w: bool,
|
w: bool,
|
||||||
) -> Inst {
|
) -> Inst {
|
||||||
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
|
|
||||||
debug_assert!(imm < 8);
|
|
||||||
Inst::XmmRmRImm {
|
Inst::XmmRmRImm {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
@@ -1736,10 +1734,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_mod(*dst);
|
collector.add_mod(*dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Inst::XmmRmRImm { src, dst, .. } => {
|
Inst::XmmRmRImm { op, src, dst, .. } => {
|
||||||
if inst.produces_const() {
|
if inst.produces_const() {
|
||||||
// No need to account for src, since src == dst.
|
// No need to account for src, since src == dst.
|
||||||
collector.add_def(*dst);
|
collector.add_def(*dst);
|
||||||
|
} else if *op == SseOpcode::Pextrb
|
||||||
|
|| *op == SseOpcode::Pextrw
|
||||||
|
|| *op == SseOpcode::Pextrd
|
||||||
|
|| *op == SseOpcode::Pshufd
|
||||||
|
{
|
||||||
|
src.get_regs_as_uses(collector);
|
||||||
|
collector.add_def(*dst);
|
||||||
} else {
|
} else {
|
||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(*dst);
|
collector.add_mod(*dst);
|
||||||
@@ -2038,6 +2043,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
}
|
}
|
||||||
Inst::XmmRmRImm {
|
Inst::XmmRmRImm {
|
||||||
|
ref op,
|
||||||
ref mut src,
|
ref mut src,
|
||||||
ref mut dst,
|
ref mut dst,
|
||||||
..
|
..
|
||||||
@@ -2045,6 +2051,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
if produces_const {
|
if produces_const {
|
||||||
src.map_as_def(mapper);
|
src.map_as_def(mapper);
|
||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
|
} else if *op == SseOpcode::Pextrb
|
||||||
|
|| *op == SseOpcode::Pextrw
|
||||||
|
|| *op == SseOpcode::Pextrd
|
||||||
|
|| *op == SseOpcode::Pshufd
|
||||||
|
{
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_def(mapper, dst);
|
||||||
} else {
|
} else {
|
||||||
src.map_uses(mapper);
|
src.map_uses(mapper);
|
||||||
map_mod(mapper, dst);
|
map_mod(mapper, dst);
|
||||||
|
|||||||
@@ -2690,6 +2690,55 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Extractlane => {
|
||||||
|
// The instruction format maps to variables like: %dst = extractlane %src, %lane
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let src_ty = ctx.input_ty(insn, 0);
|
||||||
|
assert_eq!(src_ty.bits(), 128);
|
||||||
|
let src = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let lane = if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
|
||||||
|
*imm
|
||||||
|
} else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
|
||||||
|
if !ty.is_float() {
|
||||||
|
let (sse_op, w_bit) = match ty.lane_bits() {
|
||||||
|
8 => (SseOpcode::Pextrb, false),
|
||||||
|
16 => (SseOpcode::Pextrw, false),
|
||||||
|
32 => (SseOpcode::Pextrd, false),
|
||||||
|
64 => (SseOpcode::Pextrd, true),
|
||||||
|
_ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()),
|
||||||
|
};
|
||||||
|
let src = RegMem::reg(src);
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit));
|
||||||
|
} else {
|
||||||
|
if lane == 0 {
|
||||||
|
// Remove the extractlane instruction, leaving the float where it is. The upper
|
||||||
|
// bits will remain unchanged; for correctness, this relies on Cranelift type
|
||||||
|
// checking to avoid using those bits.
|
||||||
|
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||||
|
} else {
|
||||||
|
// Otherwise, shuffle the bits in `lane` to the lowest lane.
|
||||||
|
let sse_op = SseOpcode::Pshufd;
|
||||||
|
let mask = match src_ty {
|
||||||
|
// Move the value at `lane` to lane 0, copying existing value at lane 0 to
|
||||||
|
// other lanes. Again, this relies on Cranelift type checking to avoid
|
||||||
|
// using those bits.
|
||||||
|
types::F32X4 => 0b00_00_00_00 | lane,
|
||||||
|
// Move the value at `lane` 1 (we know it must be 1 because of the `if`
|
||||||
|
// statement above) to lane 0 and leave lane 1 unchanged. The Cranelift type
|
||||||
|
// checking assumption also applies here.
|
||||||
|
types::F64X2 => 0b11_10_11_10,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let src = RegMem::reg(src);
|
||||||
|
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::IaddImm
|
Opcode::IaddImm
|
||||||
| Opcode::ImulImm
|
| Opcode::ImulImm
|
||||||
| Opcode::UdivImm
|
| Opcode::UdivImm
|
||||||
|
|||||||
Reference in New Issue
Block a user