[machinst x64]: implement packed and, and_not, xor, or
This commit is contained in:
@@ -413,6 +413,8 @@ pub enum SseOpcode {
|
||||
Paddsw,
|
||||
Paddusb,
|
||||
Paddusw,
|
||||
Pand,
|
||||
Pandn,
|
||||
Pavgb,
|
||||
Pavgw,
|
||||
Pcmpeqb,
|
||||
@@ -556,6 +558,8 @@ impl SseOpcode {
|
||||
| SseOpcode::Paddsw
|
||||
| SseOpcode::Paddusb
|
||||
| SseOpcode::Paddusw
|
||||
| SseOpcode::Pand
|
||||
| SseOpcode::Pandn
|
||||
| SseOpcode::Pavgb
|
||||
| SseOpcode::Pavgw
|
||||
| SseOpcode::Pcmpeqb
|
||||
@@ -698,6 +702,8 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Paddsw => "paddsw",
|
||||
SseOpcode::Paddusb => "paddusb",
|
||||
SseOpcode::Paddusw => "paddusw",
|
||||
SseOpcode::Pand => "pand",
|
||||
SseOpcode::Pandn => "pandn",
|
||||
SseOpcode::Pavgb => "pavgb",
|
||||
SseOpcode::Pavgw => "pavgw",
|
||||
SseOpcode::Pcmpeqb => "pcmpeqb",
|
||||
|
||||
@@ -1735,8 +1735,8 @@ pub(crate) fn emit(
|
||||
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
|
||||
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
|
||||
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
|
||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
|
||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||
@@ -1767,6 +1767,8 @@ pub(crate) fn emit(
|
||||
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
|
||||
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
|
||||
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
|
||||
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
|
||||
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||
|
||||
@@ -747,6 +747,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
types::I16X8 => SseOpcode::Pavgw,
|
||||
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
|
||||
},
|
||||
Opcode::Band => match ty {
|
||||
types::F32X4 => SseOpcode::Andps,
|
||||
types::F64X2 => SseOpcode::Andpd,
|
||||
_ => SseOpcode::Pand,
|
||||
},
|
||||
Opcode::Bor => match ty {
|
||||
types::F32X4 => SseOpcode::Orps,
|
||||
types::F64X2 => SseOpcode::Orpd,
|
||||
_ => SseOpcode::Por,
|
||||
},
|
||||
Opcode::Bxor => match ty {
|
||||
types::F32X4 => SseOpcode::Xorps,
|
||||
types::F64X2 => SseOpcode::Xorpd,
|
||||
_ => SseOpcode::Pxor,
|
||||
},
|
||||
_ => panic!("Unsupported packed instruction: {}", op),
|
||||
};
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
@@ -799,6 +814,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::BandNot => {
|
||||
let ty = ty.unwrap();
|
||||
debug_assert!(ty.is_vector() && ty.bytes() == 16);
|
||||
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
let sse_op = match ty {
|
||||
types::F32X4 => SseOpcode::Andnps,
|
||||
types::F64X2 => SseOpcode::Andnpd,
|
||||
_ => SseOpcode::Pandn,
|
||||
};
|
||||
// Note the flipping of operands: the `rhs` operand is used as the destination instead
|
||||
// of the `lhs` as in the other bit operations above (e.g. `band`).
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
|
||||
}
|
||||
|
||||
Opcode::Iabs => {
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
Reference in New Issue
Block a user