[machinst x64]: implement packed and, and_not, xor, or

This commit is contained in:
Andrew Brown
2020-10-07 13:34:18 -07:00
parent e8c2a1763a
commit 3c55523d40
3 changed files with 41 additions and 1 deletions

View File

@@ -413,6 +413,8 @@ pub enum SseOpcode {
Paddsw,
Paddusb,
Paddusw,
Pand,
Pandn,
Pavgb,
Pavgw,
Pcmpeqb,
@@ -556,6 +558,8 @@ impl SseOpcode {
| SseOpcode::Paddsw
| SseOpcode::Paddusb
| SseOpcode::Paddusw
| SseOpcode::Pand
| SseOpcode::Pandn
| SseOpcode::Pavgb
| SseOpcode::Pavgw
| SseOpcode::Pcmpeqb
@@ -698,6 +702,8 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddsw => "paddsw",
SseOpcode::Paddusb => "paddusb",
SseOpcode::Paddusw => "paddusw",
SseOpcode::Pand => "pand",
SseOpcode::Pandn => "pandn",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pcmpeqb => "pcmpeqb",

View File

@@ -1735,8 +1735,8 @@ pub(crate) fn emit(
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
@@ -1767,6 +1767,8 @@ pub(crate) fn emit(
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),

View File

@@ -747,6 +747,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
types::I16X8 => SseOpcode::Pavgw,
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
},
Opcode::Band => match ty {
types::F32X4 => SseOpcode::Andps,
types::F64X2 => SseOpcode::Andpd,
_ => SseOpcode::Pand,
},
Opcode::Bor => match ty {
types::F32X4 => SseOpcode::Orps,
types::F64X2 => SseOpcode::Orpd,
_ => SseOpcode::Por,
},
Opcode::Bxor => match ty {
types::F32X4 => SseOpcode::Xorps,
types::F64X2 => SseOpcode::Xorpd,
_ => SseOpcode::Pxor,
},
_ => panic!("Unsupported packed instruction: {}", op),
};
let lhs = put_input_in_reg(ctx, inputs[0]);
@@ -799,6 +814,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::BandNot => {
let ty = ty.unwrap();
debug_assert!(ty.is_vector() && ty.bytes() == 16);
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
let sse_op = match ty {
types::F32X4 => SseOpcode::Andnps,
types::F64X2 => SseOpcode::Andnpd,
_ => SseOpcode::Pandn,
};
// Note the flipping of operands: the `rhs` operand is used as the destination instead
// of the `lhs` as in the other bit operations above (e.g. `band`).
ctx.emit(Inst::gen_move(dst, rhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
}
Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);