[machinst x64]: implement packed and, and_not, xor, or

This commit is contained in:
Andrew Brown
2020-10-07 13:34:18 -07:00
parent e8c2a1763a
commit 3c55523d40
3 changed files with 41 additions and 1 deletions

View File

@@ -413,6 +413,8 @@ pub enum SseOpcode {
Paddsw, Paddsw,
Paddusb, Paddusb,
Paddusw, Paddusw,
Pand,
Pandn,
Pavgb, Pavgb,
Pavgw, Pavgw,
Pcmpeqb, Pcmpeqb,
@@ -556,6 +558,8 @@ impl SseOpcode {
| SseOpcode::Paddsw | SseOpcode::Paddsw
| SseOpcode::Paddusb | SseOpcode::Paddusb
| SseOpcode::Paddusw | SseOpcode::Paddusw
| SseOpcode::Pand
| SseOpcode::Pandn
| SseOpcode::Pavgb | SseOpcode::Pavgb
| SseOpcode::Pavgw | SseOpcode::Pavgw
| SseOpcode::Pcmpeqb | SseOpcode::Pcmpeqb
@@ -698,6 +702,8 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddsw => "paddsw", SseOpcode::Paddsw => "paddsw",
SseOpcode::Paddusb => "paddusb", SseOpcode::Paddusb => "paddusb",
SseOpcode::Paddusw => "paddusw", SseOpcode::Paddusw => "paddusw",
SseOpcode::Pand => "pand",
SseOpcode::Pandn => "pandn",
SseOpcode::Pavgb => "pavgb", SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw", SseOpcode::Pavgw => "pavgw",
SseOpcode::Pcmpeqb => "pcmpeqb", SseOpcode::Pcmpeqb => "pcmpeqb",

View File

@@ -1735,8 +1735,8 @@ pub(crate) fn emit(
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2), SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2), SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2), SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2), SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
@@ -1767,6 +1767,8 @@ pub(crate) fn emit(
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2), SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2), SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2), SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2), SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2), SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2), SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),

View File

@@ -747,6 +747,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
types::I16X8 => SseOpcode::Pavgw, types::I16X8 => SseOpcode::Pavgw,
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty), _ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
}, },
Opcode::Band => match ty {
types::F32X4 => SseOpcode::Andps,
types::F64X2 => SseOpcode::Andpd,
_ => SseOpcode::Pand,
},
Opcode::Bor => match ty {
types::F32X4 => SseOpcode::Orps,
types::F64X2 => SseOpcode::Orpd,
_ => SseOpcode::Por,
},
Opcode::Bxor => match ty {
types::F32X4 => SseOpcode::Xorps,
types::F64X2 => SseOpcode::Xorpd,
_ => SseOpcode::Pxor,
},
_ => panic!("Unsupported packed instruction: {}", op), _ => panic!("Unsupported packed instruction: {}", op),
}; };
let lhs = put_input_in_reg(ctx, inputs[0]); let lhs = put_input_in_reg(ctx, inputs[0]);
@@ -799,6 +814,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
Opcode::BandNot => {
let ty = ty.unwrap();
debug_assert!(ty.is_vector() && ty.bytes() == 16);
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
let sse_op = match ty {
types::F32X4 => SseOpcode::Andnps,
types::F64X2 => SseOpcode::Andnpd,
_ => SseOpcode::Pandn,
};
// Note the flipping of operands: the `rhs` operand is used as the destination instead
// of the `lhs` as in the other bit operations above (e.g. `band`).
ctx.emit(Inst::gen_move(dst, rhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
}
Opcode::Iabs => { Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]); let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);