[machinst x64]: implement packed and, and_not, xor, or
This commit is contained in:
@@ -413,6 +413,8 @@ pub enum SseOpcode {
|
|||||||
Paddsw,
|
Paddsw,
|
||||||
Paddusb,
|
Paddusb,
|
||||||
Paddusw,
|
Paddusw,
|
||||||
|
Pand,
|
||||||
|
Pandn,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
Pavgw,
|
Pavgw,
|
||||||
Pcmpeqb,
|
Pcmpeqb,
|
||||||
@@ -556,6 +558,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddsw
|
| SseOpcode::Paddsw
|
||||||
| SseOpcode::Paddusb
|
| SseOpcode::Paddusb
|
||||||
| SseOpcode::Paddusw
|
| SseOpcode::Paddusw
|
||||||
|
| SseOpcode::Pand
|
||||||
|
| SseOpcode::Pandn
|
||||||
| SseOpcode::Pavgb
|
| SseOpcode::Pavgb
|
||||||
| SseOpcode::Pavgw
|
| SseOpcode::Pavgw
|
||||||
| SseOpcode::Pcmpeqb
|
| SseOpcode::Pcmpeqb
|
||||||
@@ -698,6 +702,8 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddsw => "paddsw",
|
SseOpcode::Paddsw => "paddsw",
|
||||||
SseOpcode::Paddusb => "paddusb",
|
SseOpcode::Paddusb => "paddusb",
|
||||||
SseOpcode::Paddusw => "paddusw",
|
SseOpcode::Paddusw => "paddusw",
|
||||||
|
SseOpcode::Pand => "pand",
|
||||||
|
SseOpcode::Pandn => "pandn",
|
||||||
SseOpcode::Pavgb => "pavgb",
|
SseOpcode::Pavgb => "pavgb",
|
||||||
SseOpcode::Pavgw => "pavgw",
|
SseOpcode::Pavgw => "pavgw",
|
||||||
SseOpcode::Pcmpeqb => "pcmpeqb",
|
SseOpcode::Pcmpeqb => "pcmpeqb",
|
||||||
|
|||||||
@@ -1735,8 +1735,8 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
|
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
|
||||||
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
|
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
|
||||||
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
|
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
|
||||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
|
||||||
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
|
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
|
||||||
|
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||||
@@ -1767,6 +1767,8 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
|
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
|
||||||
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
|
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
|
||||||
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
|
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
|
||||||
|
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
|
||||||
|
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
||||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||||
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||||
|
|||||||
@@ -747,6 +747,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
types::I16X8 => SseOpcode::Pavgw,
|
types::I16X8 => SseOpcode::Pavgw,
|
||||||
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
|
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
|
||||||
},
|
},
|
||||||
|
Opcode::Band => match ty {
|
||||||
|
types::F32X4 => SseOpcode::Andps,
|
||||||
|
types::F64X2 => SseOpcode::Andpd,
|
||||||
|
_ => SseOpcode::Pand,
|
||||||
|
},
|
||||||
|
Opcode::Bor => match ty {
|
||||||
|
types::F32X4 => SseOpcode::Orps,
|
||||||
|
types::F64X2 => SseOpcode::Orpd,
|
||||||
|
_ => SseOpcode::Por,
|
||||||
|
},
|
||||||
|
Opcode::Bxor => match ty {
|
||||||
|
types::F32X4 => SseOpcode::Xorps,
|
||||||
|
types::F64X2 => SseOpcode::Xorpd,
|
||||||
|
_ => SseOpcode::Pxor,
|
||||||
|
},
|
||||||
_ => panic!("Unsupported packed instruction: {}", op),
|
_ => panic!("Unsupported packed instruction: {}", op),
|
||||||
};
|
};
|
||||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||||
@@ -799,6 +814,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::BandNot => {
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
debug_assert!(ty.is_vector() && ty.bytes() == 16);
|
||||||
|
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let sse_op = match ty {
|
||||||
|
types::F32X4 => SseOpcode::Andnps,
|
||||||
|
types::F64X2 => SseOpcode::Andnpd,
|
||||||
|
_ => SseOpcode::Pandn,
|
||||||
|
};
|
||||||
|
// Note the flipping of operands: the `rhs` operand is used as the destination instead
|
||||||
|
// of the `lhs` as in the other bit operations above (e.g. `band`).
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Iabs => {
|
Opcode::Iabs => {
|
||||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user