[machinst x64]: add packed min/max implementations
This commit is contained in:
@@ -403,6 +403,18 @@ pub enum SseOpcode {
|
||||
Paddw,
|
||||
Pavgb,
|
||||
Pavgw,
|
||||
Pmaxsb,
|
||||
Pmaxsw,
|
||||
Pmaxsd,
|
||||
Pmaxub,
|
||||
Pmaxuw,
|
||||
Pmaxud,
|
||||
Pminsb,
|
||||
Pminsw,
|
||||
Pminsd,
|
||||
Pminub,
|
||||
Pminuw,
|
||||
Pminud,
|
||||
Pmulld,
|
||||
Pmullw,
|
||||
Pmuludq,
|
||||
@@ -507,6 +519,10 @@ impl SseOpcode {
|
||||
| SseOpcode::Paddw
|
||||
| SseOpcode::Pavgb
|
||||
| SseOpcode::Pavgw
|
||||
| SseOpcode::Pmaxsw
|
||||
| SseOpcode::Pmaxub
|
||||
| SseOpcode::Pminsw
|
||||
| SseOpcode::Pminub
|
||||
| SseOpcode::Pmullw
|
||||
| SseOpcode::Pmuludq
|
||||
| SseOpcode::Psllw
|
||||
@@ -531,9 +547,18 @@ impl SseOpcode {
|
||||
|
||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||
|
||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
||||
SSE41
|
||||
}
|
||||
SseOpcode::Insertps
|
||||
| SseOpcode::Pmaxsb
|
||||
| SseOpcode::Pmaxsd
|
||||
| SseOpcode::Pmaxuw
|
||||
| SseOpcode::Pmaxud
|
||||
| SseOpcode::Pminsb
|
||||
| SseOpcode::Pminsd
|
||||
| SseOpcode::Pminuw
|
||||
| SseOpcode::Pminud
|
||||
| SseOpcode::Pmulld
|
||||
| SseOpcode::Roundss
|
||||
| SseOpcode::Roundsd => SSE41,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -609,6 +634,18 @@ impl fmt::Debug for SseOpcode {
|
||||
SseOpcode::Paddw => "paddw",
|
||||
SseOpcode::Pavgb => "pavgb",
|
||||
SseOpcode::Pavgw => "pavgw",
|
||||
SseOpcode::Pmaxsb => "pmaxsb",
|
||||
SseOpcode::Pmaxsw => "pmaxsw",
|
||||
SseOpcode::Pmaxsd => "pmaxsd",
|
||||
SseOpcode::Pmaxub => "pmaxub",
|
||||
SseOpcode::Pmaxuw => "pmaxuw",
|
||||
SseOpcode::Pmaxud => "pmaxud",
|
||||
SseOpcode::Pminsb => "pminsb",
|
||||
SseOpcode::Pminsw => "pminsw",
|
||||
SseOpcode::Pminsd => "pminsd",
|
||||
SseOpcode::Pminub => "pminub",
|
||||
SseOpcode::Pminuw => "pminuw",
|
||||
SseOpcode::Pminud => "pminud",
|
||||
SseOpcode::Pmulld => "pmulld",
|
||||
SseOpcode::Pmullw => "pmullw",
|
||||
SseOpcode::Pmuludq => "pmuludq",
|
||||
|
||||
@@ -1780,6 +1780,18 @@ pub(crate) fn emit(
|
||||
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
|
||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||
SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
|
||||
SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
|
||||
SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
|
||||
SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
|
||||
SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
|
||||
SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
|
||||
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
|
||||
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
|
||||
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
||||
|
||||
@@ -3165,6 +3165,78 @@ fn test_x64_emit() {
|
||||
"pmuludq %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410F383CF7",
|
||||
"pmaxsb %xmm15, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410FEEF7",
|
||||
"pmaxsw %xmm15, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
|
||||
"66410F383DF7",
|
||||
"pmaxsd %xmm15, %xmm6",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
|
||||
"66410FDECE",
|
||||
"pmaxub %xmm14, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
|
||||
"66410F383ECE",
|
||||
"pmaxuw %xmm14, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
|
||||
"66410F383FCE",
|
||||
"pmaxud %xmm14, %xmm1",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450F3838C8",
|
||||
"pminsb %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450FEAC8",
|
||||
"pminsw %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
|
||||
"66450F3839C8",
|
||||
"pminsd %xmm8, %xmm9",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
|
||||
"660FDAD3",
|
||||
"pminub %xmm3, %xmm2",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
|
||||
"660F383AD3",
|
||||
"pminuw %xmm3, %xmm2",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
|
||||
"660F383BD3",
|
||||
"pminud %xmm3, %xmm2",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
|
||||
"66410FEFD3",
|
||||
|
||||
@@ -709,6 +709,48 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty.is_vector() {
|
||||
let sse_op = match op {
|
||||
Opcode::Imax => match ty {
|
||||
types::I8X16 => SseOpcode::Pmaxsb,
|
||||
types::I16X8 => SseOpcode::Pmaxsw,
|
||||
types::I32X4 => SseOpcode::Pmaxsd,
|
||||
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||
},
|
||||
Opcode::Umax => match ty {
|
||||
types::I8X16 => SseOpcode::Pmaxub,
|
||||
types::I16X8 => SseOpcode::Pmaxuw,
|
||||
types::I32X4 => SseOpcode::Pmaxud,
|
||||
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||
},
|
||||
Opcode::Imin => match ty {
|
||||
types::I8X16 => SseOpcode::Pminsb,
|
||||
types::I16X8 => SseOpcode::Pminsw,
|
||||
types::I32X4 => SseOpcode::Pminsd,
|
||||
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||
},
|
||||
Opcode::Umin => match ty {
|
||||
types::I8X16 => SseOpcode::Pminub,
|
||||
types::I16X8 => SseOpcode::Pminuw,
|
||||
types::I32X4 => SseOpcode::Pminud,
|
||||
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||
},
|
||||
_ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."),
|
||||
};
|
||||
|
||||
// Move the `lhs` to the same register as `dst`.
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
|
||||
} else {
|
||||
panic!("Unsupported type for {} instruction: {}", op, ty);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Bnot => {
|
||||
let ty = ty.unwrap();
|
||||
if ty.is_vector() {
|
||||
|
||||
Reference in New Issue
Block a user