[machinst x64]: add packed min/max implementations
This commit is contained in:
@@ -403,6 +403,18 @@ pub enum SseOpcode {
|
|||||||
Paddw,
|
Paddw,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
Pavgw,
|
Pavgw,
|
||||||
|
Pmaxsb,
|
||||||
|
Pmaxsw,
|
||||||
|
Pmaxsd,
|
||||||
|
Pmaxub,
|
||||||
|
Pmaxuw,
|
||||||
|
Pmaxud,
|
||||||
|
Pminsb,
|
||||||
|
Pminsw,
|
||||||
|
Pminsd,
|
||||||
|
Pminub,
|
||||||
|
Pminuw,
|
||||||
|
Pminud,
|
||||||
Pmulld,
|
Pmulld,
|
||||||
Pmullw,
|
Pmullw,
|
||||||
Pmuludq,
|
Pmuludq,
|
||||||
@@ -507,6 +519,10 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddw
|
| SseOpcode::Paddw
|
||||||
| SseOpcode::Pavgb
|
| SseOpcode::Pavgb
|
||||||
| SseOpcode::Pavgw
|
| SseOpcode::Pavgw
|
||||||
|
| SseOpcode::Pmaxsw
|
||||||
|
| SseOpcode::Pmaxub
|
||||||
|
| SseOpcode::Pminsw
|
||||||
|
| SseOpcode::Pminub
|
||||||
| SseOpcode::Pmullw
|
| SseOpcode::Pmullw
|
||||||
| SseOpcode::Pmuludq
|
| SseOpcode::Pmuludq
|
||||||
| SseOpcode::Psllw
|
| SseOpcode::Psllw
|
||||||
@@ -531,9 +547,18 @@ impl SseOpcode {
|
|||||||
|
|
||||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
|
SseOpcode::Insertps
|
||||||
SSE41
|
| SseOpcode::Pmaxsb
|
||||||
}
|
| SseOpcode::Pmaxsd
|
||||||
|
| SseOpcode::Pmaxuw
|
||||||
|
| SseOpcode::Pmaxud
|
||||||
|
| SseOpcode::Pminsb
|
||||||
|
| SseOpcode::Pminsd
|
||||||
|
| SseOpcode::Pminuw
|
||||||
|
| SseOpcode::Pminud
|
||||||
|
| SseOpcode::Pmulld
|
||||||
|
| SseOpcode::Roundss
|
||||||
|
| SseOpcode::Roundsd => SSE41,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -609,6 +634,18 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddw => "paddw",
|
SseOpcode::Paddw => "paddw",
|
||||||
SseOpcode::Pavgb => "pavgb",
|
SseOpcode::Pavgb => "pavgb",
|
||||||
SseOpcode::Pavgw => "pavgw",
|
SseOpcode::Pavgw => "pavgw",
|
||||||
|
SseOpcode::Pmaxsb => "pmaxsb",
|
||||||
|
SseOpcode::Pmaxsw => "pmaxsw",
|
||||||
|
SseOpcode::Pmaxsd => "pmaxsd",
|
||||||
|
SseOpcode::Pmaxub => "pmaxub",
|
||||||
|
SseOpcode::Pmaxuw => "pmaxuw",
|
||||||
|
SseOpcode::Pmaxud => "pmaxud",
|
||||||
|
SseOpcode::Pminsb => "pminsb",
|
||||||
|
SseOpcode::Pminsw => "pminsw",
|
||||||
|
SseOpcode::Pminsd => "pminsd",
|
||||||
|
SseOpcode::Pminub => "pminub",
|
||||||
|
SseOpcode::Pminuw => "pminuw",
|
||||||
|
SseOpcode::Pminud => "pminud",
|
||||||
SseOpcode::Pmulld => "pmulld",
|
SseOpcode::Pmulld => "pmulld",
|
||||||
SseOpcode::Pmullw => "pmullw",
|
SseOpcode::Pmullw => "pmullw",
|
||||||
SseOpcode::Pmuludq => "pmuludq",
|
SseOpcode::Pmuludq => "pmuludq",
|
||||||
|
|||||||
@@ -1780,6 +1780,18 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
|
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
|
||||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||||
|
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||||
|
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||||
|
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||||
|
SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
|
||||||
|
SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
|
||||||
|
SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
|
||||||
|
SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
|
||||||
|
SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
|
||||||
|
SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
|
||||||
|
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
|
||||||
|
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
|
||||||
|
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
|
||||||
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
|
||||||
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
|
||||||
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
|
||||||
|
|||||||
@@ -3165,6 +3165,78 @@ fn test_x64_emit() {
|
|||||||
"pmuludq %xmm8, %xmm9",
|
"pmuludq %xmm8, %xmm9",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
|
||||||
|
"66410F383CF7",
|
||||||
|
"pmaxsb %xmm15, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
|
||||||
|
"66410FEEF7",
|
||||||
|
"pmaxsw %xmm15, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
|
||||||
|
"66410F383DF7",
|
||||||
|
"pmaxsd %xmm15, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
|
||||||
|
"66410FDECE",
|
||||||
|
"pmaxub %xmm14, %xmm1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
|
||||||
|
"66410F383ECE",
|
||||||
|
"pmaxuw %xmm14, %xmm1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
|
||||||
|
"66410F383FCE",
|
||||||
|
"pmaxud %xmm14, %xmm1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
|
||||||
|
"66450F3838C8",
|
||||||
|
"pminsb %xmm8, %xmm9",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
|
||||||
|
"66450FEAC8",
|
||||||
|
"pminsw %xmm8, %xmm9",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
|
||||||
|
"66450F3839C8",
|
||||||
|
"pminsd %xmm8, %xmm9",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
|
||||||
|
"660FDAD3",
|
||||||
|
"pminub %xmm3, %xmm2",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
|
||||||
|
"660F383AD3",
|
||||||
|
"pminuw %xmm3, %xmm2",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
|
||||||
|
"660F383BD3",
|
||||||
|
"pminud %xmm3, %xmm2",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
|
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
|
||||||
"66410FEFD3",
|
"66410FEFD3",
|
||||||
|
|||||||
@@ -709,6 +709,48 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
|
||||||
|
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
if ty.is_vector() {
|
||||||
|
let sse_op = match op {
|
||||||
|
Opcode::Imax => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pmaxsb,
|
||||||
|
types::I16X8 => SseOpcode::Pmaxsw,
|
||||||
|
types::I32X4 => SseOpcode::Pmaxsd,
|
||||||
|
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||||
|
},
|
||||||
|
Opcode::Umax => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pmaxub,
|
||||||
|
types::I16X8 => SseOpcode::Pmaxuw,
|
||||||
|
types::I32X4 => SseOpcode::Pmaxud,
|
||||||
|
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||||
|
},
|
||||||
|
Opcode::Imin => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pminsb,
|
||||||
|
types::I16X8 => SseOpcode::Pminsw,
|
||||||
|
types::I32X4 => SseOpcode::Pminsd,
|
||||||
|
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||||
|
},
|
||||||
|
Opcode::Umin => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pminub,
|
||||||
|
types::I16X8 => SseOpcode::Pminuw,
|
||||||
|
types::I32X4 => SseOpcode::Pminud,
|
||||||
|
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
|
||||||
|
},
|
||||||
|
_ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Move the `lhs` to the same register as `dst`.
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
|
||||||
|
} else {
|
||||||
|
panic!("Unsupported type for {} instruction: {}", op, ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Bnot => {
|
Opcode::Bnot => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if ty.is_vector() {
|
if ty.is_vector() {
|
||||||
|
|||||||
Reference in New Issue
Block a user