[machinst x64]: add packed min/max implementations

This commit is contained in:
Andrew Brown
2020-09-21 12:32:32 -07:00
parent 7546d98844
commit ac2bf9d246
4 changed files with 166 additions and 3 deletions

View File

@@ -403,6 +403,18 @@ pub enum SseOpcode {
Paddw,
Pavgb,
Pavgw,
Pmaxsb,
Pmaxsw,
Pmaxsd,
Pmaxub,
Pmaxuw,
Pmaxud,
Pminsb,
Pminsw,
Pminsd,
Pminub,
Pminuw,
Pminud,
Pmulld,
Pmullw,
Pmuludq,
@@ -507,6 +519,10 @@ impl SseOpcode {
| SseOpcode::Paddw
| SseOpcode::Pavgb
| SseOpcode::Pavgw
| SseOpcode::Pmaxsw
| SseOpcode::Pmaxub
| SseOpcode::Pminsw
| SseOpcode::Pminub
| SseOpcode::Pmullw
| SseOpcode::Pmuludq
| SseOpcode::Psllw
@@ -531,9 +547,18 @@ impl SseOpcode {
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3,
SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => {
SSE41
}
SseOpcode::Insertps
| SseOpcode::Pmaxsb
| SseOpcode::Pmaxsd
| SseOpcode::Pmaxuw
| SseOpcode::Pmaxud
| SseOpcode::Pminsb
| SseOpcode::Pminsd
| SseOpcode::Pminuw
| SseOpcode::Pminud
| SseOpcode::Pmulld
| SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41,
}
}
@@ -609,6 +634,18 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddw => "paddw",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pmaxsb => "pmaxsb",
SseOpcode::Pmaxsw => "pmaxsw",
SseOpcode::Pmaxsd => "pmaxsd",
SseOpcode::Pmaxub => "pmaxub",
SseOpcode::Pmaxuw => "pmaxuw",
SseOpcode::Pmaxud => "pmaxud",
SseOpcode::Pminsb => "pminsb",
SseOpcode::Pminsw => "pminsw",
SseOpcode::Pminsd => "pminsd",
SseOpcode::Pminub => "pminub",
SseOpcode::Pminuw => "pminuw",
SseOpcode::Pminud => "pminud",
SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq",

View File

@@ -1780,6 +1780,18 @@ pub(crate) fn emit(
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),

View File

@@ -3165,6 +3165,78 @@ fn test_x64_emit() {
"pmuludq %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
"66410F383CF7",
"pmaxsb %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
"66410FEEF7",
"pmaxsw %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
"66410F383DF7",
"pmaxsd %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
"66410FDECE",
"pmaxub %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
"66410F383ECE",
"pmaxuw %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
"66410F383FCE",
"pmaxud %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
"66450F3838C8",
"pminsb %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
"66450FEAC8",
"pminsw %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
"66450F3839C8",
"pminsd %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
"660FDAD3",
"pminub %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
"660F383AD3",
"pminuw %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
"660F383BD3",
"pminud %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
"66410FEFD3",

View File

@@ -709,6 +709,48 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap();
if ty.is_vector() {
let sse_op = match op {
Opcode::Imax => match ty {
types::I8X16 => SseOpcode::Pmaxsb,
types::I16X8 => SseOpcode::Pmaxsw,
types::I32X4 => SseOpcode::Pmaxsd,
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
},
Opcode::Umax => match ty {
types::I8X16 => SseOpcode::Pmaxub,
types::I16X8 => SseOpcode::Pmaxuw,
types::I32X4 => SseOpcode::Pmaxud,
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
},
Opcode::Imin => match ty {
types::I8X16 => SseOpcode::Pminsb,
types::I16X8 => SseOpcode::Pminsw,
types::I32X4 => SseOpcode::Pminsd,
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
},
Opcode::Umin => match ty {
types::I8X16 => SseOpcode::Pminub,
types::I16X8 => SseOpcode::Pminuw,
types::I32X4 => SseOpcode::Pminud,
_ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
},
_ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."),
};
// Move the `lhs` to the same register as `dst`.
ctx.emit(Inst::gen_move(dst, lhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
} else {
panic!("Unsupported type for {} instruction: {}", op, ty);
}
}
Opcode::Bnot => {
let ty = ty.unwrap();
if ty.is_vector() {