[machinst x64]: add saturating addition implementation

This commit is contained in:
Andrew Brown
2020-09-23 08:54:10 -07:00
parent a64abf9b76
commit 050f078f86
4 changed files with 57 additions and 5 deletions

View File

@@ -402,6 +402,10 @@ pub enum SseOpcode {
Paddd,
Paddq,
Paddw,
Paddsb,
Paddsw,
Paddusb,
Paddusw,
Pavgb,
Pavgw,
Pextrb,
@@ -527,6 +531,10 @@ impl SseOpcode {
| SseOpcode::Paddd
| SseOpcode::Paddq
| SseOpcode::Paddw
| SseOpcode::Paddsb
| SseOpcode::Paddsw
| SseOpcode::Paddusb
| SseOpcode::Paddusw
| SseOpcode::Pavgb
| SseOpcode::Pavgw
| SseOpcode::Pextrw
@@ -650,6 +658,10 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddd => "paddd",
SseOpcode::Paddq => "paddq",
SseOpcode::Paddw => "paddw",
SseOpcode::Paddsb => "paddsb",
SseOpcode::Paddsw => "paddsw",
SseOpcode::Paddusb => "paddusb",
SseOpcode::Paddusw => "paddusw",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pextrb => "pextrb",

View File

@@ -1780,6 +1780,10 @@ pub(crate) fn emit(
SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
SseOpcode::Paddsb => (LegacyPrefixes::_66, 0x0FEC, 2),
SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),

View File

@@ -3111,6 +3111,30 @@ fn test_x64_emit() {
"paddq %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5),
"66410FECE9",
"paddsb %xmm9, %xmm5",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6),
"660FEDF7",
"paddsw %xmm7, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13),
"66450FDCEC",
"paddusb %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8),
"66440FDDC1",
"paddusw %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
"66450FE0EC",

View File

@@ -506,6 +506,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Iadd
| Opcode::IaddIfcout
| Opcode::SaddSat
| Opcode::UaddSat
| Opcode::Isub
| Opcode::Imul
| Opcode::AvgRound
@@ -520,14 +522,24 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
types::I16X8 => SseOpcode::Paddw,
types::I32X4 => SseOpcode::Paddd,
types::I64X2 => SseOpcode::Paddq,
_ => panic!("Unsupported type for packed Iadd instruction"),
_ => panic!("Unsupported type for packed iadd instruction: {}", ty),
},
Opcode::SaddSat => match ty {
types::I8X16 => SseOpcode::Paddsb,
types::I16X8 => SseOpcode::Paddsw,
_ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
},
Opcode::UaddSat => match ty {
types::I8X16 => SseOpcode::Paddusb,
types::I16X8 => SseOpcode::Paddusw,
_ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
},
Opcode::Isub => match ty {
types::I8X16 => SseOpcode::Psubb,
types::I16X8 => SseOpcode::Psubw,
types::I32X4 => SseOpcode::Psubd,
types::I64X2 => SseOpcode::Psubq,
_ => panic!("Unsupported type for packed Isub instruction"),
_ => panic!("Unsupported type for packed isub instruction: {}", ty),
},
Opcode::Imul => match ty {
types::I16X8 => SseOpcode::Pmullw,
@@ -633,14 +645,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
return Ok(());
}
_ => panic!("Unsupported type for packed Imul instruction"),
_ => panic!("Unsupported type for packed imul instruction: {}", ty),
},
Opcode::AvgRound => match ty {
types::I8X16 => SseOpcode::Pavgb,
types::I16X8 => SseOpcode::Pavgw,
_ => panic!("Unsupported type for packed AvgRound instruction: {}", ty),
_ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
},
_ => panic!("Unsupported packed instruction"),
_ => panic!("Unsupported packed instruction: {}", op),
};
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);