[machinst x64]: add integer comparisons

This commit is contained in:
Andrew Brown
2020-09-29 13:17:46 -07:00
parent d7fda6ac0e
commit 74226d6781
4 changed files with 151 additions and 7 deletions

View File

@@ -344,6 +344,7 @@ pub(crate) enum InstructionSet {
SSE2, SSE2,
SSSE3, SSSE3,
SSE41, SSE41,
SSE42,
} }
/// Some SSE operations requiring 2 operands r/m and r. /// Some SSE operations requiring 2 operands r/m and r.
@@ -414,6 +415,14 @@ pub enum SseOpcode {
Paddusw, Paddusw,
Pavgb, Pavgb,
Pavgw, Pavgw,
Pcmpeqb,
Pcmpeqw,
Pcmpeqd,
Pcmpeqq,
Pcmpgtb,
Pcmpgtw,
Pcmpgtd,
Pcmpgtq,
Pextrb, Pextrb,
Pextrw, Pextrw,
Pextrd, Pextrd,
@@ -543,6 +552,12 @@ impl SseOpcode {
| SseOpcode::Paddusw | SseOpcode::Paddusw
| SseOpcode::Pavgb | SseOpcode::Pavgb
| SseOpcode::Pavgw | SseOpcode::Pavgw
| SseOpcode::Pcmpeqb
| SseOpcode::Pcmpeqw
| SseOpcode::Pcmpeqd
| SseOpcode::Pcmpgtb
| SseOpcode::Pcmpgtw
| SseOpcode::Pcmpgtd
| SseOpcode::Pextrw | SseOpcode::Pextrw
| SseOpcode::Pinsrw | SseOpcode::Pinsrw
| SseOpcode::Pmaxsw | SseOpcode::Pmaxsw
@@ -575,6 +590,7 @@ impl SseOpcode {
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3, SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
SseOpcode::Insertps SseOpcode::Insertps
| SseOpcode::Pcmpeqq
| SseOpcode::Pextrb | SseOpcode::Pextrb
| SseOpcode::Pextrd | SseOpcode::Pextrd
| SseOpcode::Pinsrb | SseOpcode::Pinsrb
@@ -590,6 +606,8 @@ impl SseOpcode {
| SseOpcode::Pmulld | SseOpcode::Pmulld
| SseOpcode::Roundss | SseOpcode::Roundss
| SseOpcode::Roundsd => SSE41, | SseOpcode::Roundsd => SSE41,
SseOpcode::Pcmpgtq => SSE42,
} }
} }
@@ -670,6 +688,14 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Paddusw => "paddusw", SseOpcode::Paddusw => "paddusw",
SseOpcode::Pavgb => "pavgb", SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw", SseOpcode::Pavgw => "pavgw",
SseOpcode::Pcmpeqb => "pcmpeqb",
SseOpcode::Pcmpeqw => "pcmpeqw",
SseOpcode::Pcmpeqd => "pcmpeqd",
SseOpcode::Pcmpeqq => "pcmpeqq",
SseOpcode::Pcmpgtb => "pcmpgtb",
SseOpcode::Pcmpgtw => "pcmpgtw",
SseOpcode::Pcmpgtd => "pcmpgtd",
SseOpcode::Pcmpgtq => "pcmpgtq",
SseOpcode::Pextrb => "pextrb", SseOpcode::Pextrb => "pextrb",
SseOpcode::Pextrw => "pextrw", SseOpcode::Pextrw => "pextrw",
SseOpcode::Pextrd => "pextrd", SseOpcode::Pextrd => "pextrd",

View File

@@ -1786,6 +1786,14 @@ pub(crate) fn emit(
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2), SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2), SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2), SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
SseOpcode::Pcmpeqq => (LegacyPrefixes::_66, 0x0F3829, 3),
SseOpcode::Pcmpgtb => (LegacyPrefixes::_66, 0x0F64, 2),
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3), SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2), SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3), SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),

View File

@@ -1125,7 +1125,11 @@ impl Inst {
src.to_reg() == Some(dst.to_reg()) src.to_reg() == Some(dst.to_reg())
&& (*op == SseOpcode::Xorps && (*op == SseOpcode::Xorps
|| *op == SseOpcode::Xorpd || *op == SseOpcode::Xorpd
|| *op == SseOpcode::Pxor) || *op == SseOpcode::Pxor
|| *op == SseOpcode::Pcmpeqb
|| *op == SseOpcode::Pcmpeqw
|| *op == SseOpcode::Pcmpeqd
|| *op == SseOpcode::Pcmpeqq)
} }
Self::XmmRmRImm { Self::XmmRmRImm {

View File

@@ -3,8 +3,8 @@
#![allow(non_snake_case)] #![allow(non_snake_case)]
use crate::ir::{ use crate::ir::{
condcodes::FloatCC, types, AbiParam, ArgumentPurpose, ExternalName, Inst as IRInst, condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
InstructionData, LibCall, Opcode, Signature, Type, Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type,
}; };
use crate::isa::x64::abi::*; use crate::isa::x64::abi::*;
use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::args::*;
@@ -1297,12 +1297,118 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
Opcode::Icmp => { Opcode::Icmp => {
emit_cmp(ctx, insn);
let condcode = ctx.data(insn).cond_code().unwrap(); let condcode = ctx.data(insn).cond_code().unwrap();
let cc = CC::from_intcc(condcode);
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let ty = ctx.input_ty(insn, 0);
if !ty.is_vector() {
emit_cmp(ctx, insn);
let cc = CC::from_intcc(condcode);
ctx.emit(Inst::setcc(cc, dst)); ctx.emit(Inst::setcc(cc, dst));
} else {
assert_eq!(ty.bits(), 128);
let eq = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpeqb,
types::I16X8 => SseOpcode::Pcmpeqw,
types::I32X4 => SseOpcode::Pcmpeqd,
types::I64X2 => SseOpcode::Pcmpeqq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let gt = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpgtb,
types::I16X8 => SseOpcode::Pcmpgtw,
types::I32X4 => SseOpcode::Pcmpgtd,
types::I64X2 => SseOpcode::Pcmpgtq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let maxu = |ty| match ty {
types::I8X16 => SseOpcode::Pmaxub,
types::I16X8 => SseOpcode::Pmaxuw,
types::I32X4 => SseOpcode::Pmaxud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let mins = |ty| match ty {
types::I8X16 => SseOpcode::Pminsb,
types::I16X8 => SseOpcode::Pminsw,
types::I32X4 => SseOpcode::Pminsd,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let minu = |ty| match ty {
types::I8X16 => SseOpcode::Pminub,
types::I16X8 => SseOpcode::Pminuw,
types::I32X4 => SseOpcode::Pminud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
// Here we decide which operand to use as the read/write `dst` (ModRM reg field)
// and which to use as the read `input` (ModRM r/m field). In the normal case we
// use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
// the less-than cases so that we can reuse the greater-than implementation.
let input = match condcode {
IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::UnsignedLessThanOrEqual => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
_ => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
};
match condcode {
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
IntCC::NotEqual => {
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
}
}
} }
Opcode::Fcmp => { Opcode::Fcmp => {