[machinst x64]: add integer comparisons
This commit is contained in:
@@ -344,6 +344,7 @@ pub(crate) enum InstructionSet {
|
|||||||
SSE2,
|
SSE2,
|
||||||
SSSE3,
|
SSSE3,
|
||||||
SSE41,
|
SSE41,
|
||||||
|
SSE42,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Some SSE operations requiring 2 operands r/m and r.
|
/// Some SSE operations requiring 2 operands r/m and r.
|
||||||
@@ -414,6 +415,14 @@ pub enum SseOpcode {
|
|||||||
Paddusw,
|
Paddusw,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
Pavgw,
|
Pavgw,
|
||||||
|
Pcmpeqb,
|
||||||
|
Pcmpeqw,
|
||||||
|
Pcmpeqd,
|
||||||
|
Pcmpeqq,
|
||||||
|
Pcmpgtb,
|
||||||
|
Pcmpgtw,
|
||||||
|
Pcmpgtd,
|
||||||
|
Pcmpgtq,
|
||||||
Pextrb,
|
Pextrb,
|
||||||
Pextrw,
|
Pextrw,
|
||||||
Pextrd,
|
Pextrd,
|
||||||
@@ -543,6 +552,12 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Paddusw
|
| SseOpcode::Paddusw
|
||||||
| SseOpcode::Pavgb
|
| SseOpcode::Pavgb
|
||||||
| SseOpcode::Pavgw
|
| SseOpcode::Pavgw
|
||||||
|
| SseOpcode::Pcmpeqb
|
||||||
|
| SseOpcode::Pcmpeqw
|
||||||
|
| SseOpcode::Pcmpeqd
|
||||||
|
| SseOpcode::Pcmpgtb
|
||||||
|
| SseOpcode::Pcmpgtw
|
||||||
|
| SseOpcode::Pcmpgtd
|
||||||
| SseOpcode::Pextrw
|
| SseOpcode::Pextrw
|
||||||
| SseOpcode::Pinsrw
|
| SseOpcode::Pinsrw
|
||||||
| SseOpcode::Pmaxsw
|
| SseOpcode::Pmaxsw
|
||||||
@@ -575,6 +590,7 @@ impl SseOpcode {
|
|||||||
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
|
SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
|
||||||
|
|
||||||
SseOpcode::Insertps
|
SseOpcode::Insertps
|
||||||
|
| SseOpcode::Pcmpeqq
|
||||||
| SseOpcode::Pextrb
|
| SseOpcode::Pextrb
|
||||||
| SseOpcode::Pextrd
|
| SseOpcode::Pextrd
|
||||||
| SseOpcode::Pinsrb
|
| SseOpcode::Pinsrb
|
||||||
@@ -590,6 +606,8 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pmulld
|
| SseOpcode::Pmulld
|
||||||
| SseOpcode::Roundss
|
| SseOpcode::Roundss
|
||||||
| SseOpcode::Roundsd => SSE41,
|
| SseOpcode::Roundsd => SSE41,
|
||||||
|
|
||||||
|
SseOpcode::Pcmpgtq => SSE42,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -670,6 +688,14 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Paddusw => "paddusw",
|
SseOpcode::Paddusw => "paddusw",
|
||||||
SseOpcode::Pavgb => "pavgb",
|
SseOpcode::Pavgb => "pavgb",
|
||||||
SseOpcode::Pavgw => "pavgw",
|
SseOpcode::Pavgw => "pavgw",
|
||||||
|
SseOpcode::Pcmpeqb => "pcmpeqb",
|
||||||
|
SseOpcode::Pcmpeqw => "pcmpeqw",
|
||||||
|
SseOpcode::Pcmpeqd => "pcmpeqd",
|
||||||
|
SseOpcode::Pcmpeqq => "pcmpeqq",
|
||||||
|
SseOpcode::Pcmpgtb => "pcmpgtb",
|
||||||
|
SseOpcode::Pcmpgtw => "pcmpgtw",
|
||||||
|
SseOpcode::Pcmpgtd => "pcmpgtd",
|
||||||
|
SseOpcode::Pcmpgtq => "pcmpgtq",
|
||||||
SseOpcode::Pextrb => "pextrb",
|
SseOpcode::Pextrb => "pextrb",
|
||||||
SseOpcode::Pextrw => "pextrw",
|
SseOpcode::Pextrw => "pextrw",
|
||||||
SseOpcode::Pextrd => "pextrd",
|
SseOpcode::Pextrd => "pextrd",
|
||||||
|
|||||||
@@ -1786,6 +1786,14 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
|
SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
|
||||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||||
|
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||||
|
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
|
||||||
|
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
|
||||||
|
SseOpcode::Pcmpeqq => (LegacyPrefixes::_66, 0x0F3829, 3),
|
||||||
|
SseOpcode::Pcmpgtb => (LegacyPrefixes::_66, 0x0F64, 2),
|
||||||
|
SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
|
||||||
|
SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
|
||||||
|
SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
|
||||||
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
|
||||||
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
|
||||||
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
|
||||||
|
|||||||
@@ -1125,7 +1125,11 @@ impl Inst {
|
|||||||
src.to_reg() == Some(dst.to_reg())
|
src.to_reg() == Some(dst.to_reg())
|
||||||
&& (*op == SseOpcode::Xorps
|
&& (*op == SseOpcode::Xorps
|
||||||
|| *op == SseOpcode::Xorpd
|
|| *op == SseOpcode::Xorpd
|
||||||
|| *op == SseOpcode::Pxor)
|
|| *op == SseOpcode::Pxor
|
||||||
|
|| *op == SseOpcode::Pcmpeqb
|
||||||
|
|| *op == SseOpcode::Pcmpeqw
|
||||||
|
|| *op == SseOpcode::Pcmpeqd
|
||||||
|
|| *op == SseOpcode::Pcmpeqq)
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::XmmRmRImm {
|
Self::XmmRmRImm {
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
#![allow(non_snake_case)]
|
#![allow(non_snake_case)]
|
||||||
|
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
condcodes::FloatCC, types, AbiParam, ArgumentPurpose, ExternalName, Inst as IRInst,
|
condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
|
||||||
InstructionData, LibCall, Opcode, Signature, Type,
|
Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type,
|
||||||
};
|
};
|
||||||
use crate::isa::x64::abi::*;
|
use crate::isa::x64::abi::*;
|
||||||
use crate::isa::x64::inst::args::*;
|
use crate::isa::x64::inst::args::*;
|
||||||
@@ -1297,12 +1297,118 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Icmp => {
|
Opcode::Icmp => {
|
||||||
emit_cmp(ctx, insn);
|
|
||||||
|
|
||||||
let condcode = ctx.data(insn).cond_code().unwrap();
|
let condcode = ctx.data(insn).cond_code().unwrap();
|
||||||
let cc = CC::from_intcc(condcode);
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let ty = ctx.input_ty(insn, 0);
|
||||||
|
if !ty.is_vector() {
|
||||||
|
emit_cmp(ctx, insn);
|
||||||
|
let cc = CC::from_intcc(condcode);
|
||||||
ctx.emit(Inst::setcc(cc, dst));
|
ctx.emit(Inst::setcc(cc, dst));
|
||||||
|
} else {
|
||||||
|
assert_eq!(ty.bits(), 128);
|
||||||
|
let eq = |ty| match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pcmpeqb,
|
||||||
|
types::I16X8 => SseOpcode::Pcmpeqw,
|
||||||
|
types::I32X4 => SseOpcode::Pcmpeqd,
|
||||||
|
types::I64X2 => SseOpcode::Pcmpeqq,
|
||||||
|
_ => panic!(
|
||||||
|
"Unable to find an instruction for {} for type: {}",
|
||||||
|
condcode, ty
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let gt = |ty| match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pcmpgtb,
|
||||||
|
types::I16X8 => SseOpcode::Pcmpgtw,
|
||||||
|
types::I32X4 => SseOpcode::Pcmpgtd,
|
||||||
|
types::I64X2 => SseOpcode::Pcmpgtq,
|
||||||
|
_ => panic!(
|
||||||
|
"Unable to find an instruction for {} for type: {}",
|
||||||
|
condcode, ty
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let maxu = |ty| match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pmaxub,
|
||||||
|
types::I16X8 => SseOpcode::Pmaxuw,
|
||||||
|
types::I32X4 => SseOpcode::Pmaxud,
|
||||||
|
_ => panic!(
|
||||||
|
"Unable to find an instruction for {} for type: {}",
|
||||||
|
condcode, ty
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let mins = |ty| match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pminsb,
|
||||||
|
types::I16X8 => SseOpcode::Pminsw,
|
||||||
|
types::I32X4 => SseOpcode::Pminsd,
|
||||||
|
_ => panic!(
|
||||||
|
"Unable to find an instruction for {} for type: {}",
|
||||||
|
condcode, ty
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let minu = |ty| match ty {
|
||||||
|
types::I8X16 => SseOpcode::Pminub,
|
||||||
|
types::I16X8 => SseOpcode::Pminuw,
|
||||||
|
types::I32X4 => SseOpcode::Pminud,
|
||||||
|
_ => panic!(
|
||||||
|
"Unable to find an instruction for {} for type: {}",
|
||||||
|
condcode, ty
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Here we decide which operand to use as the read/write `dst` (ModRM reg field)
|
||||||
|
// and which to use as the read `input` (ModRM r/m field). In the normal case we
|
||||||
|
// use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
|
||||||
|
// the less-than cases so that we can reuse the greater-than implementation.
|
||||||
|
let input = match condcode {
|
||||||
|
IntCC::SignedLessThan
|
||||||
|
| IntCC::SignedLessThanOrEqual
|
||||||
|
| IntCC::UnsignedLessThan
|
||||||
|
| IntCC::UnsignedLessThanOrEqual => {
|
||||||
|
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||||
|
lhs
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||||
|
rhs
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match condcode {
|
||||||
|
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
|
||||||
|
IntCC::NotEqual => {
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
|
||||||
|
// Emit all 1s into the `tmp` register.
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, ty);
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||||
|
// Invert the result of the `PCMPEQ*`.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||||
|
}
|
||||||
|
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
|
||||||
|
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
|
||||||
|
}
|
||||||
|
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
|
||||||
|
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
|
||||||
|
}
|
||||||
|
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
|
||||||
|
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
|
||||||
|
// Emit all 1s into the `tmp` register.
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, ty);
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||||
|
// Invert the result of the `PCMPEQ*`.
|
||||||
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||||
|
}
|
||||||
|
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
|
||||||
|
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
|
||||||
|
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
|
||||||
|
}
|
||||||
|
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Fcmp => {
|
Opcode::Fcmp => {
|
||||||
|
|||||||
Reference in New Issue
Block a user