x64: port icmp to ISLE (#3886)

* x64: port GPR-held `icmp` to ISLE
* x64: port equality `icmp` for i128 type
* x64: port `icmp` for vector types
* x64: rename from_intcc to intcc_to_cc
This commit is contained in:
Andrew Brown
2022-03-18 11:22:09 -07:00
committed by GitHub
parent 8cfb552090
commit e92cbfb283
8 changed files with 1145 additions and 549 deletions

View File

@@ -920,145 +920,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let condcode = ctx.data(insn).cond_code().unwrap();
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ctx.input_ty(insn, 0);
if !ty.is_vector() {
if ty == types::I128 && condcode != IntCC::Equal && condcode != IntCC::NotEqual {
let condcode = emit_cmp(ctx, insn, condcode);
let cc = CC::from_intcc(condcode);
ctx.emit(Inst::setcc(cc, dst));
} else {
assert_eq!(ty.bits(), 128);
let eq = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpeqb,
types::I16X8 => SseOpcode::Pcmpeqw,
types::I32X4 => SseOpcode::Pcmpeqd,
types::I64X2 => SseOpcode::Pcmpeqq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let gt = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpgtb,
types::I16X8 => SseOpcode::Pcmpgtw,
types::I32X4 => SseOpcode::Pcmpgtd,
types::I64X2 => SseOpcode::Pcmpgtq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let maxu = |ty| match ty {
types::I8X16 => SseOpcode::Pmaxub,
types::I16X8 => SseOpcode::Pmaxuw,
types::I32X4 => SseOpcode::Pmaxud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let mins = |ty| match ty {
types::I8X16 => SseOpcode::Pminsb,
types::I16X8 => SseOpcode::Pminsw,
types::I32X4 => SseOpcode::Pminsd,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let minu = |ty| match ty {
types::I8X16 => SseOpcode::Pminub,
types::I16X8 => SseOpcode::Pminuw,
types::I32X4 => SseOpcode::Pminud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
// which to use as the read `input` (ModRM r/m field). In the normal case we use
// Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
// less-than cases so that we can reuse the greater-than implementation.
//
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
// from the normal order because of the special-case lowering for these instructions
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
let input = match condcode {
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::UnsignedLessThanOrEqual => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
_ => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
};
match condcode {
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
IntCC::NotEqual => {
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty != types::I64X2 =>
{
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty == types::I64X2 =>
{
// The PMINS* instruction is only available in AVX512VL/F so we must instead
// compare with flipped operands and negate the result (emitting one more
// instruction).
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPGT*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
}
implemented_in_isle(ctx);
}
}