[x64] Add the new i64x2 comparisons
This commit is contained in:
@@ -3185,11 +3185,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
),
|
||||
};
|
||||
|
||||
// Here we decide which operand to use as the read/write `dst` (ModRM reg field)
|
||||
// and which to use as the read `input` (ModRM r/m field). In the normal case we
|
||||
// use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
|
||||
// the less-than cases so that we can reuse the greater-than implementation.
|
||||
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
|
||||
// which to use as the read `input` (ModRM r/m field). In the normal case we use
|
||||
// Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
|
||||
// less-than cases so that we can reuse the greater-than implementation.
|
||||
//
|
||||
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
|
||||
// from the normal order because of the special-case lowering for these instructions
|
||||
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
|
||||
let input = match condcode {
|
||||
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
rhs
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
|
||||
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
lhs
|
||||
}
|
||||
IntCC::SignedLessThan
|
||||
| IntCC::SignedLessThanOrEqual
|
||||
| IntCC::UnsignedLessThan
|
||||
@@ -3220,10 +3236,25 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
|
||||
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
|
||||
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
|
||||
if ty != types::I64X2 =>
|
||||
{
|
||||
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
|
||||
if ty == types::I64X2 =>
|
||||
{
|
||||
// The PMINS* instruction is only available in AVX512VL/F so we must instead
|
||||
// compare with flipped operands and negate the result (emitting one more
|
||||
// instruction).
|
||||
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
|
||||
// Emit all 1s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||
// Invert the result of the `PCMPGT*`.
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||
}
|
||||
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
|
||||
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
|
||||
|
||||
Reference in New Issue
Block a user