x64: port icmp to ISLE (#3886)

* x64: port GPR-held `icmp` to ISLE
* x64: port equality `icmp` for i128 type
* x64: port `icmp` for vector types
* x64: rename from_intcc to intcc_to_cc
This commit is contained in:
Andrew Brown
2022-03-18 11:22:09 -07:00
committed by GitHub
parent 8cfb552090
commit e92cbfb283
8 changed files with 1145 additions and 549 deletions

View File

@@ -784,6 +784,7 @@
(decl put_masked_in_imm8_gpr (Value Type) Imm8Gpr)
(extern constructor put_masked_in_imm8_gpr put_masked_in_imm8_gpr)
;; Condition codes
(type CC extern
(enum O
NO
@@ -801,6 +802,8 @@
NLE
P
NP))
(decl intcc_to_cc (IntCC) CC)
(extern constructor intcc_to_cc intcc_to_cc)
(type Avx512Opcode extern
(enum Vcvtudq2ps
@@ -1362,6 +1365,16 @@
src1
src2))
(decl x64_and_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
(rule (x64_and_with_flags_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr)))
(ProducesFlags.ProducesFlagsSideEffect
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
(AluRmiROpcode.And)
src1
src2
dst))))
;; Helper for emitting `or` instructions.
(decl or (Type Gpr GprMemImm) Gpr)
(rule (or ty src1 src2)
@@ -1992,65 +2005,57 @@
(rule (movlhps src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Movlhps) src1 src2))
;; Helper for creating `pmaxsb` instructions.
;; Helpers for creating `pmaxs*` instructions.
(decl pmaxs (Type Xmm XmmMem) Xmm)
(rule (pmaxs $I8X16 x y) (pmaxsb x y))
(rule (pmaxs $I16X8 x y) (pmaxsw x y))
(rule (pmaxs $I32X4 x y) (pmaxsd x y))
;; No $I64X2 version (PMAXSQ) in SSE4.1.
(decl pmaxsb (Xmm XmmMem) Xmm)
(rule (pmaxsb src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsb) src1 src2))
;; Helper for creating `pmaxsw` instructions.
(rule (pmaxsb src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsb) src1 src2))
(decl pmaxsw (Xmm XmmMem) Xmm)
(rule (pmaxsw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsw) src1 src2))
;; Helper for creating `pmaxsd` instructions.
(rule (pmaxsw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsw) src1 src2))
(decl pmaxsd (Xmm XmmMem) Xmm)
(rule (pmaxsd src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsd) src1 src2))
(rule (pmaxsd src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsd) src1 src2))
;; Helper for creating `pminsb` instructions.
;; Helpers for creating `pmins*` instructions.
(decl pmins (Type Xmm XmmMem) Xmm)
(rule (pmins $I8X16 x y) (pminsb x y))
(rule (pmins $I16X8 x y) (pminsw x y))
(rule (pmins $I32X4 x y) (pminsd x y))
;; No $I64X2 version (PMINSQ) in SSE4.1.
(decl pminsb (Xmm XmmMem) Xmm)
(rule (pminsb src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminsb) src1 src2))
;; Helper for creating `pminsw` instructions.
(rule (pminsb src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminsb) src1 src2))
(decl pminsw (Xmm XmmMem) Xmm)
(rule (pminsw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminsw) src1 src2))
;; Helper for creating `pminsd` instructions.
(rule (pminsw src1 src2) (xmm_rm_r $I16X8 (SseOpcode.Pminsw) src1 src2))
(decl pminsd (Xmm XmmMem) Xmm)
(rule (pminsd src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminsd) src1 src2))
(rule (pminsd src1 src2) (xmm_rm_r $I32X4 (SseOpcode.Pminsd) src1 src2))
;; Helper for creating `pmaxub` instructions.
;; Helpers for creating `pmaxu*` instructions.
(decl pmaxu (Type Xmm XmmMem) Xmm)
(rule (pmaxu $I8X16 x y) (pmaxub x y))
(rule (pmaxu $I16X8 x y) (pmaxuw x y))
(rule (pmaxu $I32X4 x y) (pmaxud x y))
;; No $I64X2 version (PMAXUQ) in SSE4.1.
(decl pmaxub (Xmm XmmMem) Xmm)
(rule (pmaxub src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxub) src1 src2))
;; Helper for creating `pmaxuw` instructions.
(rule (pmaxub src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxub) src1 src2))
(decl pmaxuw (Xmm XmmMem) Xmm)
(rule (pmaxuw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxuw) src1 src2))
;; Helper for creating `pmaxud` instructions.
(rule (pmaxuw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxuw) src1 src2))
(decl pmaxud (Xmm XmmMem) Xmm)
(rule (pmaxud src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pmaxud) src1 src2))
(rule (pmaxud src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxud) src1 src2))
;; Helper for creating `pminub` instructions.
;; Helper for creating `pminu*` instructions.
(decl pminu (Type Xmm XmmMem) Xmm)
(rule (pminu $I8X16 x y) (pminub x y))
(rule (pminu $I16X8 x y) (pminuw x y))
(rule (pminu $I32X4 x y) (pminud x y))
;; No $I64X2 version (PMINUQ) in SSE4.1.
(decl pminub (Xmm XmmMem) Xmm)
(rule (pminub src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminub) src1 src2))
;; Helper for creating `pminuw` instructions.
(rule (pminub src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminub) src1 src2))
(decl pminuw (Xmm XmmMem) Xmm)
(rule (pminuw src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminuw) src1 src2))
;; Helper for creating `pminud` instructions.
(rule (pminuw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminuw) src1 src2))
(decl pminud (Xmm XmmMem) Xmm)
(rule (pminud src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2))
(rule (pminud src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2))
;; Helper for creating `punpcklbw` instructions.
(decl punpcklbw (Xmm XmmMem) Xmm)
@@ -2498,6 +2503,38 @@
(_ Unit (emit (MInst.XmmRmR (SseOpcode.Maxpd) x y dst))))
dst))
;; Helpers for creating `pcmpeq*` instructions.
(decl pcmpeq (Type Xmm XmmMem) Xmm)
(rule (pcmpeq $I8X16 x y) (pcmpeqb x y))
(rule (pcmpeq $I16X8 x y) (pcmpeqw x y))
(rule (pcmpeq $I32X4 x y) (pcmpeqd x y))
(rule (pcmpeq $I64X2 x y) (pcmpeqq x y))
(decl pcmpeqb (Xmm XmmMem) Xmm)
(rule (pcmpeqb x y) (xmm_rm_r $I8X16 (SseOpcode.Pcmpeqb) x y))
(decl pcmpeqw (Xmm XmmMem) Xmm)
(rule (pcmpeqw x y) (xmm_rm_r $I16X8 (SseOpcode.Pcmpeqw) x y))
(decl pcmpeqd (Xmm XmmMem) Xmm)
(rule (pcmpeqd x y) (xmm_rm_r $I32X4 (SseOpcode.Pcmpeqd) x y))
(decl pcmpeqq (Xmm XmmMem) Xmm)
(rule (pcmpeqq x y) (xmm_rm_r $I64X2 (SseOpcode.Pcmpeqq) x y))
;; Helpers for creating `pcmpgt*` instructions.
(decl pcmpgt (Type Xmm XmmMem) Xmm)
(rule (pcmpgt $I8X16 x y) (pcmpgtb x y))
(rule (pcmpgt $I16X8 x y) (pcmpgtw x y))
(rule (pcmpgt $I32X4 x y) (pcmpgtd x y))
(rule (pcmpgt $I64X2 x y) (pcmpgtq x y))
(decl pcmpgtb (Xmm XmmMem) Xmm)
(rule (pcmpgtb x y) (xmm_rm_r $I8X16 (SseOpcode.Pcmpgtb) x y))
(decl pcmpgtw (Xmm XmmMem) Xmm)
(rule (pcmpgtw x y) (xmm_rm_r $I16X8 (SseOpcode.Pcmpgtw) x y))
(decl pcmpgtd (Xmm XmmMem) Xmm)
(rule (pcmpgtd x y) (xmm_rm_r $I32X4 (SseOpcode.Pcmpgtd) x y))
(decl pcmpgtq (Xmm XmmMem) Xmm)
(rule (pcmpgtq x y) (xmm_rm_r $I64X2 (SseOpcode.Pcmpgtq) x y))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Gpr InstOutput output_gpr)
@@ -2547,6 +2584,8 @@
(convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem)
(convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem)
(convert IntCC CC intcc_to_cc)
(decl reg_to_xmm_mem (Reg) XmmMem)
(rule (reg_to_xmm_mem r)
(xmm_to_xmm_mem (xmm_new r)))

View File

@@ -1440,6 +1440,107 @@
(rule (lower (resumable_trap code))
(safepoint (ud2 code)))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on
;; Cranelift's verification that `a` and `b` are of the same type.
;; Unfortunately for clarity, the registers are flipped here (TODO).
(rule (lower (icmp cc a @ (value_type (fits_in_64 ty)) b))
(let ((size OperandSize (raw_operand_size_of_type ty)))
(with_flags (cmp size b a) (setcc cc))))
;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
;; one. To note: what is different here about the output values is that each
;; lane will be filled with all 1s or all 0s according to the comparison,
;; whereas for GPR-held values, the result will be simply 0 or 1 (upper bits
;; unset).
(rule (lower (icmp (IntCC.Equal) a @ (value_type (vec128 ty)) b))
(pcmpeq ty a b))
;; To lower a not-equals comparison, we perform an equality comparison
;; (PCMPEQ*) and then invert the bits (PXOR with all 1s).
(rule (lower (icmp (IntCC.NotEqual) a @ (value_type (vec128 ty)) b))
(let ((checked Xmm (pcmpeq ty a b))
(all_ones Xmm (vector_all_ones ty)))
(pxor checked all_ones)))
;; Signed comparisons have a single-instruction lowering, unlike their unsigned
;; counterparts. These latter instructions use the unsigned min/max
;; (PMINU*/PMAXU*) and negate the result (PXOR with all 1s).
(rule (lower (icmp (IntCC.SignedGreaterThan) a @ (value_type (vec128 ty)) b))
(pcmpgt ty a b))
(rule (lower (icmp (IntCC.SignedLessThan) a @ (value_type (vec128 ty)) b))
(pcmpgt ty b a))
(rule (lower (icmp (IntCC.UnsignedGreaterThan) a @ (value_type (vec128 ty)) b))
(let ((max Xmm (pmaxu ty a b))
(eq Xmm (pcmpeq ty max b))
(all_ones Xmm (vector_all_ones ty)))
(pxor eq all_ones)))
(rule (lower (icmp (IntCC.UnsignedLessThan) a @ (value_type (vec128 ty)) b))
(let ((min Xmm (pminu ty a b))
(eq Xmm (pcmpeq ty min b))
(all_ones Xmm (vector_all_ones ty)))
(pxor eq all_ones)))
;; To lower signed and unsigned *-or-equals comparisons, we find the minimum
;; number (PMIN[U|S]*) and compare that to one of the terms (PCMPEQ*). Note that
;; there is no 64x2 version of this lowering (see below).
(rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type (vec128 ty)) b))
(let ((max Xmm (pmaxs ty a b)))
(pcmpeq ty a max)))
(rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type (vec128 ty)) b))
(let ((min Xmm (pmins ty a b)))
(pcmpeq ty a min)))
(rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type (vec128 ty)) b))
(let ((max Xmm (pmaxu ty a b)))
(pcmpeq ty a max)))
(rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type (vec128 ty)) b))
(let ((min Xmm (pminu ty a b)))
(pcmpeq ty a min)))
;; The PMIN[S|U]Q instruction is only available in AVX512VL/F so we must instead
;; compare with flipped operands (PCMPGT*) and negate the result (PXOR with all
;; 1s), emitting one more instruction than the smaller-lane versions.
(rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type $I64X2) b))
(let ((checked Xmm (pcmpgt $I64X2 b a))
(all_ones Xmm (vector_all_ones $I64X2)))
(pxor checked all_ones)))
(rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type $I64X2) b))
(let ((checked Xmm (pcmpgt $I64X2 a b))
(all_ones Xmm (vector_all_ones $I64X2)))
(pxor checked all_ones)))
;; TODO: not used by WebAssembly translation
;; (rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type $I64X2) b))
;; TODO: not used by WebAssembly translation
;; (rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type $I64X2) b))
;; For I128 values (held in two GPRs), the instruction sequences depend on what
;; kind of condition is tested.
(rule (lower (icmp (IntCC.Equal) a @ (value_type $I128) b))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (cmp (OperandSize.Size64) b_lo a_lo) (setcc (CC.Z))))
(cmp_hi Reg (with_flags_reg (cmp (OperandSize.Size64) b_hi a_hi) (setcc (CC.Z))))
;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the
;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be
;; unchanged so we must compare against 1; this instruction combines
;; `cmp_lo` and `cmp_hi` for that final comparison.
(cmp Reg (x64_and $I64 cmp_lo cmp_hi)))
;; We can use the flag-setting behavior of `AND` to set the final
;; bits. If the result of `AND` is zero, then the `ZF` will be set;
;; if either of the halves `AND`s to 0, they were not equal,
;; therefore we `SETcc` with `NZ`.
(with_flags (x64_and_with_flags_paired $I64 cmp (RegMemImm.Imm 1)) (setcc (CC.NZ)))))
(rule (lower (icmp (IntCC.NotEqual) a @ (value_type $I128) b))
(let ((a_lo Gpr (value_regs_get_gpr a 0))
(a_hi Gpr (value_regs_get_gpr a 1))
(b_lo Gpr (value_regs_get_gpr b 0))
(b_hi Gpr (value_regs_get_gpr b 1))
(cmp_lo Reg (with_flags_reg (cmp (OperandSize.Size64) b_lo a_lo) (setcc (CC.NZ))))
(cmp_hi Reg (with_flags_reg (cmp (OperandSize.Size64) b_hi a_hi) (setcc (CC.NZ))))
;; See comments for `IntCC.Equal`.
(cmp Reg (or $I64 cmp_lo cmp_hi)))
(with_flags (x64_and_with_flags_paired $I64 cmp (RegMemImm.Imm 1)) (setcc (CC.NZ)))))
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; CLIF `select` instructions receive a testable argument (i.e. boolean or
@@ -1715,8 +1816,8 @@
(mul Gpr (mul $I32 masked4 (RegMemImm.Imm 0x01010101)))
(final Gpr (shr $I32 mul (Imm8Reg.Imm8 24))))
final))
(rule 1 (lower (has_type (and
$I8X16
(avx512vl_enabled)
@@ -1725,7 +1826,7 @@
(vpopcntb src))
;; For SSE 4.2 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf):
;;
;; __m128i count_bytes ( __m128i v) {
@@ -1807,7 +1908,7 @@
(shl ty lo4 (Imm8Reg.Imm8 4))
hi4)))
swap4))
(decl do_bitrev16 (Type Gpr) Gpr)
(rule (do_bitrev16 ty src)
(let ((src_ Gpr (do_bitrev8 ty src))
@@ -1819,7 +1920,7 @@
(shl ty lo8 (Imm8Reg.Imm8 8))
hi8)))
swap8))
(decl do_bitrev32 (Type Gpr) Gpr)
(rule (do_bitrev32 ty src)
(let ((src_ Gpr (do_bitrev16 ty src))

View File

@@ -920,145 +920,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let condcode = ctx.data(insn).cond_code().unwrap();
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ctx.input_ty(insn, 0);
if !ty.is_vector() {
if ty == types::I128 && condcode != IntCC::Equal && condcode != IntCC::NotEqual {
let condcode = emit_cmp(ctx, insn, condcode);
let cc = CC::from_intcc(condcode);
ctx.emit(Inst::setcc(cc, dst));
} else {
assert_eq!(ty.bits(), 128);
let eq = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpeqb,
types::I16X8 => SseOpcode::Pcmpeqw,
types::I32X4 => SseOpcode::Pcmpeqd,
types::I64X2 => SseOpcode::Pcmpeqq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let gt = |ty| match ty {
types::I8X16 => SseOpcode::Pcmpgtb,
types::I16X8 => SseOpcode::Pcmpgtw,
types::I32X4 => SseOpcode::Pcmpgtd,
types::I64X2 => SseOpcode::Pcmpgtq,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let maxu = |ty| match ty {
types::I8X16 => SseOpcode::Pmaxub,
types::I16X8 => SseOpcode::Pmaxuw,
types::I32X4 => SseOpcode::Pmaxud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let mins = |ty| match ty {
types::I8X16 => SseOpcode::Pminsb,
types::I16X8 => SseOpcode::Pminsw,
types::I32X4 => SseOpcode::Pminsd,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
let minu = |ty| match ty {
types::I8X16 => SseOpcode::Pminub,
types::I16X8 => SseOpcode::Pminuw,
types::I32X4 => SseOpcode::Pminud,
_ => panic!(
"Unable to find an instruction for {} for type: {}",
condcode, ty
),
};
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
// which to use as the read `input` (ModRM r/m field). In the normal case we use
// Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
// less-than cases so that we can reuse the greater-than implementation.
//
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
// from the normal order because of the special-case lowering for these instructions
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
let input = match condcode {
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::UnsignedLessThanOrEqual => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
_ => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
};
match condcode {
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
IntCC::NotEqual => {
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty != types::I64X2 =>
{
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty == types::I64X2 =>
{
// The PMINS* instruction is only available in AVX512VL/F so we must instead
// compare with flipped operands and negate the result (emitting one more
// instruction).
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPGT*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
}
implemented_in_isle(ctx);
}
}

View File

@@ -9,8 +9,10 @@ use regalloc::Writable;
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
use crate::{
ir::{
condcodes::FloatCC, immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode,
Value, ValueLabel, ValueList,
condcodes::{FloatCC, IntCC},
immediates::*,
types::*,
Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel, ValueList,
},
isa::{
settings::Flags,
@@ -512,6 +514,11 @@ where
None
}
}
#[inline]
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
CC::from_intcc(*intcc)
}
}
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle b2bc986bcbbbb77
src/isa/x64/inst.isle cdd292107fb36cf
src/isa/x64/lower.isle c049f7d36db0e0fb
src/isa/x64/inst.isle 5644ccb29bff0b51
src/isa/x64/lower.isle 51d6ce13a3e47bc4

File diff suppressed because it is too large Load Diff

View File

@@ -241,20 +241,20 @@ block0(v0: i128, v1: i128):
; Inst 5: movq %r14, 32(%rsp)
; Inst 6: movq %rbx, 40(%rsp)
; Inst 7: movq %r15, 48(%rsp)
; Inst 8: cmpq %rcx, %rsi
; Inst 8: cmpq %rdx, %rdi
; Inst 9: setz %al
; Inst 10: cmpq %rdx, %rdi
; Inst 10: cmpq %rcx, %rsi
; Inst 11: setz %r8b
; Inst 12: andq %rax, %r8
; Inst 13: andq $1, %r8
; Inst 12: andq %r8, %rax
; Inst 13: andq $1, %rax
; Inst 14: setnz %al
; Inst 15: movq %rax, rsp(0 + virtual offset)
; Inst 16: cmpq %rcx, %rsi
; Inst 16: cmpq %rdx, %rdi
; Inst 17: setnz %al
; Inst 18: cmpq %rdx, %rdi
; Inst 18: cmpq %rcx, %rsi
; Inst 19: setnz %r8b
; Inst 20: orq %rax, %r8
; Inst 21: andq $1, %r8
; Inst 20: orq %r8, %rax
; Inst 21: andq $1, %rax
; Inst 22: setnz %r8b
; Inst 23: cmpq %rcx, %rsi
; Inst 24: setl %r9b

View File

@@ -55,14 +55,16 @@ block0(v0: i16x8, v1: i16x8):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 7)
; (instruction range: 0 .. 9)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: pminsw %xmm1, %xmm0
; Inst 3: pcmpeqw %xmm1, %xmm0
; Inst 4: movq %rbp, %rsp
; Inst 5: popq %rbp
; Inst 6: ret
; Inst 2: movdqa %xmm1, %xmm2
; Inst 3: movdqa %xmm0, %xmm1
; Inst 4: pmaxsw %xmm2, %xmm1
; Inst 5: pcmpeqw %xmm1, %xmm0
; Inst 6: movq %rbp, %rsp
; Inst 7: popq %rbp
; Inst 8: ret
; }}
function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
@@ -75,13 +77,15 @@ block0(v0: i8x16, v1: i8x16):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 7)
; (instruction range: 0 .. 9)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: pminub %xmm1, %xmm0
; Inst 3: pcmpeqb %xmm1, %xmm0
; Inst 4: movq %rbp, %rsp
; Inst 5: popq %rbp
; Inst 6: ret
; Inst 2: movdqa %xmm1, %xmm2
; Inst 3: movdqa %xmm0, %xmm1
; Inst 4: pmaxub %xmm2, %xmm1
; Inst 5: pcmpeqb %xmm1, %xmm0
; Inst 6: movq %rbp, %rsp
; Inst 7: popq %rbp
; Inst 8: ret
; }}