x64: port icmp to ISLE (#3886)
* x64: port GPR-held `icmp` to ISLE * x64: port equality `icmp` for i128 type * x64: port `icmp` for vector types * x64: rename from_intcc to intcc_to_cc
This commit is contained in:
@@ -784,6 +784,7 @@
|
||||
(decl put_masked_in_imm8_gpr (Value Type) Imm8Gpr)
|
||||
(extern constructor put_masked_in_imm8_gpr put_masked_in_imm8_gpr)
|
||||
|
||||
;; Condition codes
|
||||
(type CC extern
|
||||
(enum O
|
||||
NO
|
||||
@@ -801,6 +802,8 @@
|
||||
NLE
|
||||
P
|
||||
NP))
|
||||
(decl intcc_to_cc (IntCC) CC)
|
||||
(extern constructor intcc_to_cc intcc_to_cc)
|
||||
|
||||
(type Avx512Opcode extern
|
||||
(enum Vcvtudq2ps
|
||||
@@ -1362,6 +1365,16 @@
|
||||
src1
|
||||
src2))
|
||||
|
||||
(decl x64_and_with_flags_paired (Type Gpr GprMemImm) ProducesFlags)
|
||||
(rule (x64_and_with_flags_paired ty src1 src2)
|
||||
(let ((dst WritableGpr (temp_writable_gpr)))
|
||||
(ProducesFlags.ProducesFlagsSideEffect
|
||||
(MInst.AluRmiR (operand_size_of_type_32_64 ty)
|
||||
(AluRmiROpcode.And)
|
||||
src1
|
||||
src2
|
||||
dst))))
|
||||
|
||||
;; Helper for emitting `or` instructions.
|
||||
(decl or (Type Gpr GprMemImm) Gpr)
|
||||
(rule (or ty src1 src2)
|
||||
@@ -1992,65 +2005,57 @@
|
||||
(rule (movlhps src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Movlhps) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxsb` instructions.
|
||||
;; Helpers for creating `pmaxs*` instructions.
|
||||
(decl pmaxs (Type Xmm XmmMem) Xmm)
|
||||
(rule (pmaxs $I8X16 x y) (pmaxsb x y))
|
||||
(rule (pmaxs $I16X8 x y) (pmaxsw x y))
|
||||
(rule (pmaxs $I32X4 x y) (pmaxsd x y))
|
||||
;; No $I64X2 version (PMAXSQ) in SSE4.1.
|
||||
(decl pmaxsb (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsb) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxsw` instructions.
|
||||
(rule (pmaxsb src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsb) src1 src2))
|
||||
(decl pmaxsw (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxsw src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxsd` instructions.
|
||||
(rule (pmaxsw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsw) src1 src2))
|
||||
(decl pmaxsd (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxsd src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxsd) src1 src2))
|
||||
(rule (pmaxsd src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxsd) src1 src2))
|
||||
|
||||
;; Helper for creating `pminsb` instructions.
|
||||
;; Helpers for creating `pmins*` instructions.
|
||||
(decl pmins (Type Xmm XmmMem) Xmm)
|
||||
(rule (pmins $I8X16 x y) (pminsb x y))
|
||||
(rule (pmins $I16X8 x y) (pminsw x y))
|
||||
(rule (pmins $I32X4 x y) (pminsd x y))
|
||||
;; No $I64X2 version (PMINSQ) in SSE4.1.
|
||||
(decl pminsb (Xmm XmmMem) Xmm)
|
||||
(rule (pminsb src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminsb) src1 src2))
|
||||
|
||||
;; Helper for creating `pminsw` instructions.
|
||||
(rule (pminsb src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminsb) src1 src2))
|
||||
(decl pminsw (Xmm XmmMem) Xmm)
|
||||
(rule (pminsw src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminsw) src1 src2))
|
||||
|
||||
;; Helper for creating `pminsd` instructions.
|
||||
(rule (pminsw src1 src2) (xmm_rm_r $I16X8 (SseOpcode.Pminsw) src1 src2))
|
||||
(decl pminsd (Xmm XmmMem) Xmm)
|
||||
(rule (pminsd src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminsd) src1 src2))
|
||||
(rule (pminsd src1 src2) (xmm_rm_r $I32X4 (SseOpcode.Pminsd) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxub` instructions.
|
||||
;; Helpers for creating `pmaxu*` instructions.
|
||||
(decl pmaxu (Type Xmm XmmMem) Xmm)
|
||||
(rule (pmaxu $I8X16 x y) (pmaxub x y))
|
||||
(rule (pmaxu $I16X8 x y) (pmaxuw x y))
|
||||
(rule (pmaxu $I32X4 x y) (pmaxud x y))
|
||||
;; No $I64X2 version (PMAXUQ) in SSE4.1.
|
||||
(decl pmaxub (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxub src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxub) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxuw` instructions.
|
||||
(rule (pmaxub src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxub) src1 src2))
|
||||
(decl pmaxuw (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxuw src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxuw) src1 src2))
|
||||
|
||||
;; Helper for creating `pmaxud` instructions.
|
||||
(rule (pmaxuw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxuw) src1 src2))
|
||||
(decl pmaxud (Xmm XmmMem) Xmm)
|
||||
(rule (pmaxud src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaxud) src1 src2))
|
||||
(rule (pmaxud src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pmaxud) src1 src2))
|
||||
|
||||
;; Helper for creating `pminub` instructions.
|
||||
;; Helper for creating `pminu*` instructions.
|
||||
(decl pminu (Type Xmm XmmMem) Xmm)
|
||||
(rule (pminu $I8X16 x y) (pminub x y))
|
||||
(rule (pminu $I16X8 x y) (pminuw x y))
|
||||
(rule (pminu $I32X4 x y) (pminud x y))
|
||||
;; No $I64X2 version (PMINUQ) in SSE4.1.
|
||||
(decl pminub (Xmm XmmMem) Xmm)
|
||||
(rule (pminub src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminub) src1 src2))
|
||||
|
||||
;; Helper for creating `pminuw` instructions.
|
||||
(rule (pminub src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminub) src1 src2))
|
||||
(decl pminuw (Xmm XmmMem) Xmm)
|
||||
(rule (pminuw src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminuw) src1 src2))
|
||||
|
||||
;; Helper for creating `pminud` instructions.
|
||||
(rule (pminuw src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminuw) src1 src2))
|
||||
(decl pminud (Xmm XmmMem) Xmm)
|
||||
(rule (pminud src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2))
|
||||
(rule (pminud src1 src2) (xmm_rm_r $I8X16 (SseOpcode.Pminud) src1 src2))
|
||||
|
||||
;; Helper for creating `punpcklbw` instructions.
|
||||
(decl punpcklbw (Xmm XmmMem) Xmm)
|
||||
@@ -2498,6 +2503,38 @@
|
||||
(_ Unit (emit (MInst.XmmRmR (SseOpcode.Maxpd) x y dst))))
|
||||
dst))
|
||||
|
||||
;; Helpers for creating `pcmpeq*` instructions.
|
||||
(decl pcmpeq (Type Xmm XmmMem) Xmm)
|
||||
(rule (pcmpeq $I8X16 x y) (pcmpeqb x y))
|
||||
(rule (pcmpeq $I16X8 x y) (pcmpeqw x y))
|
||||
(rule (pcmpeq $I32X4 x y) (pcmpeqd x y))
|
||||
(rule (pcmpeq $I64X2 x y) (pcmpeqq x y))
|
||||
|
||||
(decl pcmpeqb (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpeqb x y) (xmm_rm_r $I8X16 (SseOpcode.Pcmpeqb) x y))
|
||||
(decl pcmpeqw (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpeqw x y) (xmm_rm_r $I16X8 (SseOpcode.Pcmpeqw) x y))
|
||||
(decl pcmpeqd (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpeqd x y) (xmm_rm_r $I32X4 (SseOpcode.Pcmpeqd) x y))
|
||||
(decl pcmpeqq (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpeqq x y) (xmm_rm_r $I64X2 (SseOpcode.Pcmpeqq) x y))
|
||||
|
||||
;; Helpers for creating `pcmpgt*` instructions.
|
||||
(decl pcmpgt (Type Xmm XmmMem) Xmm)
|
||||
(rule (pcmpgt $I8X16 x y) (pcmpgtb x y))
|
||||
(rule (pcmpgt $I16X8 x y) (pcmpgtw x y))
|
||||
(rule (pcmpgt $I32X4 x y) (pcmpgtd x y))
|
||||
(rule (pcmpgt $I64X2 x y) (pcmpgtq x y))
|
||||
|
||||
(decl pcmpgtb (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpgtb x y) (xmm_rm_r $I8X16 (SseOpcode.Pcmpgtb) x y))
|
||||
(decl pcmpgtw (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpgtw x y) (xmm_rm_r $I16X8 (SseOpcode.Pcmpgtw) x y))
|
||||
(decl pcmpgtd (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpgtd x y) (xmm_rm_r $I32X4 (SseOpcode.Pcmpgtd) x y))
|
||||
(decl pcmpgtq (Xmm XmmMem) Xmm)
|
||||
(rule (pcmpgtq x y) (xmm_rm_r $I64X2 (SseOpcode.Pcmpgtq) x y))
|
||||
|
||||
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(convert Gpr InstOutput output_gpr)
|
||||
@@ -2547,6 +2584,8 @@
|
||||
(convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem)
|
||||
(convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem)
|
||||
|
||||
(convert IntCC CC intcc_to_cc)
|
||||
|
||||
(decl reg_to_xmm_mem (Reg) XmmMem)
|
||||
(rule (reg_to_xmm_mem r)
|
||||
(xmm_to_xmm_mem (xmm_new r)))
|
||||
|
||||
@@ -1440,6 +1440,107 @@
|
||||
(rule (lower (resumable_trap code))
|
||||
(safepoint (ud2 code)))
|
||||
|
||||
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on
|
||||
;; Cranelift's verification that `a` and `b` are of the same type.
|
||||
;; Unfortunately for clarity, the registers are flipped here (TODO).
|
||||
(rule (lower (icmp cc a @ (value_type (fits_in_64 ty)) b))
|
||||
(let ((size OperandSize (raw_operand_size_of_type ty)))
|
||||
(with_flags (cmp size b a) (setcc cc))))
|
||||
|
||||
;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
|
||||
;; one. To note: what is different here about the output values is that each
|
||||
;; lane will be filled with all 1s or all 0s according to the comparison,
|
||||
;; whereas for GPR-held values, the result will be simply 0 or 1 (upper bits
|
||||
;; unset).
|
||||
(rule (lower (icmp (IntCC.Equal) a @ (value_type (vec128 ty)) b))
|
||||
(pcmpeq ty a b))
|
||||
;; To lower a not-equals comparison, we perform an equality comparison
|
||||
;; (PCMPEQ*) and then invert the bits (PXOR with all 1s).
|
||||
(rule (lower (icmp (IntCC.NotEqual) a @ (value_type (vec128 ty)) b))
|
||||
(let ((checked Xmm (pcmpeq ty a b))
|
||||
(all_ones Xmm (vector_all_ones ty)))
|
||||
(pxor checked all_ones)))
|
||||
;; Signed comparisons have a single-instruction lowering, unlike their unsigned
|
||||
;; counterparts. These latter instructions use the unsigned min/max
|
||||
;; (PMINU*/PMAXU*) and negate the result (PXOR with all 1s).
|
||||
(rule (lower (icmp (IntCC.SignedGreaterThan) a @ (value_type (vec128 ty)) b))
|
||||
(pcmpgt ty a b))
|
||||
(rule (lower (icmp (IntCC.SignedLessThan) a @ (value_type (vec128 ty)) b))
|
||||
(pcmpgt ty b a))
|
||||
(rule (lower (icmp (IntCC.UnsignedGreaterThan) a @ (value_type (vec128 ty)) b))
|
||||
(let ((max Xmm (pmaxu ty a b))
|
||||
(eq Xmm (pcmpeq ty max b))
|
||||
(all_ones Xmm (vector_all_ones ty)))
|
||||
(pxor eq all_ones)))
|
||||
(rule (lower (icmp (IntCC.UnsignedLessThan) a @ (value_type (vec128 ty)) b))
|
||||
(let ((min Xmm (pminu ty a b))
|
||||
(eq Xmm (pcmpeq ty min b))
|
||||
(all_ones Xmm (vector_all_ones ty)))
|
||||
(pxor eq all_ones)))
|
||||
;; To lower signed and unsigned *-or-equals comparisons, we find the minimum
|
||||
;; number (PMIN[U|S]*) and compare that to one of the terms (PCMPEQ*). Note that
|
||||
;; there is no 64x2 version of this lowering (see below).
|
||||
(rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type (vec128 ty)) b))
|
||||
(let ((max Xmm (pmaxs ty a b)))
|
||||
(pcmpeq ty a max)))
|
||||
(rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type (vec128 ty)) b))
|
||||
(let ((min Xmm (pmins ty a b)))
|
||||
(pcmpeq ty a min)))
|
||||
(rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type (vec128 ty)) b))
|
||||
(let ((max Xmm (pmaxu ty a b)))
|
||||
(pcmpeq ty a max)))
|
||||
(rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type (vec128 ty)) b))
|
||||
(let ((min Xmm (pminu ty a b)))
|
||||
(pcmpeq ty a min)))
|
||||
;; The PMIN[S|U]Q instruction is only available in AVX512VL/F so we must instead
|
||||
;; compare with flipped operands (PCMPGT*) and negate the result (PXOR with all
|
||||
;; 1s), emitting one more instruction than the smaller-lane versions.
|
||||
(rule (lower (icmp (IntCC.SignedGreaterThanOrEqual) a @ (value_type $I64X2) b))
|
||||
(let ((checked Xmm (pcmpgt $I64X2 b a))
|
||||
(all_ones Xmm (vector_all_ones $I64X2)))
|
||||
(pxor checked all_ones)))
|
||||
(rule (lower (icmp (IntCC.SignedLessThanOrEqual) a @ (value_type $I64X2) b))
|
||||
(let ((checked Xmm (pcmpgt $I64X2 a b))
|
||||
(all_ones Xmm (vector_all_ones $I64X2)))
|
||||
(pxor checked all_ones)))
|
||||
;; TODO: not used by WebAssembly translation
|
||||
;; (rule (lower (icmp (IntCC.UnsignedGreaterThanOrEqual) a @ (value_type $I64X2) b))
|
||||
;; TODO: not used by WebAssembly translation
|
||||
;; (rule (lower (icmp (IntCC.UnsignedLessThanOrEqual) a @ (value_type $I64X2) b))
|
||||
|
||||
;; For I128 values (held in two GPRs), the instruction sequences depend on what
|
||||
;; kind of condition is tested.
|
||||
(rule (lower (icmp (IntCC.Equal) a @ (value_type $I128) b))
|
||||
(let ((a_lo Gpr (value_regs_get_gpr a 0))
|
||||
(a_hi Gpr (value_regs_get_gpr a 1))
|
||||
(b_lo Gpr (value_regs_get_gpr b 0))
|
||||
(b_hi Gpr (value_regs_get_gpr b 1))
|
||||
(cmp_lo Reg (with_flags_reg (cmp (OperandSize.Size64) b_lo a_lo) (setcc (CC.Z))))
|
||||
(cmp_hi Reg (with_flags_reg (cmp (OperandSize.Size64) b_hi a_hi) (setcc (CC.Z))))
|
||||
;; At this point, `cmp_lo` and `cmp_hi` contain either 0 or 1 in the
|
||||
;; lowest 8 bits--`SETcc` guarantees this. The upper bits may be
|
||||
;; unchanged so we must compare against 1; this instruction combines
|
||||
;; `cmp_lo` and `cmp_hi` for that final comparison.
|
||||
(cmp Reg (x64_and $I64 cmp_lo cmp_hi)))
|
||||
;; We can use the flag-setting behavior of `AND` to set the final
|
||||
;; bits. If the result of `AND` is zero, then the `ZF` will be set;
|
||||
;; if either of the halves `AND`s to 0, they were not equal,
|
||||
;; therefore we `SETcc` with `NZ`.
|
||||
(with_flags (x64_and_with_flags_paired $I64 cmp (RegMemImm.Imm 1)) (setcc (CC.NZ)))))
|
||||
|
||||
(rule (lower (icmp (IntCC.NotEqual) a @ (value_type $I128) b))
|
||||
(let ((a_lo Gpr (value_regs_get_gpr a 0))
|
||||
(a_hi Gpr (value_regs_get_gpr a 1))
|
||||
(b_lo Gpr (value_regs_get_gpr b 0))
|
||||
(b_hi Gpr (value_regs_get_gpr b 1))
|
||||
(cmp_lo Reg (with_flags_reg (cmp (OperandSize.Size64) b_lo a_lo) (setcc (CC.NZ))))
|
||||
(cmp_hi Reg (with_flags_reg (cmp (OperandSize.Size64) b_hi a_hi) (setcc (CC.NZ))))
|
||||
;; See comments for `IntCC.Equal`.
|
||||
(cmp Reg (or $I64 cmp_lo cmp_hi)))
|
||||
(with_flags (x64_and_with_flags_paired $I64 cmp (RegMemImm.Imm 1)) (setcc (CC.NZ)))))
|
||||
|
||||
;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; CLIF `select` instructions receive a testable argument (i.e. boolean or
|
||||
@@ -1715,8 +1816,8 @@
|
||||
(mul Gpr (mul $I32 masked4 (RegMemImm.Imm 0x01010101)))
|
||||
(final Gpr (shr $I32 mul (Imm8Reg.Imm8 24))))
|
||||
final))
|
||||
|
||||
|
||||
|
||||
|
||||
(rule 1 (lower (has_type (and
|
||||
$I8X16
|
||||
(avx512vl_enabled)
|
||||
@@ -1725,7 +1826,7 @@
|
||||
(vpopcntb src))
|
||||
|
||||
|
||||
|
||||
|
||||
;; For SSE 4.2 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf):
|
||||
;;
|
||||
;; __m128i count_bytes ( __m128i v) {
|
||||
@@ -1807,7 +1908,7 @@
|
||||
(shl ty lo4 (Imm8Reg.Imm8 4))
|
||||
hi4)))
|
||||
swap4))
|
||||
|
||||
|
||||
(decl do_bitrev16 (Type Gpr) Gpr)
|
||||
(rule (do_bitrev16 ty src)
|
||||
(let ((src_ Gpr (do_bitrev8 ty src))
|
||||
@@ -1819,7 +1920,7 @@
|
||||
(shl ty lo8 (Imm8Reg.Imm8 8))
|
||||
hi8)))
|
||||
swap8))
|
||||
|
||||
|
||||
(decl do_bitrev32 (Type Gpr) Gpr)
|
||||
(rule (do_bitrev32 ty src)
|
||||
(let ((src_ Gpr (do_bitrev16 ty src))
|
||||
|
||||
@@ -920,145 +920,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let condcode = ctx.data(insn).cond_code().unwrap();
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
if !ty.is_vector() {
|
||||
if ty == types::I128 && condcode != IntCC::Equal && condcode != IntCC::NotEqual {
|
||||
let condcode = emit_cmp(ctx, insn, condcode);
|
||||
let cc = CC::from_intcc(condcode);
|
||||
ctx.emit(Inst::setcc(cc, dst));
|
||||
} else {
|
||||
assert_eq!(ty.bits(), 128);
|
||||
let eq = |ty| match ty {
|
||||
types::I8X16 => SseOpcode::Pcmpeqb,
|
||||
types::I16X8 => SseOpcode::Pcmpeqw,
|
||||
types::I32X4 => SseOpcode::Pcmpeqd,
|
||||
types::I64X2 => SseOpcode::Pcmpeqq,
|
||||
_ => panic!(
|
||||
"Unable to find an instruction for {} for type: {}",
|
||||
condcode, ty
|
||||
),
|
||||
};
|
||||
let gt = |ty| match ty {
|
||||
types::I8X16 => SseOpcode::Pcmpgtb,
|
||||
types::I16X8 => SseOpcode::Pcmpgtw,
|
||||
types::I32X4 => SseOpcode::Pcmpgtd,
|
||||
types::I64X2 => SseOpcode::Pcmpgtq,
|
||||
_ => panic!(
|
||||
"Unable to find an instruction for {} for type: {}",
|
||||
condcode, ty
|
||||
),
|
||||
};
|
||||
let maxu = |ty| match ty {
|
||||
types::I8X16 => SseOpcode::Pmaxub,
|
||||
types::I16X8 => SseOpcode::Pmaxuw,
|
||||
types::I32X4 => SseOpcode::Pmaxud,
|
||||
_ => panic!(
|
||||
"Unable to find an instruction for {} for type: {}",
|
||||
condcode, ty
|
||||
),
|
||||
};
|
||||
let mins = |ty| match ty {
|
||||
types::I8X16 => SseOpcode::Pminsb,
|
||||
types::I16X8 => SseOpcode::Pminsw,
|
||||
types::I32X4 => SseOpcode::Pminsd,
|
||||
_ => panic!(
|
||||
"Unable to find an instruction for {} for type: {}",
|
||||
condcode, ty
|
||||
),
|
||||
};
|
||||
let minu = |ty| match ty {
|
||||
types::I8X16 => SseOpcode::Pminub,
|
||||
types::I16X8 => SseOpcode::Pminuw,
|
||||
types::I32X4 => SseOpcode::Pminud,
|
||||
_ => panic!(
|
||||
"Unable to find an instruction for {} for type: {}",
|
||||
condcode, ty
|
||||
),
|
||||
};
|
||||
|
||||
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
|
||||
// which to use as the read `input` (ModRM r/m field). In the normal case we use
|
||||
// Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
|
||||
// less-than cases so that we can reuse the greater-than implementation.
|
||||
//
|
||||
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
|
||||
// from the normal order because of the special-case lowering for these instructions
|
||||
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
|
||||
let input = match condcode {
|
||||
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
rhs
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
|
||||
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
lhs
|
||||
}
|
||||
IntCC::SignedLessThan
|
||||
| IntCC::SignedLessThanOrEqual
|
||||
| IntCC::UnsignedLessThan
|
||||
| IntCC::UnsignedLessThanOrEqual => {
|
||||
let lhs = input_to_reg_mem(ctx, inputs[0]);
|
||||
let rhs = put_input_in_reg(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||
lhs
|
||||
}
|
||||
_ => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
let rhs = input_to_reg_mem(ctx, inputs[1]);
|
||||
ctx.emit(Inst::gen_move(dst, lhs, ty));
|
||||
rhs
|
||||
}
|
||||
};
|
||||
|
||||
match condcode {
|
||||
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
|
||||
IntCC::NotEqual => {
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
|
||||
// Emit all 1s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||
// Invert the result of the `PCMPEQ*`.
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||
}
|
||||
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
|
||||
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
|
||||
if ty != types::I64X2 =>
|
||||
{
|
||||
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
|
||||
}
|
||||
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
|
||||
if ty == types::I64X2 =>
|
||||
{
|
||||
// The PMINS* instruction is only available in AVX512VL/F so we must instead
|
||||
// compare with flipped operands and negate the result (emitting one more
|
||||
// instruction).
|
||||
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
|
||||
// Emit all 1s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||
// Invert the result of the `PCMPGT*`.
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||
}
|
||||
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
|
||||
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
|
||||
// Emit all 1s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
|
||||
// Invert the result of the `PCMPEQ*`.
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
|
||||
}
|
||||
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
|
||||
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
|
||||
}
|
||||
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
|
||||
}
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,8 +9,10 @@ use regalloc::Writable;
|
||||
use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg};
|
||||
use crate::{
|
||||
ir::{
|
||||
condcodes::FloatCC, immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode,
|
||||
Value, ValueLabel, ValueList,
|
||||
condcodes::{FloatCC, IntCC},
|
||||
immediates::*,
|
||||
types::*,
|
||||
Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel, ValueList,
|
||||
},
|
||||
isa::{
|
||||
settings::Flags,
|
||||
@@ -512,6 +514,11 @@ where
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
|
||||
CC::from_intcc(*intcc)
|
||||
}
|
||||
}
|
||||
|
||||
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle b2bc986bcbbbb77
|
||||
src/isa/x64/inst.isle cdd292107fb36cf
|
||||
src/isa/x64/lower.isle c049f7d36db0e0fb
|
||||
src/isa/x64/inst.isle 5644ccb29bff0b51
|
||||
src/isa/x64/lower.isle 51d6ce13a3e47bc4
|
||||
|
||||
1260
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
1260
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
generated
File diff suppressed because it is too large
Load Diff
@@ -241,20 +241,20 @@ block0(v0: i128, v1: i128):
|
||||
; Inst 5: movq %r14, 32(%rsp)
|
||||
; Inst 6: movq %rbx, 40(%rsp)
|
||||
; Inst 7: movq %r15, 48(%rsp)
|
||||
; Inst 8: cmpq %rcx, %rsi
|
||||
; Inst 8: cmpq %rdx, %rdi
|
||||
; Inst 9: setz %al
|
||||
; Inst 10: cmpq %rdx, %rdi
|
||||
; Inst 10: cmpq %rcx, %rsi
|
||||
; Inst 11: setz %r8b
|
||||
; Inst 12: andq %rax, %r8
|
||||
; Inst 13: andq $1, %r8
|
||||
; Inst 12: andq %r8, %rax
|
||||
; Inst 13: andq $1, %rax
|
||||
; Inst 14: setnz %al
|
||||
; Inst 15: movq %rax, rsp(0 + virtual offset)
|
||||
; Inst 16: cmpq %rcx, %rsi
|
||||
; Inst 16: cmpq %rdx, %rdi
|
||||
; Inst 17: setnz %al
|
||||
; Inst 18: cmpq %rdx, %rdi
|
||||
; Inst 18: cmpq %rcx, %rsi
|
||||
; Inst 19: setnz %r8b
|
||||
; Inst 20: orq %rax, %r8
|
||||
; Inst 21: andq $1, %r8
|
||||
; Inst 20: orq %r8, %rax
|
||||
; Inst 21: andq $1, %rax
|
||||
; Inst 22: setnz %r8b
|
||||
; Inst 23: cmpq %rcx, %rsi
|
||||
; Inst 24: setl %r9b
|
||||
|
||||
@@ -55,14 +55,16 @@ block0(v0: i16x8, v1: i16x8):
|
||||
; Entry block: 0
|
||||
; Block 0:
|
||||
; (original IR block: block0)
|
||||
; (instruction range: 0 .. 7)
|
||||
; (instruction range: 0 .. 9)
|
||||
; Inst 0: pushq %rbp
|
||||
; Inst 1: movq %rsp, %rbp
|
||||
; Inst 2: pminsw %xmm1, %xmm0
|
||||
; Inst 3: pcmpeqw %xmm1, %xmm0
|
||||
; Inst 4: movq %rbp, %rsp
|
||||
; Inst 5: popq %rbp
|
||||
; Inst 6: ret
|
||||
; Inst 2: movdqa %xmm1, %xmm2
|
||||
; Inst 3: movdqa %xmm0, %xmm1
|
||||
; Inst 4: pmaxsw %xmm2, %xmm1
|
||||
; Inst 5: pcmpeqw %xmm1, %xmm0
|
||||
; Inst 6: movq %rbp, %rsp
|
||||
; Inst 7: popq %rbp
|
||||
; Inst 8: ret
|
||||
; }}
|
||||
|
||||
function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
|
||||
@@ -75,13 +77,15 @@ block0(v0: i8x16, v1: i8x16):
|
||||
; Entry block: 0
|
||||
; Block 0:
|
||||
; (original IR block: block0)
|
||||
; (instruction range: 0 .. 7)
|
||||
; (instruction range: 0 .. 9)
|
||||
; Inst 0: pushq %rbp
|
||||
; Inst 1: movq %rsp, %rbp
|
||||
; Inst 2: pminub %xmm1, %xmm0
|
||||
; Inst 3: pcmpeqb %xmm1, %xmm0
|
||||
; Inst 4: movq %rbp, %rsp
|
||||
; Inst 5: popq %rbp
|
||||
; Inst 6: ret
|
||||
; Inst 2: movdqa %xmm1, %xmm2
|
||||
; Inst 3: movdqa %xmm0, %xmm1
|
||||
; Inst 4: pmaxub %xmm2, %xmm1
|
||||
; Inst 5: pcmpeqb %xmm1, %xmm0
|
||||
; Inst 6: movq %rbp, %rsp
|
||||
; Inst 7: popq %rbp
|
||||
; Inst 8: ret
|
||||
; }}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user