From 7e0bb465d0205fd7942a48dce931a8221dccabb8 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 13 Jun 2022 16:34:11 -0700 Subject: [PATCH] X64: port the rest of icmp to ISLE (#4254) Finish migrating icmp to ISLE for x64 --- cranelift/codegen/src/isa/x64/inst.isle | 20 +++ cranelift/codegen/src/isa/x64/lower.isle | 25 ++++ cranelift/codegen/src/isa/x64/lower.rs | 11 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 19 +++ .../filetests/filetests/isa/x64/i128.clif | 120 ++++++++---------- 5 files changed, 121 insertions(+), 74 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index be9733e486..a3c06d26ad 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1271,6 +1271,26 @@ (decl lo_gpr (Value) Gpr) (rule (lo_gpr regs) (gpr_new (lo_reg regs))) +;;;; Helpers for Working With Integer Comparison Codes ;;;;;;;;;;;;;;;;;;;;;;;;; +;; + +;; An extractor that fails if the two arguments are equal. The first argument is +;; returned when it does not match the second. +(decl pure intcc_neq (IntCC IntCC) IntCC) +(extern constructor intcc_neq intcc_neq) + +;; This is a direct import of `IntCC::without_equal`. +;; Get the corresponding IntCC with the equal component removed. +;; For conditions without a zero component, this is a no-op. +(decl intcc_without_eq (IntCC) IntCC) +(extern constructor intcc_without_eq intcc_without_eq) + +;; This is a direct import of `IntCC::unsigned`. +;; Get the corresponding IntCC with the signed component removed. +;; For conditions without a signed component, this is a no-op. +(decl intcc_unsigned (IntCC) IntCC) +(extern constructor intcc_unsigned intcc_unsigned) + ;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; These should only be used for legalization purposes, when we can't otherwise diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 68e61d6b46..ed1cfe5579 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1459,6 +1459,7 @@ ;; unset). (rule (lower (icmp (IntCC.Equal) a @ (value_type (ty_vec128 ty)) b)) (x64_pcmpeq ty a b)) + ;; To lower a not-equals comparison, we perform an equality comparison ;; (PCMPEQ*) and then invert the bits (PXOR with all 1s). (rule (lower (icmp (IntCC.NotEqual) a @ (value_type (ty_vec128 ty)) b)) @@ -1553,6 +1554,30 @@ (cmp Reg (x64_or $I64 cmp_lo cmp_hi))) (with_flags (x64_test (OperandSize.Size64) (RegMemImm.Imm 1) cmp) (x64_setcc (CC.NZ))))) +;; Result = (a_hi <> b_hi) || +;; (a_hi == b_hi && a_lo <> b_lo) +(rule (lower (icmp cc a @ (value_type $I128) b)) + (if (intcc_neq cc (IntCC.Equal))) + (if (intcc_neq cc (IntCC.NotEqual))) + (let ((a_lo Gpr (value_regs_get_gpr a 0)) + (a_hi Gpr (value_regs_get_gpr a 1)) + (b_lo Gpr (value_regs_get_gpr b 0)) + (b_hi Gpr (value_regs_get_gpr b 1)) + (cmp_hi ValueRegs (with_flags (x64_cmp (OperandSize.Size64) b_hi a_hi) + (consumes_flags_concat + (x64_setcc (intcc_without_eq cc)) + (x64_setcc (CC.Z))))) + (cc_hi Reg (value_regs_get cmp_hi 0)) + (eq_hi Reg (value_regs_get cmp_hi 1)) + + (cmp_lo Reg (with_flags_reg (x64_cmp (OperandSize.Size64) b_lo a_lo) + (x64_setcc (intcc_unsigned cc)))) + + (res_lo Reg (x64_and $I64 eq_hi cmp_lo)) + (res Reg (x64_or $I64 cc_hi res_lo))) + (x64_and $I64 res (RegMemImm.Imm 1)))) + + ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; CLIF's `fcmp` instruction always operates on XMM registers--both scalar and diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index fc8960be8f..9702df8d17 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -903,16 +903,7 @@ fn lower_insn_to_regs>( | Opcode::FmaxPseudo => implemented_in_isle(ctx), Opcode::Icmp => { - let condcode = ctx.data(insn).cond_code().unwrap(); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ctx.input_ty(insn, 0); - if ty == types::I128 && condcode != IntCC::Equal && condcode != IntCC::NotEqual { - let condcode = emit_cmp(ctx, insn, condcode); - let cc = CC::from_intcc(condcode); - ctx.emit(Inst::setcc(cc, dst)); - } else { - implemented_in_isle(ctx); - } + implemented_in_isle(ctx); } Opcode::Fcmp => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 2c428ec292..f5948d75a5 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -511,6 +511,25 @@ where } } + #[inline] + fn intcc_neq(&mut self, x: &IntCC, y: &IntCC) -> Option { + if x != y { + Some(*x) + } else { + None + } + } + + #[inline] + fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC { + x.without_equal() + } + + #[inline] + fn intcc_unsigned(&mut self, x: &IntCC) -> IntCC { + x.unsigned() + } + #[inline] fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC { CC::from_intcc(*intcc) diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index e8ebeb5bb8..cc1c8e444e 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -193,100 +193,92 @@ block0(v0: i128, v1: i128): ; movq %r15, 48(%rsp) ; block0: ; cmpq %rdx, %rdi -; setz %r8b +; setz %al ; cmpq %rcx, %rsi -; setz %r9b -; andq %r8, %r9, %r8 -; testq $1, %r8 +; setz %r8b +; andq %rax, %r8, %rax +; testq $1, %rax ; setnz %al ; cmpq %rdx, %rdi -; setnz %r9b +; setnz %r8b ; cmpq %rcx, %rsi -; setnz %r10b -; orq %r9, %r10, %r9 -; testq $1, %r9 +; setnz %r9b +; orq %r8, %r9, %r8 +; testq $1, %r8 ; setnz %r8b ; movq %r8, rsp(0 + virtual offset) ; cmpq %rcx, %rsi -; setl %r11b +; setl %r8b ; setz %r10b ; cmpq %rdx, %rdi -; setb %r9b -; andq %r9, %r10, %r9 -; orq %r9, %r11, %r9 -; andq %r9, $1, %r9 -; setnz %r9b +; setb %r11b +; andq %r10, %r11, %r10 +; orq %r8, %r10, %r8 +; andq %r8, $1, %r8 ; cmpq %rcx, %rsi ; setl %r10b -; setz %bl +; setz %r11b ; cmpq %rdx, %rdi -; setbe %r11b -; andq %r11, %rbx, %r11 -; orq %r11, %r10, %r11 -; andq %r11, $1, %r11 -; setnz %r10b +; setbe %r13b +; andq %r11, %r13, %r11 +; orq %r10, %r11, %r10 +; andq %r10, $1, %r10 ; cmpq %rcx, %rsi ; setnle %r11b -; setz %r12b -; cmpq %rdx, %rdi -; setnbe %r15b -; andq %r15, %r12, %r15 -; orq %r15, %r11, %r15 -; andq %r15, $1, %r15 -; setnz %r11b -; cmpq %rcx, %rsi -; setnle %bl -; setz %r13b -; cmpq %rdx, %rdi -; setnb %r12b -; andq %r12, %r13, %r12 -; orq %r12, %rbx, %r12 -; andq %r12, $1, %r12 -; setnz %bl -; cmpq %rcx, %rsi -; setb %r12b ; setz %r14b ; cmpq %rdx, %rdi -; setb %r13b -; andq %r13, %r14, %r13 -; orq %r13, %r12, %r13 -; andq %r13, $1, %r13 -; setnz %r12b +; setnbe %r15b +; andq %r14, %r15, %r14 +; orq %r11, %r14, %r11 +; andq %r11, $1, %r11 +; cmpq %rcx, %rsi +; setnle %r12b +; setz %bl +; cmpq %rdx, %rdi +; setnb %r13b +; andq %rbx, %r13, %rbx +; orq %r12, %rbx, %r12 +; andq %r12, $1, %r12 ; cmpq %rcx, %rsi ; setb %r13b +; setz %r14b +; cmpq %rdx, %rdi +; setb %r15b +; andq %r14, %r15, %r14 +; orq %r13, %r14, %r13 +; andq %r13, $1, %r13 +; cmpq %rcx, %rsi +; setb %bl ; setz %r15b ; cmpq %rdx, %rdi ; setbe %r14b -; andq %r14, %r15, %r14 -; orq %r14, %r13, %r14 -; andq %r14, $1, %r14 -; setnz %r13b +; andq %r15, %r14, %r15 +; orq %rbx, %r15, %rbx +; andq %rbx, $1, %rbx ; cmpq %rcx, %rsi ; setnbe %r14b -; setz %r8b +; setz %r15b ; cmpq %rdx, %rdi -; setnbe %r15b -; andq %r15, %r8, %r15 -; orq %r15, %r14, %r15 -; andq %r15, $1, %r15 -; setnz %r14b +; setnbe %r9b +; andq %r15, %r9, %r15 +; orq %r14, %r15, %r14 +; andq %r14, $1, %r14 ; cmpq %rcx, %rsi ; setnbe %sil ; setz %cl ; cmpq %rdx, %rdi -; setnb %dil -; andq %rdi, %rcx, %rdi -; orq %rdi, %rsi, %rdi -; andq %rdi, $1, %rdi -; setnz %dil -; movq rsp(0 + virtual offset), %rsi -; andl %eax, %esi, %eax -; andl %r9d, %r10d, %r9d -; andl %r11d, %ebx, %r11d -; andl %r12d, %r13d, %r12d -; andl %r14d, %edi, %r14d +; setnb %dl +; andq %rcx, %rdx, %rcx +; orq %rsi, %rcx, %rsi +; andq %rsi, $1, %rsi +; movq rsp(0 + virtual offset), %r9 ; andl %eax, %r9d, %eax +; andl %r8d, %r10d, %r8d ; andl %r11d, %r12d, %r11d +; andl %r13d, %ebx, %r13d +; andl %r14d, %esi, %r14d +; andl %eax, %r8d, %eax +; andl %r11d, %r13d, %r11d ; andl %eax, %r11d, %eax ; andl %eax, %r14d, %eax ; movq 16(%rsp), %rbx