diff --git a/.github/actions/install-rust/action.yml b/.github/actions/install-rust/action.yml index e63fcc4eb2..d8016f78e9 100644 --- a/.github/actions/install-rust/action.yml +++ b/.github/actions/install-rust/action.yml @@ -18,11 +18,6 @@ runs: - name: Install Rust shell: bash run: | - - if [[ "${{ runner.os }}" = "Windows" ]]; then - rustup self update - fi - rustup set profile minimal rustup update "${{ inputs.toolchain }}" --no-self-update rustup default "${{ inputs.toolchain }}" diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs index fd9a22ee23..9cfb0894ca 100644 --- a/cranelift/codegen/src/egraph/cost.rs +++ b/cranelift/codegen/src/egraph/cost.rs @@ -85,11 +85,8 @@ pub(crate) fn pure_op_cost(op: Opcode) -> Cost { Opcode::Iadd | Opcode::Isub | Opcode::Band - | Opcode::BandNot | Opcode::Bor - | Opcode::BorNot | Opcode::Bxor - | Opcode::BxorNot | Opcode::Bnot | Opcode::Ishl | Opcode::Ushr diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 1238f463aa..eccda08b60 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -580,7 +580,7 @@ (sub ty (zero_reg) x)) ;; `i128` -(rule 2 (lower (has_type $I128 (ineg x))) +(rule 2 (lower (has_type $I128 (ineg x))) (sub_i128 (value_regs_zero) x)) ;; vectors. @@ -1054,75 +1054,74 @@ ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_32 ty) (band x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (band x y))) (alu_rs_imm_logic_commutative (ALUOp.And) ty x y)) -(rule (lower (has_type $I64 (band x y))) - (alu_rs_imm_logic_commutative (ALUOp.And) $I64 x y)) - (rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (band x y))) (and_vec x y (vector_size ty))) +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 1 (lower (has_type (fits_in_64 ty) (band x (bnot y)))) + (alu_rs_imm_logic (ALUOp.AndNot) ty x y)) +(rule 2 (lower (has_type (fits_in_64 ty) (band (bnot y) x))) + (alu_rs_imm_logic (ALUOp.AndNot) ty x y)) + +(rule 3 (lower (has_type $I128 (band x (bnot y)))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y)) +(rule 4 (lower (has_type $I128 (band (bnot y) x))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y)) + +(rule 5 (lower (has_type (ty_vec128 ty) (band x (bnot y)))) + (bic_vec x y (vector_size ty))) +(rule 6 (lower (has_type (ty_vec128 ty) (band (bnot y) x))) + (bic_vec x y (vector_size ty))) + ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_32 ty) (bor x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (bor x y))) (alu_rs_imm_logic_commutative (ALUOp.Orr) ty x y)) -(rule (lower (has_type $I64 (bor x y))) - (alu_rs_imm_logic_commutative (ALUOp.Orr) $I64 x y)) - (rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (bor x y))) (orr_vec x y (vector_size ty))) +;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced +;; by Cranelift's `bor_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 1 (lower (has_type (fits_in_64 ty) (bor x (bnot y)))) + (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)) +(rule 2 (lower (has_type (fits_in_64 ty) (bor (bnot y) x))) + (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)) + +(rule 3 (lower (has_type $I128 (bor x (bnot y)))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y)) +(rule 4 (lower (has_type $I128 (bor (bnot y) x))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y)) + ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_32 ty) (bxor x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y))) (alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y)) -(rule (lower (has_type $I64 (bxor x y))) - (alu_rs_imm_logic_commutative (ALUOp.Eor) $I64 x y)) - (rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (bxor x y))) (eor_vec x y (vector_size ty))) -;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced +;; by Cranelift's `bxor_not` instruction that is legalized into the simpler +;; forms early on. -(rule -1 (lower (has_type (fits_in_32 ty) (band_not x y))) - (alu_rs_imm_logic (ALUOp.AndNot) ty x y)) +(rule 1 (lower (has_type (fits_in_64 ty) (bxor x (bnot y)))) + (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) +(rule 2 (lower (has_type (fits_in_64 ty) (bxor (bnot y) x))) + (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) -(rule (lower (has_type $I64 (band_not x y))) - (alu_rs_imm_logic (ALUOp.AndNot) $I64 x y)) - -(rule (lower (has_type $I128 (band_not x y))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y)) - -(rule -2 (lower (has_type (ty_vec128 ty) (band_not x y))) - (bic_vec x y (vector_size ty))) - -;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule -1 (lower (has_type (fits_in_32 ty) (bor_not x y))) - (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)) - -(rule (lower (has_type $I64 (bor_not x y))) - (alu_rs_imm_logic (ALUOp.OrrNot) $I64 x y)) - -(rule (lower (has_type $I128 (bor_not x y))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y)) - -;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule -1 (lower (has_type (fits_in_32 ty) (bxor_not x y))) - (alu_rs_imm_logic (ALUOp.EorNot) $I32 x y)) - -(rule (lower (has_type $I64 (bxor_not x y))) - (alu_rs_imm_logic (ALUOp.EorNot) $I64 x y)) - -(rule (lower (has_type $I128 (bxor_not x y))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) +(rule 3 (lower (has_type $I128 (bxor x (bnot y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) +(rule 4 (lower (has_type $I128 (bxor (bnot y) x))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2407,7 +2406,7 @@ ;; sign extended. We then check if the output sign bit has flipped. (rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b))) (let ((extend ExtendOp (lower_extend_op ty $true)) - + ;; Instead of emitting two `sxt{b,h}` we do one as an instruction and ;; the other as an extend operation in the `add` instruction. ;; @@ -2417,7 +2416,7 @@ ;; cset out_carry, ne (a_sext Reg (put_in_reg_sext32 a)) (out Reg (add_extend_op ty a_sext b extend)) - (out_carry Reg (with_flags_reg + (out_carry Reg (with_flags_reg (cmp_extend (OperandSize.Size32) out out extend) (cset (Cond.Ne))))) (output_pair diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 9a7e5faf9e..ef6f8dd379 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1950,32 +1950,14 @@ ;;; (decl gen_andn (Reg Reg) Reg) -(rule 1 - (gen_andn rs1 rs2) - (if-let $true (has_b)) +(rule 1 (gen_andn rs1 rs2) (alu_rrr (AluOPRRR.Andn) rs1 rs2)) -(rule - (gen_andn rs1 rs2) - (if-let $false (has_b)) - (let - ((tmp Reg (gen_bit_not rs2))) - (alu_and rs1 tmp))) - ;;; (decl gen_orn (Reg Reg) Reg) -(rule 1 - (gen_orn rs1 rs2 ) - (if-let $true (has_b)) +(rule 1 (gen_orn rs1 rs2) (alu_rrr (AluOPRRR.Orn) rs1 rs2)) -(rule - (gen_orn rs1 rs2) - (if-let $false (has_b)) - (let - ((tmp Reg (gen_bit_not rs2))) - (alu_rrr (AluOPRRR.Or) rs1 tmp))) - (decl gen_rev8 (Reg) Reg) (rule 1 (gen_rev8 rs) @@ -2014,14 +1996,6 @@ (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd)))) (writable_reg_to_reg rd))) -;;; x ^ ~y -(decl gen_xor_not (Reg Reg) Reg) -(rule - (gen_xor_not x y) - (let - ((tmp Reg (gen_bit_not y))) - (alu_rrr (AluOPRRR.Xor) x tmp))) - ;; Negates x ;; Equivalent to 0 - x (decl neg (Type ValueRegs) ValueRegs) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 4d169abf5c..fdaa7102c4 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -40,14 +40,14 @@ (rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) (alu_rr_imm12 (select_addi ty) y x)) -(rule +(rule (lower (has_type $I128 (iadd x y))) (let ( ;; low part. (low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) ;; compute carry. (carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) - ;; + ;; (high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1))) ;; add carry. (high Reg (alu_add high_tmp carry))) @@ -158,19 +158,19 @@ (alu_rrr (AluOPRRR.Remuw) (ext_int_if_need $false x ty) y2))) (rule -1 (lower (has_type (fits_in_16 ty) (srem x y))) - (let + (let ((y2 Reg (ext_int_if_need $true y ty)) (_ InstOutput (gen_div_by_zero y2))) (alu_rrr (AluOPRRR.Remw) (ext_int_if_need $true x ty) y2))) (rule (lower (has_type $I32 (srem x y))) - (let + (let ((y2 Reg (ext_int_if_need $true y $I32)) (_ InstOutput (gen_div_by_zero y2))) (alu_rrr (AluOPRRR.Remw) x y2))) (rule (lower (has_type $I32 (urem x y))) - (let + (let ((y2 Reg (ext_int_if_need $false y $I32)) (_ InstOutput (gen_div_by_zero y2))) (alu_rrr (AluOPRRR.Remuw) x y2))) @@ -204,6 +204,29 @@ (rule (lower (has_type $F64 (band x y))) (lower_float_binary (AluOPRRR.And) x y $F64)) +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 3 (lower (has_type (fits_in_64 ty) (band x (bnot y)))) + (if-let $true (has_b)) + (gen_andn x y)) +(rule 4 (lower (has_type (fits_in_64 ty) (band (bnot y) x))) + (if-let $true (has_b)) + (gen_andn x y)) +(rule 5 (lower (has_type $I128 (band x (bnot y)))) + (if-let $true (has_b)) + (let + ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) +(rule 6 (lower (has_type $I128 (band (bnot y) x))) + (if-let $true (has_b)) + (let + ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + ;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (bor x y))) @@ -222,6 +245,30 @@ (rule (lower (has_type $F64 (bor x y))) (lower_float_binary (AluOPRRR.Or) x y $F64)) +;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced +;; by Cranelift's `bor_not` instruction that is legalized into the simpler +;; forms early on. + +(rule 3 (lower (has_type (fits_in_64 ty) (bor x (bnot y)))) + (if-let $true (has_b)) + (gen_orn x y)) +(rule 4 (lower (has_type (fits_in_64 ty) (bor (bnot y) x))) + (if-let $true (has_b)) + (gen_orn x y)) + +(rule 5 (lower (has_type $I128 (bor x (bnot y)))) + (if-let $true (has_b)) + (let + ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) +(rule 6 (lower (has_type $I128 (bor (bnot y) x))) + (if-let $true (has_b)) + (let + ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + ;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (bxor x y))) @@ -289,15 +336,6 @@ (lower_extend x $true (ty_bits in) (ty_bits out))) -;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 ty) (band_not x y))) - (gen_andn x y)) -(rule 1 (lower (has_type $I128 (band_not x y))) - (let - ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0))) - (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (popcnt x))) (lower_popcnt x ty)) @@ -397,29 +435,6 @@ (lower_i128_rotr x y)) -;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; notice x y order!!! -(rule (lower (has_type (fits_in_64 ty) (bxor_not x y))) - (gen_xor_not x y)) -(rule 1 (lower (has_type $I128 (bxor_not x y))) - (let - ((low Reg (gen_xor_not (value_regs_get x 0) (value_regs_get y 0))) - (high Reg (gen_xor_not (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high) - ) -) - -;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 ty) (bor_not x y))) - (gen_orn x y)) - -(rule 1 (lower (has_type $I128 (bor_not x y))) - (let - ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0))) - (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - - ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (cls x))) (lower_cls x ty)) @@ -428,12 +443,12 @@ ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule +(rule (lower (has_type ty (fabs x))) (gen_fabs x ty)) ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule +(rule (lower (has_type ty (fneg x))) (fpu_rrr (f_copy_neg_sign_op ty) ty x x)) @@ -458,35 +473,35 @@ ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 - ;; - (lower + ;; + (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x))) (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo))) ;;; for I8 and I16 (rule 1 - (lower + (lower (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x))) (gen_atomic_rmw_loop op ty addr x)) ;;;special for I8 and I16 max min etc. ;;;because I need uextend or sextend the value. (rule 2 - (lower + (lower (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x))) (gen_atomic_rmw_loop op ty addr (ext_int_if_need $true x ty))) (rule 2 ;; - (lower + (lower (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x))) ;; (gen_atomic_rmw_loop op ty addr (ext_int_if_need $false x ty))) ;;;;; Rules for `AtomicRmwOp.Sub` (rule - (lower + (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x))) (let ((tmp WritableReg (temp_writable_reg ty)) @@ -504,7 +519,7 @@ ;;;;; Rules for `AtomicRmwOp.Nand` (rule - (lower + (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x))) (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x)) @@ -512,13 +527,13 @@ (extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc) ;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;; -(rule +(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p))) (gen_atomic_load p ty)) ;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;; -(rule +(rule (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p)) (gen_atomic_store p ty src)) @@ -562,37 +577,37 @@ ;;;;; Rules for `for float arithmatic` -(rule +(rule (lower (has_type ty (fadd x y))) (fpu_rrr (f_arithmatic_op ty (Opcode.Fadd)) ty x y)) -(rule +(rule (lower (has_type ty (fsub x y))) (fpu_rrr (f_arithmatic_op ty (Opcode.Fsub)) ty x y)) -(rule +(rule (lower (has_type ty (fmul x y))) (fpu_rrr (f_arithmatic_op ty (Opcode.Fmul)) ty x y)) -(rule +(rule (lower (has_type ty (fdiv x y))) (fpu_rrr (f_arithmatic_op ty (Opcode.Fdiv)) ty x y)) -(rule +(rule (lower (has_type ty (fmin x y))) (gen_float_select (FloatSelectOP.Min) x y ty)) -(rule +(rule (lower (has_type ty (fmin_pseudo x y))) (gen_float_select_pseudo (FloatSelectOP.Min) x y ty)) -(rule +(rule (lower (has_type ty (fmax x y))) (gen_float_select (FloatSelectOP.Max) x y ty)) -(rule +(rule (lower (has_type ty (fmax_pseudo x y))) (gen_float_select_pseudo (FloatSelectOP.Max) x y ty)) ;;;;; Rules for `stack_addr`;;;;;;;;; -(rule +(rule (lower (stack_addr ss offset)) (gen_stack_addr ss offset)) @@ -624,7 +639,7 @@ (gen_bitselect ty c x y)) ;;;;; Rules for `isplit`;;;;;;;;; -(rule +(rule (lower (isplit x)) (let ((t1 Reg (gen_move2 (value_regs_get x 0) $I64 $I64)) @@ -632,7 +647,7 @@ (output_pair t1 t2))) ;;;;; Rules for `iconcat`;;;;;;;;; -(rule +(rule (lower (has_type $I128 (iconcat x y))) (let ((t1 Reg (gen_move2 x $I64 $I64)) @@ -716,16 +731,16 @@ (gen_load_128 p offset flags)) ;;;;; Rules for `istore8`;;;;;;;;; -(rule +(rule (lower (istore8 flags x p offset)) (gen_store p offset (StoreOP.Sb) flags x)) ;;;;; Rules for `istore16`;;;;;;;;; -(rule +(rule (lower (istore16 flags x p offset)) (gen_store p offset (StoreOP.Sh) flags x)) ;;;;; Rules for `istore32`;;;;;;;;; -(rule +(rule (lower (istore32 flags x p offset)) (gen_store p offset (StoreOP.Sw) flags x)) @@ -803,22 +818,22 @@ (gen_moves v in_ty out)) ;;;;; Rules for `ceil`;;;;;;;;; -(rule +(rule (lower (has_type ty (ceil x))) (gen_float_round (FloatRoundOP.Ceil) x ty) ) ;;;;; Rules for `floor`;;;;;;;;; -(rule +(rule (lower (has_type ty (floor x))) (gen_float_round (FloatRoundOP.Floor) x ty)) ;;;;; Rules for `trunc`;;;;;;;;; -(rule +(rule (lower (has_type ty (trunc x))) (gen_float_round (FloatRoundOP.Trunc) x ty)) ;;;;; Rules for `nearest`;;;;;;;;; -(rule +(rule (lower (has_type ty (nearest x))) (gen_float_round (FloatRoundOP.Nearest) x ty)) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 9bbbc53f6c..1eed87da07 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -983,6 +983,22 @@ (rule 0 (lower (has_type (vr128_ty ty) (band x y))) (vec_and ty x y)) +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +;; z15 version using a single instruction. +(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band x (bnot y)))) + (and_not_reg ty x y)) +(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band (bnot y) x))) + (and_not_reg ty x y)) + +;; And-not two vector registers. +(rule 9 (lower (has_type (vr128_ty ty) (band x (bnot y)))) + (vec_and_not ty x y)) +(rule 10 (lower (has_type (vr128_ty ty) (band (bnot y) x))) + (vec_and_not ty x y)) + ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Or two registers. @@ -1009,6 +1025,22 @@ (rule 0 (lower (has_type (vr128_ty ty) (bor x y))) (vec_or ty x y)) +;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced +;; by Cranelift's `bor_not` instruction that is legalized into the simpler +;; forms early on. + +;; z15 version using a single instruction. +(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor x (bnot y)))) + (or_not_reg ty x y)) +(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor (bnot y) x))) + (or_not_reg ty x y)) + +;; Or-not two vector registers. +(rule 9 (lower (has_type (vr128_ty ty) (bor x (bnot y)))) + (vec_or_not ty x y)) +(rule 10 (lower (has_type (vr128_ty ty) (bor (bnot y) x))) + (vec_or_not ty x y)) + ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1032,49 +1064,20 @@ (rule 0 (lower (has_type (vr128_ty ty) (bxor x y))) (vec_xor ty x y)) - -;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced +;; by Cranelift's `bxor_not` instruction that is legalized into the simpler +;; forms early on. ;; z15 version using a single instruction. -(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band_not x y))) - (and_not_reg ty x y)) - -;; z14 version using XOR with -1. -(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (band_not x y))) - (and_reg ty x (not_reg ty y))) - -;; And-not two vector registers. -(rule (lower (has_type (vr128_ty ty) (band_not x y))) - (vec_and_not ty x y)) - - -;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; z15 version using a single instruction. -(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor_not x y))) - (or_not_reg ty x y)) - -;; z14 version using XOR with -1. -(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bor_not x y))) - (or_reg ty x (not_reg ty y))) - -;; Or-not two vector registers. -(rule (lower (has_type (vr128_ty ty) (bor_not x y))) - (vec_or_not ty x y)) - - -;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; z15 version using a single instruction. -(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor_not x y))) +(rule 5 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor x (bnot y)))) + (not_xor_reg ty x y)) +(rule 6 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor (bnot y) x))) (not_xor_reg ty x y)) -;; z14 version using XOR with -1. -(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bxor_not x y))) - (not_reg ty (xor_reg ty x y))) - ;; Xor-not two vector registers. -(rule (lower (has_type (vr128_ty ty) (bxor_not x y))) +(rule 7 (lower (has_type (vr128_ty ty) (bxor x (bnot y)))) + (vec_not_xor ty x y)) +(rule 8 (lower (has_type (vr128_ty ty) (bxor (bnot y) x))) (vec_not_xor ty x y)) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 0f01e9cc53..9adc4e2fdb 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -269,6 +269,36 @@ (value_gprs (x64_and $I64 x_lo y_lo) (x64_and $I64 x_hi y_hi)))) +;; Specialized lowerings for `(band x (bnot y))` which is additionally produced +;; by Cranelift's `band_not` instruction that is legalized into the simpler +;; forms early on. + +(decl sse_and_not (Type Xmm XmmMem) Xmm) +(rule (sse_and_not $F32X4 x y) (x64_andnps x y)) +(rule (sse_and_not $F64X2 x y) (x64_andnpd x y)) +(rule -1 (sse_and_not (multi_lane _bits _lanes) x y) (x64_pandn x y)) + +;; Note the flipping of operands below as we're match +;; +;; (band x (bnot y)) +;; +;; while x86 does +;; +;; pandn(x, y) = and(not(x), y) +(rule 8 (lower (has_type ty @ (multi_lane _bits _lane) (band x (bnot y)))) + (sse_and_not ty y x)) +(rule 9 (lower (has_type ty @ (multi_lane _bits _lane) (band (bnot y) x))) + (sse_and_not ty y x)) + +(rule 10 (lower (has_type ty @ (use_bmi1 $true) (band x (bnot y)))) + (if (ty_int_ref_scalar_64 ty)) + ;; the first argument is the one that gets inverted with andn + (x64_andn ty y x)) +(rule 11 (lower (has_type ty @ (use_bmi1 $true) (band (bnot y) x))) + (if (ty_int_ref_scalar_64 ty)) + (x64_andn ty y x)) + + ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `{i,b}64` and smaller. @@ -1085,52 +1115,6 @@ (OperandSize.Size32)))) (x64_pmuludq x2 y2))) -;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl sse_and_not (Type Xmm XmmMem) Xmm) -(rule (sse_and_not $F32X4 x y) (x64_andnps x y)) -(rule (sse_and_not $F64X2 x y) (x64_andnpd x y)) -(rule -1 (sse_and_not (multi_lane _bits _lanes) x y) (x64_pandn x y)) - -;; Note the flipping of operands below. CLIF specifies -;; -;; band_not(x, y) = and(x, not(y)) -;; -;; while x86 does -;; -;; pandn(x, y) = and(not(x), y) -(rule 0 (lower (has_type ty (band_not x y))) - (sse_and_not ty y x)) - - -(rule 1 (lower (has_type ty @ (use_bmi1 $false) (band_not x y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_and ty - x - (x64_not ty y))) - -(rule 1 (lower (has_type ty @ (use_bmi1 $true) (band_not x y))) - (if (ty_int_ref_scalar_64 ty)) - ;; the first argument is the one that gets inverted with andn - (x64_andn ty y x)) - - -;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type ty (bxor_not x y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_xor ty - x - (x64_not ty y))) - -;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type ty (bor_not x y))) - (if (ty_int_ref_scalar_64 ty)) - (x64_or ty - x - (x64_not ty y))) - ;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8X16 (iabs x))) @@ -1167,7 +1151,7 @@ ;; it below, since we need to pass it into the cmove ;; before we pass the cmove to with_flags_reg. (neg_result Gpr (produces_flags_get_reg neg)) - ;; When the neg instruction sets the sign flag, + ;; When the neg instruction sets the sign flag, ;; takes the original (non-negative) value. (cmove ConsumesFlags (cmove ty (CC.S) src neg_result))) (with_flags_reg (produces_flags_ignore neg) cmove))) @@ -3586,7 +3570,7 @@ (u8_from_uimm8 lane)))) (x64_pshufd val lane (OperandSize.Size32))) -;; This is the only remaining case for F64X2 +;; This is the only remaining case for F64X2 (rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty)) (u8_from_uimm8 1)))) ;; 0xee == 0b11_10_11_10 diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index af836a0622..6fa43e0552 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -224,6 +224,28 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: pos.func.dfg.replace(inst).icmp(cond, arg, imm); } + // Legalize the fused bitwise-plus-not instructions into simpler + // instructions to assist with optimizations. Lowering will + // pattern match this sequence regardless when architectures + // support the instruction natively. + InstructionData::Binary { opcode, args } => { + match opcode { + ir::Opcode::BandNot => { + let neg = pos.ins().bnot(args[1]); + pos.func.dfg.replace(inst).band(args[0], neg); + } + ir::Opcode::BorNot => { + let neg = pos.ins().bnot(args[1]); + pos.func.dfg.replace(inst).bor(args[0], neg); + } + ir::Opcode::BxorNot => { + let neg = pos.ins().bnot(args[1]); + pos.func.dfg.replace(inst).bxor(args[0], neg); + } + _ => prev_pos = pos.position(), + }; + } + _ => { prev_pos = pos.position(); continue; diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index d889fd8faa..888dd51bbc 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -281,3 +281,8 @@ (rule (simplify (icmp (ty_int ty) (IntCC.SignedLessThanOrEqual) x x)) (iconst ty (imm64 1))) + +;; (x ^ -1) can be replaced with the `bnot` instruction +(rule (simplify (bxor ty x (iconst ty k))) + (if-let -1 (i64_sextend_imm64 ty k)) + (bnot ty x)) diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index ded7e52b0a..1962eb508f 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -221,3 +221,33 @@ block0(v1: i8): ; check: v3 = iconst.i8 0 ; check: return v3 + +function %bnot1(i8) -> i8 { +block0(v1: i8): + v2 = iconst.i8 -1 + v3 = bxor v1, v2 + return v3 +} + +; check: v4 = bnot v1 +; check: return v4 + +function %bnot2(i64) -> i64 { +block0(v1: i64): + v2 = iconst.i64 -1 + v3 = bxor v1, v2 + return v3 +} + +; check: v4 = bnot v1 +; check: return v4 + +function %bnot3(i64) -> i64 { +block0(v1: i64): + v2 = iconst.i64 -1 + v3 = bxor v2, v1 + return v3 +} + +; check: v5 = bnot v1 +; check: return v5 diff --git a/cranelift/filetests/filetests/isa/aarch64/bitopts-optimized.clif b/cranelift/filetests/filetests/isa/aarch64/bitopts-optimized.clif new file mode 100644 index 0000000000..c46a729001 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/bitopts-optimized.clif @@ -0,0 +1,37 @@ +test compile precise-output +set unwind_info=false +set opt_level=speed +target aarch64 + +function %band_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = band v2, v1 + return v3 +} + +; block0: +; bic w0, w1, w0 +; ret + +function %bor_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = bor v2, v1 + return v3 +} + +; block0: +; orn w0, w1, w0 +; ret + +function %bxor_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = bxor v2, v1 + return v3 +} + +; block0: +; eon w0, w1, w0 +; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops-optimized.clif b/cranelift/filetests/filetests/isa/riscv64/bitops-optimized.clif new file mode 100644 index 0000000000..1f249ff3ed --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/bitops-optimized.clif @@ -0,0 +1,45 @@ +test compile precise-output +set opt_level=speed +target riscv64 has_b + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not.i32 v0, v1 + return v2 +} + +; block0: +; andn a0,a0,a1 +; ret + +function %band_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = band v2, v1 + return v3 +} + +; block0: +; andn a0,a1,a0 +; ret + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not.i32 v0, v1 + return v2 +} + +; block0: +; orn a0,a0,a1 +; ret + +function %bor_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = bor v2, v1 + return v3 +} + +; block0: +; orn a0,a1,a0 +; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops.clif b/cranelift/filetests/filetests/isa/riscv64/bitops.clif index 48697c3a0b..2857ad8a12 100644 --- a/cranelift/filetests/filetests/isa/riscv64/bitops.clif +++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif @@ -631,9 +631,9 @@ block0(v0: i128, v1: i128): } ; block0: -; not a2,a2 -; and a0,a0,a2 +; not a4,a2 ; not a6,a3 +; and a0,a0,a4 ; and a1,a1,a6 ; ret @@ -645,9 +645,9 @@ block0(v0: i64): } ; block0: -; li t2,4 -; not a1,t2 -; and a0,a0,a1 +; li a1,4 +; not a2,a1 +; and a0,a0,a2 ; ret function %band_not_i64_constant_shift(i64, i64) -> i64 { @@ -660,8 +660,8 @@ block0(v0: i64, v1: i64): ; block0: ; slli a2,a1,4 -; not a1,a2 -; and a0,a0,a1 +; not a2,a2 +; and a0,a0,a2 ; ret function %bor_not_i32(i32, i32) -> i32 { @@ -693,9 +693,9 @@ block0(v0: i128, v1: i128): } ; block0: -; not a2,a2 -; or a0,a0,a2 +; not a4,a2 ; not a6,a3 +; or a0,a0,a4 ; or a1,a1,a6 ; ret @@ -707,9 +707,9 @@ block0(v0: i64): } ; block0: -; li t2,4 -; not a1,t2 -; or a0,a0,a1 +; li a1,4 +; not a2,a1 +; or a0,a0,a2 ; ret function %bor_not_i64_constant_shift(i64, i64) -> i64 { @@ -722,8 +722,8 @@ block0(v0: i64, v1: i64): ; block0: ; slli a2,a1,4 -; not a1,a2 -; or a0,a0,a1 +; not a2,a2 +; or a0,a0,a2 ; ret function %bxor_not_i32(i32, i32) -> i32 { @@ -755,9 +755,9 @@ block0(v0: i128, v1: i128): } ; block0: -; not a2,a2 -; xor a0,a0,a2 +; not a4,a2 ; not a6,a3 +; xor a0,a0,a4 ; xor a1,a1,a6 ; ret @@ -769,9 +769,9 @@ block0(v0: i64): } ; block0: -; li t2,4 -; not a1,t2 -; xor a0,a0,a1 +; li a1,4 +; not a2,a1 +; xor a0,a0,a2 ; ret function %bxor_not_i64_constant_shift(i64, i64) -> i64 { @@ -784,8 +784,8 @@ block0(v0: i64, v1: i64): ; block0: ; slli a2,a1,4 -; not a1,a2 -; xor a0,a0,a1 +; not a2,a2 +; xor a0,a0,a2 ; ret function %ishl_i128_i8(i128, i8) -> i128 { diff --git a/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif b/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif new file mode 100644 index 0000000000..091f1a6bf4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif @@ -0,0 +1,66 @@ +test compile precise-output +set opt_level=speed +target s390x has_mie2 + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not.i32 v0, v1 + return v2 +} + +; block0: +; ncrk %r2, %r2, %r3 +; br %r14 + +function %band_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = band v2, v1 + return v3 +} + +; block0: +; ncrk %r2, %r3, %r2 +; br %r14 + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not.i32 v0, v1 + return v2 +} + +; block0: +; ocrk %r2, %r2, %r3 +; br %r14 + +function %bor_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = bor v2, v1 + return v3 +} + +; block0: +; ocrk %r2, %r3, %r2 +; br %r14 + +function %bxor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor_not.i32 v0, v1 + return v2 +} + +; block0: +; nxrk %r2, %r2, %r3 +; br %r14 + +function %bxor_not_i32_reversed(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bnot v0 + v3 = bxor v2, v1 + return v3 +} + +; block0: +; nxrk %r2, %r3, %r2 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index a2c1e6ccd2..ffa698326e 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -366,9 +366,8 @@ block0(v0: i32, v1: i32): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; nr %r2, %r5 +; xilf %r3, 4294967295 +; nr %r2, %r3 ; br %r14 function %band_not_i16(i16, i16) -> i16 { @@ -378,9 +377,8 @@ block0(v0: i16, v1: i16): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; nr %r2, %r5 +; xilf %r3, 4294967295 +; nr %r2, %r3 ; br %r14 function %band_not_i8(i8, i8) -> i8 { @@ -390,9 +388,8 @@ block0(v0: i8, v1: i8): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; nr %r2, %r5 +; xilf %r3, 4294967295 +; nr %r2, %r3 ; br %r14 function %bor_not_i128(i128, i128) -> i128 { @@ -427,9 +424,8 @@ block0(v0: i32, v1: i32): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; or %r2, %r5 +; xilf %r3, 4294967295 +; or %r2, %r3 ; br %r14 function %bor_not_i16(i16, i16) -> i16 { @@ -439,9 +435,8 @@ block0(v0: i16, v1: i16): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; or %r2, %r5 +; xilf %r3, 4294967295 +; or %r2, %r3 ; br %r14 function %bor_not_i8(i8, i8) -> i8 { @@ -451,9 +446,8 @@ block0(v0: i8, v1: i8): } ; block0: -; lgr %r5, %r3 -; xilf %r5, 4294967295 -; or %r2, %r5 +; xilf %r3, 4294967295 +; or %r2, %r3 ; br %r14 function %bxor_not_i128(i128, i128) -> i128 { @@ -476,9 +470,9 @@ block0(v0: i64, v1: i64): } ; block0: +; xilf %r3, 4294967295 +; xihf %r3, 4294967295 ; xgr %r2, %r3 -; xilf %r2, 4294967295 -; xihf %r2, 4294967295 ; br %r14 function %bxor_not_i32(i32, i32) -> i32 { @@ -488,8 +482,8 @@ block0(v0: i32, v1: i32): } ; block0: +; xilf %r3, 4294967295 ; xr %r2, %r3 -; xilf %r2, 4294967295 ; br %r14 function %bxor_not_i16(i16, i16) -> i16 { @@ -499,8 +493,8 @@ block0(v0: i16, v1: i16): } ; block0: +; xilf %r3, 4294967295 ; xr %r2, %r3 -; xilf %r2, 4294967295 ; br %r14 function %bxor_not_i8(i8, i8) -> i8 { @@ -510,8 +504,8 @@ block0(v0: i8, v1: i8): } ; block0: +; xilf %r3, 4294967295 ; xr %r2, %r3 -; xilf %r2, 4294967295 ; br %r14 function %bnot_i128(i128) -> i128 { diff --git a/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif b/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif index 6c448f42bb..29101eaff3 100644 --- a/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif @@ -1,4 +1,5 @@ test compile precise-output +set opt_level=speed target x86_64 has_bmi1 function %f1(i8, i8) -> i8 { @@ -15,3 +16,19 @@ block0(v0: i8, v1: i8): ; popq %rbp ; ret + +function %reversed_operands(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bnot v0 + v3 = band v2, v1 + return v3 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andn %eax, %edi, %esi +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/wasm/i32-not-x64.wat b/cranelift/filetests/filetests/wasm/i32-not-x64.wat new file mode 100644 index 0000000000..45d3798e74 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i32-not-x64.wat @@ -0,0 +1,46 @@ +;;!target = "x86_64" +;;!compile = true +;;!settings = ["opt_level=speed", "has_bmi1=true"] + +(module + ;; this should get optimized to a `bnot` in clif + (func (param i32) (result i32) + i32.const -1 + local.get 0 + i32.xor) + + ;; this should get optimized to a single `andn` instruction + (func (param i32 i32) (result i32) + local.get 0 + i32.const -1 + local.get 1 + i32.xor + i32.and) +) + +;; function u0:0: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; jmp label1 +;; block1: +;; movq %rdi, %rax +;; notl %eax, %eax +;; movq %rbp, %rsp +;; popq %rbp +;; ret +;; +;; function u0:1: +;; pushq %rbp +;; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +;; movq %rsp, %rbp +;; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +;; block0: +;; jmp label1 +;; block1: +;; andn %eax, %esi, %edi +;; movq %rbp, %rsp +;; popq %rbp +;; ret