diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 316d6e75d8..865ed500d2 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1531,6 +1531,8 @@ (decl sse_xor_op (Type) SseOpcode) (rule 1 (sse_xor_op $F32X4) (SseOpcode.Xorps)) (rule 1 (sse_xor_op $F64X2) (SseOpcode.Xorpd)) +(rule 1 (sse_xor_op $F32) (SseOpcode.Xorps)) +(rule 1 (sse_xor_op $F64) (SseOpcode.Xorpd)) ;; Priority 0 because multi_lane overlaps with the previous two explicit type ;; patterns. diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 326a83258b..14b07ff141 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -242,46 +242,54 @@ ;; `{i,b}64` and smaller. ;; And two registers. -(rule 0 (lower (has_type (fits_in_64 ty) (band x y))) +(rule 0 (lower (has_type ty (band x y))) + (if (ty_int_ref_scalar_64 ty)) (x64_and ty x y)) ;; And with a memory operand. -(rule 1 (lower (has_type (fits_in_64 ty) - (band x (sinkable_load y)))) +(rule 1 (lower (has_type ty (band x (sinkable_load y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_and ty x (sink_load_to_gpr_mem_imm y))) -(rule 2 (lower (has_type (fits_in_64 ty) - (band (sinkable_load x) y))) +(rule 2 (lower (has_type ty (band (sinkable_load x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_and ty y (sink_load_to_gpr_mem_imm x))) ;; And with an immediate. -(rule 3 (lower (has_type (fits_in_64 ty) - (band x (simm32_from_value y)))) +(rule 3 (lower (has_type ty (band x (simm32_from_value y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_and ty x y)) -(rule 4 (lower (has_type (fits_in_64 ty) - (band (simm32_from_value x) y))) +(rule 4 (lower (has_type ty (band (simm32_from_value x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_and ty y x)) +;; f32 and f64 + +(rule 5 (lower (has_type (ty_scalar_float ty) (band x y))) + (sse_and ty x y)) + ;; SSE. (decl sse_and (Type Xmm XmmMem) Xmm) (rule (sse_and $F32X4 x y) (x64_andps x y)) (rule (sse_and $F64X2 x y) (x64_andpd x y)) +(rule (sse_and $F32 x y) (x64_andps x y)) +(rule (sse_and $F64 x y) (x64_andpd x y)) (rule -1 (sse_and (multi_lane _bits _lanes) x y) (x64_pand x y)) -(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) +(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (band x y))) (sse_and ty x y)) ;; `i128`. -(rule 6 (lower (has_type $I128 (band x y))) +(rule 7 (lower (has_type $I128 (band x y))) (let ((x_regs ValueRegs x) (x_lo Gpr (value_regs_get_gpr x_regs 0)) (x_hi Gpr (value_regs_get_gpr x_regs 1)) @@ -296,39 +304,47 @@ ;; `{i,b}64` and smaller. ;; Or two registers. -(rule 0 (lower (has_type (fits_in_64 ty) (bor x y))) +(rule 0 (lower (has_type ty (bor x y))) + (if (ty_int_ref_scalar_64 ty)) (x64_or ty x y)) ;; Or with a memory operand. -(rule 1 (lower (has_type (fits_in_64 ty) - (bor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bor x (sinkable_load y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_or ty x (sink_load_to_gpr_mem_imm y))) -(rule 2 (lower (has_type (fits_in_64 ty) - (bor (sinkable_load x) y))) +(rule 2 (lower (has_type ty (bor (sinkable_load x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_or ty y (sink_load_to_gpr_mem_imm x))) ;; Or with an immediate. -(rule 3 (lower (has_type (fits_in_64 ty) - (bor x (simm32_from_value y)))) +(rule 3 (lower (has_type ty (bor x (simm32_from_value y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_or ty x y)) -(rule 4 (lower (has_type (fits_in_64 ty) - (bor (simm32_from_value x) y))) +(rule 4 (lower (has_type ty (bor (simm32_from_value x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_or ty y x)) +;; f32 and f64 + +(rule 5 (lower (has_type (ty_scalar_float ty) (bor x y))) + (sse_or ty x y)) + ;; SSE. (decl sse_or (Type Xmm XmmMem) Xmm) (rule (sse_or $F32X4 x y) (x64_orps x y)) (rule (sse_or $F64X2 x y) (x64_orpd x y)) +(rule (sse_or $F32 x y) (x64_orps x y)) +(rule (sse_or $F64 x y) (x64_orpd x y)) (rule -1 (sse_or (multi_lane _bits _lanes) x y) (x64_por x y)) -(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) +(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bor x y))) (sse_or ty x y)) @@ -343,7 +359,7 @@ (value_gprs (x64_or $I64 x_lo y_lo) (x64_or $I64 x_hi y_hi)))) -(rule 6 (lower (has_type $I128 (bor x y))) +(rule 7 (lower (has_type $I128 (bor x y))) (or_i128 x y)) ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -351,39 +367,45 @@ ;; `{i,b}64` and smaller. ;; Xor two registers. -(rule 0 (lower (has_type (fits_in_64 ty) (bxor x y))) +(rule 0 (lower (has_type ty (bxor x y))) + (if (ty_int_ref_scalar_64 ty)) (x64_xor ty x y)) ;; Xor with a memory operand. -(rule 1 (lower (has_type (fits_in_64 ty) - (bxor x (sinkable_load y)))) +(rule 1 (lower (has_type ty (bxor x (sinkable_load y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_xor ty x (sink_load_to_gpr_mem_imm y))) -(rule 2 (lower (has_type (fits_in_64 ty) - (bxor (sinkable_load x) y))) +(rule 2 (lower (has_type ty (bxor (sinkable_load x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_xor ty y (sink_load_to_gpr_mem_imm x))) ;; Xor with an immediate. -(rule 3 (lower (has_type (fits_in_64 ty) - (bxor x (simm32_from_value y)))) +(rule 3 (lower (has_type ty (bxor x (simm32_from_value y)))) + (if (ty_int_ref_scalar_64 ty)) (x64_xor ty x y)) -(rule 4 (lower (has_type (fits_in_64 ty) - (bxor (simm32_from_value x) y))) +(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y))) + (if (ty_int_ref_scalar_64 ty)) (x64_xor ty y x)) +;; f32 and f64 + +(rule 5 (lower (has_type (ty_scalar_float ty) (bxor x y))) + (sse_xor ty x y)) + ;; SSE. -(rule 5 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y))) +(rule 6 (lower (has_type ty @ (multi_lane _bits _lanes) (bxor x y))) (sse_xor ty x y)) ;; `{i,b}128`. -(rule 6 (lower (has_type $I128 (bxor x y))) +(rule 7 (lower (has_type $I128 (bxor x y))) (let ((x_regs ValueRegs x) (x_lo Gpr (value_regs_get_gpr x_regs 0)) (x_hi Gpr (value_regs_get_gpr x_regs 1)) @@ -1220,9 +1242,11 @@ ;; `i64` and smaller. -(rule -2 (lower (has_type (fits_in_64 ty) (bnot x))) +(rule -2 (lower (has_type ty (bnot x))) + (if (ty_int_ref_scalar_64 ty)) (x64_not ty x)) + ;; `i128`. (decl i128_not (Value) ValueRegs) @@ -1236,6 +1260,11 @@ (rule (lower (has_type $I128 (bnot x))) (i128_not x)) +;; f32 and f64 + +(rule -3 (lower (has_type (ty_scalar_float ty) (bnot x))) + (sse_xor ty x (vector_all_ones))) + ;; Special case for vector-types where bit-negation is an xor against an ;; all-one value (rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x))) diff --git a/cranelift/filetests/filetests/runtests/float-bitops.clif b/cranelift/filetests/filetests/runtests/float-bitops.clif index 9ccfa070e0..16977df949 100644 --- a/cranelift/filetests/filetests/runtests/float-bitops.clif +++ b/cranelift/filetests/filetests/runtests/float-bitops.clif @@ -1,4 +1,6 @@ test interpret +test run +target x86_64 function %bnot_f32(f32) -> f32 { block0(v0: f32):