x64: Deduplicate fcmp emission logic (#6113)

* x64: Deduplicate fcmp emission logic The `select`-of-`fcmp` lowering duplicated a good deal of `FloatCC` lowering logic that was already done by `emit_fcmp`, so this commit refactors these lowering rules to instead delegate to `emit_fcmp` and then handle that result. * Swap order of condition codes Shouldn't affect the correctness of this operation and it's a bit more natural to write the lowering rule this way. * Swap the order of comparison operands No need to swap `a b`, only the `x y` needs swapping. * Fix x64 printing of `XmmCmove`
2023-03-29 11:24:25 -05:00
parent dcf0ea9ff3
commit afb417920d
6 changed files with 73 additions and 112 deletions
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1589,20 +1589,19 @@ impl PrettyPrint for Inst {
                let alternative = pretty_print_reg(alternative.to_reg(), size, allocs);
                let dst = pretty_print_reg(dst.to_reg().to_reg(), size, allocs);
                let consequent = consequent.pretty_print(size, allocs);
+                let suffix = match *ty {
+                    types::F64 => "sd",
+                    types::F32 => "ss",
+                    types::F32X4 => "aps",
+                    types::F64X2 => "apd",
+                    _ => "dqa",
+                };
                format!(
-                    "mov {}, {}; j{} $next; mov{} {}, {}; $next: ",
+                    "mov{suffix} {alternative}, {dst}; \
+                    j{} $next; \
+                    mov{suffix} {consequent}, {dst}; \
+                    $next:",
                    cc.invert().to_string(),
-                    match *ty {
-                        types::F64 => "sd",
-                        types::F32 => "ss",
-                        types::F32X4 => "aps",
-                        types::F64X2 => "apd",
-                        _ => "dqa",
-                    },
-                    consequent,
-                    dst,
-                    alternative,
-                    dst,
                )
            }

--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1570,90 +1570,25 @@

 ;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; CLIF `select` instructions receive a testable argument (i.e. boolean or
-;; integer) that determines which of the other two arguments is selected as
-;; output. Since Cranelift booleans are typically generated by a comparison, the
-;; lowerings in this section "look upwards in the tree" to emit the proper
-;; sequence of "selection" instructions.
+;; When a `select` has an `fcmp` as a condition then rely on `emit_fcmp` to
+;; figure out how to perform the comparison.
 ;;
-;; The following rules--for selecting on a floating-point comparison--emit a
-;; `UCOMIS*` instruction and then a conditional move, `cmove`. Note that for
-;; values contained in XMM registers, `cmove` and `cmove_or` may in fact emit a
-;; jump sequence, not `CMOV`. The `cmove` instruction operates on the flags set
-;; by `UCOMIS*`; the key to understanding these is the UCOMIS* documentation
-;; (see Intel's Software Developer's Manual, volume 2, chapter 4):
-;;  - unordered assigns    Z = 1, P = 1, C = 1
-;;  - greater than assigns Z = 0, P = 0, C = 0
-;;  - less than assigns    Z = 0, P = 0, C = 1
-;;  - equal assigns        Z = 1, P = 0, C = 0
-;;
-;; Note that prefixing the flag with `N` means "not," so that `CC.P -> P = 1`
-;; and `CC.NP -> P = 0`. Also, x86 uses mnemonics for certain combinations of
-;; flags; e.g.:
-;;  - `CC.B -> C = 1` (below)
-;;  - `CC.NB -> C = 0` (not below)
-;;  - `CC.BE -> C = 1 OR Z = 1` (below or equal)
-;;  - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)
+;; Note, though, that the `FloatCC.Equal` requires an "and" to happen for two
+;; condition codes which isn't the easiest thing to lower to a `cmove`
+;; instruction. For this reason a `select (fcmp eq ..) ..` is instead
+;; flipped around to be `select (fcmp ne ..) ..` with all operands reversed.
+;; This will produce a `FcmpCondResult.OrCondition` which is easier to codegen
+;; for.
+(rule (lower (has_type ty (select (maybe_uextend (fcmp cc a b)) x y)))
+      (lower_select_fcmp ty (emit_fcmp cc a b) x y))
+(rule 1 (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Equal) a b)) x y)))
+        (lower_select_fcmp ty (emit_fcmp (FloatCC.NotEqual) a b) y x))

-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Ordered) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NP) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Unordered) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.P) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.GreaterThan) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NBE) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.NB) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.B) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y)))
-      (with_flags (x64_ucomis b a) (cmove_from_values ty (CC.BE) x y)))
-
-;; Certain FloatCC variants are implemented by flipping the operands of the
-;; comparison (e.g., "greater than" is lowered the same as "less than" but the
-;; comparison is reversed). This allows us to use a single flag for the `cmove`,
-;; which involves fewer instructions than `cmove_or`.
-;;
-;; But why flip at all, you may ask? Can't we just use `CC.B` (i.e., below) for
-;; `FloatCC.LessThan`? Recall that in these floating-point lowerings, values may
-;; be unordered and we must we want to express that `FloatCC.LessThan` is `LT`,
-;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
-;; to `CC.NBE`), we also avoid these unordered cases.
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.LessThan) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_from_values ty (CC.NBE) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.LessThanOrEqual) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_from_values ty (CC.NB) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_from_values ty (CC.B) x y)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_from_values ty (CC.BE) x y)))
-
-;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
-;; flag checks. Recall from the flag assignment chart above that equality, e.g.,
-;; will assign `Z = 1`. But so does an unordered comparison: `Z = 1, P = 1, C =
-;; 1`. In order to avoid semantics like `EQ | UNO` for equality, we must ensure
-;; that the values are actually ordered, checking that `P = 0` (note that the
-;; `C` flag is irrelevant here). Since we cannot find a single instruction that
-;; implements a `Z = 1 AND P = 0` check, we invert the flag checks (i.e., `Z = 1
-;; AND P = 0` becomes `Z = 0 OR P = 1`) and also flip the select operands, `x`
-;; and `y`. The same argument applies to `FloatCC.NotEqual`.
-;;
-;; More details about the CLIF semantics for `fcmp` are available at
-;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.Equal) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))
-
-(rule (lower (has_type ty (select (maybe_uextend (fcmp (FloatCC.NotEqual) a b)) x y)))
-      (with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
+(decl lower_select_fcmp (Type FcmpCondResult Value Value) InstOutput)
+(rule (lower_select_fcmp ty (FcmpCondResult.Condition flags cc) x y)
+      (with_flags flags (cmove_from_values ty cc x y)))
+(rule (lower_select_fcmp ty (FcmpCondResult.OrCondition flags cc1 cc2) x y)
+      (with_flags flags (cmove_or_from_values ty cc1 cc2 x y)))

 ;; We also can lower `select`s that depend on an `icmp` test, but more simply
 ;; than the `fcmp` variants above. In these cases, we lower to a `CMP`