cranelift: Remove booleans (#5031)

Remove the boolean types from cranelift, and the associated instructions breduce, bextend, bconst, and bint. Standardize on using 1/0 for the return value from instructions that produce scalar boolean results, and -1/0 for boolean vector elements. Fixes #3205 Co-authored-by: Afonso Bordado <afonso360@users.noreply.github.com> Co-authored-by: Ulrich Weigand <ulrich.weigand@de.ibm.com> Co-authored-by: Chris Fallin <chris@cfallin.org>
2022-10-17 16:00:27 -07:00
parent 766ecb561e
commit 32a7593c94
242 changed files with 7695 additions and 10010 deletions
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -260,13 +260,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
        // bits as well -- see `Inst::store()`).
        let ty = match ty {
-            types::B1
-            | types::B8
-            | types::I8
-            | types::B16
-            | types::I16
-            | types::B32
-            | types::I32 => types::I64,
+            types::I8 | types::I16 | types::I32 => types::I64,
            _ => ty,
        };
        Inst::load(ty, mem, into_reg, ExtKind::None)
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -2217,17 +2217,11 @@ impl MachInst for Inst {
            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
-            types::B1 => Ok((&[RegClass::Int], &[types::B1])),
-            types::B8 => Ok((&[RegClass::Int], &[types::B8])),
-            types::B16 => Ok((&[RegClass::Int], &[types::B16])),
-            types::B32 => Ok((&[RegClass::Int], &[types::B32])),
-            types::B64 => Ok((&[RegClass::Int], &[types::B64])),
            types::R32 => panic!("32-bit reftype pointer should never be seen on x86-64"),
            types::R64 => Ok((&[RegClass::Int], &[types::R64])),
            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
-            types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])),
            _ if ty.is_vector() => {
                assert!(ty.bits() <= 128);
                Ok((&[RegClass::Float], &[types::I8X16]))
@@ -2326,15 +2320,10 @@ impl MachInst for Inst {
            } else {
                // Must be an integer type.
                debug_assert!(
-                    ty == types::B1
-                        || ty == types::I8
-                        || ty == types::B8
+                    ty == types::I8
                        || ty == types::I16
-                        || ty == types::B16
                        || ty == types::I32
-                        || ty == types::B32
                        || ty == types::I64
-                        || ty == types::B64
                        || ty == types::R32
                        || ty == types::R64
                );
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -22,30 +22,6 @@
      (value_regs (imm $I64 x)
                  (imm $I64 0)))

-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; `b64` and smaller.
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bconst $false)))
-      (imm ty 0))
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bconst $true)))
-      (imm ty 1))
-
-;; `b128`
-
-(rule 1 (lower (has_type $B128
-                       (bconst $false)))
-      (value_regs (imm $B64 0)
-                  (imm $B64 0)))
-
-(rule 1 (lower (has_type $B128
-                       (bconst $true)))
-      (value_regs (imm $B64 1)
-                  (imm $B64 0)))
-
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (f32const (u64_from_ieee32 x)))
@@ -303,7 +279,7 @@
                       (band x y)))
      (sse_and ty x y))

-;; `{i,b}128`.
+;; `i128`.

 (rule 6 (lower (has_type $I128 (band x y)))
      (let ((x_regs ValueRegs x)
@@ -315,17 +291,6 @@
        (value_gprs (x64_and $I64 x_lo y_lo)
                    (x64_and $I64 x_hi y_hi))))

-(rule 6 (lower (has_type $B128 (band x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `and` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_and $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `{i,b}64` and smaller.
@@ -381,17 +346,6 @@
 (rule 6 (lower (has_type $I128 (bor x y)))
      (or_i128 x y))

-(rule 6 (lower (has_type $B128 (bor x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `or` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_or $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `{i,b}64` and smaller.
@@ -439,17 +393,6 @@
        (value_gprs (x64_xor $I64 x_lo y_lo)
                    (x64_xor $I64 x_hi y_hi))))

-(rule 6 (lower (has_type $B128 (bxor x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `xor` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_xor $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `i64` and smaller.
@@ -1240,9 +1183,6 @@
 (rule (lower (has_type $I128 (bnot x)))
      (i128_not x))

-(rule (lower (has_type $B128 (bnot x)))
-      (i128_not x))
-
 ;; Special case for vector-types where bit-negation is an xor against an
 ;; all-one value
 (rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
@@ -1450,35 +1390,35 @@
      (lower_icmp_bool (emit_cmp cc a b)))

 ;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
      (x64_shr $I64 x (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
      (x64_shr $I64 x (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
      (x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
      (x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
      (x64_shr $I32 x (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
      (x64_shr $I32 x (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
      (x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
      (x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

 ;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
@@ -1710,14 +1650,7 @@
 ;; Finally, we lower `select` from a condition value `c`. These rules are meant
 ;; to be the final, default lowerings if no other patterns matched above.

-(rule -1 (lower (has_type ty (select c @ (value_type $B1) x y)))
-      (let ((size OperandSize (raw_operand_size_of_type $B1))
-            ;; N.B.: disallow load-op fusion, see above. TODO:
-            ;; https://github.com/bytecodealliance/wasmtime/issues/3953.
-            (gpr_c Gpr (put_in_gpr c)))
-           (with_flags (x64_test size (RegMemImm.Imm 1) gpr_c) (cmove_from_values ty (CC.NZ) x y))))
-
-(rule -2 (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
+(rule -1 (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
      (let ((size OperandSize (raw_operand_size_of_type a_ty))
            ;; N.B.: disallow load-op fusion, see above. TODO:
            ;; https://github.com/bytecodealliance/wasmtime/issues/3953.
@@ -2125,7 +2058,7 @@
                       (uextend src @ (has_type $I32 (uload32 _ _ _)))))
      src)

-;; Rules for `sextend` / `bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl generic_sextend (Value Type Type) InstOutput)

@@ -2140,17 +2073,17 @@
      (x64_sar $I64 src (Imm8Reg.Imm8 63)))

 ;; I64 -> I128.
-(rule 3 (generic_sextend src (ty_int_bool_64 _) (ty_int_bool_128 _))
+(rule 3 (generic_sextend src $I64 $I128)
      (value_regs src (spread_sign_bit src)))

 ;; I{8,16,32} -> I128.
-(rule 2 (generic_sextend src (fits_in_32 src_ty) (ty_int_bool_128 _))
+(rule 2 (generic_sextend src (fits_in_32 src_ty) $I128)
      (let ((lo Gpr (extend_to_gpr src $I64 (ExtendKind.Sign)))
            (hi Gpr (spread_sign_bit lo)))
      (value_regs lo hi)))

 ;; I{8,16,32} -> I64.
-(rule 1 (generic_sextend src (fits_in_32 src_ty) (ty_int_bool_64 _))
+(rule 1 (generic_sextend src (fits_in_32 src_ty) $I64)
      (extend_to_gpr src $I64 (ExtendKind.Sign)))

 ;; I8 -> I{16,32}, I16 -> I32.
@@ -2162,13 +2095,7 @@
                 (sextend src @ (value_type src_ty))))
      (generic_sextend src src_ty dst_ty))

-;; Bools are stored as 0/-1 so extends must sign-extend as well.
-(rule (lower
-       (has_type dst_ty
-                 (bextend src @ (value_type src_ty))))
-      (generic_sextend src src_ty dst_ty))
-
-;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; T -> T is always a no-op, even I128 -> I128.
 (rule (lower (has_type ty (ireduce src @ (value_type ty))))
@@ -2180,28 +2107,6 @@
 (rule 1 (lower (has_type (fits_in_64 ty) (ireduce src)))
      (value_regs_get_gpr src 0))

-;; Likewise for breduce.
-
-(rule (lower (has_type ty (breduce src @ (value_type ty))))
-      src)
-
-(rule 1 (lower (has_type (fits_in_64 ty) (breduce src)))
-      (value_regs_get_gpr src 0))
-
-;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Booleans are stored as all-zeroes (0) or all-ones (-1). We AND out
-;; the LSB to give a 0 / 1-valued integer result.
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bint src)))
-      (x64_and ty src (RegMemImm.Imm 1)))
-(rule 1 (lower (has_type $I128
-                       (bint src)))
-      (value_regs
-       (x64_and $I64 src (RegMemImm.Imm 1))
-       (imm $I64 0)))
-
 ;; Rules for `debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (debugtrap))
@@ -2505,7 +2410,7 @@
      (x64_movzx (ext_mode (ty_bits_u16 ty) 64) (to_amode flags address offset)))
 ;; But if we know that both the `from` and `to` are 64 bits, we simply load with
 ;; no extension.
-(rule -1 (lower (has_type (ty_int_bool_ref_64 ty) (load flags address offset)))
+(rule -1 (lower (has_type (ty_int_ref_64 ty) (load flags address offset)))
      (x64_mov (to_amode flags address offset)))
 ;; Also, certain scalar loads have a specific `from` width and extension kind
 ;; (signed -> `sx`, zeroed -> `zx`). We overwrite the high bits of the 64-bit
@@ -2538,8 +2443,8 @@
 (rule -2 (lower (has_type (ty_vec128 ty) (load flags address offset)))
      (x64_movdqu (to_amode flags address offset)))

-;; We can load an I128/B128 by doing two 64-bit loads.
-(rule -3 (lower (has_type (ty_int_bool_128 _)
+;; We can load an I128 by doing two 64-bit loads.
+(rule -3 (lower (has_type $I128
                       (load flags address offset)))
      (let ((addr_lo Amode (to_amode flags address offset))
            (addr_hi Amode (amode_offset addr_lo 8))
@@ -2623,9 +2528,9 @@
      (side_effect
       (x64_xmm_movrm (SseOpcode.Movdqu) (to_amode flags address offset) value)))

-;; Stores of I128/B128 values: store the two 64-bit halves separately.
+;; Stores of I128 values: store the two 64-bit halves separately.
 (rule 0 (lower (store flags
-                    value @ (value_type (ty_int_bool_128 _))
+                    value @ (value_type $I128)
                    address
                    offset))
      (let ((value_reg ValueRegs value)
@@ -2918,8 +2823,6 @@


 (decl cmp_zero_int_bool_ref (Value) ProducesFlags)
-(rule 1 (cmp_zero_int_bool_ref val @ (value_type $B1))
-      (x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val))
 (rule (cmp_zero_int_bool_ref val @ (value_type ty))
      (let ((size OperandSize (raw_operand_size_of_type ty))
            (src Gpr val))
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -22,7 +22,6 @@ use target_lexicon::Triple;
 fn is_int_or_ref_ty(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
-        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
        types::R32 => panic!("shouldn't have 32-bits refs on x64"),
        _ => false,
    }
@@ -328,7 +327,6 @@ fn lower_insn_to_regs(
    let op = ctx.data(insn).opcode();
    match op {
        Opcode::Iconst
-        | Opcode::Bconst
        | Opcode::F32const
        | Opcode::F64const
        | Opcode::Null
@@ -369,10 +367,7 @@ fn lower_insn_to_regs(
        | Opcode::IsInvalid
        | Opcode::Uextend
        | Opcode::Sextend
-        | Opcode::Breduce
-        | Opcode::Bextend
        | Opcode::Ireduce
-        | Opcode::Bint
        | Opcode::Debugtrap
        | Opcode::WideningPairwiseDotProductS
        | Opcode::Fadd
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -549,7 +549,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {

    #[inline]
    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
-        if is_int_or_ref_ty(ty) || ty == I128 || ty == B128 {
+        if is_int_or_ref_ty(ty) || ty == I128 {
            Some(RegisterClass::Gpr {
                single_register: ty != I128,
            })
@@ -564,7 +564,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
        match ty {
            types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => Some(()),
-            types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => Some(()),
            types::R32 => panic!("shouldn't have 32-bits refs on x64"),
            _ => None,
        }