cranelift: Remove booleans (#5031)

Remove the boolean types from cranelift, and the associated instructions breduce, bextend, bconst, and bint. Standardize on using 1/0 for the return value from instructions that produce scalar boolean results, and -1/0 for boolean vector elements. Fixes #3205 Co-authored-by: Afonso Bordado <afonso360@users.noreply.github.com> Co-authored-by: Ulrich Weigand <ulrich.weigand@de.ibm.com> Co-authored-by: Chris Fallin <chris@cfallin.org>
2022-10-17 16:00:27 -07:00
parent 766ecb561e
commit 32a7593c94
242 changed files with 7695 additions and 10010 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1156,12 +1156,6 @@
 (rule (scalar_size $I64) (ScalarSize.Size64))
 (rule (scalar_size $I128) (ScalarSize.Size128))

-(rule (scalar_size $B8) (ScalarSize.Size8))
-(rule (scalar_size $B16) (ScalarSize.Size16))
-(rule (scalar_size $B32) (ScalarSize.Size32))
-(rule (scalar_size $B64) (ScalarSize.Size64))
-(rule (scalar_size $B128) (ScalarSize.Size128))
-
 (rule (scalar_size $F32) (ScalarSize.Size32))
 (rule (scalar_size $F64) (ScalarSize.Size64))

@@ -1947,19 +1941,13 @@

 ;; Helper for materializing a boolean value into a register from
 ;; flags.
-(decl materialize_bool_result (u8 Cond) ConsumesFlags)
-(rule (materialize_bool_result 1 cond)
+(decl materialize_bool_result (Cond) ConsumesFlags)
+(rule (materialize_bool_result cond)
      (let ((dst WritableReg (temp_writable_reg $I64)))
        (ConsumesFlags.ConsumesFlagsReturnsReg
         (MInst.CSet dst cond)
         dst)))

-(rule -1 (materialize_bool_result _ty_bits cond)
-      (let ((dst WritableReg (temp_writable_reg $I64)))
-        (ConsumesFlags.ConsumesFlagsReturnsReg
-         (MInst.CSetm dst cond)
-         dst)))
-
 (decl cmn_imm (OperandSize Reg Imm12) ProducesFlags)
 (rule (cmn_imm size src1 src2)
      (ProducesFlags.ProducesFlagsSideEffect
@@ -2224,6 +2212,18 @@
         (MInst.CSel dst cond if_true if_false)
         dst)))

+;; Helper for constructing `cset` instructions.
+(decl cset (Cond) ConsumesFlags)
+(rule (cset cond)
+      (let ((dst WritableReg (temp_writable_reg $I64)))
+        (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSet dst cond) dst)))
+
+;; Helper for constructing `csetm` instructions.
+(decl csetm (Cond) ConsumesFlags)
+(rule (csetm cond)
+      (let ((dst WritableReg (temp_writable_reg $I64)))
+        (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSetm dst cond) dst)))
+
 ;; Helper for generating a `CSNeg` instruction.
 ;;
 ;; Note that this doesn't actually emit anything, instead it produces a
@@ -2244,21 +2244,14 @@
      (produces_flags_append inst_input (MInst.CCmp size rn rm nzcv cond)))

 ;; Helper for generating `MInst.CCmpImm` instructions.
-(decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
-(rule 1 (ccmp_imm size 1 rn imm nzcv cond)
+(decl ccmp_imm (OperandSize Reg UImm5 NZCV Cond) ConsumesFlags)
+(rule 1 (ccmp_imm size rn imm nzcv cond)
      (let ((dst WritableReg (temp_writable_reg $I64)))
        (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
         (MInst.CCmpImm size rn imm nzcv cond)
         (MInst.CSet dst cond)
         (value_reg dst))))

-(rule (ccmp_imm size _ty_bits rn imm nzcv cond)
-      (let ((dst WritableReg (temp_writable_reg $I64)))
-        (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-         (MInst.CCmpImm size rn imm nzcv cond)
-         (MInst.CSetm dst cond)
-         (value_reg dst))))
-
 ;; Helpers for generating `add` instructions.

 (decl add (Type Reg Reg) Reg)
@@ -3381,11 +3374,11 @@

 ;; Integers <= 64-bits.
 (rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty)
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm in_ty)
-        (materialize_bool_result (ty_bits out_ty) cc))))
+        (materialize_bool_result cc))))

 (rule 1 (lower_icmp cond rn rm (fits_in_16 ty))
      (if (signed_cond_code cond))
@@ -3398,23 +3391,23 @@
      (let ((rn Reg (put_in_reg_zext32 rn)))
      (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $false))))
 (rule -3 (lower_icmp cond rn (imm12_from_value rm) ty)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (cmp_imm (operand_size ty) rn rm))
 (rule -4 (lower_icmp cond rn rm ty)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (cmp (operand_size ty) rn rm))

 ;; 128-bit integers.
-(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 out_ty)
+(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 $I8)
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm $I128)
-        (materialize_bool_result (ty_bits out_ty) cc))))
-(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 out_ty)
+        (materialize_bool_result cc))))
+(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 $I8)
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm $I128)
-        (materialize_bool_result (ty_bits out_ty) cc))))
+        (materialize_bool_result cc))))

 ;; cmp lhs_lo, rhs_lo
 ;; ccmp lhs_hi, rhs_hi, #0, eq
@@ -3440,7 +3433,7 @@
 ;; cmp      lhs_hi, rhs_hi
 ;; cset     tmp2, cond
 ;; csel     dst, tmp1, tmp2, eq
-(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 out_ty)
+(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 $I8)
      (let ((unsigned_cond Cond (cond_code (intcc_unsigned cond)))
            (cond Cond (cond_code cond))
            (lhs ValueRegs (put_in_regs lhs))
@@ -3449,78 +3442,100 @@
            (lhs_hi Reg (value_regs_get lhs 1))
            (rhs_lo Reg (value_regs_get rhs 0))
            (rhs_hi Reg (value_regs_get rhs 1))
-            (tmp1 ValueRegs
-             (with_flags (cmp (OperandSize.Size64) lhs_lo rhs_lo)
-                         (materialize_bool_result
-                          (ty_bits out_ty) unsigned_cond)))
-            (tmp1 Reg (value_regs_get tmp1 0))
-            (dst ValueRegs
-                  (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi)
-                   (lower_icmp_i128_consumer cond (ty_bits out_ty)
-                    tmp1 lhs_hi rhs_hi))))
-       dst))
+            (tmp1 Reg (with_flags_reg (cmp (OperandSize.Size64) lhs_lo rhs_lo)
+                                      (materialize_bool_result unsigned_cond))))
+        (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi)
+                    (lower_icmp_i128_consumer cond tmp1))))

-(decl lower_icmp_i128_consumer (Cond u8 Reg Reg Reg) ConsumesFlags)
-(rule (lower_icmp_i128_consumer cond 1 tmp1 lhs_hi rhs_hi)
+(decl lower_icmp_i128_consumer (Cond Reg) ConsumesFlags)
+(rule (lower_icmp_i128_consumer cond tmp1)
      (let ((tmp2 WritableReg (temp_writable_reg $I64))
            (dst WritableReg (temp_writable_reg $I64)))
       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
        (MInst.CSet tmp2 cond)
        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
        (value_reg dst))))
-(rule (lower_icmp_i128_consumer cond 128 tmp1 lhs_hi rhs_hi)
-      (let ((tmp2 WritableReg (temp_writable_reg $I64))
-            (dst WritableReg (temp_writable_reg $I64)))
-       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-        (MInst.CSetm tmp2 cond)
-        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
-        (value_regs dst dst))))
-(rule -1 (lower_icmp_i128_consumer cond _out_ty_bits tmp1 lhs_hi rhs_hi)
-      (let ((tmp2 WritableReg (temp_writable_reg $I64))
-            (dst WritableReg (temp_writable_reg $I64)))
-       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-        (MInst.CSetm tmp2 cond)
-        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
-        (value_reg dst))))
+
+(decl lower_bmask (Type Type ValueRegs) ValueRegs)
+
+;; For conversions that fit in a register, we can use csetm.
+;;
+;; cmp   val, #0
+;; csetm res, ne
+(rule 0
+      (lower_bmask (fits_in_64 _) (fits_in_64 _) val)
+      (with_flags_reg
+        (cmp64_imm (value_regs_get val 0) (u8_into_imm12 0))
+        (csetm (Cond.Ne))))
+
+;; For conversions from a 128-bit value into a 64-bit or smaller one, we or the
+;; two registers of the 128-bit value together, and then recurse with the
+;; combined value as a 64-bit test.
+;;
+;; orr   val, lo, hi
+;; cmp   val, #0
+;; csetm res, ne
+(rule 1
+      (lower_bmask (fits_in_64 ty) $I128 val)
+      (let ((lo Reg (value_regs_get val 0))
+            (hi Reg (value_regs_get val 1))
+            (combined Reg (orr $I64 lo hi)))
+        (lower_bmask ty $I64 (value_reg combined))))
+
+;; For converting from a smaller type into i128, duplicate the result of
+;; converting to i64.
+(rule 2
+      (lower_bmask $I128 (fits_in_64 ty) val)
+      (let ((res ValueRegs (lower_bmask $I64 ty val))
+            (res Reg (value_regs_get res 0)))
+        (value_regs res res)))
+
+;; For conversions to a 128-bit mask, we duplicate the result of converting to
+;; an I64.
+(rule 3
+      (lower_bmask $I128 $I128 val)
+      (let ((res ValueRegs (lower_bmask $I64 $I128 val))
+            (res Reg (value_regs_get res 0)))
+        (value_regs res res)))

 ;; Exceptional `lower_icmp_into_flags` rules.
 ;; We need to guarantee that the flags for `cond` are correct, so we
 ;; compare `dst` with 1.
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) ;; mov tmp, #1
       (cmp (OperandSize.Size64) dst tmp)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Zero) 1)))
       (cmp (OperandSize.Size64) dst tmp)))
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Sign) 1)))
       (cmp (OperandSize.Size64) tmp dst)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Zero) 1)))
       (cmp (OperandSize.Size64) tmp dst)))
 ;; For strict comparisons, we compare with 0.
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) dst (zero_reg))))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) dst (zero_reg))))
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) (zero_reg) dst)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) (zero_reg) dst)))

@@ -3548,7 +3563,7 @@
         (MInst.CSel dst_hi cond rn_hi rm_hi)
         (value_regs dst_lo dst_hi)))))
 (rule 1 (lower_select flags cond ty rn rm)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (with_flags flags (csel cond rn rm)))

 ;; Helper for emitting `MInst.Jump` instructions.
--- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -221,9 +221,6 @@ impl UImm12Scaled {
    /// Create a UImm12Scaled from a raw offset and the known scale type, if
    /// possible.
    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
-        // Ensure the type is at least one byte.
-        let scale_ty = if scale_ty == B1 { B8 } else { scale_ty };
-
        let scale = scale_ty.bytes();
        assert!(scale.is_power_of_two());
        let scale = scale as i64;
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1,9 +1,7 @@
 //! This module defines aarch64-specific machine instruction types.

 use crate::binemit::{Addend, CodeOffset, Reloc};
-use crate::ir::types::{
-    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
-};
+use crate::ir::types::{F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64};
 use crate::ir::{types, ExternalName, MemFlags, Opcode, Type};
 use crate::isa::CallConv;
 use crate::machinst::*;
@@ -440,22 +438,22 @@ impl Inst {
    /// Generic constructor for a load (zero-extending where appropriate).
    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
        match ty {
-            B1 | B8 | I8 => Inst::ULoad8 {
+            I8 => Inst::ULoad8 {
                rd: into_reg,
                mem,
                flags,
            },
-            B16 | I16 => Inst::ULoad16 {
+            I16 => Inst::ULoad16 {
                rd: into_reg,
                mem,
                flags,
            },
-            B32 | I32 | R32 => Inst::ULoad32 {
+            I32 | R32 => Inst::ULoad32 {
                rd: into_reg,
                mem,
                flags,
            },
-            B64 | I64 | R64 => Inst::ULoad64 {
+            I64 | R64 => Inst::ULoad64 {
                rd: into_reg,
                mem,
                flags,
@@ -491,22 +489,22 @@ impl Inst {
    /// Generic constructor for a store.
    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
        match ty {
-            B1 | B8 | I8 => Inst::Store8 {
+            I8 => Inst::Store8 {
                rd: from_reg,
                mem,
                flags,
            },
-            B16 | I16 => Inst::Store16 {
+            I16 => Inst::Store16 {
                rd: from_reg,
                mem,
                flags,
            },
-            B32 | I32 | R32 => Inst::Store32 {
+            I32 | R32 => Inst::Store32 {
                rd: from_reg,
                mem,
                flags,
            },
-            B64 | I64 | R64 => Inst::Store64 {
+            I64 | R64 => Inst::Store64 {
                rd: from_reg,
                mem,
                flags,
@@ -1209,9 +1207,7 @@ impl MachInst for Inst {
        match ty {
            F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
            F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
-            B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
-                Inst::load_constant(to_reg.unwrap(), value as u64)
-            }
+            I8 | I16 | I32 | I64 | R32 | R64 => Inst::load_constant(to_reg.unwrap(), value as u64),
            I128 => Inst::load_constant128(to_regs, value),
            _ => panic!("Cannot generate constant for type: {}", ty),
        }
@@ -1236,17 +1232,11 @@ impl MachInst for Inst {
            I16 => Ok((&[RegClass::Int], &[I16])),
            I32 => Ok((&[RegClass::Int], &[I32])),
            I64 => Ok((&[RegClass::Int], &[I64])),
-            B1 => Ok((&[RegClass::Int], &[B1])),
-            B8 => Ok((&[RegClass::Int], &[B8])),
-            B16 => Ok((&[RegClass::Int], &[B16])),
-            B32 => Ok((&[RegClass::Int], &[B32])),
-            B64 => Ok((&[RegClass::Int], &[B64])),
            R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
            R64 => Ok((&[RegClass::Int], &[R64])),
            F32 => Ok((&[RegClass::Float], &[F32])),
            F64 => Ok((&[RegClass::Float], &[F64])),
            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
-            B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])),
            _ if ty.is_vector() => {
                assert!(ty.bits() <= 128);
                Ok((&[RegClass::Float], &[I8X16]))
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -19,14 +19,6 @@
 (rule (lower (has_type ty (iconst (u64_from_imm64 n))))
      (imm ty (ImmExtend.Zero) n))

-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty (bconst $false)))
-      (imm ty (ImmExtend.Zero) 0))
-
-(rule (lower (has_type ty (bconst $true)))
-      (imm ty (ImmExtend.Zero) 1))
-
 ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (null)))
@@ -142,10 +134,10 @@
 (rule (lower (has_type $F64X2 (scalar_to_vector x)))
      (fpu_extend x (ScalarSize.Size64)))

-(rule -1 (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _))))
+(rule -1 (lower (scalar_to_vector x @ (value_type $I64)))
      (mov_to_fpu x (ScalarSize.Size64)))

-(rule -2 (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _))))
+(rule -2 (lower (scalar_to_vector x @ (value_type (int_fits_in_32 _))))
      (mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32)))

 ;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -159,18 +151,17 @@
 ;; 0 when all input elements are true, i.e. non-zero, or a NaN otherwise
 ;; (either -1 or -2 when represented as an integer); NaNs are the only
 ;; floating-point numbers that compare unequal to themselves.
-(rule (lower (has_type out_ty (vall_true x @ (value_type (multi_lane 64 2)))))
+(rule (lower (vall_true x @ (value_type (multi_lane 64 2))))
      (let ((x1 Reg (cmeq0 x (VectorSize.Size64x2)))
            (x2 Reg (addp x1 x1 (VectorSize.Size64x2))))
       (with_flags (fpu_cmp (ScalarSize.Size64) x2 x2)
-                   (materialize_bool_result (ty_bits out_ty) (Cond.Eq)))))
+                   (materialize_bool_result (Cond.Eq)))))

-(rule (lower (has_type out_ty (vall_true x @ (value_type (multi_lane 32 2)))))
+(rule (lower (vall_true x @ (value_type (multi_lane 32 2))))
      (let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
       (with_flags (cmp_rr_shift (OperandSize.Size64) (zero_reg) x1 32)
                   (ccmp_imm
                    (OperandSize.Size32)
-                    (ty_bits out_ty)
                    x1
                    (u8_into_uimm5 0)
                    (nzcv $false $true $false $false)
@@ -183,18 +174,18 @@
 ;; mov xm, vn.d[0]
 ;; cmp xm, #0
 ;; cset xm, ne
-(rule -1 (lower (has_type out_ty (vall_true x @ (value_type (lane_fits_in_32 ty)))))
+(rule -1 (lower (vall_true x @ (value_type (lane_fits_in_32 ty))))
      (if (not_vec32x2 ty))
      (let ((x1 Reg (vec_lanes (VecLanesOp.Uminv) x (vector_size ty)))
            (x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
       (with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
-                   (materialize_bool_result (ty_bits out_ty) (Cond.Ne)))))
+                   (materialize_bool_result (Cond.Ne)))))

 ;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (vany_true x @ (value_type in_ty))))
+(rule (lower (vany_true x @ (value_type in_ty)))
      (with_flags (vanytrue x in_ty)
-                  (materialize_bool_result (ty_bits out_ty) (Cond.Ne))))
+                  (materialize_bool_result (Cond.Ne))))

 ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1536,60 +1527,11 @@
 (rule -1 (lower (has_type ty (cls x)))
      (a64_cls ty x))

-;;;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
-;; out the LSB to give a 0 / 1-valued integer result.
-
-(rule 1 (lower (has_type $I128 (bint x)))
-      (let ((val ValueRegs x)
-            (in_lo Reg (value_regs_get val 0))
-            (dst_lo Reg (and_imm $I32 in_lo (u64_into_imm_logic $I32 1)))
-            (dst_hi Reg (imm $I64 (ImmExtend.Zero) 0)))
-        (value_regs dst_lo dst_hi)))
-
-(rule (lower (bint x))
-      (and_imm $I32 x (u64_into_imm_logic $I32 1)))
-
-;;;; Rules for `bmask`/`bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Bextend and Bmask both simply sign-extend. This works for:
-;; - Bextend, because booleans are stored as 0 / -1, so we
-;;   sign-extend the -1 to a -1 in the wider width.
-;; - Bmask, because the resulting integer mask value must be
-;;   all-ones (-1) if the argument is true.
-
-;; Use a common helper to type cast bools to either bool or integer types.
-(decl cast_bool (Type Type Value) InstOutput)
-(rule (lower (has_type out_ty (bextend x @ (value_type in_ty))))
-      (cast_bool in_ty out_ty x))
+;; Bmask tests the value against zero, and uses `csetm` to assert the result.
 (rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
-      (cast_bool in_ty out_ty x))
-
-
-;; If the target has the same or a smaller size than the source, it's a no-op.
-(rule (cast_bool $B8 $I8 x) x)
-(rule (cast_bool $B16 (fits_in_16 _out) x) x)
-(rule (cast_bool $B32 (fits_in_32 _out) x) x)
-(rule (cast_bool $B64 (fits_in_64 _out) x) x)
-
-;; Casting between 128 bits is a noop
-(rule -1 (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x)
-    x)
-
-;; Converting from 128 bits to anything below we just ignore the top register
-(rule -2 (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x)
-    (value_regs_get x 0))
-
-;; Extend to 64 bits first, then this will be all 0s or all 1s and we can
-;; duplicate to both halves of 128 bits
-(rule -3 (cast_bool in (ty_int_bool_128 _out) x)
-      (let ((tmp Reg (extend x $true (ty_bits in) 64)))
-        (value_regs tmp tmp)))
-
-;; Values that fit in a single register are sign extended normally
-(rule -4 (cast_bool (fits_in_64 in) (fits_in_64 out) x)
-      (extend x $true (ty_bits in) (ty_bits out)))
+      (lower_bmask out_ty in_ty x))

 ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1648,7 +1590,7 @@
 ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (bitselect c x y)))
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((tmp1 Reg (and_reg ty x c))
            (tmp2 Reg (bic ty y c)))
        (orr ty tmp1 tmp2)))
@@ -1661,22 +1603,15 @@
 (rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
        (bsl ty c x y))

-;;;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; T -> I{64,32,16,8}: We can simply pass through the value: values
 ;; are always stored with high bits undefined, so we can just leave
 ;; them be.
 (rule (lower (has_type ty (ireduce src)))
-    (if (ty_int_bool_ref_scalar_64 ty))
+    (if (ty_int_ref_scalar_64 ty))
    (value_regs_get src 0))

-;; Likewise for breduce.
-
-(rule (lower (has_type ty (breduce src)))
-      (if (ty_int_bool_ref_scalar_64 ty))
-      (value_regs_get src 0))
-
-
 ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule 4 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
@@ -1706,9 +1641,7 @@
 (rule 0 (lower (has_type out_ty
              (fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
      (with_flags (fpu_cmp (scalar_size in_ty) x y)
-                  (materialize_bool_result
-                   (ty_bits out_ty)
-                   (fp_cond_code cond))))
+                  (materialize_bool_result (fp_cond_code cond))))

 (rule -1 (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
      (if (ty_vector_float in_ty))
@@ -1740,8 +1673,8 @@
            (vec_size VectorSize (vector_size ty)))
          (value_reg (int_cmp_zero_swap cond rn vec_size))))

-(rule -1 (lower (has_type out_ty (icmp cond x @ (value_type in_ty) y)))
-      (lower_icmp_into_reg cond x y in_ty out_ty))
+(rule -1 (lower (icmp cond x @ (value_type in_ty) y))
+      (lower_icmp_into_reg cond x y in_ty $I8))

 ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1783,10 +1716,10 @@
 ;;;; Rules for `trueff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Verification ensures the input is always a single-def ffcmp.
-(rule (lower (has_type ty (trueff cc insn @ (ffcmp x @ (value_type in_ty) y))))
+(rule (lower (trueff cc insn @ (ffcmp x @ (value_type in_ty) y)))
      (with_flags_reg
       (fpu_cmp (scalar_size in_ty) x y)
-       (materialize_bool_result (ty_bits ty) (fp_cond_code cc))))
+       (materialize_bool_result (fp_cond_code cc))))

 ;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1797,13 +1730,6 @@
        (lower_icmp_into_flags cc x y in_ty)
        cond ty rn rm)))

-(rule (lower (has_type ty
-       (select _flags @ (bint (icmp cc x @ (value_type in_ty) y)) rn rm)))
-      (let ((cond Cond (cond_code cc)))
-       (lower_select
-        (lower_icmp_into_flags cc x y in_ty)
-        cond ty rn rm)))
-
 (rule (lower (has_type ty
       (select _flags @ (fcmp cc x @ (value_type in_ty) y) rn rm)))
      (let ((cond Cond (fp_cond_code cc)))
@@ -1811,20 +1737,19 @@
        (fpu_cmp (scalar_size in_ty) x y)
        cond ty rn rm)))

-(rule (lower (has_type ty
-       (select _flags @ (bint (fcmp cc x @ (value_type in_ty) y)) rn rm)))
-      (let ((cond Cond (fp_cond_code cc)))
+(rule -1 (lower (has_type ty (select rcond @ (value_type $I8) rn rm)))
+      (let ((rcond Reg rcond))
       (lower_select
-        (fpu_cmp (scalar_size in_ty) x y)
-        cond ty rn rm)))
+         (tst_imm $I32 rcond (u64_into_imm_logic $I32 255))
+         (Cond.Ne) ty rn rm)))

-(rule -1 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm)))
+(rule -2 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm)))
      (let ((rcond Reg (put_in_reg_zext32 rcond)))
       (lower_select
        (cmp (OperandSize.Size32) rcond (zero_reg))
        (Cond.Ne) ty rn rm)))

-(rule -2 (lower (has_type ty (select rcond rn rm)))
+(rule -3 (lower (has_type ty (select rcond rn rm)))
      (let ((rcond Reg (put_in_reg_zext64 rcond)))
       (lower_select
        (cmp (OperandSize.Size64) rcond (zero_reg))
@@ -1865,18 +1790,12 @@
 ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule -1 (lower (has_type ty (splat x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (vec_dup x (vector_size ty)))

 (rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
      (vec_dup_from_fpu x (vector_size ty)))

-(rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
-      (splat_const n (vector_size ty)))
-
-(rule (lower (has_type ty (splat (breduce (bconst (u64_from_bool n))))))
-      (splat_const n (vector_size ty)))
-
 (rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
      (splat_const n (vector_size ty)))

@@ -2089,17 +2008,15 @@

 ;;;; Rules for `IsNull` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (is_null x @ (value_type ty))))
+(rule (lower (is_null x @ (value_type ty)))
      (with_flags (cmp_imm (operand_size ty) x (u8_into_imm12 0))
-                  (materialize_bool_result
-                   (ty_bits out_ty) (Cond.Eq))))
+                  (materialize_bool_result (Cond.Eq))))

 ;;;; Rules for `IsInvalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (is_invalid x @ (value_type ty))))
+(rule (lower (is_invalid x @ (value_type ty)))
      (with_flags (cmn_imm (operand_size ty) x (u8_into_imm12 1))
-                  (materialize_bool_result
-                   (ty_bits out_ty) (Cond.Eq))))
+                  (materialize_bool_result (Cond.Eq))))

 ;;;; Rules for `Debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -2325,18 +2242,18 @@

 ; GPR => SIMD&FP
 (rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (mov_to_fpu x (scalar_size in_ty)))

 ; SIMD&FP => GPR
 (rule 3 (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
-      (if (ty_int_bool_ref_scalar_64 out_ty))
+      (if (ty_int_ref_scalar_64 out_ty))
      (mov_from_vec x 0 (scalar_size out_ty)))

 ; GPR <=> GPR
 (rule 2 (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 out_ty))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 out_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      x)
 (rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

@@ -2352,7 +2269,7 @@
 (rule 2 (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0))))
      val)

-(rule 0 (lower (has_type (ty_int_bool ty)
+(rule 0 (lower (has_type (ty_int ty)
                       (extractlane val
                                    (u8_from_uimm8 lane))))
      (mov_from_vec val lane (scalar_size ty)))
@@ -2365,7 +2282,7 @@
 ;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule 1 (lower (insertlane vec @ (value_type vty)
-                         val @ (value_type (ty_int_bool _))
+                         val @ (value_type (ty_int _))
                         (u8_from_uimm8 lane)))
      (mov_to_vec vec val lane (vector_size vty)))

@@ -2507,7 +2424,7 @@

 ;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; `brz` following `icmp`, possibly converted via `bint`.
+;; `brz` following `icmp`
 (rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
      (let ((cond Cond (cond_code cc))
            (cond Cond (invert_cond cond)) ;; negate for `brz`
@@ -2517,16 +2434,7 @@
        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
-      (let ((cond Cond (cond_code cc))
-            (cond Cond (invert_cond cond)) ;; negate for `brz`
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brnz` following `icmp`, possibly converted via `bint`.
+;; `brnz` following `icmp`
 (rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
      (let ((cond Cond (cond_code cc))
            (taken BranchTarget (branch_target targets 0))
@@ -2535,15 +2443,7 @@
        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
-      (let ((cond Cond (cond_code cc))
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brz` following `fcmp`, possibly converted via `bint`.
+;; `brz` following `fcmp`
 (rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
      (let ((cond Cond (fp_cond_code cc))
            (cond Cond (invert_cond cond)) ;; negate for `brz`
@@ -2553,16 +2453,7 @@
        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
-      (let ((cond Cond (fp_cond_code cc))
-            (cond Cond (invert_cond cond)) ;; negate for `brz`
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brnz` following `fcmp`, possibly converted via `bint`.
+;; `brnz` following `fcmp`
 (rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
      (let ((cond Cond (fp_cond_code cc))
            (taken BranchTarget (branch_target targets 0))
@@ -2571,14 +2462,6 @@
        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
-      (let ((cond Cond (fp_cond_code cc))
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
 ;; standard `brz`
 (rule -1 (lower_branch (brz c @ (value_type $I128) _ _) targets)
      (let ((flags ProducesFlags (flags_to_producesflags c))
@@ -2592,7 +2475,7 @@
        (with_flags_side_effect flags
         (cond_br taken not_taken (cond_br_zero rt))))))
 (rule -2 (lower_branch (brz c @ (value_type ty) _ _) targets)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((flags ProducesFlags (flags_to_producesflags c))
            (rt Reg (put_in_reg_zext64 c))
            (taken BranchTarget (branch_target targets 0))
@@ -2613,7 +2496,7 @@
        (with_flags_side_effect flags
         (cond_br taken not_taken (cond_br_not_zero rt))))))
 (rule -2 (lower_branch (brnz c @ (value_type ty) _ _) targets)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((flags ProducesFlags (flags_to_producesflags c))
            (rt Reg (put_in_reg_zext64 c))
            (taken BranchTarget (branch_target targets 0))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -165,7 +165,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
    fn integral_ty(&mut self, ty: Type) -> Option<Type> {
        match ty {
            I8 | I16 | I32 | I64 | R64 => Some(ty),
-            ty if ty.is_bool() => Some(ty),
            _ => None,
        }
    }
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -39,7 +39,7 @@ pub(crate) fn lower_insn_to_regs(
    };

    match op {
-        Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx),
+        Opcode::Iconst | Opcode::Null => implemented_in_isle(ctx),

        Opcode::F32const => {
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
@@ -163,11 +163,9 @@ pub(crate) fn lower_insn_to_regs(

        Opcode::Copy => implemented_in_isle(ctx),

-        Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),
+        Opcode::Ireduce => implemented_in_isle(ctx),

-        Opcode::Bextend | Opcode::Bmask => implemented_in_isle(ctx),
-
-        Opcode::Bint => implemented_in_isle(ctx),
+        Opcode::Bmask => implemented_in_isle(ctx),

        Opcode::Bitcast => implemented_in_isle(ctx),