cranelift: Remove booleans (#5031)

Remove the boolean types from cranelift, and the associated instructions breduce, bextend, bconst, and bint. Standardize on using 1/0 for the return value from instructions that produce scalar boolean results, and -1/0 for boolean vector elements. Fixes #3205 Co-authored-by: Afonso Bordado <afonso360@users.noreply.github.com> Co-authored-by: Ulrich Weigand <ulrich.weigand@de.ibm.com> Co-authored-by: Chris Fallin <chris@cfallin.org>
2022-10-17 16:00:27 -07:00
parent 766ecb561e
commit 32a7593c94
242 changed files with 7695 additions and 10010 deletions
--- a/cranelift/codegen/meta/src/cdsl/types.rs
+++ b/cranelift/codegen/meta/src/cdsl/types.rs
@@ -145,7 +145,6 @@ impl From<DynamicVectorType> for ValueType {
 /// A concrete scalar type that can appear as a vector lane too.
 #[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub(crate) enum LaneType {
-    Bool(shared_types::Bool),
    Float(shared_types::Float),
    Int(shared_types::Int),
 }
@@ -154,7 +153,6 @@ impl LaneType {
    /// Return a string containing the documentation comment for this lane type.
    pub fn doc(self) -> String {
        match self {
-            LaneType::Bool(_) => format!("A boolean type with {} bits.", self.lane_bits()),
            LaneType::Float(shared_types::Float::F32) => String::from(
                "A 32-bit floating point type represented in the IEEE 754-2008
                *binary32* interchange format. This corresponds to the :c:type:`float`
@@ -178,7 +176,6 @@ impl LaneType {
    /// Return the number of bits in a lane.
    pub fn lane_bits(self) -> u64 {
        match self {
-            LaneType::Bool(ref b) => *b as u64,
            LaneType::Float(ref f) => *f as u64,
            LaneType::Int(ref i) => *i as u64,
        }
@@ -188,12 +185,6 @@ impl LaneType {
    pub fn number(self) -> u16 {
        constants::LANE_BASE
            + match self {
-                LaneType::Bool(shared_types::Bool::B1) => 0,
-                LaneType::Bool(shared_types::Bool::B8) => 1,
-                LaneType::Bool(shared_types::Bool::B16) => 2,
-                LaneType::Bool(shared_types::Bool::B32) => 3,
-                LaneType::Bool(shared_types::Bool::B64) => 4,
-                LaneType::Bool(shared_types::Bool::B128) => 5,
                LaneType::Int(shared_types::Int::I8) => 6,
                LaneType::Int(shared_types::Int::I16) => 7,
                LaneType::Int(shared_types::Int::I32) => 8,
@@ -204,18 +195,6 @@ impl LaneType {
            }
    }

-    pub fn bool_from_bits(num_bits: u16) -> LaneType {
-        LaneType::Bool(match num_bits {
-            1 => shared_types::Bool::B1,
-            8 => shared_types::Bool::B8,
-            16 => shared_types::Bool::B16,
-            32 => shared_types::Bool::B32,
-            64 => shared_types::Bool::B64,
-            128 => shared_types::Bool::B128,
-            _ => unreachable!("unxpected num bits for bool"),
-        })
-    }
-
    pub fn int_from_bits(num_bits: u16) -> LaneType {
        LaneType::Int(match num_bits {
            8 => shared_types::Int::I8,
@@ -251,7 +230,6 @@ impl LaneType {
 impl fmt::Display for LaneType {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
-            LaneType::Bool(_) => write!(f, "b{}", self.lane_bits()),
            LaneType::Float(_) => write!(f, "f{}", self.lane_bits()),
            LaneType::Int(_) => write!(f, "i{}", self.lane_bits()),
        }
@@ -265,7 +243,6 @@ impl fmt::Debug for LaneType {
            f,
            "{}",
            match *self {
-                LaneType::Bool(_) => format!("BoolType({})", inner_msg),
                LaneType::Float(_) => format!("FloatType({})", inner_msg),
                LaneType::Int(_) => format!("IntType({})", inner_msg),
            }
@@ -273,13 +250,6 @@ impl fmt::Debug for LaneType {
    }
 }

-/// Create a LaneType from a given bool variant.
-impl From<shared_types::Bool> for LaneType {
-    fn from(b: shared_types::Bool) -> Self {
-        LaneType::Bool(b)
-    }
-}
-
 /// Create a LaneType from a given float variant.
 impl From<shared_types::Float> for LaneType {
    fn from(f: shared_types::Float) -> Self {
@@ -296,7 +266,6 @@ impl From<shared_types::Int> for LaneType {

 /// An iterator for different lane types.
 pub(crate) struct LaneTypeIterator {
-    bool_iter: shared_types::BoolIterator,
    int_iter: shared_types::IntIterator,
    float_iter: shared_types::FloatIterator,
 }
@@ -305,7 +274,6 @@ impl LaneTypeIterator {
    /// Create a new lane type iterator.
    fn new() -> Self {
        Self {
-            bool_iter: shared_types::BoolIterator::new(),
            int_iter: shared_types::IntIterator::new(),
            float_iter: shared_types::FloatIterator::new(),
        }
@@ -315,9 +283,7 @@ impl LaneTypeIterator {
 impl Iterator for LaneTypeIterator {
    type Item = LaneType;
    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(b) = self.bool_iter.next() {
-            Some(LaneType::from(b))
-        } else if let Some(i) = self.int_iter.next() {
+        if let Some(i) = self.int_iter.next() {
            Some(LaneType::from(i))
        } else if let Some(f) = self.float_iter.next() {
            Some(LaneType::from(f))
--- a/cranelift/codegen/meta/src/cdsl/typevar.rs
+++ b/cranelift/codegen/meta/src/cdsl/typevar.rs
@@ -90,10 +90,6 @@ impl TypeVar {
                let bits = float_type as RangeBound;
                builder.floats(bits..bits)
            }
-            LaneType::Bool(bool_type) => {
-                let bits = bool_type as RangeBound;
-                builder.bools(bits..bits)
-            }
        };
        TypeVar::new(name, doc, builder.build())
    }
@@ -171,10 +167,6 @@ impl TypeVar {
                    ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
                    "can't halve all float types"
                );
-                assert!(
-                    ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
-                    "can't halve all boolean types"
-                );
            }
            DerivedFunc::DoubleWidth => {
                assert!(
@@ -185,10 +177,6 @@ impl TypeVar {
                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
                    "can't double all float types"
                );
-                assert!(
-                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
-                    "can't double all boolean types"
-                );
            }
            DerivedFunc::HalfVector => {
                assert!(
@@ -211,10 +199,6 @@ impl TypeVar {
                    ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
                    "can't halve all float types"
                );
-                assert!(
-                    ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
-                    "can't halve all boolean types"
-                );
                assert!(
                    *ts.lanes.iter().max().unwrap() < MAX_LANES,
                    "can't double 256 lanes"
@@ -229,10 +213,6 @@ impl TypeVar {
                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
                    "can't double all float types"
                );
-                assert!(
-                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
-                    "can't double all boolean types"
-                );
                assert!(
                    *ts.lanes.iter().min().unwrap() > 1,
                    "can't halve a scalar type"
@@ -404,7 +384,6 @@ pub(crate) struct TypeSet {
    pub dynamic_lanes: NumSet,
    pub ints: NumSet,
    pub floats: NumSet,
-    pub bools: NumSet,
    pub refs: NumSet,
    pub specials: Vec<SpecialType>,
 }
@@ -415,7 +394,6 @@ impl TypeSet {
        dynamic_lanes: NumSet,
        ints: NumSet,
        floats: NumSet,
-        bools: NumSet,
        refs: NumSet,
        specials: Vec<SpecialType>,
    ) -> Self {
@@ -424,7 +402,6 @@ impl TypeSet {
            dynamic_lanes,
            ints,
            floats,
-            bools,
            refs,
            specials,
        }
@@ -432,10 +409,8 @@ impl TypeSet {

    /// Return the number of concrete types represented by this typeset.
    pub fn size(&self) -> usize {
-        self.lanes.len()
-            * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len())
-            + self.dynamic_lanes.len()
-                * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len())
+        self.lanes.len() * (self.ints.len() + self.floats.len() + self.refs.len())
+            + self.dynamic_lanes.len() * (self.ints.len() + self.floats.len() + self.refs.len())
            + self.specials.len()
    }

@@ -467,13 +442,6 @@ impl TypeSet {
        copy.ints = NumSet::new();
        copy.floats = NumSet::new();
        copy.refs = NumSet::new();
-        if !(&self.lanes - &num_set![1]).is_empty() {
-            copy.bools = &self.ints | &self.floats;
-            copy.bools = &copy.bools | &self.bools;
-        }
-        if self.lanes.contains(&1) {
-            copy.bools.insert(1);
-        }
        copy
    }

@@ -482,7 +450,6 @@ impl TypeSet {
        let mut copy = self.clone();
        copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2));
        copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2));
-        copy.bools = NumSet::from_iter(self.bools.iter().filter(|&&x| x > 8).map(|&x| x / 2));
        copy.specials = Vec::new();
        copy
    }
@@ -497,13 +464,6 @@ impl TypeSet {
                .filter(|&&x| x < MAX_FLOAT_BITS)
                .map(|&x| x * 2),
        );
-        copy.bools = NumSet::from_iter(
-            self.bools
-                .iter()
-                .filter(|&&x| x < MAX_BITS)
-                .map(|&x| x * 2)
-                .filter(|x| legal_bool(*x)),
-        );
        copy.specials = Vec::new();
        copy
    }
@@ -551,9 +511,6 @@ impl TypeSet {
            for &bits in &self.floats {
                ret.push(LaneType::float_from_bits(bits).by(num_lanes));
            }
-            for &bits in &self.bools {
-                ret.push(LaneType::bool_from_bits(bits).by(num_lanes));
-            }
            for &bits in &self.refs {
                ret.push(ReferenceType::ref_from_bits(bits).into());
            }
@@ -565,9 +522,6 @@ impl TypeSet {
            for &bits in &self.floats {
                ret.push(LaneType::float_from_bits(bits).to_dynamic(num_lanes));
            }
-            for &bits in &self.bools {
-                ret.push(LaneType::bool_from_bits(bits).to_dynamic(num_lanes));
-            }
        }
        for &special in &self.specials {
            ret.push(special.into());
@@ -612,12 +566,6 @@ impl fmt::Debug for TypeSet {
                Vec::from_iter(self.floats.iter().map(|x| x.to_string())).join(", ")
            ));
        }
-        if !self.bools.is_empty() {
-            subsets.push(format!(
-                "bools={{{}}}",
-                Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ")
-            ));
-        }
        if !self.refs.is_empty() {
            subsets.push(format!(
                "refs={{{}}}",
@@ -639,7 +587,6 @@ impl fmt::Debug for TypeSet {
 pub(crate) struct TypeSetBuilder {
    ints: Interval,
    floats: Interval,
-    bools: Interval,
    refs: Interval,
    includes_scalars: bool,
    simd_lanes: Interval,
@@ -652,7 +599,6 @@ impl TypeSetBuilder {
        Self {
            ints: Interval::None,
            floats: Interval::None,
-            bools: Interval::None,
            refs: Interval::None,
            includes_scalars: true,
            simd_lanes: Interval::None,
@@ -671,11 +617,6 @@ impl TypeSetBuilder {
        self.floats = interval.into();
        self
    }
-    pub fn bools(mut self, interval: impl Into<Interval>) -> Self {
-        assert!(self.bools == Interval::None);
-        self.bools = interval.into();
-        self
-    }
    pub fn refs(mut self, interval: impl Into<Interval>) -> Self {
        assert!(self.refs == Interval::None);
        self.refs = interval.into();
@@ -704,17 +645,11 @@ impl TypeSetBuilder {
    pub fn build(self) -> TypeSet {
        let min_lanes = if self.includes_scalars { 1 } else { 2 };

-        let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None))
-            .into_iter()
-            .filter(|x| legal_bool(*x))
-            .collect();
-
        TypeSet::new(
            range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))),
            range_to_set(self.dynamic_simd_lanes.to_range(2..MAX_LANES, None)),
            range_to_set(self.ints.to_range(8..MAX_BITS, None)),
            range_to_set(self.floats.to_range(32..64, None)),
-            bools,
            range_to_set(self.refs.to_range(32..64, None)),
            self.specials,
        )
@@ -760,11 +695,6 @@ impl Into<Interval> for Range {
    }
 }

-fn legal_bool(bits: RangeBound) -> bool {
-    // Only allow legal bit widths for bool types.
-    bits == 1 || (bits >= 8 && bits <= MAX_BITS && bits.is_power_of_two())
-}
-
 /// Generates a set with all the powers of two included in the range.
 fn range_to_set(range: Option<Range>) -> NumSet {
    let mut set = NumSet::new();
@@ -791,21 +721,12 @@ fn test_typevar_builder() {
    assert_eq!(type_set.lanes, num_set![1]);
    assert!(type_set.floats.is_empty());
    assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]);
-    assert!(type_set.bools.is_empty());
-    assert!(type_set.specials.is_empty());
-
-    let type_set = TypeSetBuilder::new().bools(Interval::All).build();
-    assert_eq!(type_set.lanes, num_set![1]);
-    assert!(type_set.floats.is_empty());
-    assert!(type_set.ints.is_empty());
-    assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]);
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new().floats(Interval::All).build();
    assert_eq!(type_set.lanes, num_set![1]);
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert!(type_set.ints.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new()
@@ -816,7 +737,6 @@ fn test_typevar_builder() {
    assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]);
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert!(type_set.ints.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new()
@@ -827,7 +747,6 @@ fn test_typevar_builder() {
    assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]);
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert!(type_set.ints.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new()
@@ -839,12 +758,10 @@ fn test_typevar_builder() {
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert!(type_set.dynamic_lanes.is_empty());
    assert!(type_set.ints.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new()
        .ints(Interval::All)
-        .bools(Interval::All)
        .floats(Interval::All)
        .dynamic_simd_lanes(Interval::All)
        .includes_scalars(false)
@@ -854,7 +771,6 @@ fn test_typevar_builder() {
        num_set![2, 4, 8, 16, 32, 64, 128, 256]
    );
    assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]);
-    assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]);
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert_eq!(type_set.lanes, num_set![1]);
    assert!(type_set.specials.is_empty());
@@ -871,14 +787,12 @@ fn test_typevar_builder() {
    assert_eq!(type_set.floats, num_set![32, 64]);
    assert_eq!(type_set.lanes, num_set![1]);
    assert!(type_set.ints.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());

    let type_set = TypeSetBuilder::new().ints(16..64).build();
    assert_eq!(type_set.lanes, num_set![1]);
    assert_eq!(type_set.ints, num_set![16, 32, 64]);
    assert!(type_set.floats.is_empty());
-    assert!(type_set.bools.is_empty());
    assert!(type_set.specials.is_empty());
 }

@@ -897,17 +811,6 @@ fn test_dynamic_to_vector() {
            .ints(Interval::All)
            .build()
    );
-    assert_eq!(
-        TypeSetBuilder::new()
-            .dynamic_simd_lanes(Interval::All)
-            .bools(Interval::All)
-            .build()
-            .dynamic_to_vector(),
-        TypeSetBuilder::new()
-            .simd_lanes(2..128)
-            .bools(Interval::All)
-            .build()
-    );
    assert_eq!(
        TypeSetBuilder::new()
            .dynamic_simd_lanes(Interval::All)
@@ -944,20 +847,6 @@ fn test_as_bool() {
        a.lane_of(),
        TypeSetBuilder::new().ints(8..8).floats(32..32).build()
    );
-
-    // Test as_bool with disjoint intervals.
-    let mut a_as_bool = TypeSetBuilder::new().simd_lanes(2..8).build();
-    a_as_bool.bools = num_set![8, 32];
-    assert_eq!(a.as_bool(), a_as_bool);
-
-    let b = TypeSetBuilder::new()
-        .simd_lanes(1..8)
-        .ints(8..8)
-        .floats(32..32)
-        .build();
-    let mut b_as_bool = TypeSetBuilder::new().simd_lanes(1..8).build();
-    b_as_bool.bools = num_set![1, 8, 32];
-    assert_eq!(b.as_bool(), b_as_bool);
 }

 #[test]
@@ -1002,14 +891,6 @@ fn test_forward_images() {
        TypeSetBuilder::new().floats(32..64).build().half_width(),
        TypeSetBuilder::new().floats(32..32).build()
    );
-    assert_eq!(
-        TypeSetBuilder::new().bools(1..8).build().half_width(),
-        empty_set
-    );
-    assert_eq!(
-        TypeSetBuilder::new().bools(1..32).build().half_width(),
-        TypeSetBuilder::new().bools(8..16).build()
-    );

    // Double width.
    assert_eq!(
@@ -1028,14 +909,6 @@ fn test_forward_images() {
        TypeSetBuilder::new().floats(32..64).build().double_width(),
        TypeSetBuilder::new().floats(64..64).build()
    );
-    assert_eq!(
-        TypeSetBuilder::new().bools(1..16).build().double_width(),
-        TypeSetBuilder::new().bools(16..32).build()
-    );
-    assert_eq!(
-        TypeSetBuilder::new().bools(32..64).build().double_width(),
-        TypeSetBuilder::new().bools(64..128).build()
-    );
 }

 #[test]
@@ -1069,10 +942,6 @@ fn test_typeset_singleton() {
        TypeSetBuilder::new().floats(64..64).build().get_singleton(),
        ValueType::Lane(shared_types::Float::F64.into())
    );
-    assert_eq!(
-        TypeSetBuilder::new().bools(1..1).build().get_singleton(),
-        ValueType::Lane(shared_types::Bool::B1.into())
-    );
    assert_eq!(
        TypeSetBuilder::new()
            .simd_lanes(4..4)
@@ -1110,7 +979,6 @@ fn test_typevar_singleton() {
    assert_eq!(typevar.name, "i32");
    assert_eq!(typevar.type_set.ints, num_set![32]);
    assert!(typevar.type_set.floats.is_empty());
-    assert!(typevar.type_set.bools.is_empty());
    assert!(typevar.type_set.specials.is_empty());
    assert_eq!(typevar.type_set.lanes, num_set![1]);

@@ -1123,6 +991,5 @@ fn test_typevar_singleton() {
    assert!(typevar.type_set.ints.is_empty());
    assert_eq!(typevar.type_set.floats, num_set![32]);
    assert_eq!(typevar.type_set.lanes, num_set![4]);
-    assert!(typevar.type_set.bools.is_empty());
    assert!(typevar.type_set.specials.is_empty());
 }
--- a/cranelift/codegen/meta/src/gen_inst.rs
+++ b/cranelift/codegen/meta/src/gen_inst.rs
@@ -769,9 +769,6 @@ fn typeset_to_string(ts: &TypeSet) -> String {
    if !ts.floats.is_empty() {
        result += &format!(", floats={}", iterable_to_string(&ts.floats));
    }
-    if !ts.bools.is_empty() {
-        result += &format!(", bools={}", iterable_to_string(&ts.bools));
-    }
    if !ts.specials.is_empty() {
        result += &format!(", specials=[{}]", iterable_to_string(&ts.specials));
    }
@@ -804,7 +801,6 @@ pub(crate) fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut For
                gen_bitset(&ts.dynamic_lanes, "dynamic_lanes", 16, fmt);
                gen_bitset(&ts.ints, "ints", 8, fmt);
                gen_bitset(&ts.floats, "floats", 8, fmt);
-                gen_bitset(&ts.bools, "bools", 8, fmt);
                gen_bitset(&ts.refs, "refs", 8, fmt);
            });
            fmt.line("},");
--- a/cranelift/codegen/meta/src/shared/formats.rs
+++ b/cranelift/codegen/meta/src/shared/formats.rs
@@ -43,7 +43,6 @@ pub(crate) struct Formats {
    pub(crate) ternary_imm8: Rc<InstructionFormat>,
    pub(crate) trap: Rc<InstructionFormat>,
    pub(crate) unary: Rc<InstructionFormat>,
-    pub(crate) unary_bool: Rc<InstructionFormat>,
    pub(crate) unary_const: Rc<InstructionFormat>,
    pub(crate) unary_global_value: Rc<InstructionFormat>,
    pub(crate) unary_ieee32: Rc<InstructionFormat>,
@@ -62,8 +61,6 @@ impl Formats {

            unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(),

-            unary_bool: Builder::new("UnaryBool").imm(&imm.boolean).build(),
-
            unary_const: Builder::new("UnaryConst").imm(&imm.pool_constant).build(),

            unary_global_value: Builder::new("UnaryGlobalValue")
--- a/cranelift/codegen/meta/src/shared/immediates.rs
+++ b/cranelift/codegen/meta/src/shared/immediates.rs
@@ -44,11 +44,6 @@ pub(crate) struct Immediates {
    /// IEEE 754-2008 binary64 interchange format.
    pub ieee64: OperandKind,

-    /// An immediate boolean operand.
-    ///
-    /// This type of immediate boolean can interact with SSA values with any BoolType type.
-    pub boolean: OperandKind,
-
    /// A condition code for comparing integer values.
    ///
    /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the
@@ -142,7 +137,6 @@ impl Immediates {
                "ir::immediates::Ieee64",
                "A 64-bit immediate floating point number.",
            ),
-            boolean: new_imm("imm", "bool", "An immediate boolean."),
            intcc: {
                let mut intcc_values = HashMap::new();
                intcc_values.insert("eq", "Equal");
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -37,17 +37,14 @@ fn define_control_flow(
        .is_branch(true),
    );

-    let Testable = &TypeVar::new(
-        "Testable",
-        "A scalar boolean or integer type",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .bools(Interval::All)
-            .build(),
+    let ScalarTruthy = &TypeVar::new(
+        "ScalarTruthy",
+        "A scalar truthy type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
    );

    {
-        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+        let c = &Operand::new("c", ScalarTruthy).with_doc("Controlling value to test");

        ig.push(
            Inst::new(
@@ -55,8 +52,7 @@ fn define_control_flow(
                r#"
        Branch when zero.

-        If ``c`` is a `b1` value, take the branch when ``c`` is false. If
-        ``c`` is an integer value, take the branch when ``c = 0``.
+        Take the branch when ``c = 0``.
        "#,
                &formats.branch,
            )
@@ -70,8 +66,7 @@ fn define_control_flow(
                r#"
        Branch when non-zero.

-        If ``c`` is a `b1` value, take the branch when ``c`` is true. If
-        ``c`` is an integer value, take the branch when ``c != 0``.
+        Take the branch when ``c != 0``.
        "#,
                &formats.branch,
            )
@@ -226,7 +221,7 @@ fn define_control_flow(
            .is_terminator(true),
        );

-        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+        let c = &Operand::new("c", ScalarTruthy).with_doc("Controlling value to test");
        ig.push(
            Inst::new(
                "trapz",
@@ -255,7 +250,7 @@ fn define_control_flow(
            .can_trap(true),
        );

-        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+        let c = &Operand::new("c", ScalarTruthy).with_doc("Controlling value to test");
        ig.push(
            Inst::new(
                "trapnz",
@@ -412,7 +407,6 @@ fn define_simd_lane_access(
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .dynamic_simd_lanes(Interval::All)
            .includes_scalars(false)
@@ -685,7 +679,7 @@ pub(crate) fn define(
    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
    let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();

-    let b1: &TypeVar = &ValueType::from(LaneType::from(types::Bool::B1)).into();
+    let i8: &TypeVar = &ValueType::from(LaneType::from(types::Int::I8)).into();
    let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into();
    let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into();

@@ -700,19 +694,10 @@ pub(crate) fn define(
            .build(),
    );

-    let Bool = &TypeVar::new(
-        "Bool",
-        "A scalar or vector boolean type",
-        TypeSetBuilder::new()
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
-
-    let ScalarBool = &TypeVar::new(
-        "ScalarBool",
-        "A scalar boolean type",
-        TypeSetBuilder::new().bools(Interval::All).build(),
+    let ScalarTruthy = &TypeVar::new(
+        "ScalarTruthy",
+        "A scalar truthy type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
    );

    let iB = &TypeVar::new(
@@ -733,33 +718,22 @@ pub(crate) fn define(
        TypeSetBuilder::new().refs(Interval::All).build(),
    );

-    let Testable = &TypeVar::new(
-        "Testable",
-        "A scalar boolean or integer type",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .bools(Interval::All)
-            .build(),
-    );
-
    let TxN = &TypeVar::new(
        "TxN",
        "A SIMD vector type",
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .includes_scalars(false)
            .build(),
    );
    let Any = &TypeVar::new(
        "Any",
-        "Any integer, float, boolean, or reference scalar or vector type",
+        "Any integer, float, or reference scalar or vector type",
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .refs(Interval::All)
            .simd_lanes(Interval::All)
            .includes_scalars(true)
@@ -1419,24 +1393,6 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let N = &Operand::new("N", &imm.boolean);
-    let a = &Operand::new("a", Bool).with_doc("A constant boolean scalar or vector value");
-
-    ig.push(
-        Inst::new(
-            "bconst",
-            r#"
-        Boolean constant.
-
-        Create a scalar boolean SSA value with an immediate constant value, or
-        a boolean vector where all the lanes have the same value.
-        "#,
-            &formats.unary_bool,
-        )
-        .operands_in(vec![N])
-        .operands_out(vec![a]),
-    );
-
    let N = &Operand::new("N", &imm.pool_constant)
        .with_doc("The 16 immediate bytes of a 128-bit vector");
    let a = &Operand::new("a", TxN).with_doc("A constant vector value");
@@ -1463,7 +1419,6 @@ pub(crate) fn define(
         lane counts and widths",
        TypeSetBuilder::new()
            .ints(8..8)
-            .bools(8..8)
            .simd_lanes(16..16)
            .includes_scalars(false)
            .build(),
@@ -1513,7 +1468,7 @@ pub(crate) fn define(
        &formats.nullary,
    ));

-    let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+    let c = &Operand::new("c", ScalarTruthy).with_doc("Controlling value to test");
    let x = &Operand::new("x", Any).with_doc("Value to use when `c` is true");
    let y = &Operand::new("y", Any).with_doc("Value to use when `c` is false");
    let a = &Operand::new("a", Any);
@@ -1640,7 +1595,6 @@ pub(crate) fn define(
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .simd_lanes(1..128)
            .includes_scalars(true)
            .build(),
@@ -1680,7 +1634,7 @@ pub(crate) fn define(
            r#"
        Vector lane select.

-        Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean
+        Select lanes from ``x`` or ``y`` controlled by the lanes of the truthy
        vector ``c``.
        "#,
            &formats.ternary,
@@ -1689,7 +1643,7 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let s = &Operand::new("s", b1);
+    let s = &Operand::new("s", i8);

    ig.push(
        Inst::new(
@@ -1760,8 +1714,8 @@ pub(crate) fn define(
        | sgt    | ugt      | Greater than          |
        | sle    | ule      | Less than or equal    |

-        When this instruction compares integer vectors, it returns a boolean
-        vector of lane-wise comparisons.
+        When this instruction compares integer vectors, it returns a vector of
+        lane-wise comparisons.
        "#,
            &formats.int_compare,
        )
@@ -1769,7 +1723,7 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let a = &Operand::new("a", b1);
+    let a = &Operand::new("a", i8);
    let x = &Operand::new("x", iB);
    let Y = &Operand::new("Y", &imm.imm64);

@@ -2158,10 +2112,10 @@ pub(crate) fn define(
    let x = &Operand::new("x", iB);
    let y = &Operand::new("y", iB);

-    let c_in = &Operand::new("c_in", b1).with_doc("Input carry flag");
-    let c_out = &Operand::new("c_out", b1).with_doc("Output carry flag");
-    let b_in = &Operand::new("b_in", b1).with_doc("Input borrow flag");
-    let b_out = &Operand::new("b_out", b1).with_doc("Output borrow flag");
+    let c_in = &Operand::new("c_in", i8).with_doc("Input carry flag");
+    let c_out = &Operand::new("c_out", i8).with_doc("Output carry flag");
+    let b_in = &Operand::new("b_in", i8).with_doc("Input borrow flag");
+    let b_out = &Operand::new("b_out", i8).with_doc("Output borrow flag");

    let c_if_in = &Operand::new("c_in", iflags);
    let c_if_out = &Operand::new("c_out", iflags);
@@ -2430,11 +2384,10 @@ pub(crate) fn define(

    let bits = &TypeVar::new(
        "bits",
-        "Any integer, float, or boolean scalar or vector type",
+        "Any integer, float, or vector type",
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .simd_lanes(Interval::All)
            .includes_scalars(true)
            .build(),
@@ -2916,7 +2869,7 @@ pub(crate) fn define(
        floating point comparisons of the same name.

        When this instruction compares floating point vectors, it returns a
-        boolean vector with the results of lane-wise comparisons.
+        vector with the results of lane-wise comparisons.
        "#,
            &formats.float_compare,
        )
@@ -3195,7 +3148,7 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let a = &Operand::new("a", b1);
+    let a = &Operand::new("a", i8);
    let x = &Operand::new("x", Ref);

    ig.push(
@@ -3213,7 +3166,7 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let a = &Operand::new("a", b1);
+    let a = &Operand::new("a", i8);
    let x = &Operand::new("x", Ref);

    ig.push(
@@ -3233,7 +3186,7 @@ pub(crate) fn define(

    let Cond = &Operand::new("Cond", &imm.intcc);
    let f = &Operand::new("f", iflags);
-    let a = &Operand::new("a", b1);
+    let a = &Operand::new("a", i8);

    ig.push(
        Inst::new(
@@ -3329,80 +3282,11 @@ pub(crate) fn define(
        .operands_out(vec![a]),
    );

-    let Bool = &TypeVar::new(
-        "Bool",
-        "A scalar boolean type",
-        TypeSetBuilder::new().bools(Interval::All).build(),
-    );
-
-    let BoolTo = &TypeVar::new(
-        "BoolTo",
-        "A smaller boolean type",
-        TypeSetBuilder::new().bools(Interval::All).build(),
-    );
-
-    let x = &Operand::new("x", Bool);
-    let a = &Operand::new("a", BoolTo);
-
-    ig.push(
-        Inst::new(
-            "breduce",
-            r#"
-        Convert `x` to a smaller boolean type by discarding the most significant bits.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let BoolTo = &TypeVar::new(
-        "BoolTo",
-        "A larger boolean type",
-        TypeSetBuilder::new().bools(Interval::All).build(),
-    );
-    let x = &Operand::new("x", Bool);
-    let a = &Operand::new("a", BoolTo);
-
-    ig.push(
-        Inst::new(
-            "bextend",
-            r#"
-        Convert `x` to a larger boolean type
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let IntTo = &TypeVar::new(
-        "IntTo",
-        "A scalar integer type",
-        TypeSetBuilder::new().ints(Interval::All).build(),
-    );
-    let x = &Operand::new("x", ScalarBool);
-    let a = &Operand::new("a", IntTo);
-
-    ig.push(
-        Inst::new(
-            "bint",
-            r#"
-        Convert `x` to an integer.
-
-        True maps to 1 and false maps to 0.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let Bool = &TypeVar::new(
-        "Bool",
-        "A scalar or vector boolean type",
+    let Truthy = &TypeVar::new(
+        "Truthy",
+        "A scalar or vector whose values are truthy",
        TypeSetBuilder::new()
-            .bools(Interval::All)
+            .ints(Interval::All)
            .simd_lanes(Interval::All)
            .build(),
    );
@@ -3414,7 +3298,7 @@ pub(crate) fn define(
            .simd_lanes(Interval::All)
            .build(),
    );
-    let x = &Operand::new("x", Bool);
+    let x = &Operand::new("x", Truthy);
    let a = &Operand::new("a", IntTo);

    ig.push(
@@ -4136,7 +4020,6 @@ pub(crate) fn define(
        TypeSetBuilder::new()
            .ints(Interval::All)
            .floats(Interval::All)
-            .bools(Interval::All)
            .dynamic_simd_lanes(Interval::All)
            .build(),
    );
--- a/cranelift/codegen/meta/src/shared/types.rs
+++ b/cranelift/codegen/meta/src/shared/types.rs
@@ -1,49 +1,5 @@
 //! This module predefines all the Cranelift scalar types.

-#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
-pub(crate) enum Bool {
-    /// 1-bit bool.
-    B1 = 1,
-    /// 8-bit bool.
-    B8 = 8,
-    /// 16-bit bool.
-    B16 = 16,
-    /// 32-bit bool.
-    B32 = 32,
-    /// 64-bit bool.
-    B64 = 64,
-    /// 128-bit bool.
-    B128 = 128,
-}
-
-/// This provides an iterator through all of the supported bool variants.
-pub(crate) struct BoolIterator {
-    index: u8,
-}
-
-impl BoolIterator {
-    pub fn new() -> Self {
-        Self { index: 0 }
-    }
-}
-
-impl Iterator for BoolIterator {
-    type Item = Bool;
-    fn next(&mut self) -> Option<Self::Item> {
-        let res = match self.index {
-            0 => Some(Bool::B1),
-            1 => Some(Bool::B8),
-            2 => Some(Bool::B16),
-            3 => Some(Bool::B32),
-            4 => Some(Bool::B64),
-            5 => Some(Bool::B128),
-            _ => return None,
-        };
-        self.index += 1;
-        res
-    }
-}
-
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub(crate) enum Int {
    /// 8-bit int.
@@ -187,18 +143,6 @@ impl Iterator for ReferenceIterator {
 mod iter_tests {
    use super::*;

-    #[test]
-    fn bool_iter_works() {
-        let mut bool_iter = BoolIterator::new();
-        assert_eq!(bool_iter.next(), Some(Bool::B1));
-        assert_eq!(bool_iter.next(), Some(Bool::B8));
-        assert_eq!(bool_iter.next(), Some(Bool::B16));
-        assert_eq!(bool_iter.next(), Some(Bool::B32));
-        assert_eq!(bool_iter.next(), Some(Bool::B64));
-        assert_eq!(bool_iter.next(), Some(Bool::B128));
-        assert_eq!(bool_iter.next(), None);
-    }
-
    #[test]
    fn int_iter_works() {
        let mut int_iter = IntIterator::new();
--- a/cranelift/codegen/src/data_value.rs
+++ b/cranelift/codegen/src/data_value.rs
@@ -12,7 +12,6 @@ use core::fmt::{self, Display, Formatter};
 #[allow(missing_docs)]
 #[derive(Clone, Debug, PartialOrd)]
 pub enum DataValue {
-    B(bool),
    I8(i8),
    I16(i16),
    I32(i32),
@@ -33,8 +32,6 @@ impl PartialEq for DataValue {
    fn eq(&self, other: &Self) -> bool {
        use DataValue::*;
        match (self, other) {
-            (B(l), B(r)) => l == r,
-            (B(_), _) => false,
            (I8(l), I8(r)) => l == r,
            (I8(_), _) => false,
            (I16(l), I16(r)) => l == r,
@@ -84,7 +81,6 @@ impl DataValue {
    /// Return the Cranelift IR [Type] for this [DataValue].
    pub fn ty(&self) -> Type {
        match self {
-            DataValue::B(_) => types::B8, // A default type.
            DataValue::I8(_) | DataValue::U8(_) => types::I8,
            DataValue::I16(_) | DataValue::U16(_) => types::I16,
            DataValue::I32(_) | DataValue::U32(_) => types::I32,
@@ -105,14 +101,6 @@ impl DataValue {
        }
    }

-    /// Return true if the value is a bool (i.e. `DataValue::B`).
-    pub fn is_bool(&self) -> bool {
-        match self {
-            DataValue::B(_) => true,
-            _ => false,
-        }
-    }
-
    /// Write a [DataValue] to a slice.
    ///
    /// # Panics:
@@ -120,8 +108,6 @@ impl DataValue {
    /// Panics if the slice does not have enough space to accommodate the [DataValue]
    pub fn write_to_slice(&self, dst: &mut [u8]) {
        match self {
-            DataValue::B(true) => dst[..16].copy_from_slice(&[u8::MAX; 16][..]),
-            DataValue::B(false) => dst[..16].copy_from_slice(&[0; 16][..]),
            DataValue::I8(i) => dst[..1].copy_from_slice(&i.to_ne_bytes()[..]),
            DataValue::I16(i) => dst[..2].copy_from_slice(&i.to_ne_bytes()[..]),
            DataValue::I32(i) => dst[..4].copy_from_slice(&i.to_ne_bytes()[..]),
@@ -153,13 +139,6 @@ impl DataValue {
            types::F64 => DataValue::F64(Ieee64::with_bits(u64::from_ne_bytes(
                src[..8].try_into().unwrap(),
            ))),
-            _ if ty.is_bool() => {
-                // Only `ty.bytes()` are guaranteed to be written
-                // so we can only test the first n bytes of `src`
-
-                let size = ty.bytes() as usize;
-                DataValue::B(src[..size].iter().any(|&i| i != 0))
-            }
            _ if ty.is_vector() => {
                if ty.bytes() == 16 {
                    DataValue::V128(src[..16].try_into().unwrap())
@@ -175,13 +154,7 @@ impl DataValue {

    /// Write a [DataValue] to a memory location.
    pub unsafe fn write_value_to(&self, p: *mut u128) {
-        // Since `DataValue` does not have type info for bools we always
-        // write out a full 16 byte slot.
-        let size = match self.ty() {
-            ty if ty.is_bool() => 16,
-            ty => ty.bytes() as usize,
-        };
-
+        let size = self.ty().bytes() as usize;
        self.write_to_slice(std::slice::from_raw_parts_mut(p as *mut u8, size));
    }

@@ -270,7 +243,6 @@ macro_rules! build_conversion_impl {
        }
    };
 }
-build_conversion_impl!(bool, B, B8);
 build_conversion_impl!(i8, I8, I8);
 build_conversion_impl!(i16, I16, I16);
 build_conversion_impl!(i32, I32, I32);
@@ -294,7 +266,6 @@ impl From<Offset32> for DataValue {
 impl Display for DataValue {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
-            DataValue::B(dv) => write!(f, "{}", dv),
            DataValue::I8(dv) => write!(f, "{}", dv),
            DataValue::I16(dv) => write!(f, "{}", dv),
            DataValue::I32(dv) => write!(f, "{}", dv),
@@ -354,16 +325,6 @@ mod test {

    #[test]
    fn type_conversions() {
-        assert_eq!(DataValue::B(true).ty(), types::B8);
-        assert_eq!(
-            TryInto::<bool>::try_into(DataValue::B(false)).unwrap(),
-            false
-        );
-        assert_eq!(
-            TryInto::<i32>::try_into(DataValue::B(false)).unwrap_err(),
-            DataValueCastFailure::TryInto(types::B8, types::I32)
-        );
-
        assert_eq!(DataValue::V128([0; 16]).ty(), types::I8X16);
        assert_eq!(
            TryInto::<[u8; 16]>::try_into(DataValue::V128([0; 16])).unwrap(),
--- a/cranelift/codegen/src/egraph/node.rs
+++ b/cranelift/codegen/src/egraph/node.rs
@@ -322,15 +322,11 @@ impl std::ops::Add<Cost> for Cost {
 pub(crate) fn op_cost(op: &InstructionImms) -> Cost {
    match op.opcode() {
        // Constants.
-        Opcode::Iconst | Opcode::F32const | Opcode::F64const | Opcode::Bconst => Cost(0),
+        Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost(0),
        // Extends/reduces.
-        Opcode::Bextend
-        | Opcode::Breduce
-        | Opcode::Uextend
-        | Opcode::Sextend
-        | Opcode::Ireduce
-        | Opcode::Iconcat
-        | Opcode::Isplit => Cost(1),
+        Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => {
+            Cost(1)
+        }
        // "Simple" arithmetic.
        Opcode::Iadd
        | Opcode::Isub
--- a/cranelift/codegen/src/inst_predicates.rs
+++ b/cranelift/codegen/src/inst_predicates.rs
@@ -2,7 +2,6 @@
 use crate::ir::immediates::Offset32;
 use crate::ir::instructions::BranchInfo;
 use crate::ir::{Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value};
-use crate::machinst::ty_bits;
 use cranelift_entity::EntityRef;

 /// Preserve instructions with used result values.
@@ -53,7 +52,7 @@ pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
    op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load())
 }

-/// Is the given instruction a constant value (`iconst`, `fconst`, `bconst`) that can be
+/// Is the given instruction a constant value (`iconst`, `fconst`) that can be
 /// represented in 64 bits?
 pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
    let data = &func.dfg[inst];
@@ -64,21 +63,6 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
        &InstructionData::UnaryImm { imm, .. } => Some(imm.bits() as u64),
        &InstructionData::UnaryIeee32 { imm, .. } => Some(imm.bits() as u64),
        &InstructionData::UnaryIeee64 { imm, .. } => Some(imm.bits()),
-        &InstructionData::UnaryBool { imm, .. } => {
-            let imm = if imm {
-                let bits = ty_bits(func.dfg.value_type(func.dfg.inst_results(inst)[0]));
-
-                if bits < 64 {
-                    (1u64 << bits) - 1
-                } else {
-                    u64::MAX
-                }
-            } else {
-                0
-            };
-
-            Some(imm)
-        }
        _ => None,
    }
 }
--- a/cranelift/codegen/src/ir/builder.rs
+++ b/cranelift/codegen/src/ir/builder.rs
@@ -238,7 +238,7 @@ mod tests {

        // Formula.
        let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0);
-        assert_eq!(pos.func.dfg.value_type(cmp), B1);
+        assert_eq!(pos.func.dfg.value_type(cmp), I8);
    }

    #[test]
--- a/cranelift/codegen/src/ir/entities.rs
+++ b/cranelift/codegen/src/ir/entities.rs
@@ -58,7 +58,6 @@ impl Block {
 /// - [`iconst`](super::InstBuilder::iconst) for integer constants
 /// - [`f32const`](super::InstBuilder::f32const) for 32-bit float constants
 /// - [`f64const`](super::InstBuilder::f64const) for 64-bit float constants
-/// - [`bconst`](super::InstBuilder::bconst) for boolean constants
 /// - [`vconst`](super::InstBuilder::vconst) for vector constants
 /// - [`null`](super::InstBuilder::null) for null reference constants
 ///
--- a/cranelift/codegen/src/ir/extfunc.rs
+++ b/cranelift/codegen/src/ir/extfunc.rs
@@ -372,7 +372,7 @@ impl<'a> fmt::Display for DisplayableExtFuncData<'a> {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::ir::types::{B8, F32, I32};
+    use crate::ir::types::{F32, I32, I8};
    use alloc::string::ToString;

    #[test]
@@ -424,7 +424,7 @@ mod tests {
        assert_eq!(sig.to_string(), "(i32) -> f32 windows_fastcall");
        sig.params.push(AbiParam::new(I32.by(4).unwrap()));
        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 windows_fastcall");
-        sig.returns.push(AbiParam::new(B8));
-        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 windows_fastcall");
+        sig.returns.push(AbiParam::new(I8));
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, i8 windows_fastcall");
    }
 }
--- a/cranelift/codegen/src/ir/instructions.rs
+++ b/cranelift/codegen/src/ir/instructions.rs
@@ -575,8 +575,6 @@ pub struct ValueTypeSet {
    pub ints: BitSet8,
    /// Allowed float widths
    pub floats: BitSet8,
-    /// Allowed bool widths
-    pub bools: BitSet8,
    /// Allowed ref widths
    pub refs: BitSet8,
    /// Allowed dynamic vectors minimum lane sizes
@@ -593,8 +591,6 @@ impl ValueTypeSet {
            self.ints.contains(l2b)
        } else if scalar.is_float() {
            self.floats.contains(l2b)
-        } else if scalar.is_bool() {
-            self.bools.contains(l2b)
        } else if scalar.is_ref() {
            self.refs.contains(l2b)
        } else {
@@ -621,10 +617,8 @@ impl ValueTypeSet {
            types::I32
        } else if self.floats.max().unwrap_or(0) > 5 {
            types::F32
-        } else if self.bools.max().unwrap_or(0) > 5 {
-            types::B32
        } else {
-            types::B1
+            types::I8
        };
        t.by(1 << self.lanes.min().unwrap()).unwrap()
    }
@@ -860,7 +854,6 @@ mod tests {
            lanes: BitSet16::from_range(0, 8),
            ints: BitSet8::from_range(4, 7),
            floats: BitSet8::from_range(0, 0),
-            bools: BitSet8::from_range(3, 7),
            refs: BitSet8::from_range(5, 7),
            dynamic_lanes: BitSet16::from_range(0, 4),
        };
@@ -870,9 +863,6 @@ mod tests {
        assert!(vts.contains(I32X4));
        assert!(vts.contains(I32X4XN));
        assert!(!vts.contains(F32));
-        assert!(!vts.contains(B1));
-        assert!(vts.contains(B8));
-        assert!(vts.contains(B64));
        assert!(vts.contains(R32));
        assert!(vts.contains(R64));
        assert_eq!(vts.example().to_string(), "i32");
@@ -881,7 +871,6 @@ mod tests {
            lanes: BitSet16::from_range(0, 8),
            ints: BitSet8::from_range(0, 0),
            floats: BitSet8::from_range(5, 7),
-            bools: BitSet8::from_range(3, 7),
            refs: BitSet8::from_range(0, 0),
            dynamic_lanes: BitSet16::from_range(0, 8),
        };
@@ -891,7 +880,6 @@ mod tests {
            lanes: BitSet16::from_range(1, 8),
            ints: BitSet8::from_range(0, 0),
            floats: BitSet8::from_range(5, 7),
-            bools: BitSet8::from_range(3, 7),
            refs: BitSet8::from_range(0, 0),
            dynamic_lanes: BitSet16::from_range(0, 8),
        };
@@ -899,23 +887,18 @@ mod tests {

        let vts = ValueTypeSet {
            lanes: BitSet16::from_range(2, 8),
-            ints: BitSet8::from_range(0, 0),
+            ints: BitSet8::from_range(3, 7),
            floats: BitSet8::from_range(0, 0),
-            bools: BitSet8::from_range(3, 7),
            refs: BitSet8::from_range(0, 0),
            dynamic_lanes: BitSet16::from_range(0, 8),
        };
-        assert!(!vts.contains(B32X2));
-        assert!(vts.contains(B32X4));
-        assert!(vts.contains(B16X4XN));
-        assert_eq!(vts.example().to_string(), "b32x4");
+        assert_eq!(vts.example().to_string(), "i32x4");

        let vts = ValueTypeSet {
            // TypeSet(lanes=(1, 256), ints=(8, 64))
            lanes: BitSet16::from_range(0, 9),
            ints: BitSet8::from_range(3, 7),
            floats: BitSet8::from_range(0, 0),
-            bools: BitSet8::from_range(0, 0),
            refs: BitSet8::from_range(0, 0),
            dynamic_lanes: BitSet16::from_range(0, 8),
        };
--- a/cranelift/codegen/src/ir/types.rs
+++ b/cranelift/codegen/src/ir/types.rs
@@ -17,10 +17,7 @@ use target_lexicon::{PointerWidth, Triple};
 ///
 /// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
 ///
-/// Boolean types: `B1`, `B8`, `B16`, `B32`, `B64`, and `B128`. These all encode 'true' or 'false'. The
-/// larger types use redundant bits.
-///
-/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
+/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float type.
 ///
 /// Note that this is encoded in a `u16` currently for extensibility,
 /// but allows only 14 bits to be used due to some bitpacking tricks
@@ -59,12 +56,11 @@ impl Type {
    /// Get log_2 of the number of bits in a lane.
    pub fn log2_lane_bits(self) -> u32 {
        match self.lane_type() {
-            B1 => 0,
-            B8 | I8 => 3,
-            B16 | I16 => 4,
-            B32 | I32 | F32 | R32 => 5,
-            B64 | I64 | F64 | R64 => 6,
-            B128 | I128 => 7,
+            I8 => 3,
+            I16 => 4,
+            I32 | F32 | R32 => 5,
+            I64 | F64 | R64 => 6,
+            I128 => 7,
            _ => 0,
        }
    }
@@ -72,12 +68,11 @@ impl Type {
    /// Get the number of bits in a lane.
    pub fn lane_bits(self) -> u32 {
        match self.lane_type() {
-            B1 => 1,
-            B8 | I8 => 8,
-            B16 | I16 => 16,
-            B32 | I32 | F32 | R32 => 32,
-            B64 | I64 | F64 | R64 => 64,
-            B128 | I128 => 128,
+            I8 => 8,
+            I16 => 16,
+            I32 | F32 | R32 => 32,
+            I64 | F64 | R64 => 64,
+            I128 => 128,
            _ => 0,
        }
    }
@@ -141,13 +136,13 @@ impl Type {
    pub fn as_bool_pedantic(self) -> Self {
        // Replace the low 4 bits with the boolean version, preserve the high 4 bits.
        self.replace_lanes(match self.lane_type() {
-            B8 | I8 => B8,
-            B16 | I16 => B16,
-            B32 | I32 | F32 => B32,
-            B64 | I64 | F64 => B64,
+            I8 => I8,
+            I16 => I16,
+            I32 | F32 => I32,
+            I64 | F64 => I64,
            R32 | R64 => panic!("Reference types should not convert to bool"),
-            B128 | I128 => B128,
-            _ => B1,
+            I128 => I128,
+            _ => I8,
        })
    }

@@ -157,7 +152,7 @@ impl Type {
    /// Scalar types are all converted to `b1` which is usually what you want.
    pub fn as_bool(self) -> Self {
        if !self.is_vector() {
-            B1
+            I8
        } else {
            self.as_bool_pedantic()
        }
@@ -169,11 +164,11 @@ impl Type {
    /// Scalar types follow this same rule, but `b1` is converted into `i8`
    pub fn as_int(self) -> Self {
        self.replace_lanes(match self.lane_type() {
-            I8 | B1 | B8 => I8,
-            I16 | B16 => I16,
-            I32 | B32 | F32 => I32,
-            I64 | B64 | F64 => I64,
-            I128 | B128 => I128,
+            I8 => I8,
+            I16 => I16,
+            I32 | F32 => I32,
+            I64 | F64 => I64,
+            I128 => I128,
            _ => unimplemented!(),
        })
    }
@@ -187,10 +182,6 @@ impl Type {
            I64 => I32,
            I128 => I64,
            F64 => F32,
-            B16 => B8,
-            B32 => B16,
-            B64 => B32,
-            B128 => B64,
            _ => return None,
        }))
    }
@@ -204,10 +195,6 @@ impl Type {
            I32 => I64,
            I64 => I128,
            F32 => F64,
-            B8 => B16,
-            B16 => B32,
-            B32 => B64,
-            B64 => B128,
            _ => return None,
        }))
    }
@@ -241,19 +228,6 @@ impl Type {
        self.0 >= constants::DYNAMIC_VECTOR_BASE
    }

-    /// Is this a scalar boolean type?
-    pub fn is_bool(self) -> bool {
-        match self {
-            B1 | B8 | B16 | B32 | B64 | B128 => true,
-            _ => false,
-        }
-    }
-
-    /// Is this a vector boolean type?
-    pub fn is_bool_vector(self) -> bool {
-        self.is_vector() && self.lane_type().is_bool()
-    }
-
    /// Is this a scalar integer type?
    pub fn is_int(self) -> bool {
        match self {
@@ -453,19 +427,6 @@ impl Type {
        }
    }

-    /// Coerces boolean types (scalar and vectors) into their integer counterparts.
-    /// B1 is converted into I8.
-    pub fn coerce_bools_to_ints(self) -> Self {
-        let is_scalar_bool = self.is_bool();
-        let is_vector_bool = self.is_vector() && self.lane_type().is_bool();
-
-        if is_scalar_bool || is_vector_bool {
-            self.as_int()
-        } else {
-            self
-        }
-    }
-
    /// Gets a bit-level representation of the type. Used only
    /// internally for efficiently storing types.
    pub(crate) fn repr(self) -> u16 {
@@ -481,9 +442,7 @@ impl Type {

 impl Display for Type {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        if self.is_bool() {
-            write!(f, "b{}", self.lane_bits())
-        } else if self.is_int() {
+        if self.is_int() {
            write!(f, "i{}", self.lane_bits())
        } else if self.is_float() {
            write!(f, "f{}", self.lane_bits())
@@ -506,9 +465,7 @@ impl Display for Type {

 impl Debug for Type {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        if self.is_bool() {
-            write!(f, "types::B{}", self.lane_bits())
-        } else if self.is_int() {
+        if self.is_int() {
            write!(f, "types::I{}", self.lane_bits())
        } else if self.is_float() {
            write!(f, "types::F{}", self.lane_bits())
@@ -548,12 +505,6 @@ mod tests {
        assert_eq!(0, IFLAGS.bits());
        assert_eq!(FFLAGS, FFLAGS.lane_type());
        assert_eq!(0, FFLAGS.bits());
-        assert_eq!(B1, B1.lane_type());
-        assert_eq!(B8, B8.lane_type());
-        assert_eq!(B16, B16.lane_type());
-        assert_eq!(B32, B32.lane_type());
-        assert_eq!(B64, B64.lane_type());
-        assert_eq!(B128, B128.lane_type());
        assert_eq!(I8, I8.lane_type());
        assert_eq!(I16, I16.lane_type());
        assert_eq!(I32, I32.lane_type());
@@ -561,7 +512,6 @@ mod tests {
        assert_eq!(I128, I128.lane_type());
        assert_eq!(F32, F32.lane_type());
        assert_eq!(F64, F64.lane_type());
-        assert_eq!(B1, B1.by(8).unwrap().lane_type());
        assert_eq!(I32, I32X4.lane_type());
        assert_eq!(F64, F64X2.lane_type());
        assert_eq!(R32, R32.lane_type());
@@ -570,12 +520,6 @@ mod tests {
        assert_eq!(INVALID.lane_bits(), 0);
        assert_eq!(IFLAGS.lane_bits(), 0);
        assert_eq!(FFLAGS.lane_bits(), 0);
-        assert_eq!(B1.lane_bits(), 1);
-        assert_eq!(B8.lane_bits(), 8);
-        assert_eq!(B16.lane_bits(), 16);
-        assert_eq!(B32.lane_bits(), 32);
-        assert_eq!(B64.lane_bits(), 64);
-        assert_eq!(B128.lane_bits(), 128);
        assert_eq!(I8.lane_bits(), 8);
        assert_eq!(I16.lane_bits(), 16);
        assert_eq!(I32.lane_bits(), 32);
@@ -592,12 +536,6 @@ mod tests {
        assert_eq!(INVALID.half_width(), None);
        assert_eq!(INVALID.half_width(), None);
        assert_eq!(FFLAGS.half_width(), None);
-        assert_eq!(B1.half_width(), None);
-        assert_eq!(B8.half_width(), None);
-        assert_eq!(B16.half_width(), Some(B8));
-        assert_eq!(B32.half_width(), Some(B16));
-        assert_eq!(B64.half_width(), Some(B32));
-        assert_eq!(B128.half_width(), Some(B64));
        assert_eq!(I8.half_width(), None);
        assert_eq!(I16.half_width(), Some(I8));
        assert_eq!(I32.half_width(), Some(I16));
@@ -610,12 +548,6 @@ mod tests {
        assert_eq!(INVALID.double_width(), None);
        assert_eq!(IFLAGS.double_width(), None);
        assert_eq!(FFLAGS.double_width(), None);
-        assert_eq!(B1.double_width(), None);
-        assert_eq!(B8.double_width(), Some(B16));
-        assert_eq!(B16.double_width(), Some(B32));
-        assert_eq!(B32.double_width(), Some(B64));
-        assert_eq!(B64.double_width(), Some(B128));
-        assert_eq!(B128.double_width(), None);
        assert_eq!(I8.double_width(), Some(I16));
        assert_eq!(I16.double_width(), Some(I32));
        assert_eq!(I32.double_width(), Some(I64));
@@ -634,7 +566,6 @@ mod tests {
        assert_eq!(big.bits(), 64 * 256);

        assert_eq!(big.half_vector().unwrap().to_string(), "f64x128");
-        assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1");
        assert_eq!(I32.half_vector(), None);
        assert_eq!(INVALID.half_vector(), None);

@@ -647,7 +578,6 @@ mod tests {
    fn dynamic_vectors() {
        // Identification.
        assert_eq!(I8X16XN.is_dynamic_vector(), true);
-        assert_eq!(B16X4XN.is_dynamic_vector(), true);
        assert_eq!(F32X8XN.is_dynamic_vector(), true);
        assert_eq!(F64X4XN.is_dynamic_vector(), true);
        assert_eq!(I128X2XN.is_dynamic_vector(), true);
@@ -656,28 +586,19 @@ mod tests {
        assert_eq!(I16X8XN.lane_count(), 0);
        assert_eq!(I16X8XN.min_lane_count(), 8);

-        // Size
-        assert_eq!(B32X2XN.bits(), 0);
-        assert_eq!(B32X2XN.min_bits(), 64);
-
        // Change lane counts
        assert_eq!(F64X4XN.half_vector(), None);
        assert_eq!(I8X8XN.by(2), None);

        // Conversions to and from vectors.
-        assert_eq!(B8.by(8).unwrap().vector_to_dynamic(), Some(B8X8XN));
        assert_eq!(I8.by(16).unwrap().vector_to_dynamic(), Some(I8X16XN));
        assert_eq!(I16.by(8).unwrap().vector_to_dynamic(), Some(I16X8XN));
-        assert_eq!(B16.by(16).unwrap().vector_to_dynamic(), Some(B16X16XN));
-        assert_eq!(B32.by(2).unwrap().vector_to_dynamic(), Some(B32X2XN));
-        assert_eq!(B32.by(8).unwrap().vector_to_dynamic(), Some(B32X8XN));
        assert_eq!(I32.by(4).unwrap().vector_to_dynamic(), Some(I32X4XN));
        assert_eq!(F32.by(4).unwrap().vector_to_dynamic(), Some(F32X4XN));
        assert_eq!(F64.by(2).unwrap().vector_to_dynamic(), Some(F64X2XN));
        assert_eq!(I128.by(2).unwrap().vector_to_dynamic(), Some(I128X2XN));

        assert_eq!(I128X2XN.dynamic_to_vector(), Some(I128X2));
-        assert_eq!(B64X2XN.dynamic_to_vector(), Some(B64X2));
        assert_eq!(F32X4XN.dynamic_to_vector(), Some(F32X4));
        assert_eq!(F64X4XN.dynamic_to_vector(), Some(F64X4));
        assert_eq!(I32X2XN.dynamic_to_vector(), Some(I32X2));
@@ -686,7 +607,6 @@ mod tests {
        assert_eq!(I8X32XN.dynamic_to_vector(), Some(I8X32));

        assert_eq!(I8X64.vector_to_dynamic(), None);
-        assert_eq!(B16X32.vector_to_dynamic(), None);
        assert_eq!(F32X16.vector_to_dynamic(), None);
        assert_eq!(I64X8.vector_to_dynamic(), None);
        assert_eq!(I128X4.vector_to_dynamic(), None);
@@ -696,12 +616,6 @@ mod tests {
    fn format_scalars() {
        assert_eq!(IFLAGS.to_string(), "iflags");
        assert_eq!(FFLAGS.to_string(), "fflags");
-        assert_eq!(B1.to_string(), "b1");
-        assert_eq!(B8.to_string(), "b8");
-        assert_eq!(B16.to_string(), "b16");
-        assert_eq!(B32.to_string(), "b32");
-        assert_eq!(B64.to_string(), "b64");
-        assert_eq!(B128.to_string(), "b128");
        assert_eq!(I8.to_string(), "i8");
        assert_eq!(I16.to_string(), "i16");
        assert_eq!(I32.to_string(), "i32");
@@ -715,11 +629,6 @@ mod tests {

    #[test]
    fn format_vectors() {
-        assert_eq!(B1.by(8).unwrap().to_string(), "b1x8");
-        assert_eq!(B8.by(1).unwrap().to_string(), "b8");
-        assert_eq!(B16.by(256).unwrap().to_string(), "b16x256");
-        assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8");
-        assert_eq!(B64.by(8).unwrap().to_string(), "b64x8");
        assert_eq!(I8.by(64).unwrap().to_string(), "i8x64");
        assert_eq!(F64.by(2).unwrap().to_string(), "f64x2");
        assert_eq!(I8.by(3), None);
@@ -729,19 +638,10 @@ mod tests {

    #[test]
    fn as_bool() {
-        assert_eq!(I32X4.as_bool(), B32X4);
-        assert_eq!(I32.as_bool(), B1);
-        assert_eq!(I32X4.as_bool_pedantic(), B32X4);
-        assert_eq!(I32.as_bool_pedantic(), B32);
-    }
-
-    #[test]
-    fn as_int() {
-        assert_eq!(B32X4.as_int(), I32X4);
-        assert_eq!(B8X8.as_int(), I8X8);
-        assert_eq!(B1.as_int(), I8);
-        assert_eq!(B8.as_int(), I8);
-        assert_eq!(B128.as_int(), I128);
+        assert_eq!(I32X4.as_bool(), I32X4);
+        assert_eq!(I32.as_bool(), I8);
+        assert_eq!(I32X4.as_bool_pedantic(), I32X4);
+        assert_eq!(I32.as_bool_pedantic(), I32);
    }

    #[test]
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1156,12 +1156,6 @@
 (rule (scalar_size $I64) (ScalarSize.Size64))
 (rule (scalar_size $I128) (ScalarSize.Size128))

-(rule (scalar_size $B8) (ScalarSize.Size8))
-(rule (scalar_size $B16) (ScalarSize.Size16))
-(rule (scalar_size $B32) (ScalarSize.Size32))
-(rule (scalar_size $B64) (ScalarSize.Size64))
-(rule (scalar_size $B128) (ScalarSize.Size128))
-
 (rule (scalar_size $F32) (ScalarSize.Size32))
 (rule (scalar_size $F64) (ScalarSize.Size64))

@@ -1947,19 +1941,13 @@

 ;; Helper for materializing a boolean value into a register from
 ;; flags.
-(decl materialize_bool_result (u8 Cond) ConsumesFlags)
-(rule (materialize_bool_result 1 cond)
+(decl materialize_bool_result (Cond) ConsumesFlags)
+(rule (materialize_bool_result cond)
      (let ((dst WritableReg (temp_writable_reg $I64)))
        (ConsumesFlags.ConsumesFlagsReturnsReg
         (MInst.CSet dst cond)
         dst)))

-(rule -1 (materialize_bool_result _ty_bits cond)
-      (let ((dst WritableReg (temp_writable_reg $I64)))
-        (ConsumesFlags.ConsumesFlagsReturnsReg
-         (MInst.CSetm dst cond)
-         dst)))
-
 (decl cmn_imm (OperandSize Reg Imm12) ProducesFlags)
 (rule (cmn_imm size src1 src2)
      (ProducesFlags.ProducesFlagsSideEffect
@@ -2224,6 +2212,18 @@
         (MInst.CSel dst cond if_true if_false)
         dst)))

+;; Helper for constructing `cset` instructions.
+(decl cset (Cond) ConsumesFlags)
+(rule (cset cond)
+      (let ((dst WritableReg (temp_writable_reg $I64)))
+        (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSet dst cond) dst)))
+
+;; Helper for constructing `csetm` instructions.
+(decl csetm (Cond) ConsumesFlags)
+(rule (csetm cond)
+      (let ((dst WritableReg (temp_writable_reg $I64)))
+        (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSetm dst cond) dst)))
+
 ;; Helper for generating a `CSNeg` instruction.
 ;;
 ;; Note that this doesn't actually emit anything, instead it produces a
@@ -2244,21 +2244,14 @@
      (produces_flags_append inst_input (MInst.CCmp size rn rm nzcv cond)))

 ;; Helper for generating `MInst.CCmpImm` instructions.
-(decl ccmp_imm (OperandSize u8 Reg UImm5 NZCV Cond) ConsumesFlags)
-(rule 1 (ccmp_imm size 1 rn imm nzcv cond)
+(decl ccmp_imm (OperandSize Reg UImm5 NZCV Cond) ConsumesFlags)
+(rule 1 (ccmp_imm size rn imm nzcv cond)
      (let ((dst WritableReg (temp_writable_reg $I64)))
        (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
         (MInst.CCmpImm size rn imm nzcv cond)
         (MInst.CSet dst cond)
         (value_reg dst))))

-(rule (ccmp_imm size _ty_bits rn imm nzcv cond)
-      (let ((dst WritableReg (temp_writable_reg $I64)))
-        (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-         (MInst.CCmpImm size rn imm nzcv cond)
-         (MInst.CSetm dst cond)
-         (value_reg dst))))
-
 ;; Helpers for generating `add` instructions.

 (decl add (Type Reg Reg) Reg)
@@ -3381,11 +3374,11 @@

 ;; Integers <= 64-bits.
 (rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty)
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm in_ty)
-        (materialize_bool_result (ty_bits out_ty) cc))))
+        (materialize_bool_result cc))))

 (rule 1 (lower_icmp cond rn rm (fits_in_16 ty))
      (if (signed_cond_code cond))
@@ -3398,23 +3391,23 @@
      (let ((rn Reg (put_in_reg_zext32 rn)))
      (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $false))))
 (rule -3 (lower_icmp cond rn (imm12_from_value rm) ty)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (cmp_imm (operand_size ty) rn rm))
 (rule -4 (lower_icmp cond rn rm ty)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (cmp (operand_size ty) rn rm))

 ;; 128-bit integers.
-(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 out_ty)
+(rule (lower_icmp_into_reg cond @ (IntCC.Equal) rn rm $I128 $I8)
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm $I128)
-        (materialize_bool_result (ty_bits out_ty) cc))))
-(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 out_ty)
+        (materialize_bool_result cc))))
+(rule (lower_icmp_into_reg cond @ (IntCC.NotEqual) rn rm $I128 $I8)
      (let ((cc Cond (cond_code cond)))
       (with_flags
        (lower_icmp cond rn rm $I128)
-        (materialize_bool_result (ty_bits out_ty) cc))))
+        (materialize_bool_result cc))))

 ;; cmp lhs_lo, rhs_lo
 ;; ccmp lhs_hi, rhs_hi, #0, eq
@@ -3440,7 +3433,7 @@
 ;; cmp      lhs_hi, rhs_hi
 ;; cset     tmp2, cond
 ;; csel     dst, tmp1, tmp2, eq
-(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 out_ty)
+(rule -1 (lower_icmp_into_reg cond lhs rhs $I128 $I8)
      (let ((unsigned_cond Cond (cond_code (intcc_unsigned cond)))
            (cond Cond (cond_code cond))
            (lhs ValueRegs (put_in_regs lhs))
@@ -3449,78 +3442,100 @@
            (lhs_hi Reg (value_regs_get lhs 1))
            (rhs_lo Reg (value_regs_get rhs 0))
            (rhs_hi Reg (value_regs_get rhs 1))
-            (tmp1 ValueRegs
-             (with_flags (cmp (OperandSize.Size64) lhs_lo rhs_lo)
-                         (materialize_bool_result
-                          (ty_bits out_ty) unsigned_cond)))
-            (tmp1 Reg (value_regs_get tmp1 0))
-            (dst ValueRegs
-                  (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi)
-                   (lower_icmp_i128_consumer cond (ty_bits out_ty)
-                    tmp1 lhs_hi rhs_hi))))
-       dst))
+            (tmp1 Reg (with_flags_reg (cmp (OperandSize.Size64) lhs_lo rhs_lo)
+                                      (materialize_bool_result unsigned_cond))))
+        (with_flags (cmp (OperandSize.Size64) lhs_hi rhs_hi)
+                    (lower_icmp_i128_consumer cond tmp1))))

-(decl lower_icmp_i128_consumer (Cond u8 Reg Reg Reg) ConsumesFlags)
-(rule (lower_icmp_i128_consumer cond 1 tmp1 lhs_hi rhs_hi)
+(decl lower_icmp_i128_consumer (Cond Reg) ConsumesFlags)
+(rule (lower_icmp_i128_consumer cond tmp1)
      (let ((tmp2 WritableReg (temp_writable_reg $I64))
            (dst WritableReg (temp_writable_reg $I64)))
       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
        (MInst.CSet tmp2 cond)
        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
        (value_reg dst))))
-(rule (lower_icmp_i128_consumer cond 128 tmp1 lhs_hi rhs_hi)
-      (let ((tmp2 WritableReg (temp_writable_reg $I64))
-            (dst WritableReg (temp_writable_reg $I64)))
-       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-        (MInst.CSetm tmp2 cond)
-        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
-        (value_regs dst dst))))
-(rule -1 (lower_icmp_i128_consumer cond _out_ty_bits tmp1 lhs_hi rhs_hi)
-      (let ((tmp2 WritableReg (temp_writable_reg $I64))
-            (dst WritableReg (temp_writable_reg $I64)))
-       (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
-        (MInst.CSetm tmp2 cond)
-        (MInst.CSel dst (Cond.Eq) tmp1 tmp2)
-        (value_reg dst))))
+
+(decl lower_bmask (Type Type ValueRegs) ValueRegs)
+
+;; For conversions that fit in a register, we can use csetm.
+;;
+;; cmp   val, #0
+;; csetm res, ne
+(rule 0
+      (lower_bmask (fits_in_64 _) (fits_in_64 _) val)
+      (with_flags_reg
+        (cmp64_imm (value_regs_get val 0) (u8_into_imm12 0))
+        (csetm (Cond.Ne))))
+
+;; For conversions from a 128-bit value into a 64-bit or smaller one, we or the
+;; two registers of the 128-bit value together, and then recurse with the
+;; combined value as a 64-bit test.
+;;
+;; orr   val, lo, hi
+;; cmp   val, #0
+;; csetm res, ne
+(rule 1
+      (lower_bmask (fits_in_64 ty) $I128 val)
+      (let ((lo Reg (value_regs_get val 0))
+            (hi Reg (value_regs_get val 1))
+            (combined Reg (orr $I64 lo hi)))
+        (lower_bmask ty $I64 (value_reg combined))))
+
+;; For converting from a smaller type into i128, duplicate the result of
+;; converting to i64.
+(rule 2
+      (lower_bmask $I128 (fits_in_64 ty) val)
+      (let ((res ValueRegs (lower_bmask $I64 ty val))
+            (res Reg (value_regs_get res 0)))
+        (value_regs res res)))
+
+;; For conversions to a 128-bit mask, we duplicate the result of converting to
+;; an I64.
+(rule 3
+      (lower_bmask $I128 $I128 val)
+      (let ((res ValueRegs (lower_bmask $I64 $I128 val))
+            (res Reg (value_regs_get res 0)))
+        (value_regs res res)))

 ;; Exceptional `lower_icmp_into_flags` rules.
 ;; We need to guarantee that the flags for `cond` are correct, so we
 ;; compare `dst` with 1.
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Sign) 1))) ;; mov tmp, #1
       (cmp (OperandSize.Size64) dst tmp)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Zero) 1)))
       (cmp (OperandSize.Size64) dst tmp)))
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Sign) 1)))
       (cmp (OperandSize.Size64) tmp dst)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThanOrEqual) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0))
            (tmp Reg (imm $I64 (ImmExtend.Zero) 1)))
       (cmp (OperandSize.Size64) tmp dst)))
 ;; For strict comparisons, we compare with 0.
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedGreaterThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) dst (zero_reg))))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedGreaterThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) dst (zero_reg))))
 (rule (lower_icmp_into_flags cond @ (IntCC.SignedLessThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) (zero_reg) dst)))
 (rule (lower_icmp_into_flags cond @ (IntCC.UnsignedLessThan) lhs rhs $I128)
-      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $B1))
+      (let ((dst ValueRegs (lower_icmp_into_reg cond lhs rhs $I128 $I8))
            (dst Reg (value_regs_get dst 0)))
       (cmp (OperandSize.Size64) (zero_reg) dst)))

@@ -3548,7 +3563,7 @@
         (MInst.CSel dst_hi cond rn_hi rm_hi)
         (value_regs dst_lo dst_hi)))))
 (rule 1 (lower_select flags cond ty rn rm)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (with_flags flags (csel cond rn rm)))

 ;; Helper for emitting `MInst.Jump` instructions.
--- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs
@@ -221,9 +221,6 @@ impl UImm12Scaled {
    /// Create a UImm12Scaled from a raw offset and the known scale type, if
    /// possible.
    pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
-        // Ensure the type is at least one byte.
-        let scale_ty = if scale_ty == B1 { B8 } else { scale_ty };
-
        let scale = scale_ty.bytes();
        assert!(scale.is_power_of_two());
        let scale = scale as i64;
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1,9 +1,7 @@
 //! This module defines aarch64-specific machine instruction types.

 use crate::binemit::{Addend, CodeOffset, Reloc};
-use crate::ir::types::{
-    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64,
-};
+use crate::ir::types::{F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64};
 use crate::ir::{types, ExternalName, MemFlags, Opcode, Type};
 use crate::isa::CallConv;
 use crate::machinst::*;
@@ -440,22 +438,22 @@ impl Inst {
    /// Generic constructor for a load (zero-extending where appropriate).
    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
        match ty {
-            B1 | B8 | I8 => Inst::ULoad8 {
+            I8 => Inst::ULoad8 {
                rd: into_reg,
                mem,
                flags,
            },
-            B16 | I16 => Inst::ULoad16 {
+            I16 => Inst::ULoad16 {
                rd: into_reg,
                mem,
                flags,
            },
-            B32 | I32 | R32 => Inst::ULoad32 {
+            I32 | R32 => Inst::ULoad32 {
                rd: into_reg,
                mem,
                flags,
            },
-            B64 | I64 | R64 => Inst::ULoad64 {
+            I64 | R64 => Inst::ULoad64 {
                rd: into_reg,
                mem,
                flags,
@@ -491,22 +489,22 @@ impl Inst {
    /// Generic constructor for a store.
    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
        match ty {
-            B1 | B8 | I8 => Inst::Store8 {
+            I8 => Inst::Store8 {
                rd: from_reg,
                mem,
                flags,
            },
-            B16 | I16 => Inst::Store16 {
+            I16 => Inst::Store16 {
                rd: from_reg,
                mem,
                flags,
            },
-            B32 | I32 | R32 => Inst::Store32 {
+            I32 | R32 => Inst::Store32 {
                rd: from_reg,
                mem,
                flags,
            },
-            B64 | I64 | R64 => Inst::Store64 {
+            I64 | R64 => Inst::Store64 {
                rd: from_reg,
                mem,
                flags,
@@ -1209,9 +1207,7 @@ impl MachInst for Inst {
        match ty {
            F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
            F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
-            B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
-                Inst::load_constant(to_reg.unwrap(), value as u64)
-            }
+            I8 | I16 | I32 | I64 | R32 | R64 => Inst::load_constant(to_reg.unwrap(), value as u64),
            I128 => Inst::load_constant128(to_regs, value),
            _ => panic!("Cannot generate constant for type: {}", ty),
        }
@@ -1236,17 +1232,11 @@ impl MachInst for Inst {
            I16 => Ok((&[RegClass::Int], &[I16])),
            I32 => Ok((&[RegClass::Int], &[I32])),
            I64 => Ok((&[RegClass::Int], &[I64])),
-            B1 => Ok((&[RegClass::Int], &[B1])),
-            B8 => Ok((&[RegClass::Int], &[B8])),
-            B16 => Ok((&[RegClass::Int], &[B16])),
-            B32 => Ok((&[RegClass::Int], &[B32])),
-            B64 => Ok((&[RegClass::Int], &[B64])),
            R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
            R64 => Ok((&[RegClass::Int], &[R64])),
            F32 => Ok((&[RegClass::Float], &[F32])),
            F64 => Ok((&[RegClass::Float], &[F64])),
            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
-            B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])),
            _ if ty.is_vector() => {
                assert!(ty.bits() <= 128);
                Ok((&[RegClass::Float], &[I8X16]))
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -19,14 +19,6 @@
 (rule (lower (has_type ty (iconst (u64_from_imm64 n))))
      (imm ty (ImmExtend.Zero) n))

-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty (bconst $false)))
-      (imm ty (ImmExtend.Zero) 0))
-
-(rule (lower (has_type ty (bconst $true)))
-      (imm ty (ImmExtend.Zero) 1))
-
 ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (null)))
@@ -142,10 +134,10 @@
 (rule (lower (has_type $F64X2 (scalar_to_vector x)))
      (fpu_extend x (ScalarSize.Size64)))

-(rule -1 (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _))))
+(rule -1 (lower (scalar_to_vector x @ (value_type $I64)))
      (mov_to_fpu x (ScalarSize.Size64)))

-(rule -2 (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _))))
+(rule -2 (lower (scalar_to_vector x @ (value_type (int_fits_in_32 _))))
      (mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32)))

 ;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -159,18 +151,17 @@
 ;; 0 when all input elements are true, i.e. non-zero, or a NaN otherwise
 ;; (either -1 or -2 when represented as an integer); NaNs are the only
 ;; floating-point numbers that compare unequal to themselves.
-(rule (lower (has_type out_ty (vall_true x @ (value_type (multi_lane 64 2)))))
+(rule (lower (vall_true x @ (value_type (multi_lane 64 2))))
      (let ((x1 Reg (cmeq0 x (VectorSize.Size64x2)))
            (x2 Reg (addp x1 x1 (VectorSize.Size64x2))))
       (with_flags (fpu_cmp (ScalarSize.Size64) x2 x2)
-                   (materialize_bool_result (ty_bits out_ty) (Cond.Eq)))))
+                   (materialize_bool_result (Cond.Eq)))))

-(rule (lower (has_type out_ty (vall_true x @ (value_type (multi_lane 32 2)))))
+(rule (lower (vall_true x @ (value_type (multi_lane 32 2))))
      (let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64))))
       (with_flags (cmp_rr_shift (OperandSize.Size64) (zero_reg) x1 32)
                   (ccmp_imm
                    (OperandSize.Size32)
-                    (ty_bits out_ty)
                    x1
                    (u8_into_uimm5 0)
                    (nzcv $false $true $false $false)
@@ -183,18 +174,18 @@
 ;; mov xm, vn.d[0]
 ;; cmp xm, #0
 ;; cset xm, ne
-(rule -1 (lower (has_type out_ty (vall_true x @ (value_type (lane_fits_in_32 ty)))))
+(rule -1 (lower (vall_true x @ (value_type (lane_fits_in_32 ty))))
      (if (not_vec32x2 ty))
      (let ((x1 Reg (vec_lanes (VecLanesOp.Uminv) x (vector_size ty)))
            (x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64))))
       (with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0))
-                   (materialize_bool_result (ty_bits out_ty) (Cond.Ne)))))
+                   (materialize_bool_result (Cond.Ne)))))

 ;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (vany_true x @ (value_type in_ty))))
+(rule (lower (vany_true x @ (value_type in_ty)))
      (with_flags (vanytrue x in_ty)
-                  (materialize_bool_result (ty_bits out_ty) (Cond.Ne))))
+                  (materialize_bool_result (Cond.Ne))))

 ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1536,60 +1527,11 @@
 (rule -1 (lower (has_type ty (cls x)))
      (a64_cls ty x))

-;;;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
-;; out the LSB to give a 0 / 1-valued integer result.
-
-(rule 1 (lower (has_type $I128 (bint x)))
-      (let ((val ValueRegs x)
-            (in_lo Reg (value_regs_get val 0))
-            (dst_lo Reg (and_imm $I32 in_lo (u64_into_imm_logic $I32 1)))
-            (dst_hi Reg (imm $I64 (ImmExtend.Zero) 0)))
-        (value_regs dst_lo dst_hi)))
-
-(rule (lower (bint x))
-      (and_imm $I32 x (u64_into_imm_logic $I32 1)))
-
-;;;; Rules for `bmask`/`bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Bextend and Bmask both simply sign-extend. This works for:
-;; - Bextend, because booleans are stored as 0 / -1, so we
-;;   sign-extend the -1 to a -1 in the wider width.
-;; - Bmask, because the resulting integer mask value must be
-;;   all-ones (-1) if the argument is true.
-
-;; Use a common helper to type cast bools to either bool or integer types.
-(decl cast_bool (Type Type Value) InstOutput)
-(rule (lower (has_type out_ty (bextend x @ (value_type in_ty))))
-      (cast_bool in_ty out_ty x))
+;; Bmask tests the value against zero, and uses `csetm` to assert the result.
 (rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
-      (cast_bool in_ty out_ty x))
-
-
-;; If the target has the same or a smaller size than the source, it's a no-op.
-(rule (cast_bool $B8 $I8 x) x)
-(rule (cast_bool $B16 (fits_in_16 _out) x) x)
-(rule (cast_bool $B32 (fits_in_32 _out) x) x)
-(rule (cast_bool $B64 (fits_in_64 _out) x) x)
-
-;; Casting between 128 bits is a noop
-(rule -1 (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x)
-    x)
-
-;; Converting from 128 bits to anything below we just ignore the top register
-(rule -2 (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x)
-    (value_regs_get x 0))
-
-;; Extend to 64 bits first, then this will be all 0s or all 1s and we can
-;; duplicate to both halves of 128 bits
-(rule -3 (cast_bool in (ty_int_bool_128 _out) x)
-      (let ((tmp Reg (extend x $true (ty_bits in) 64)))
-        (value_regs tmp tmp)))
-
-;; Values that fit in a single register are sign extended normally
-(rule -4 (cast_bool (fits_in_64 in) (fits_in_64 out) x)
-      (extend x $true (ty_bits in) (ty_bits out)))
+      (lower_bmask out_ty in_ty x))

 ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1648,7 +1590,7 @@
 ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (bitselect c x y)))
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((tmp1 Reg (and_reg ty x c))
            (tmp2 Reg (bic ty y c)))
        (orr ty tmp1 tmp2)))
@@ -1661,22 +1603,15 @@
 (rule (lower (has_type (ty_vec128 ty) (vselect c x y)))
        (bsl ty c x y))

-;;;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; T -> I{64,32,16,8}: We can simply pass through the value: values
 ;; are always stored with high bits undefined, so we can just leave
 ;; them be.
 (rule (lower (has_type ty (ireduce src)))
-    (if (ty_int_bool_ref_scalar_64 ty))
+    (if (ty_int_ref_scalar_64 ty))
    (value_regs_get src 0))

-;; Likewise for breduce.
-
-(rule (lower (has_type ty (breduce src)))
-      (if (ty_int_bool_ref_scalar_64 ty))
-      (value_regs_get src 0))
-
-
 ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule 4 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y)))
@@ -1706,9 +1641,7 @@
 (rule 0 (lower (has_type out_ty
              (fcmp cond x @ (value_type (ty_scalar_float in_ty)) y)))
      (with_flags (fpu_cmp (scalar_size in_ty) x y)
-                  (materialize_bool_result
-                   (ty_bits out_ty)
-                   (fp_cond_code cond))))
+                  (materialize_bool_result (fp_cond_code cond))))

 (rule -1 (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y)))
      (if (ty_vector_float in_ty))
@@ -1740,8 +1673,8 @@
            (vec_size VectorSize (vector_size ty)))
          (value_reg (int_cmp_zero_swap cond rn vec_size))))

-(rule -1 (lower (has_type out_ty (icmp cond x @ (value_type in_ty) y)))
-      (lower_icmp_into_reg cond x y in_ty out_ty))
+(rule -1 (lower (icmp cond x @ (value_type in_ty) y))
+      (lower_icmp_into_reg cond x y in_ty $I8))

 ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1783,10 +1716,10 @@
 ;;;; Rules for `trueff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Verification ensures the input is always a single-def ffcmp.
-(rule (lower (has_type ty (trueff cc insn @ (ffcmp x @ (value_type in_ty) y))))
+(rule (lower (trueff cc insn @ (ffcmp x @ (value_type in_ty) y)))
      (with_flags_reg
       (fpu_cmp (scalar_size in_ty) x y)
-       (materialize_bool_result (ty_bits ty) (fp_cond_code cc))))
+       (materialize_bool_result (fp_cond_code cc))))

 ;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1797,13 +1730,6 @@
        (lower_icmp_into_flags cc x y in_ty)
        cond ty rn rm)))

-(rule (lower (has_type ty
-       (select _flags @ (bint (icmp cc x @ (value_type in_ty) y)) rn rm)))
-      (let ((cond Cond (cond_code cc)))
-       (lower_select
-        (lower_icmp_into_flags cc x y in_ty)
-        cond ty rn rm)))
-
 (rule (lower (has_type ty
       (select _flags @ (fcmp cc x @ (value_type in_ty) y) rn rm)))
      (let ((cond Cond (fp_cond_code cc)))
@@ -1811,20 +1737,19 @@
        (fpu_cmp (scalar_size in_ty) x y)
        cond ty rn rm)))

-(rule (lower (has_type ty
-       (select _flags @ (bint (fcmp cc x @ (value_type in_ty) y)) rn rm)))
-      (let ((cond Cond (fp_cond_code cc)))
+(rule -1 (lower (has_type ty (select rcond @ (value_type $I8) rn rm)))
+      (let ((rcond Reg rcond))
       (lower_select
-        (fpu_cmp (scalar_size in_ty) x y)
-        cond ty rn rm)))
+         (tst_imm $I32 rcond (u64_into_imm_logic $I32 255))
+         (Cond.Ne) ty rn rm)))

-(rule -1 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm)))
+(rule -2 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm)))
      (let ((rcond Reg (put_in_reg_zext32 rcond)))
       (lower_select
        (cmp (OperandSize.Size32) rcond (zero_reg))
        (Cond.Ne) ty rn rm)))

-(rule -2 (lower (has_type ty (select rcond rn rm)))
+(rule -3 (lower (has_type ty (select rcond rn rm)))
      (let ((rcond Reg (put_in_reg_zext64 rcond)))
       (lower_select
        (cmp (OperandSize.Size64) rcond (zero_reg))
@@ -1865,18 +1790,12 @@
 ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule -1 (lower (has_type ty (splat x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (vec_dup x (vector_size ty)))

 (rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
      (vec_dup_from_fpu x (vector_size ty)))

-(rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
-      (splat_const n (vector_size ty)))
-
-(rule (lower (has_type ty (splat (breduce (bconst (u64_from_bool n))))))
-      (splat_const n (vector_size ty)))
-
 (rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
      (splat_const n (vector_size ty)))

@@ -2089,17 +2008,15 @@

 ;;;; Rules for `IsNull` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (is_null x @ (value_type ty))))
+(rule (lower (is_null x @ (value_type ty)))
      (with_flags (cmp_imm (operand_size ty) x (u8_into_imm12 0))
-                  (materialize_bool_result
-                   (ty_bits out_ty) (Cond.Eq))))
+                  (materialize_bool_result (Cond.Eq))))

 ;;;; Rules for `IsInvalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type out_ty (is_invalid x @ (value_type ty))))
+(rule (lower (is_invalid x @ (value_type ty)))
      (with_flags (cmn_imm (operand_size ty) x (u8_into_imm12 1))
-                  (materialize_bool_result
-                   (ty_bits out_ty) (Cond.Eq))))
+                  (materialize_bool_result (Cond.Eq))))

 ;;;; Rules for `Debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -2325,18 +2242,18 @@

 ; GPR => SIMD&FP
 (rule 4 (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (mov_to_fpu x (scalar_size in_ty)))

 ; SIMD&FP => GPR
 (rule 3 (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
-      (if (ty_int_bool_ref_scalar_64 out_ty))
+      (if (ty_int_ref_scalar_64 out_ty))
      (mov_from_vec x 0 (scalar_size out_ty)))

 ; GPR <=> GPR
 (rule 2 (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 out_ty))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 out_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      x)
 (rule 1 (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

@@ -2352,7 +2269,7 @@
 (rule 2 (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0))))
      val)

-(rule 0 (lower (has_type (ty_int_bool ty)
+(rule 0 (lower (has_type (ty_int ty)
                       (extractlane val
                                    (u8_from_uimm8 lane))))
      (mov_from_vec val lane (scalar_size ty)))
@@ -2365,7 +2282,7 @@
 ;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule 1 (lower (insertlane vec @ (value_type vty)
-                         val @ (value_type (ty_int_bool _))
+                         val @ (value_type (ty_int _))
                         (u8_from_uimm8 lane)))
      (mov_to_vec vec val lane (vector_size vty)))

@@ -2507,7 +2424,7 @@

 ;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; `brz` following `icmp`, possibly converted via `bint`.
+;; `brz` following `icmp`
 (rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
      (let ((cond Cond (cond_code cc))
            (cond Cond (invert_cond cond)) ;; negate for `brz`
@@ -2517,16 +2434,7 @@
        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
-      (let ((cond Cond (cond_code cc))
-            (cond Cond (invert_cond cond)) ;; negate for `brz`
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brnz` following `icmp`, possibly converted via `bint`.
+;; `brnz` following `icmp`
 (rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
      (let ((cond Cond (cond_code cc))
            (taken BranchTarget (branch_target targets 0))
@@ -2535,15 +2443,7 @@
        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
-      (let ((cond Cond (cond_code cc))
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (lower_icmp_into_flags cc x y ty)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brz` following `fcmp`, possibly converted via `bint`.
+;; `brz` following `fcmp`
 (rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
      (let ((cond Cond (fp_cond_code cc))
            (cond Cond (invert_cond cond)) ;; negate for `brz`
@@ -2553,16 +2453,7 @@
        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
-      (let ((cond Cond (fp_cond_code cc))
-            (cond Cond (invert_cond cond)) ;; negate for `brz`
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
-;; `brnz` following `fcmp`, possibly converted via `bint`.
+;; `brnz` following `fcmp`
 (rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
      (let ((cond Cond (fp_cond_code cc))
            (taken BranchTarget (branch_target targets 0))
@@ -2571,14 +2462,6 @@
        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
                                (cond_br taken not_taken
                                 (cond_br_cond cond))))))
-(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
-      (let ((cond Cond (fp_cond_code cc))
-            (taken BranchTarget (branch_target targets 0))
-            (not_taken BranchTarget (branch_target targets 1)))
-       (side_effect
-        (with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
-                                (cond_br taken not_taken
-                                 (cond_br_cond cond))))))
 ;; standard `brz`
 (rule -1 (lower_branch (brz c @ (value_type $I128) _ _) targets)
      (let ((flags ProducesFlags (flags_to_producesflags c))
@@ -2592,7 +2475,7 @@
        (with_flags_side_effect flags
         (cond_br taken not_taken (cond_br_zero rt))))))
 (rule -2 (lower_branch (brz c @ (value_type ty) _ _) targets)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((flags ProducesFlags (flags_to_producesflags c))
            (rt Reg (put_in_reg_zext64 c))
            (taken BranchTarget (branch_target targets 0))
@@ -2613,7 +2496,7 @@
        (with_flags_side_effect flags
         (cond_br taken not_taken (cond_br_not_zero rt))))))
 (rule -2 (lower_branch (brnz c @ (value_type ty) _ _) targets)
-      (if (ty_int_bool_ref_scalar_64 ty))
+      (if (ty_int_ref_scalar_64 ty))
      (let ((flags ProducesFlags (flags_to_producesflags c))
            (rt Reg (put_in_reg_zext64 c))
            (taken BranchTarget (branch_target targets 0))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -165,7 +165,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
    fn integral_ty(&mut self, ty: Type) -> Option<Type> {
        match ty {
            I8 | I16 | I32 | I64 | R64 => Some(ty),
-            ty if ty.is_bool() => Some(ty),
            _ => None,
        }
    }
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -39,7 +39,7 @@ pub(crate) fn lower_insn_to_regs(
    };

    match op {
-        Opcode::Iconst | Opcode::Bconst | Opcode::Null => implemented_in_isle(ctx),
+        Opcode::Iconst | Opcode::Null => implemented_in_isle(ctx),

        Opcode::F32const => {
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
@@ -163,11 +163,9 @@ pub(crate) fn lower_insn_to_regs(

        Opcode::Copy => implemented_in_isle(ctx),

-        Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),
+        Opcode::Ireduce => implemented_in_isle(ctx),

-        Opcode::Bextend | Opcode::Bmask => implemented_in_isle(ctx),
-
-        Opcode::Bint => implemented_in_isle(ctx),
+        Opcode::Bmask => implemented_in_isle(ctx),

        Opcode::Bitcast => implemented_in_isle(ctx),

--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -1659,11 +1659,6 @@
      (result Reg (alu_rrr (AluOPRRR.Or) tmp_x tmp_y)))
    result))

-(decl gen_bint (Reg) Reg)
-(rule 
-  (gen_bint r)
-  (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const 1)))
-
 (decl gen_int_select (Type IntSelectOP ValueRegs ValueRegs) ValueRegs)
 (rule
  (gen_int_select ty op x y)
@@ -1729,12 +1724,6 @@
      (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type))))
    result))

-;;;; in_type out_type
-;;;; out_type is returned.
-(decl pure valid_bextend_ty (Type Type) Type)
-(extern constructor valid_bextend_ty valid_bextend_ty)
-
-
 ;;; some float binary operation 
 ;;; 1. need move into x reister.
 ;;; 2. do the operation.
@@ -1907,14 +1896,29 @@
 (decl lower_brz_or_nz (IntCC ValueRegs VecMachLabel Type) InstOutput)
 (extern constructor lower_brz_or_nz lower_brz_or_nz)

+;; Normalize a value by masking to its bit-size.
+(decl normalize_value (Type ValueRegs) ValueRegs)
+
+(rule (normalize_value $I8 r)
+      (value_reg (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const 255))))
+(rule (normalize_value $I16 r)
+      (value_reg (alu_rrr (AluOPRRR.And) r (imm $I16 65535))))
+(rule (normalize_value $I32 r)
+      (value_reg (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const -1))))
+
+(rule (normalize_value $I64  r) r)
+(rule (normalize_value $I128 r) r)
+(rule (normalize_value $F32  r) r)
+(rule (normalize_value $F64  r) r)
+
 ;;;;; 
 (rule 
  (lower_branch (brz v @ (value_type ty) _ _) targets)
-  (lower_brz_or_nz (IntCC.Equal) v targets ty))
+  (lower_brz_or_nz (IntCC.Equal) (normalize_value ty v) targets ty))
 ;;;; 
 (rule 
  (lower_branch (brnz v @ (value_type ty) _ _) targets)
-  (lower_brz_or_nz (IntCC.NotEqual) v targets ty))
+  (lower_brz_or_nz (IntCC.NotEqual) (normalize_value ty v) targets ty))

 ;;; 
 (rule 
@@ -2082,3 +2086,43 @@
 (decl umulh (Reg Reg) Reg)
 (rule (umulh a b)
  (alu_rrr (AluOPRRR.Mulhu) a b))
+
+;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl lower_bmask (Type Type ValueRegs) ValueRegs)
+
+;; Produces -1 if the 64-bit value is non-zero, and 0 otherwise.
+(rule
+  0
+  (lower_bmask (fits_in_64 _) (fits_in_64 _) val)
+  (let ((input Reg val)
+        (zero Reg (zero_reg))
+        (ones Reg (load_imm12 -1)))
+  (value_reg (gen_select_reg (IntCC.Equal) zero input zero ones))))
+
+;; Bitwise-or the two registers that make up the 128-bit value, then recurse as
+;; though it was a 64-bit value.
+(rule
+  1
+  (lower_bmask (fits_in_64 ty) $I128 val)
+  (let ((lo Reg (value_regs_get val 0))
+        (hi Reg (value_regs_get val 1))
+        (combined Reg (alu_rrr (AluOPRRR.Or) lo hi)))
+    (lower_bmask ty $I64 (value_reg combined))))
+
+;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of the
+;; bmask of the 64-bit value into both result registers of the i128.
+(rule
+  2
+  (lower_bmask $I128 (fits_in_64 _) val)
+  (let ((res ValueRegs (lower_bmask $I64 $I64 val)))
+    (value_regs (value_regs_get res 0) (value_regs_get res 0))))
+
+;; Conversion of one 64-bit value to a 128-bit one. Duplicate the result of
+;; bmasking the 128-bit value to a 64-bit value into both registers of the
+;; 128-bit result.
+(rule
+  3
+  (lower_bmask $I128 $I128 val)
+  (let ((res ValueRegs (lower_bmask $I64 $I128 val)))
+    (value_regs (value_regs_get res 0) (value_regs_get res 0))))
--- a/cranelift/codegen/src/isa/riscv64/inst/args.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs
@@ -1189,10 +1189,8 @@ impl LoadOP {
            return if t == F32 { Self::Flw } else { Self::Fld };
        }
        match t {
-            B1 | B8 => Self::Lbu,
-            B16 => Self::Lhu,
-            B32 | R32 => Self::Lwu,
-            B64 | R64 | I64 => Self::Ld,
+            R32 => Self::Lwu,
+            R64 | I64 => Self::Ld,

            I8 => Self::Lb,
            I16 => Self::Lh,
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -1039,9 +1039,8 @@ impl MachInstEmit for Inst {
            &Inst::CondBr {
                taken,
                not_taken,
-                kind,
+                mut kind,
            } => {
-                let mut kind = kind;
                kind.rs1 = allocs.next(kind.rs1);
                kind.rs2 = allocs.next(kind.rs2);
                match taken {
@@ -1385,13 +1384,13 @@ impl MachInstEmit for Inst {
                .for_each(|i| i.emit(&[], sink, emit_info, state));

                sink.bind_label(label_true);
-                Inst::load_imm12(rd, Imm12::from_bits(-1)).emit(&[], sink, emit_info, state);
+                Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
                Inst::Jal {
                    dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2),
                }
                .emit(&[], sink, emit_info, state);
                sink.bind_label(label_false);
-                Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state);
+                Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
            }
            &Inst::AtomicCas {
                offset,
--- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
@@ -572,16 +572,6 @@ fn test_riscv64_binemit() {
        "lb a0,100(a1)",
        0x6458503,
    ));
-    insns.push(TestUnit::new(
-        Inst::Load {
-            rd: writable_a0(),
-            op: LoadOP::Lbu,
-            flags: MemFlags::new(),
-            from: AMode::RegOffset(a1(), 100, B8),
-        },
-        "lbu a0,100(a1)",
-        0x645c503,
-    ));
    insns.push(TestUnit::new(
        Inst::Load {
            rd: writable_a0(),
@@ -593,17 +583,6 @@ fn test_riscv64_binemit() {
        0x6459503,
    ));

-    insns.push(TestUnit::new(
-        Inst::Load {
-            rd: writable_a0(),
-            op: LoadOP::Lhu,
-            flags: MemFlags::new(),
-            from: AMode::RegOffset(a1(), 100, B16),
-        },
-        "lhu a0,100(a1)",
-        0x645d503,
-    ));
-
    insns.push(TestUnit::new(
        Inst::Load {
            rd: writable_a0(),
@@ -615,16 +594,6 @@ fn test_riscv64_binemit() {
        0x645a503,
    ));

-    insns.push(TestUnit::new(
-        Inst::Load {
-            rd: writable_a0(),
-            op: LoadOP::Lwu,
-            flags: MemFlags::new(),
-            from: AMode::RegOffset(a1(), 100, B32),
-        },
-        "lwu a0,100(a1)",
-        0x645e503,
-    ));
    insns.push(TestUnit::new(
        Inst::Load {
            rd: writable_a0(),
--- a/cranelift/codegen/src/isa/riscv64/inst/imms.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs
@@ -12,7 +12,7 @@ pub struct Imm12 {

 impl Imm12 {
    pub(crate) const FALSE: Self = Self { bits: 0 };
-    pub(crate) const TRUE: Self = Self { bits: -1 };
+    pub(crate) const TRUE: Self = Self { bits: 1 };
    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
        let sign_bit = 1 << 11;
        if val == 0 {
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -6,9 +6,7 @@

 use crate::binemit::{Addend, CodeOffset, Reloc};
 pub use crate::ir::condcodes::IntCC;
-use crate::ir::types::{
-    B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, R32, R64,
-};
+use crate::ir::types::{F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, R32, R64};

 pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
 use crate::isa::CallConv;
@@ -691,14 +689,11 @@ impl MachInst for Inst {

    fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
        to_regs: ValueRegs<Writable<Reg>>,
-        mut value: u128,
+        value: u128,
        ty: Type,
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
-        if ty.is_bool() && value != 0 {
-            value = !0;
-        }
-        if (ty.bits() <= 64 && (ty.is_bool() || ty.is_int())) || ty == R32 || ty == R64 {
+        if (ty.bits() <= 64 && ty.is_int()) || ty == R32 || ty == R64 {
            return Inst::load_constant_u64(to_regs.only_reg().unwrap(), value as u64);
        };
        match ty {
@@ -708,7 +703,7 @@ impl MachInst for Inst {
            F64 => {
                Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, alloc_tmp(I64))
            }
-            I128 | B128 => {
+            I128 => {
                let mut insts = SmallInstVec::new();
                insts.extend(Inst::load_constant_u64(
                    to_regs.regs()[0],
@@ -736,17 +731,11 @@ impl MachInst for Inst {
            I16 => Ok((&[RegClass::Int], &[I16])),
            I32 => Ok((&[RegClass::Int], &[I32])),
            I64 => Ok((&[RegClass::Int], &[I64])),
-            B1 => Ok((&[RegClass::Int], &[B1])),
-            B8 => Ok((&[RegClass::Int], &[B8])),
-            B16 => Ok((&[RegClass::Int], &[B16])),
-            B32 => Ok((&[RegClass::Int], &[B32])),
-            B64 => Ok((&[RegClass::Int], &[B64])),
            R32 => panic!("32-bit reftype pointer should never be seen on riscv64"),
            R64 => Ok((&[RegClass::Int], &[R64])),
            F32 => Ok((&[RegClass::Float], &[F32])),
            F64 => Ok((&[RegClass::Float], &[F64])),
            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
-            B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])),
            IFLAGS => Ok((&[RegClass::Int], &[IFLAGS])),
            FFLAGS => Ok((&[RegClass::Int], &[FFLAGS])),
            _ => Err(CodegenError::Unsupported(format!(
--- a/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
@@ -143,7 +143,7 @@ mod tests {

        assert_eq!(
            format!("{:?}", fde),
-            "FrameDescriptionEntry { address: Constant(4321), length: 12, lsda: None, instructions: [] }"
+            "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
        );
    }

--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -9,15 +9,6 @@
 (rule (lower (has_type ty (iconst (u64_from_imm64 n))))
  (imm ty n))

-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty (bconst $false)))
-  (imm ty 0))
-
-(rule (lower (has_type ty (bconst $true)))
-  (imm ty 1))
-
-
 ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (has_type ty (null)))
@@ -194,8 +185,6 @@
 (rule 1 (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y)))
  (alu_rr_imm12 (AluOPRRI.Andi) y x))

-(rule (lower (has_type $B128 (band x y)))
-  (lower_b128_binary (AluOPRRR.And) x y))
 (rule (lower (has_type $I128 (band x y)))
  (lower_b128_binary (AluOPRRR.And) x y))

@@ -215,8 +204,6 @@

 (rule 1 (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y)))
  (alu_rr_imm12 (AluOPRRI.Ori) y x))
-(rule (lower (has_type $B128 (bor x y)))
-  (lower_b128_binary (AluOPRRR.Or) x y))
 (rule (lower (has_type $I128 (bor x y)))
  (lower_b128_binary (AluOPRRR.Or) x y))
 (rule (lower (has_type $F32 (bor x y)))
@@ -235,8 +222,6 @@

 (rule 1 (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y)))
  (alu_rr_imm12 (AluOPRRI.Xori) y x))
-(rule (lower (has_type $B128 (bxor x y)))
-  (lower_b128_binary (AluOPRRR.Xor) x y))
 (rule (lower (has_type $I128 (bxor x y)))
  (lower_b128_binary (AluOPRRR.Xor) x y))
 (rule (lower (has_type $F32 (bxor x y)))
@@ -251,8 +236,6 @@

 (rule (lower (has_type $I128 (bnot x)))
  (bnot_128 x))
-(rule (lower (has_type $B128 (bnot x)))
-  (bnot_128 x))
 (rule
  (lower (has_type $F32 (bnot x)))
  (lower_float_bnot x $F32)
@@ -556,11 +539,6 @@
 (rule (lower (has_type ty (copy x)))
  (gen_move2 x ty ty))

-;;;;;  Rules for `breduce`;;;;;;;;;;;;;;;;;
-(rule
-  (lower (has_type ty (breduce x)))
-  (gen_move2 (value_regs_get x 0) ty ty))
-
 ;;;;;  Rules for `ireduce`;;;;;;;;;;;;;;;;;
 (rule
  (lower (has_type ty (ireduce x)))
@@ -623,8 +601,8 @@

 ;;;;;  Rules for `select`;;;;;;;;;
 (rule
-  (lower (has_type ty (select c x y)))
-  (gen_select ty c x y)
+  (lower (has_type ty (select c @ (value_type cty) x y)))
+  (gen_select ty (normalize_value cty c) x y)
 )

 ;;;;;  Rules for `bitselect`;;;;;;;;;
@@ -633,15 +611,6 @@
  (lower (has_type ty (bitselect c x y)))
  (gen_bitselect ty c x y))

-;;;;;  Rules for `bint`;;;;;;;;;
-(rule
-  (lower (has_type (fits_in_64 ty) (bint (valueregs_2_reg x))))
-  (gen_bint x))
-(rule 1
-  (lower (has_type $I128 (bint (valueregs_2_reg x))))
-  (let ((tmp Reg (gen_bint x)))
-   (value_regs tmp (load_u64_constant 0))))
-
 ;;;;;  Rules for `isplit`;;;;;;;;;
 (rule 
  (lower (isplit x))
@@ -733,10 +702,6 @@
 (rule 1
  (lower (has_type $I128 (load flags p offset)))
  (gen_load_128 p offset flags))
-;;;; for B128
-(rule 1
-  (lower (has_type $B128 (load flags p offset)))
-  (gen_load_128 p offset flags))

 ;;;;;  Rules for `istore8`;;;;;;;;;
 (rule 
@@ -762,11 +727,6 @@
  (lower (store flags x @ (value_type $I128 ) p offset))
  (gen_store_128 p offset flags x))

-;;; special for B128
-(rule 1
-  (lower (store flags x @ (value_type $B128 ) p offset))
-  (gen_store_128 p offset flags x))
-
 (decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
 (rule
  (gen_icmp cc x y ty)
@@ -923,34 +883,8 @@

 ;;;;;  Rules for `bmask`;;;;;;;;;
 (rule
-  ;; because we encode bool all 1s.
-  ;; move is just ok.
-  (lower (has_type (fits_in_64 ty) (bmask x @ (value_type ity))))
-  (gen_move2 (value_regs_get x 0) ity ty))
-;;; for i128 
-(rule 1
-  ;; because we encode bool all 1s.
-  ;; move is just ok.
-  (lower (has_type $I128 (bmask x @ (value_type ity))))
-  (value_regs (gen_move2 (value_regs_get x 0) $I64 $I64) (gen_move2 (value_regs_get x 0) $I64 $I64)))
-
-;;;;;  Rules for `bextend`;;;;;;;;;
-(rule
-  ;; because we encode bool all 1s.
-  ;; move is just ok.
-  (lower (has_type ty (bextend x @ (value_type ity))))
-  ;;extra checks.
-  (if-let _ (valid_bextend_ty ity ty))
-  (gen_moves x ity ty))
-
-;;; for B128
-(rule 1
-  ;; because we encode bool all 1s.
-  ;; move is just ok.
-  (lower (has_type ty (bextend x @ (value_type ity))))
-  ;;extra checks.
-  (if-let $B128 (valid_bextend_ty ity ty))
-  (value_regs (gen_moves x $I64 $I64) (gen_moves x $I64 $I64)))
+  (lower (has_type oty (bmask x @ (value_type ity))))
+  (lower_bmask oty ity x))

 ;; N.B.: the Ret itself is generated by the ABI.
 (rule (lower (return args))
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -71,13 +71,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        }
    }

-    fn valid_bextend_ty(&mut self, from: Type, to: Type) -> Option<Type> {
-        if from.is_bool() && to.is_bool() && from.bits() < to.bits() {
-            Some(to)
-        } else {
-            None
-        }
-    }
    fn lower_br_fcmp(
        &mut self,
        cc: &FloatCC,
@@ -155,7 +148,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        }
    }
    fn int_zero_reg(&mut self, ty: Type) -> ValueRegs {
-        assert!(ty.is_int() || ty.is_bool(), "{:?}", ty);
+        assert!(ty.is_int(), "{:?}", ty);
        if ty.bits() == 128 {
            ValueRegs::two(self.zero_reg(), self.zero_reg())
        } else {
@@ -190,7 +183,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        Imm12::from_bits(imm.as_i16() & (x as i16))
    }
    fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg {
-        if ty.is_int() || ty.is_bool() || ty == R32 || ty == R64 {
+        if ty.is_int() || ty == R32 || ty == R64 {
            if ty.bits() <= 64 {
                vec![self.temp_writable_reg(I64)]
            } else {
@@ -203,26 +196,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        }
    }

-    fn imm(&mut self, ty: Type, mut val: u64) -> Reg {
-        // Boolean types
-        // Boolean values are either true or false.
-
-        // The b1 type represents an abstract boolean value. It can only exist as an SSA value, and can't be directly stored in memory. It can, however, be converted into an integer with value 0 or 1 by the bint instruction (and converted back with icmp_imm with 0).
-
-        // Several larger boolean types are also defined, primarily to be used as SIMD element types. They can be stored in memory, and are represented as either all zero bits or all one bits.
-
-        // b1
-        // b8
-        // b16
-        // b32
-        // b64
-        // ///////////////////////////////////////////////////////////
-        // "represented as either all zero bits or all one bits."
-        // \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
-        if ty.is_bool() && val != 0 {
-            // need all be one
-            val = !0;
-        }
+    fn imm(&mut self, ty: Type, val: u64) -> Reg {
        let tmp = self.temp_writable_reg(ty);
        self.emit_list(&MInst::load_constant_u64(tmp, val));
        tmp.to_reg()
@@ -296,7 +270,11 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
        rd.to_reg()
    }
    fn imm12_const(&mut self, val: i32) -> Imm12 {
-        Imm12::maybe_from_u64(val as u64).unwrap()
+        if let Some(res) = Imm12::maybe_from_u64(val as u64) {
+            res
+        } else {
+            panic!("Unable to make an Imm12 value from {}", val)
+        }
    }
    fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 {
        Imm12::maybe_from_u64((val + add) as u64).unwrap()
@@ -526,7 +504,7 @@ fn construct_dest<F: std::ops::FnMut(Type) -> WritableReg>(
    mut alloc: F,
    ty: Type,
 ) -> WritableValueRegs {
-    if ty.is_bool() || ty.is_int() {
+    if ty.is_int() {
        if ty.bits() == 128 {
            WritableValueRegs::two(alloc(I64), alloc(I64))
        } else {
--- a/cranelift/codegen/src/isa/s390x/abi.rs
+++ b/cranelift/codegen/src/isa/s390x/abi.rs
@@ -94,7 +94,6 @@ pub type S390xCallee = Callee<S390xMachineDeps>;
 fn in_int_reg(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
-        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
        _ => false,
    }
 }
--- a/cranelift/codegen/src/isa/s390x/inst.isle
+++ b/cranelift/codegen/src/isa/s390x/inst.isle
@@ -3129,7 +3129,6 @@
        dst))

 ;; Sign-extend a register from a smaller `Type` into a 32-bit register.
-;; This handles both integer and boolean input types (except $B1).
 (decl sext32_reg (Type Reg) Reg)
 (rule (sext32_reg ty src)
      (let ((dst WritableReg (temp_writable_reg $I32))
@@ -3137,7 +3136,6 @@
        dst))

 ;; Zero-extend a register from a smaller `Type` into a 64-bit register.
-;; This handles both integer and boolean input types (except $B1).
 (decl zext64_reg (Type Reg) Reg)
 (rule (zext64_reg ty src)
      (let ((dst WritableReg (temp_writable_reg $I64))
@@ -3145,7 +3143,6 @@
        dst))

 ;; Sign-extend a register from a smaller `Type` into a 64-bit register.
-;; This handles both integer and boolean input types (except $B1).
 (decl sext64_reg (Type Reg) Reg)
 (rule (sext64_reg ty src)
      (let ((dst WritableReg (temp_writable_reg $I64))
@@ -3477,14 +3474,19 @@
            (_ Unit (emit_consumer (emit_cmov_imm ty dst cond imm_true))))
        dst))

-;; Lower a boolean condition to a boolean type.  The value used to represent
-;; "true" is -1 for all result types except for $B1, which uses 1.
+;; Lower a boolean condition to the values 1/0. This rule is only used in the
+;; context of instructions that return $I8 results.
 (decl lower_bool (Type ProducesBool) Reg)
-(rule (lower_bool $B1 cond) (select_bool_imm $B1 cond 1 0))
-(rule (lower_bool $B8 cond) (select_bool_imm $B8 cond -1 0))
-(rule (lower_bool $B16 cond) (select_bool_imm $B16 cond -1 0))
-(rule (lower_bool $B32 cond) (select_bool_imm $B32 cond -1 0))
-(rule (lower_bool $B64 cond) (select_bool_imm $B64 cond -1 0))
+(rule (lower_bool $I8 cond) (select_bool_imm $I8 cond 1 0))
+
+;; Lower a boolean condition to the values -1/0.
+(decl lower_bool_to_mask (Type ProducesBool) Reg)
+(rule 0 (lower_bool_to_mask (fits_in_64 ty) producer)
+      (select_bool_imm ty producer -1 0))
+
+(rule 1 (lower_bool_to_mask $I128 producer)
+      (let ((res Reg (lower_bool_to_mask $I64 producer)))
+        (mov_to_vec128 $I128 res res)))

 ;; Emit a conditional branch based on a boolean condition.
 (decl cond_br_bool (ProducesBool MachLabel MachLabel) SideEffectNoResult)
--- a/cranelift/codegen/src/isa/s390x/inst/mod.rs
+++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs
@@ -397,10 +397,10 @@ impl Inst {
    /// Generic constructor for a load (zero-extending where appropriate).
    pub fn gen_load(into_reg: Writable<Reg>, mem: MemArg, ty: Type) -> Inst {
        match ty {
-            types::B1 | types::B8 | types::I8 => Inst::Load64ZExt8 { rd: into_reg, mem },
-            types::B16 | types::I16 => Inst::Load64ZExt16 { rd: into_reg, mem },
-            types::B32 | types::I32 => Inst::Load64ZExt32 { rd: into_reg, mem },
-            types::B64 | types::I64 | types::R64 => Inst::Load64 { rd: into_reg, mem },
+            types::I8 => Inst::Load64ZExt8 { rd: into_reg, mem },
+            types::I16 => Inst::Load64ZExt16 { rd: into_reg, mem },
+            types::I32 => Inst::Load64ZExt32 { rd: into_reg, mem },
+            types::I64 | types::R64 => Inst::Load64 { rd: into_reg, mem },
            types::F32 => Inst::VecLoadLaneUndef {
                size: 32,
                rd: into_reg,
@@ -414,7 +414,7 @@ impl Inst {
                lane_imm: 0,
            },
            _ if ty.is_vector() && ty.bits() == 128 => Inst::VecLoad { rd: into_reg, mem },
-            types::B128 | types::I128 => Inst::VecLoad { rd: into_reg, mem },
+            types::I128 => Inst::VecLoad { rd: into_reg, mem },
            _ => unimplemented!("gen_load({})", ty),
        }
    }
@@ -422,10 +422,10 @@ impl Inst {
    /// Generic constructor for a store.
    pub fn gen_store(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
        match ty {
-            types::B1 | types::B8 | types::I8 => Inst::Store8 { rd: from_reg, mem },
-            types::B16 | types::I16 => Inst::Store16 { rd: from_reg, mem },
-            types::B32 | types::I32 => Inst::Store32 { rd: from_reg, mem },
-            types::B64 | types::I64 | types::R64 => Inst::Store64 { rd: from_reg, mem },
+            types::I8 => Inst::Store8 { rd: from_reg, mem },
+            types::I16 => Inst::Store16 { rd: from_reg, mem },
+            types::I32 => Inst::Store32 { rd: from_reg, mem },
+            types::I64 | types::R64 => Inst::Store64 { rd: from_reg, mem },
            types::F32 => Inst::VecStoreLane {
                size: 32,
                rd: from_reg,
@@ -439,7 +439,7 @@ impl Inst {
                lane_imm: 0,
            },
            _ if ty.is_vector() && ty.bits() == 128 => Inst::VecStore { rd: from_reg, mem },
-            types::B128 | types::I128 => Inst::VecStore { rd: from_reg, mem },
+            types::I128 => Inst::VecStore { rd: from_reg, mem },
            _ => unimplemented!("gen_store({})", ty),
        }
    }
@@ -1086,7 +1086,7 @@ impl MachInst for Inst {
            .only_reg()
            .expect("multi-reg values not supported yet");
        match ty {
-            types::I128 | types::B128 => {
+            types::I128 => {
                let mut ret = SmallVec::new();
                ret.push(Inst::load_vec_constant(to_reg, value));
                ret
@@ -1112,14 +1112,8 @@ impl MachInst for Inst {
                ));
                ret
            }
-            types::I64 | types::B64 | types::R64 => Inst::load_constant64(to_reg, value as u64),
-            types::B1
-            | types::I8
-            | types::B8
-            | types::I16
-            | types::B16
-            | types::I32
-            | types::B32 => Inst::load_constant32(to_reg, value as u32),
+            types::I64 | types::R64 => Inst::load_constant64(to_reg, value as u64),
+            types::I8 | types::I16 | types::I32 => Inst::load_constant32(to_reg, value as u32),
            _ => unreachable!(),
        }
    }
@@ -1140,17 +1134,11 @@ impl MachInst for Inst {
            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
-            types::B1 => Ok((&[RegClass::Int], &[types::B1])),
-            types::B8 => Ok((&[RegClass::Int], &[types::B8])),
-            types::B16 => Ok((&[RegClass::Int], &[types::B16])),
-            types::B32 => Ok((&[RegClass::Int], &[types::B32])),
-            types::B64 => Ok((&[RegClass::Int], &[types::B64])),
            types::R32 => panic!("32-bit reftype pointer should never be seen on s390x"),
            types::R64 => Ok((&[RegClass::Int], &[types::R64])),
            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
            types::I128 => Ok((&[RegClass::Float], &[types::I128])),
-            types::B128 => Ok((&[RegClass::Float], &[types::B128])),
            _ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])),
            // FIXME: We don't really have IFLAGS, but need to allow it here
            // for now to support the SelectifSpectreGuard instruction.
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -16,14 +16,6 @@
      (imm ty n))


-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type ty (bconst $false)))
-      (imm ty 0))
-(rule (lower (has_type ty (bconst $true)))
-      (imm ty 1))
-
-
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (f32const (u64_from_ieee32 x)))
@@ -1163,92 +1155,10 @@
      (vec_select ty y z x))


-;;;; Rules for `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-;; Up to 64-bit source type: Always a no-op.
-(rule 1 (lower (breduce x @ (value_type (fits_in_64 _ty))))
-      x)
-
-;; 128-bit source type: Extract the low half.
-(rule (lower (breduce x @ (value_type (vr128_ty _ty))))
-      (vec_extract_lane $I64X2 x 1 (zero_reg)))
-
-
-;;;; Rules for `bextend` and `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Use a common helper to type cast bools to either bool or integer types.
-(decl cast_bool (Type Value) Reg)
-(rule (lower (has_type ty (bextend x)))
-      (cast_bool ty x))
 (rule (lower (has_type ty (bmask x)))
-      (cast_bool ty x))
-
-;; If the target has the same or a smaller size than the source, it's a no-op.
-(rule 8 (cast_bool $B1 x @ (value_type $B1)) x)
-(rule 8 (cast_bool $B1 x @ (value_type $B8)) x)
-(rule 8 (cast_bool $B8 x @ (value_type $B8)) x)
-(rule 8 (cast_bool $I8 x @ (value_type $B8)) x)
-(rule 7 (cast_bool (fits_in_16 _ty) x @ (value_type $B16)) x)
-(rule 6 (cast_bool (fits_in_32 _ty) x @ (value_type $B32)) x)
-(rule 5 (cast_bool (fits_in_64 _ty) x @ (value_type $B64)) x)
-(rule 4 (cast_bool (vr128_ty _ty) x @ (value_type $B128)) x)
-(rule 5 (cast_bool (fits_in_64 _ty) x @ (value_type $B128))
-      (vec_extract_lane $I64X2 x 1 (zero_reg)))
-
-;; Single-bit values are sign-extended via a pair of shifts.
-(rule 0 (cast_bool (gpr32_ty ty) x @ (value_type $B1))
-      (ashr_imm $I32 (lshl_imm $I32 x 31) 31))
-(rule 1 (cast_bool (gpr64_ty ty) x @ (value_type $B1))
-      (ashr_imm $I64 (lshl_imm $I64 x 63) 63))
-(rule 4 (cast_bool (vr128_ty ty) x @ (value_type $B1))
-      (let ((gpr Reg (ashr_imm $I64 (lshl_imm $I64 x 63) 63)))
-        (mov_to_vec128 ty gpr gpr)))
-
-;; Other values are just sign-extended normally.
-(rule 0 (cast_bool (gpr32_ty _ty) x @ (value_type $B8))
-      (sext32_reg $I8 x))
-(rule 0 (cast_bool (gpr32_ty _ty) x @ (value_type $B16))
-      (sext32_reg $I16 x))
-(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B8))
-      (sext64_reg $I8 x))
-(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B16))
-      (sext64_reg $I16 x))
-(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B32))
-      (sext64_reg $I32 x))
-(rule 3 (cast_bool (vr128_ty ty) x @ (value_type (gpr32_ty src_ty)))
-      (let ((x_ext Reg (sext64_reg src_ty x)))
-        (mov_to_vec128 ty x_ext x_ext)))
-(rule 2 (cast_bool (vr128_ty ty) x @ (value_type (gpr64_ty src_ty)))
-      (mov_to_vec128 ty x x))
-
-
-;;;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Mask with 1 to get a 0/1 result (8- or 16-bit result types).
-(rule 5 (lower (has_type (fits_in_16 ty) (bint x @ (value_type (fits_in_64 _)))))
-      (and_uimm16shifted ty x (uimm16shifted 1 0)))
-
-;; Mask with 1 to get a 0/1 result (32-bit result types).
-(rule 4 (lower (has_type (fits_in_32 ty) (bint x @ (value_type (fits_in_64 _)))))
-      (and_uimm32shifted ty x (uimm32shifted 1 0)))
-
-;; Mask with 1 to get a 0/1 result (64-bit result types).
-(rule 3 (lower (has_type (fits_in_64 ty) (bint x @ (value_type (fits_in_64 _)))))
-      (and_reg ty x (imm ty 1)))
-
-;; Mask with 1 to get a 0/1 result (128-bit result types).
-(rule 1 (lower (has_type (vr128_ty ty) (bint x @ (value_type (fits_in_64 _)))))
-      (let ((x_ext Reg (and_uimm16shifted $I8 x (uimm16shifted 1 0))))
-        (vec_insert_lane $I8X16 (vec_imm ty 0) x_ext 15 (zero_reg))))
-
-;; Mask with 1 to get a 0/1 result (128-bit source types).
-(rule 2 (lower (has_type (fits_in_64 ty) (bint x @ (value_type (vr128_ty _)))))
-      (let ((x_gpr Reg (vec_extract_lane $I8X16 x 15 (zero_reg))))
-        (and_uimm16shifted ty x_gpr (uimm16shifted 1 0))))
-
-;; Mask with 1 to get a 0/1 result (128-bit source and result types).
-(rule 0 (lower (has_type (vr128_ty ty) (bint x @ (value_type (vr128_ty _)))))
-      (vec_and ty x (vec_imm ty 1)))
+      (lower_bool_to_mask ty (value_nonzero x)))


 ;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1864,7 +1774,7 @@
 (rule 1 (lower (insertlane x @ (value_type ty)
                         y @ (value_type in_ty)
                         (u8_from_uimm8 idx)))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (vec_insert_lane ty x y (be_lane_idx ty idx) (zero_reg)))

 ;; Insert vector lane from floating-point register.
@@ -1980,7 +1890,7 @@
 ;; Extract vector lane to general-purpose register.
 (rule 1 (lower (has_type out_ty
                       (extractlane x @ (value_type ty) (u8_from_uimm8 idx))))
-      (if (ty_int_bool_ref_scalar_64 out_ty))
+      (if (ty_int_ref_scalar_64 out_ty))
      (vec_extract_lane ty x (be_lane_idx ty idx) (zero_reg)))

 ;; Extract vector lane to floating-point register.
@@ -2037,7 +1947,7 @@

 ;; Load replicated value from general-purpose register.
 (rule 1 (lower (has_type ty (splat x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (vec_replicate_lane ty (vec_insert_lane_undef ty x 0 (zero_reg)) 0))

 ;; Load replicated value from floating-point register.
@@ -2097,7 +2007,7 @@
 ;; Load scalar value from general-purpose register.
 (rule 1 (lower (has_type ty (scalar_to_vector
                             x @ (value_type in_ty))))
-      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (if (ty_int_ref_scalar_64 in_ty))
      (vec_insert_lane ty (vec_imm ty 0) x (be_lane_idx ty 0) (zero_reg)))

 ;; Load scalar value from floating-point register.
@@ -3783,14 +3693,14 @@
 ;;;; Rules for `is_null` and `is_invalid`  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Null references are represented by the constant value 0.
-(rule (lower (has_type $B1 (is_null x @ (value_type $R64))))
-      (lower_bool $B1 (bool (icmps_simm16 $I64 x 0)
+(rule (lower (has_type $I8 (is_null x @ (value_type $R64))))
+      (lower_bool $I8 (bool (icmps_simm16 $I64 x 0)
                            (intcc_as_cond (IntCC.Equal)))))


 ;; Invalid references are represented by the constant value -1.
-(rule (lower (has_type $B1 (is_invalid x @ (value_type $R64))))
-      (lower_bool $B1 (bool (icmps_simm16 $I64 x -1)
+(rule (lower (has_type $I8 (is_invalid x @ (value_type $R64))))
+      (lower_bool $I8 (bool (icmps_simm16 $I64 x -1)
                            (intcc_as_cond (IntCC.Equal)))))


@@ -3798,10 +3708,9 @@

 ;; Return a `ProducesBool` to capture the fact that the input value is nonzero.
 ;; In the common case where that input is the result of an `icmp` or `fcmp`
-;; instruction (possibly via an intermediate `bint`), directly use that compare.
-;; Note that it is not safe to sink memory loads here, see the `icmp` comment.
+;; instruction, directly use that compare. Note that it is not safe to sink
+;; memory loads here, see the `icmp` comment.
 (decl value_nonzero (Value) ProducesBool)
-(rule (value_nonzero (bint val)) (value_nonzero val))
 (rule (value_nonzero (icmp int_cc x y)) (icmp_val $false int_cc x y))
 (rule (value_nonzero (fcmp float_cc x y)) (fcmp_val float_cc x y))
 (rule -1 (value_nonzero val @ (value_type (gpr32_ty ty)))
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -45,7 +45,6 @@ impl LowerBackend for S390xBackend {
            Opcode::Nop
            | Opcode::Copy
            | Opcode::Iconst
-            | Opcode::Bconst
            | Opcode::F32const
            | Opcode::F64const
            | Opcode::Vconst
@@ -100,10 +99,7 @@ impl LowerBackend for S390xBackend {
            | Opcode::BxorNot
            | Opcode::Bitselect
            | Opcode::Vselect
-            | Opcode::Breduce
-            | Opcode::Bextend
            | Opcode::Bmask
-            | Opcode::Bint
            | Opcode::Bitrev
            | Opcode::Clz
            | Opcode::Cls
--- a/cranelift/codegen/src/isa/s390x/lower/isle.rs
+++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs
@@ -252,7 +252,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
    #[inline]
    fn gpr32_ty(&mut self, ty: Type) -> Option<Type> {
        match ty {
-            I8 | I16 | I32 | B1 | B8 | B16 | B32 => Some(ty),
+            I8 | I16 | I32 => Some(ty),
            _ => None,
        }
    }
@@ -260,7 +260,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
    #[inline]
    fn gpr64_ty(&mut self, ty: Type) -> Option<Type> {
        match ty {
-            I64 | B64 | R64 => Some(ty),
+            I64 | R64 => Some(ty),
            _ => None,
        }
    }
@@ -268,7 +268,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6>
    #[inline]
    fn vr128_ty(&mut self, ty: Type) -> Option<Type> {
        match ty {
-            I128 | B128 => Some(ty),
+            I128 => Some(ty),
            _ if ty.is_vector() && ty.bits() == 128 => Some(ty),
            _ => None,
        }
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -260,13 +260,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
        // For integer-typed values, we always load a full 64 bits (and we always spill a full 64
        // bits as well -- see `Inst::store()`).
        let ty = match ty {
-            types::B1
-            | types::B8
-            | types::I8
-            | types::B16
-            | types::I16
-            | types::B32
-            | types::I32 => types::I64,
+            types::I8 | types::I16 | types::I32 => types::I64,
            _ => ty,
        };
        Inst::load(ty, mem, into_reg, ExtKind::None)
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -2217,17 +2217,11 @@ impl MachInst for Inst {
            types::I16 => Ok((&[RegClass::Int], &[types::I16])),
            types::I32 => Ok((&[RegClass::Int], &[types::I32])),
            types::I64 => Ok((&[RegClass::Int], &[types::I64])),
-            types::B1 => Ok((&[RegClass::Int], &[types::B1])),
-            types::B8 => Ok((&[RegClass::Int], &[types::B8])),
-            types::B16 => Ok((&[RegClass::Int], &[types::B16])),
-            types::B32 => Ok((&[RegClass::Int], &[types::B32])),
-            types::B64 => Ok((&[RegClass::Int], &[types::B64])),
            types::R32 => panic!("32-bit reftype pointer should never be seen on x86-64"),
            types::R64 => Ok((&[RegClass::Int], &[types::R64])),
            types::F32 => Ok((&[RegClass::Float], &[types::F32])),
            types::F64 => Ok((&[RegClass::Float], &[types::F64])),
            types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])),
-            types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])),
            _ if ty.is_vector() => {
                assert!(ty.bits() <= 128);
                Ok((&[RegClass::Float], &[types::I8X16]))
@@ -2326,15 +2320,10 @@ impl MachInst for Inst {
            } else {
                // Must be an integer type.
                debug_assert!(
-                    ty == types::B1
-                        || ty == types::I8
-                        || ty == types::B8
+                    ty == types::I8
                        || ty == types::I16
-                        || ty == types::B16
                        || ty == types::I32
-                        || ty == types::B32
                        || ty == types::I64
-                        || ty == types::B64
                        || ty == types::R32
                        || ty == types::R64
                );
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -22,30 +22,6 @@
      (value_regs (imm $I64 x)
                  (imm $I64 0)))

-;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; `b64` and smaller.
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bconst $false)))
-      (imm ty 0))
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bconst $true)))
-      (imm ty 1))
-
-;; `b128`
-
-(rule 1 (lower (has_type $B128
-                       (bconst $false)))
-      (value_regs (imm $B64 0)
-                  (imm $B64 0)))
-
-(rule 1 (lower (has_type $B128
-                       (bconst $true)))
-      (value_regs (imm $B64 1)
-                  (imm $B64 0)))
-
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (f32const (u64_from_ieee32 x)))
@@ -303,7 +279,7 @@
                       (band x y)))
      (sse_and ty x y))

-;; `{i,b}128`.
+;; `i128`.

 (rule 6 (lower (has_type $I128 (band x y)))
      (let ((x_regs ValueRegs x)
@@ -315,17 +291,6 @@
        (value_gprs (x64_and $I64 x_lo y_lo)
                    (x64_and $I64 x_hi y_hi))))

-(rule 6 (lower (has_type $B128 (band x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `and` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_and $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `{i,b}64` and smaller.
@@ -381,17 +346,6 @@
 (rule 6 (lower (has_type $I128 (bor x y)))
      (or_i128 x y))

-(rule 6 (lower (has_type $B128 (bor x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `or` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_or $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `{i,b}64` and smaller.
@@ -439,17 +393,6 @@
        (value_gprs (x64_xor $I64 x_lo y_lo)
                    (x64_xor $I64 x_hi y_hi))))

-(rule 6 (lower (has_type $B128 (bxor x y)))
-      ;; Booleans are always `0` or `1`, so we only need to do the `xor` on the
-      ;; low half. The high half is always zero but, rather than generate a new
-      ;; zero, we just reuse `x`'s high half which is already zero.
-      (let ((x_regs ValueRegs x)
-            (x_lo Gpr (value_regs_get_gpr x_regs 0))
-            (x_hi Gpr (value_regs_get_gpr x_regs 1))
-            (y_lo Gpr (lo_gpr y)))
-        (value_gprs (x64_xor $I64 x_lo y_lo)
-                    x_hi)))
-
 ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `i64` and smaller.
@@ -1240,9 +1183,6 @@
 (rule (lower (has_type $I128 (bnot x)))
      (i128_not x))

-(rule (lower (has_type $B128 (bnot x)))
-      (i128_not x))
-
 ;; Special case for vector-types where bit-negation is an xor against an
 ;; all-one value
 (rule -1 (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
@@ -1450,35 +1390,35 @@
      (lower_icmp_bool (emit_cmp cc a b)))

 ;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
      (x64_shr $I64 x (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
      (x64_shr $I64 x (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
      (x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
      (x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))

 ;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
      (x64_shr $I32 x (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
      (x64_shr $I32 x (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
      (x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

 ;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
-(rule 2 (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
+(rule 2 (lower (has_type $I8 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
      (x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))

 ;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
@@ -1710,14 +1650,7 @@
 ;; Finally, we lower `select` from a condition value `c`. These rules are meant
 ;; to be the final, default lowerings if no other patterns matched above.

-(rule -1 (lower (has_type ty (select c @ (value_type $B1) x y)))
-      (let ((size OperandSize (raw_operand_size_of_type $B1))
-            ;; N.B.: disallow load-op fusion, see above. TODO:
-            ;; https://github.com/bytecodealliance/wasmtime/issues/3953.
-            (gpr_c Gpr (put_in_gpr c)))
-           (with_flags (x64_test size (RegMemImm.Imm 1) gpr_c) (cmove_from_values ty (CC.NZ) x y))))
-
-(rule -2 (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
+(rule -1 (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
      (let ((size OperandSize (raw_operand_size_of_type a_ty))
            ;; N.B.: disallow load-op fusion, see above. TODO:
            ;; https://github.com/bytecodealliance/wasmtime/issues/3953.
@@ -2125,7 +2058,7 @@
                       (uextend src @ (has_type $I32 (uload32 _ _ _)))))
      src)

-;; Rules for `sextend` / `bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl generic_sextend (Value Type Type) InstOutput)

@@ -2140,17 +2073,17 @@
      (x64_sar $I64 src (Imm8Reg.Imm8 63)))

 ;; I64 -> I128.
-(rule 3 (generic_sextend src (ty_int_bool_64 _) (ty_int_bool_128 _))
+(rule 3 (generic_sextend src $I64 $I128)
      (value_regs src (spread_sign_bit src)))

 ;; I{8,16,32} -> I128.
-(rule 2 (generic_sextend src (fits_in_32 src_ty) (ty_int_bool_128 _))
+(rule 2 (generic_sextend src (fits_in_32 src_ty) $I128)
      (let ((lo Gpr (extend_to_gpr src $I64 (ExtendKind.Sign)))
            (hi Gpr (spread_sign_bit lo)))
      (value_regs lo hi)))

 ;; I{8,16,32} -> I64.
-(rule 1 (generic_sextend src (fits_in_32 src_ty) (ty_int_bool_64 _))
+(rule 1 (generic_sextend src (fits_in_32 src_ty) $I64)
      (extend_to_gpr src $I64 (ExtendKind.Sign)))

 ;; I8 -> I{16,32}, I16 -> I32.
@@ -2162,13 +2095,7 @@
                 (sextend src @ (value_type src_ty))))
      (generic_sextend src src_ty dst_ty))

-;; Bools are stored as 0/-1 so extends must sign-extend as well.
-(rule (lower
-       (has_type dst_ty
-                 (bextend src @ (value_type src_ty))))
-      (generic_sextend src src_ty dst_ty))
-
-;; Rules for `ireduce` / `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; T -> T is always a no-op, even I128 -> I128.
 (rule (lower (has_type ty (ireduce src @ (value_type ty))))
@@ -2180,28 +2107,6 @@
 (rule 1 (lower (has_type (fits_in_64 ty) (ireduce src)))
      (value_regs_get_gpr src 0))

-;; Likewise for breduce.
-
-(rule (lower (has_type ty (breduce src @ (value_type ty))))
-      src)
-
-(rule 1 (lower (has_type (fits_in_64 ty) (breduce src)))
-      (value_regs_get_gpr src 0))
-
-;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Booleans are stored as all-zeroes (0) or all-ones (-1). We AND out
-;; the LSB to give a 0 / 1-valued integer result.
-
-(rule (lower (has_type (fits_in_64 ty)
-                       (bint src)))
-      (x64_and ty src (RegMemImm.Imm 1)))
-(rule 1 (lower (has_type $I128
-                       (bint src)))
-      (value_regs
-       (x64_and $I64 src (RegMemImm.Imm 1))
-       (imm $I64 0)))
-
 ;; Rules for `debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (rule (lower (debugtrap))
@@ -2505,7 +2410,7 @@
      (x64_movzx (ext_mode (ty_bits_u16 ty) 64) (to_amode flags address offset)))
 ;; But if we know that both the `from` and `to` are 64 bits, we simply load with
 ;; no extension.
-(rule -1 (lower (has_type (ty_int_bool_ref_64 ty) (load flags address offset)))
+(rule -1 (lower (has_type (ty_int_ref_64 ty) (load flags address offset)))
      (x64_mov (to_amode flags address offset)))
 ;; Also, certain scalar loads have a specific `from` width and extension kind
 ;; (signed -> `sx`, zeroed -> `zx`). We overwrite the high bits of the 64-bit
@@ -2538,8 +2443,8 @@
 (rule -2 (lower (has_type (ty_vec128 ty) (load flags address offset)))
      (x64_movdqu (to_amode flags address offset)))

-;; We can load an I128/B128 by doing two 64-bit loads.
-(rule -3 (lower (has_type (ty_int_bool_128 _)
+;; We can load an I128 by doing two 64-bit loads.
+(rule -3 (lower (has_type $I128
                       (load flags address offset)))
      (let ((addr_lo Amode (to_amode flags address offset))
            (addr_hi Amode (amode_offset addr_lo 8))
@@ -2623,9 +2528,9 @@
      (side_effect
       (x64_xmm_movrm (SseOpcode.Movdqu) (to_amode flags address offset) value)))

-;; Stores of I128/B128 values: store the two 64-bit halves separately.
+;; Stores of I128 values: store the two 64-bit halves separately.
 (rule 0 (lower (store flags
-                    value @ (value_type (ty_int_bool_128 _))
+                    value @ (value_type $I128)
                    address
                    offset))
      (let ((value_reg ValueRegs value)
@@ -2918,8 +2823,6 @@


 (decl cmp_zero_int_bool_ref (Value) ProducesFlags)
-(rule 1 (cmp_zero_int_bool_ref val @ (value_type $B1))
-      (x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val))
 (rule (cmp_zero_int_bool_ref val @ (value_type ty))
      (let ((size OperandSize (raw_operand_size_of_type ty))
            (src Gpr val))
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -22,7 +22,6 @@ use target_lexicon::Triple;
 fn is_int_or_ref_ty(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
-        types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
        types::R32 => panic!("shouldn't have 32-bits refs on x64"),
        _ => false,
    }
@@ -328,7 +327,6 @@ fn lower_insn_to_regs(
    let op = ctx.data(insn).opcode();
    match op {
        Opcode::Iconst
-        | Opcode::Bconst
        | Opcode::F32const
        | Opcode::F64const
        | Opcode::Null
@@ -369,10 +367,7 @@ fn lower_insn_to_regs(
        | Opcode::IsInvalid
        | Opcode::Uextend
        | Opcode::Sextend
-        | Opcode::Breduce
-        | Opcode::Bextend
        | Opcode::Ireduce
-        | Opcode::Bint
        | Opcode::Debugtrap
        | Opcode::WideningPairwiseDotProductS
        | Opcode::Fadd
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -549,7 +549,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {

    #[inline]
    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
-        if is_int_or_ref_ty(ty) || ty == I128 || ty == B128 {
+        if is_int_or_ref_ty(ty) || ty == I128 {
            Some(RegisterClass::Gpr {
                single_register: ty != I128,
            })
@@ -564,7 +564,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
        match ty {
            types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => Some(()),
-            types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => Some(()),
            types::R32 => panic!("shouldn't have 32-bits refs on x64"),
            _ => None,
        }
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@@ -171,7 +171,7 @@ macro_rules! isle_common_prelude_methods {
        }

        #[inline]
-        fn ty_int_bool_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
+        fn ty_int_ref_scalar_64(&mut self, ty: Type) -> Option<Type> {
            if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() {
                Some(ty)
            } else {
@@ -216,33 +216,17 @@ macro_rules! isle_common_prelude_methods {
        }

        #[inline]
-        fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
+        fn int_fits_in_32(&mut self, ty: Type) -> Option<Type> {
            match ty {
-                I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
+                I8 | I16 | I32 => Some(ty),
                _ => None,
            }
        }

        #[inline]
-        fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
+        fn ty_int_ref_64(&mut self, ty: Type) -> Option<Type> {
            match ty {
-                I64 | B64 => Some(ty),
-                _ => None,
-            }
-        }
-
-        #[inline]
-        fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
-            match ty {
-                I64 | B64 | R64 => Some(ty),
-                _ => None,
-            }
-        }
-
-        #[inline]
-        fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
-            match ty {
-                I128 | B128 => Some(ty),
+                I64 | R64 => Some(ty),
                _ => None,
            }
        }
@@ -252,15 +236,6 @@ macro_rules! isle_common_prelude_methods {
            ty.is_int().then(|| ty)
        }

-        #[inline]
-        fn ty_int_bool(&mut self, ty: Type) -> Option<Type> {
-            if ty.is_int() || ty.is_bool() {
-                Some(ty)
-            } else {
-                None
-            }
-        }
-
        #[inline]
        fn ty_scalar_float(&mut self, ty: Type) -> Option<Type> {
            match ty {
--- a/cranelift/codegen/src/machinst/helpers.rs
+++ b/cranelift/codegen/src/machinst/helpers.rs
@@ -12,7 +12,7 @@ pub fn ty_bits(ty: Type) -> usize {

 /// Is the type represented by an integer (not float) at the machine level?
 pub(crate) fn ty_has_int_representation(ty: Type) -> bool {
-    ty.is_int() || ty.is_bool() || ty.is_ref()
+    ty.is_int() || ty.is_ref()
 }

 /// Is the type represented by a float or vector value at the machine level?
--- a/cranelift/codegen/src/opts/algebraic.isle
+++ b/cranelift/codegen/src/opts/algebraic.isle
@@ -170,10 +170,6 @@
      (if (u8_lt lz lx))
      (iadd ty (iadd ty y z) x))

-;; Select's selector input doesn't need bint; remove the redundant op.
-(rule (simplify (select ty (bint _ b) x y))
-      (subsume (select ty b x y)))
-
 ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
 ;; used. This is neutral (add-with-imm) or positive (iconst) for
 ;; register pressure, and these ops are very cheap.
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -12,7 +12,7 @@
 (decl unit () Unit)
 (extern constructor unit unit)

-;; `bool` is declared in `clif.isle`.
+(type bool (primitive bool))
 (extern const $true bool)
 (extern const $false bool)

@@ -139,13 +139,6 @@

 ;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(extern const $B1 Type)
-(extern const $B8 Type)
-(extern const $B16 Type)
-(extern const $B32 Type)
-(extern const $B64 Type)
-(extern const $B128 Type)
-
 (extern const $I8 Type)
 (extern const $I16 Type)
 (extern const $I32 Type)
@@ -158,11 +151,6 @@
 (extern const $F32 Type)
 (extern const $F64 Type)

-(extern const $B8X16 Type)
-(extern const $B16X8 Type)
-(extern const $B32X4 Type)
-(extern const $B64X2 Type)
-
 (extern const $I8X8 Type)
 (extern const $I8X16 Type)
 (extern const $I16X4 Type)
@@ -232,10 +220,10 @@
 (decl ty_64 (Type) Type)
 (extern extractor ty_64 ty_64)

-;; A pure constructor that only matches scalar booleans, integers, and
-;; references that can fit in 64 bits.
-(decl pure ty_int_bool_ref_scalar_64 (Type) Type)
-(extern constructor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
+;; A pure constructor that only matches scalar integers, and references that can
+;; fit in 64 bits.
+(decl pure ty_int_ref_scalar_64 (Type) Type)
+(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64)

 ;; An extractor that matches 32- and 64-bit types only.
 (decl ty_32_or_64 (Type) Type)
@@ -245,25 +233,13 @@
 (decl ty_8_or_16 (Type) Type)
 (extern extractor ty_8_or_16 ty_8_or_16)

-;; An extractor that matches int and bool types that fit in 32 bits.
-(decl int_bool_fits_in_32 (Type) Type)
-(extern extractor int_bool_fits_in_32 int_bool_fits_in_32)
+;; An extractor that matches int types that fit in 32 bits.
+(decl int_fits_in_32 (Type) Type)
+(extern extractor int_fits_in_32 int_fits_in_32)

-;; An extractor that matches I64 or B64.
-(decl ty_int_bool_64 (Type) Type)
-(extern extractor ty_int_bool_64 ty_int_bool_64)
-
-;; An extractor that matches I64 or B64 or R64.
-(decl ty_int_bool_ref_64 (Type) Type)
-(extern extractor ty_int_bool_ref_64 ty_int_bool_ref_64)
-
-;; An extractor that matches I128 or B128.
-(decl ty_int_bool_128 (Type) Type)
-(extern extractor ty_int_bool_128 ty_int_bool_128)
-
-;; An extractor that matches any int or bool.
-(decl ty_int_bool (Type) Type)
-(extern extractor ty_int_bool ty_int_bool)
+;; An extractor that matches I64 or R64.
+(decl ty_int_ref_64 (Type) Type)
+(extern extractor ty_int_ref_64 ty_int_ref_64)

 ;; An extractor that only matches integers.
 (decl ty_int (Type) Type)
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -614,7 +614,7 @@ mod simplify {
        dfg::ValueDef,
        immediates,
        instructions::{Opcode, ValueList},
-        types::{B8, I16, I32, I8},
+        types::{I16, I32, I8},
    };
    use std::marker::PhantomData;

@@ -861,29 +861,6 @@ mod simplify {
                }
            }

-            InstructionData::CondTrap { .. }
-            | InstructionData::Branch { .. }
-            | InstructionData::Ternary {
-                opcode: Opcode::Select,
-                ..
-            } => {
-                // Fold away a redundant `bint`.
-                let condition_def = {
-                    let args = pos.func.dfg.inst_args(inst);
-                    pos.func.dfg.value_def(args[0])
-                };
-                if let ValueDef::Result(def_inst, _) = condition_def {
-                    if let InstructionData::Unary {
-                        opcode: Opcode::Bint,
-                        arg: bool_val,
-                    } = pos.func.dfg[def_inst]
-                    {
-                        let args = pos.func.dfg.inst_args_mut(inst);
-                        args[0] = bool_val;
-                    }
-                }
-            }
-
            InstructionData::Ternary {
                opcode: Opcode::Bitselect,
                args,
@@ -898,15 +875,13 @@ mod simplify {
                // while vselect can be encoded using single BLEND instruction.
                if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
                    let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
-                        InstructionData::Unary {
-                            opcode: Opcode::RawBitcast,
-                            arg,
-                        } => {
-                            // If controlling mask is raw-bitcasted boolean vector then
-                            // we know each lane is either all zeroes or ones,
-                            // so we can use vselect instruction instead.
+                        InstructionData::IntCompare { .. }
+                        | InstructionData::FloatCompare { .. } => {
+                            // If the controlled mask is from a comparison, the value will be all
+                            // zeros or ones in each output lane.
+                            let arg = args[0];
                            let arg_type = pos.func.dfg.value_type(arg);
-                            if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
+                            if !arg_type.is_vector() {
                                return;
                            }
                            (arg, arg_type)
@@ -916,13 +891,13 @@ mod simplify {
                            constant_handle,
                        } => {
                            // If each byte of controlling mask is 0x00 or 0xFF then
-                            // we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
+                            // we will always bitcast our way to vselect(I8x16, I8x16).
                            // Bitselect operates at bit level, so the lane types don't matter.
                            let const_data = pos.func.dfg.constants.get(constant_handle);
                            if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
                                return;
                            }
-                            let new_type = B8.by(old_cond_type.bytes()).unwrap();
+                            let new_type = I8.by(old_cond_type.bytes()).unwrap();
                            (pos.ins().raw_bitcast(new_type, args[0]), new_type)
                        }
                        _ => return,
--- a/cranelift/codegen/src/souper_harvest.rs
+++ b/cranelift/codegen/src/souper_harvest.rs
@@ -150,11 +150,11 @@ fn harvest_candidate_lhs(
                        a.into()
                    } else {
                        // The only arguments we get that we haven't already
-                        // converted into a souper instruction are `iconst`s and
-                        // `bconst`s. This is because souper only allows
+                        // converted into a souper instruction are `iconst`s.
+                        // This is because souper only allows
                        // constants as operands, and it doesn't allow assigning
                        // constants to a variable name. So we lazily convert
-                        // `iconst`s and `bconst`s into souper operands here,
+                        // `iconst`s into souper operands here,
                        // when they are actually used.
                        match func.dfg.value_def(arg) {
                            ir::ValueDef::Result(inst, 0) => match func.dfg[inst] {
@@ -166,20 +166,13 @@ fn harvest_candidate_lhs(
                                        r#type: souper_type_of(&func.dfg, arg),
                                    })
                                }
-                                ir::InstructionData::UnaryBool { opcode, imm } => {
-                                    debug_assert_eq!(opcode, ir::Opcode::Iconst);
-                                    ast::Operand::Constant(ast::Constant {
-                                        value: imm.into(),
-                                        r#type: souper_type_of(&func.dfg, arg),
-                                    })
-                                }
                                _ => unreachable!(
-                                    "only iconst and bconst instructions \
+                                    "only iconst instructions \
                                     aren't in `ir_to_souper_val`"
                                ),
                            },
                            _ => unreachable!(
-                                "only iconst and bconst instructions \
+                                "only iconst instructions \
                                 aren't in `ir_to_souper_val`"
                            ),
                        }
@@ -487,11 +480,11 @@ fn harvest_candidate_lhs(
                    }
                    // Because Souper doesn't allow constants to be on the right
                    // hand side of an assignment (i.e. `%0:i32 = 1234` is
-                    // disallowed) we have to ignore `iconst` and `bconst`
+                    // disallowed) we have to ignore `iconst`
                    // instructions until we process them as operands for some
                    // other instruction. See the `arg` closure above for
                    // details.
-                    (ir::Opcode::Iconst, _) | (ir::Opcode::Bconst, _) => return,
+                    (ir::Opcode::Iconst, _) => return,
                    _ => ast::AssignmentRhs::Var,
                }
            }
@@ -533,7 +526,7 @@ fn harvest_candidate_lhs(

 fn souper_type_of(dfg: &ir::DataFlowGraph, val: ir::Value) -> Option<ast::Type> {
    let ty = dfg.value_type(val);
-    assert!(ty.is_int() || ty.is_bool());
+    assert!(ty.is_int());
    assert_eq!(ty.lane_count(), 1);
    Some(ast::Type {
        width: ty.bits().try_into().unwrap(),
--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -768,7 +768,6 @@ impl<'a> Verifier<'a> {
            | UnaryImm { .. }
            | UnaryIeee32 { .. }
            | UnaryIeee64 { .. }
-            | UnaryBool { .. }
            | Binary { .. }
            | BinaryImm8 { .. }
            | BinaryImm64 { .. }
@@ -1514,7 +1513,7 @@ impl<'a> Verifier<'a> {
            ir::InstructionData::Unary { opcode, arg } => {
                let arg_type = self.func.dfg.value_type(arg);
                match opcode {
-                    Opcode::Bextend | Opcode::Uextend | Opcode::Sextend | Opcode::Fpromote => {
+                    Opcode::Uextend | Opcode::Sextend | Opcode::Fpromote => {
                        if arg_type.lane_count() != ctrl_type.lane_count() {
                            return errors.nonfatal((
                                inst,
@@ -1536,7 +1535,7 @@ impl<'a> Verifier<'a> {
                            ));
                        }
                    }
-                    Opcode::Breduce | Opcode::Ireduce | Opcode::Fdemote => {
+                    Opcode::Ireduce | Opcode::Fdemote => {
                        if arg_type.lane_count() != ctrl_type.lane_count() {
                            return errors.nonfatal((
                                inst,
--- a/cranelift/codegen/src/write.rs
+++ b/cranelift/codegen/src/write.rs
@@ -393,7 +393,6 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt
        UnaryImm { imm, .. } => write!(w, " {}", imm),
        UnaryIeee32 { imm, .. } => write!(w, " {}", imm),
        UnaryIeee64 { imm, .. } => write!(w, " {}", imm),
-        UnaryBool { imm, .. } => write!(w, " {}", imm),
        UnaryGlobalValue { global_value, .. } => write!(w, " {}", global_value),
        UnaryConst {
            constant_handle, ..
@@ -539,7 +538,6 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt
                UnaryImm { imm, .. } => imm.to_string(),
                UnaryIeee32 { imm, .. } => imm.to_string(),
                UnaryIeee64 { imm, .. } => imm.to_string(),
-                UnaryBool { imm, .. } => imm.to_string(),
                UnaryConst {
                    constant_handle, ..
                } => constant_handle.to_string(),
--- a/cranelift/docs/ir.md
+++ b/cranelift/docs/ir.md
@@ -138,25 +138,6 @@ All SSA values have a type which determines the size and shape (for SIMD
 vectors) of the value. Many instructions are polymorphic -- they can operate on
 different types.

-### Boolean types
-
-Boolean values are either true or false.
-
-The `b1` type represents an abstract boolean value. It can only exist as
-an SSA value, and can't be directly stored in memory. It can, however, be
-converted into an integer with value 0 or 1 by the `bint` instruction (and
-converted back with `icmp_imm` with 0).
-
-Several larger boolean types are also defined, primarily to be used as SIMD
-element types. They can be stored in memory, and are represented as either all
-zero bits or all one bits.
-
- b1
- b8
- b16
- b32
- b64
-
 ### Integer types

 Integer values have a fixed size and can be interpreted as either signed or
@@ -219,8 +200,8 @@ instructions either. The verifier enforces these rules.
 ### SIMD vector types

 A SIMD vector type represents a vector of values from one of the scalar types
-(boolean, integer, and floating point). Each scalar value in a SIMD type is
-called a *lane*. The number of lanes must be a power of two in the range 2-256.
+(integer, and floating point). Each scalar value in a SIMD type is called a
+*lane*. The number of lanes must be a power of two in the range 2-256.

 i%Bx%N
    A SIMD vector of integers. The lane type `iB` is one of the integer
@@ -247,14 +228,6 @@ f64x%N

    The size of a `f64` vector in memory is :math:`8N` bytes.

-b1x%N
-    A boolean SIMD vector.
-
-    Boolean vectors are used when comparing SIMD vectors. For example,
-    comparing two `i32x4` values would produce a `b1x4` result.
-
-    Like the `b1` type, a boolean vector cannot be stored in memory.
-
 ### Pseudo-types and type classes

 These are not concrete types, but convenient names used to refer to real types
@@ -314,12 +287,6 @@ ieee64
    A 64-bit immediate floating point number in the IEEE 754-2008 binary64
    interchange format. All bit patterns are allowed.

-bool
-    A boolean immediate value, either false or true.
-
-    In the textual format, `bool` immediates appear as 'false'
-    and 'true'.
-
 intcc
    An integer condition code. See the `icmp` instruction for details.

@@ -790,10 +757,9 @@ an instruction is required to load a constant into an SSA value: `iconst`,

 ### Bitwise operations

-The bitwise operations and operate on any value type: Integers, floating point
-numbers, and booleans. When operating on integer or floating point types, the
-bitwise operations are working on the binary representation of the values. When
-operating on boolean values, the bitwise operations work as logical operators.
+The bitwise operations and operate on any value type: Integers, and floating
+point numbers. When operating on integer or floating point types, the bitwise
+operations are working on the binary representation of the values.

 The shift and rotate operations only work on integer types (scalar and vector).
 The shift amount does not have to be the same type as the value being shifted.
--- a/cranelift/filetests/filetests/isa/aarch64/atomic-cas.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic-cas.clif
@@ -7,8 +7,7 @@ function u0:0(i64, i32, i32) -> i8 system_v {
 block0(v0: i64, v1: i32, v2: i32):
    v6 = atomic_cas.i32 v0, v1, v2
    v7 = icmp eq v6, v1
-    v8 = bint.i8 v7
-    return v8
+    return v7
 }

 ;   stp fp, lr, [sp, #-16]!
@@ -22,8 +21,7 @@ block0(v0: i64, v1: i32, v2: i32):
 ;   mov x28, x2
 ;   atomic_cas_loop_32 addr=x25, expect=x26, replacement=x28, oldval=x27, scratch=x24
 ;   subs wzr, w27, w26
-;   cset x8, eq
-;   and w0, w8, #1
+;   cset x0, eq
 ;   ldp x24, x25, [sp], #16
 ;   ldp x26, x27, [sp], #16
 ;   ldr x28, [sp], #16
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -304,28 +304,28 @@ block0(v0: i8):
 ;   umov w0, v5.b[0]
 ;   ret

-function %bextend_b8() -> b32 {
+function %sextend_i8() -> i32 {
 block0:
-    v1 = bconst.b8 true
-    v2 = bextend.b32 v1
+    v1 = iconst.i8 -1
+    v2 = sextend.i32 v1
    return v2
 }

 ; block0:
-;   movz x1, #255
+;   movn x1, #0
 ;   sxtb w0, w1
 ;   ret

-function %bextend_b1() -> b32 {
+function %sextend_i8() -> i32 {
 block0:
-    v1 = bconst.b1 true
-    v2 = bextend.b32 v1
+    v1 = iconst.i8 -1
+    v2 = sextend.i32 v1
    return v2
 }

 ; block0:
-;   movz x1, #1
-;   sbfx w0, w1, #0, #1
+;   movn x1, #0
+;   sxtb w0, w1
 ;   ret

 function %bnot_i32(i32) -> i32 {
--- a/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif
@@ -2,7 +2,7 @@ test compile precise-output
 set unwind_info=false
 target aarch64

-function %f0(i8x16) -> b8x16 {
+function %f0(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = iconst.i8 0
  v2 = splat.i8x16 v1
@@ -14,7 +14,7 @@ block0(v0: i8x16):
 ;   cmeq v0.16b, v0.16b, #0
 ;   ret

-function %f0_vconst(i8x16) -> b8x16 {
+function %f0_vconst(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = vconst.i8x16 0x00
  v2 = icmp eq v0, v1
@@ -25,7 +25,7 @@ block0(v0: i8x16):
 ;   cmeq v0.16b, v0.16b, #0
 ;   ret

-function %f1(i16x8) -> b16x8 {
+function %f1(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = iconst.i16 0
  v2 = splat.i16x8 v1
@@ -37,7 +37,7 @@ block0(v0: i16x8):
 ;   cmeq v0.8h, v0.8h, #0
 ;   ret

-function %f1_vconst(i16x8) -> b16x8 {
+function %f1_vconst(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = vconst.i16x8 0x00
  v2 = icmp eq v1, v0
@@ -48,7 +48,7 @@ block0(v0: i16x8):
 ;   cmeq v0.8h, v0.8h, #0
 ;   ret

-function %f2(i32x4) -> b32x4 {
+function %f2(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = iconst.i32 0
  v2 = splat.i32x4 v1
@@ -61,7 +61,7 @@ block0(v0: i32x4):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f2_vconst(i32x4) -> b32x4 {
+function %f2_vconst(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = vconst.i32x4 0x00
  v2 = icmp ne v0, v1
@@ -73,7 +73,7 @@ block0(v0: i32x4):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f3(i64x2) -> b64x2 {
+function %f3(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = iconst.i64 0
  v2 = splat.i64x2 v1
@@ -86,7 +86,7 @@ block0(v0: i64x2):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f3_vconst(i64x2) -> b64x2 {
+function %f3_vconst(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = vconst.i64x2 0x00
  v2 = icmp ne v1, v0
@@ -98,7 +98,7 @@ block0(v0: i64x2):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f4(i8x16) -> b8x16 {
+function %f4(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = iconst.i8 0
  v2 = splat.i8x16 v1
@@ -110,7 +110,7 @@ block0(v0: i8x16):
 ;   cmle v0.16b, v0.16b, #0
 ;   ret

-function %f4_vconst(i8x16) -> b8x16 {
+function %f4_vconst(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = vconst.i8x16 0x00
  v2 = icmp sle v0, v1
@@ -121,7 +121,7 @@ block0(v0: i8x16):
 ;   cmle v0.16b, v0.16b, #0
 ;   ret

-function %f5(i16x8) -> b16x8 {
+function %f5(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = iconst.i16 0
  v2 = splat.i16x8 v1
@@ -133,7 +133,7 @@ block0(v0: i16x8):
 ;   cmge v0.8h, v0.8h, #0
 ;   ret

-function %f5_vconst(i16x8) -> b16x8 {
+function %f5_vconst(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = vconst.i16x8 0x00
  v2 = icmp sle v1, v0
@@ -144,7 +144,7 @@ block0(v0: i16x8):
 ;   cmge v0.8h, v0.8h, #0
 ;   ret

-function %f6(i32x4) -> b32x4 {
+function %f6(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = iconst.i32 0
  v2 = splat.i32x4 v1
@@ -156,7 +156,7 @@ block0(v0: i32x4):
 ;   cmge v0.4s, v0.4s, #0
 ;   ret

-function %f6_vconst(i32x4) -> b32x4 {
+function %f6_vconst(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = vconst.i32x4 0x00
  v2 = icmp sge v0, v1
@@ -167,7 +167,7 @@ block0(v0: i32x4):
 ;   cmge v0.4s, v0.4s, #0
 ;   ret

-function %f7(i64x2) -> b64x2 {
+function %f7(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = iconst.i64 0
  v2 = splat.i64x2 v1
@@ -179,7 +179,7 @@ block0(v0: i64x2):
 ;   cmle v0.2d, v0.2d, #0
 ;   ret

-function %f7_vconst(i64x2) -> b64x2 {
+function %f7_vconst(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = vconst.i64x2 0x00
  v2 = icmp sge v1, v0
@@ -190,7 +190,7 @@ block0(v0: i64x2):
 ;   cmle v0.2d, v0.2d, #0
 ;   ret

-function %f8(i8x16) -> b8x16 {
+function %f8(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = iconst.i8 0
  v2 = splat.i8x16 v1
@@ -202,7 +202,7 @@ block0(v0: i8x16):
 ;   cmlt v0.16b, v0.16b, #0
 ;   ret

-function %f8_vconst(i8x16) -> b8x16 {
+function %f8_vconst(i8x16) -> i8x16 {
 block0(v0: i8x16):
  v1 = vconst.i8x16 0x00
  v2 = icmp slt v0, v1
@@ -213,7 +213,7 @@ block0(v0: i8x16):
 ;   cmlt v0.16b, v0.16b, #0
 ;   ret

-function %f9(i16x8) -> b16x8 {
+function %f9(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = iconst.i16 0
  v2 = splat.i16x8 v1
@@ -225,7 +225,7 @@ block0(v0: i16x8):
 ;   cmgt v0.8h, v0.8h, #0
 ;   ret

-function %f9_vconst(i16x8) -> b16x8 {
+function %f9_vconst(i16x8) -> i16x8 {
 block0(v0: i16x8):
  v1 = vconst.i16x8 0x00
  v2 = icmp slt v1, v0
@@ -236,7 +236,7 @@ block0(v0: i16x8):
 ;   cmgt v0.8h, v0.8h, #0
 ;   ret

-function %f10(i32x4) -> b32x4 {
+function %f10(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = iconst.i32 0
  v2 = splat.i32x4 v1
@@ -248,7 +248,7 @@ block0(v0: i32x4):
 ;   cmgt v0.4s, v0.4s, #0
 ;   ret

-function %f10_vconst(i32x4) -> b32x4 {
+function %f10_vconst(i32x4) -> i32x4 {
 block0(v0: i32x4):
  v1 = vconst.i32x4 0x00
  v2 = icmp sgt v0, v1
@@ -259,7 +259,7 @@ block0(v0: i32x4):
 ;   cmgt v0.4s, v0.4s, #0
 ;   ret

-function %f11(i64x2) -> b64x2 {
+function %f11(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = iconst.i64 0
  v2 = splat.i64x2 v1
@@ -271,7 +271,7 @@ block0(v0: i64x2):
 ;   cmlt v0.2d, v0.2d, #0
 ;   ret

-function %f11_vconst(i64x2) -> b64x2 {
+function %f11_vconst(i64x2) -> i64x2 {
 block0(v0: i64x2):
  v1 = vconst.i64x2 0x00
  v2 = icmp sgt v1, v0
@@ -282,7 +282,7 @@ block0(v0: i64x2):
 ;   cmlt v0.2d, v0.2d, #0
 ;   ret

-function %f12(f32x4) -> b32x4 {
+function %f12(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -294,7 +294,7 @@ block0(v0: f32x4):
 ;   fcmeq v0.4s, v0.4s, #0.0
 ;   ret

-function %f12_vconst(f32x4) -> b32x4 {
+function %f12_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp eq v0, v1
@@ -305,7 +305,7 @@ block0(v0: f32x4):
 ;   fcmeq v0.4s, v0.4s, #0.0
 ;   ret

-function %f13(f64x2) -> b64x2 {
+function %f13(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -317,7 +317,7 @@ block0(v0: f64x2):
 ;   fcmeq v0.2d, v0.2d, #0.0
 ;   ret

-function %f13_vconst(f64x2) -> b64x2 {
+function %f13_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp eq v1, v0
@@ -328,7 +328,7 @@ block0(v0: f64x2):
 ;   fcmeq v0.2d, v0.2d, #0.0
 ;   ret

-function %f14(f64x2) -> b64x2 {
+function %f14(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -341,7 +341,7 @@ block0(v0: f64x2):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f14_vconst(f64x2) -> b64x2 {
+function %f14_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp ne v0, v1
@@ -353,7 +353,7 @@ block0(v0: f64x2):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f15(f32x4) -> b32x4 {
+function %f15(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -366,7 +366,7 @@ block0(v0: f32x4):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f15_vconst(f32x4) -> b32x4 {
+function %f15_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp ne v1, v0
@@ -378,7 +378,7 @@ block0(v0: f32x4):
 ;   mvn v0.16b, v3.16b
 ;   ret

-function %f16(f32x4) -> b32x4 {
+function %f16(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -390,7 +390,7 @@ block0(v0: f32x4):
 ;   fcmle v0.4s, v0.4s, #0.0
 ;   ret

-function %f16_vconst(f32x4) -> b32x4 {
+function %f16_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp le v0, v1
@@ -401,7 +401,7 @@ block0(v0: f32x4):
 ;   fcmle v0.4s, v0.4s, #0.0
 ;   ret

-function %f17(f64x2) -> b64x2 {
+function %f17(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -413,7 +413,7 @@ block0(v0: f64x2):
 ;   fcmge v0.2d, v0.2d, #0.0
 ;   ret

-function %f17_vconst(f64x2) -> b64x2 {
+function %f17_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp le v1, v0
@@ -424,7 +424,7 @@ block0(v0: f64x2):
 ;   fcmge v0.2d, v0.2d, #0.0
 ;   ret

-function %f18(f64x2) -> b64x2 {
+function %f18(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -436,7 +436,7 @@ block0(v0: f64x2):
 ;   fcmge v0.2d, v0.2d, #0.0
 ;   ret

-function %f18_vconst(f64x2) -> b64x2 {
+function %f18_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp ge v0, v1
@@ -447,7 +447,7 @@ block0(v0: f64x2):
 ;   fcmge v0.2d, v0.2d, #0.0
 ;   ret

-function %f19(f32x4) -> b32x4 {
+function %f19(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -459,7 +459,7 @@ block0(v0: f32x4):
 ;   fcmle v0.4s, v0.4s, #0.0
 ;   ret

-function %f19_vconst(f32x4) -> b32x4 {
+function %f19_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp ge v1, v0
@@ -470,7 +470,7 @@ block0(v0: f32x4):
 ;   fcmle v0.4s, v0.4s, #0.0
 ;   ret

-function %f20(f32x4) -> b32x4 {
+function %f20(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -482,7 +482,7 @@ block0(v0: f32x4):
 ;   fcmlt v0.4s, v0.4s, #0.0
 ;   ret

-function %f20_vconst(f32x4) -> b32x4 {
+function %f20_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp lt v0, v1
@@ -493,7 +493,7 @@ block0(v0: f32x4):
 ;   fcmlt v0.4s, v0.4s, #0.0
 ;   ret

-function %f21(f64x2) -> b64x2 {
+function %f21(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -505,7 +505,7 @@ block0(v0: f64x2):
 ;   fcmgt v0.2d, v0.2d, #0.0
 ;   ret

-function %f21_vconst(f64x2) -> b64x2 {
+function %f21_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp lt v1, v0
@@ -516,7 +516,7 @@ block0(v0: f64x2):
 ;   fcmgt v0.2d, v0.2d, #0.0
 ;   ret

-function %f22(f64x2) -> b64x2 {
+function %f22(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = f64const 0.0
  v2 = splat.f64x2 v1
@@ -528,7 +528,7 @@ block0(v0: f64x2):
 ;   fcmgt v0.2d, v0.2d, #0.0
 ;   ret

-function %f22_vconst(f64x2) -> b64x2 {
+function %f22_vconst(f64x2) -> i64x2 {
 block0(v0: f64x2):
  v1 = vconst.f64x2 [0.0 0.0]
  v2 = fcmp gt v0, v1
@@ -539,7 +539,7 @@ block0(v0: f64x2):
 ;   fcmgt v0.2d, v0.2d, #0.0
 ;   ret

-function %f23(f32x4) -> b32x4 {
+function %f23(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = f32const 0.0
  v2 = splat.f32x4 v1
@@ -551,7 +551,7 @@ block0(v0: f32x4):
 ;   fcmlt v0.4s, v0.4s, #0.0
 ;   ret

-function %f23_vconst(f32x4) -> b32x4 {
+function %f23_vconst(f32x4) -> i32x4 {
 block0(v0: f32x4):
  v1 = vconst.f32x4 [0.0 0.0 0.0 0.0]
  v2 = fcmp gt v1, v0
--- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
@@ -2,7 +2,7 @@ test compile precise-output
 set unwind_info=false
 target aarch64

-function %f(i64, i64) -> b1 {
+function %f(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = icmp eq v0, v1
  return v2
@@ -13,7 +13,7 @@ block0(v0: i64, v1: i64):
 ;   cset x0, eq
 ;   ret

-function %icmp_eq_i128(i128, i128) -> b1 {
+function %icmp_eq_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp eq v0, v1
  return v2
@@ -25,7 +25,7 @@ block0(v0: i128, v1: i128):
 ;   cset x0, eq
 ;   ret

-function %icmp_ne_i128(i128, i128) -> b1 {
+function %icmp_ne_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ne v0, v1
  return v2
@@ -37,7 +37,7 @@ block0(v0: i128, v1: i128):
 ;   cset x0, ne
 ;   ret

-function %icmp_slt_i128(i128, i128) -> b1 {
+function %icmp_slt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp slt v0, v1
  return v2
@@ -51,7 +51,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_ult_i128(i128, i128) -> b1 {
+function %icmp_ult_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ult v0, v1
  return v2
@@ -65,7 +65,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_sle_i128(i128, i128) -> b1 {
+function %icmp_sle_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sle v0, v1
  return v2
@@ -79,7 +79,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_ule_i128(i128, i128) -> b1 {
+function %icmp_ule_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ule v0, v1
  return v2
@@ -93,7 +93,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_sgt_i128(i128, i128) -> b1 {
+function %icmp_sgt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sgt v0, v1
  return v2
@@ -107,7 +107,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_ugt_i128(i128, i128) -> b1 {
+function %icmp_ugt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ugt v0, v1
  return v2
@@ -121,7 +121,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_sge_i128(i128, i128) -> b1 {
+function %icmp_sge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sge v0, v1
  return v2
@@ -135,7 +135,7 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x7, x10, eq
 ;   ret

-function %icmp_uge_i128(i128, i128) -> b1 {
+function %icmp_uge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp uge v0, v1
  return v2
@@ -471,3 +471,4 @@ block1:
 ;   b label3
 ; block3:
 ;   ret
+
--- a/cranelift/filetests/filetests/isa/aarch64/condops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condops.clif
@@ -737,7 +737,7 @@ block0(v0: i128, v1: i128, v2: i128):
 ;   csdb
 ;   ret

-function %g(i8) -> b1 {
+function %g(i8) -> i8 {
 block0(v0: i8):
  v3 = iconst.i8 42
  v4 = ifcmp v0, v3
@@ -763,15 +763,14 @@ block0(v0: i8, v1: i8, v2: i8):
 ;   orr w0, w5, w7
 ;   ret

-function %i(b1, i8, i8) -> i8 {
-block0(v0: b1, v1: i8, v2: i8):
+function %i(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v2: i8):
  v3 = select.i8 v0, v1, v2
  return v3
 }

 ; block0:
-;   and w5, w0, #1
-;   subs wzr, w5, wzr
+;   ands wzr, w0, #255
 ;   csel x0, x1, x2, ne
 ;   ret

@@ -788,15 +787,14 @@ block0(v0: i32, v1: i8, v2: i8):
 ;   csel x0, x1, x2, eq
 ;   ret

-function %i128_select(b1, i128, i128) -> i128 {
-block0(v0: b1, v1: i128, v2: i128):
+function %i128_select(i8, i128, i128) -> i128 {
+block0(v0: i8, v1: i128, v2: i128):
  v3 = select.i128 v0, v1, v2
  return v3
 }

 ; block0:
-;   and w8, w0, #1
-;   subs wzr, w8, wzr
+;   ands wzr, w0, #255
 ;   csel x0, x2, x4, ne
 ;   csel x1, x3, x5, ne
 ;   ret
--- a/cranelift/filetests/filetests/isa/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif
@@ -2,19 +2,19 @@ test compile precise-output
 set unwind_info=false
 target aarch64

-function %f() -> b8 {
+function %f() -> i8 {
 block0:
-  v0 = bconst.b8 true
+  v0 = iconst.i8 -1
  return v0
 }

 ; block0:
-;   movz x0, #255
+;   movn x0, #0
 ;   ret

-function %f() -> b16 {
+function %f() -> i16 {
 block0:
-  v0 = bconst.b16 false
+  v0 = iconst.i16 0
  return v0
 }

--- a/cranelift/filetests/filetests/isa/aarch64/i128-bmask.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/i128-bmask.clif
@@ -0,0 +1,112 @@
+test compile precise-output
+target aarch64
+
+function %bmask_i128_i128(i128) -> i128 {
+block0(v0: i128):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   orr x5, x0, x1
+;   subs xzr, x5, #0
+;   csetm x1, ne
+;   mov x0, x1
+;   ret
+
+function %bmask_i128_i64(i128) -> i64 {
+block0(v0: i128):
+  v1 = bmask.i64 v0
+  return v1
+}
+
+; block0:
+;   orr x4, x0, x1
+;   subs xzr, x4, #0
+;   csetm x0, ne
+;   ret
+
+function %bmask_i128_i32(i128) -> i32 {
+block0(v0: i128):
+  v1 = bmask.i32 v0
+  return v1
+}
+
+; block0:
+;   orr x4, x0, x1
+;   subs xzr, x4, #0
+;   csetm x0, ne
+;   ret
+
+function %bmask_i128_i16(i128) -> i16 {
+block0(v0: i128):
+  v1 = bmask.i16 v0
+  return v1
+}
+
+; block0:
+;   orr x4, x0, x1
+;   subs xzr, x4, #0
+;   csetm x0, ne
+;   ret
+
+function %bmask_i128_i8(i128) -> i8 {
+block0(v0: i128):
+  v1 = bmask.i8 v0
+  return v1
+}
+
+; block0:
+;   orr x4, x0, x1
+;   subs xzr, x4, #0
+;   csetm x0, ne
+;   ret
+
+function %bmask_i64_i128(i64) -> i128 {
+block0(v0: i64):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   subs xzr, x0, #0
+;   csetm x1, ne
+;   mov x0, x1
+;   ret
+
+function %bmask_i32_i128(i32) -> i128 {
+block0(v0: i32):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   subs xzr, x0, #0
+;   csetm x1, ne
+;   mov x0, x1
+;   ret
+
+function %bmask_i16_i128(i16) -> i128 {
+block0(v0: i16):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   subs xzr, x0, #0
+;   csetm x1, ne
+;   mov x0, x1
+;   ret
+
+function %bmask_i8_i128(i8) -> i128 {
+block0(v0: i8):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   subs xzr, x0, #0
+;   csetm x1, ne
+;   mov x0, x1
+;   ret
+
--- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
@@ -10,16 +10,14 @@ function u0:0() -> i8 system_v {
 block0:
    v0 = iconst.i16 0xddcc
    v1 = icmp.i16 ne v0, v0
-    v2 = bint.i8 v1
-    return v2
+    return v1
 }

 ; block0:
-;   movz x2, #56780
-;   uxth w4, w2
-;   movz x6, #56780
-;   subs wzr, w4, w6, UXTH
-;   cset x9, ne
-;   and w0, w9, #1
+;   movz x1, #56780
+;   uxth w3, w1
+;   movz x5, #56780
+;   subs wzr, w3, w5, UXTH
+;   cset x0, ne
 ;   ret

--- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
@@ -10,7 +10,7 @@ block0(v0: r64):
 ; block0:
 ;   ret

-function %f1(r64) -> b1 {
+function %f1(r64) -> i8 {
 block0(v0: r64):
  v1 = is_null v0
  return v1
@@ -21,7 +21,7 @@ block0(v0: r64):
 ;   cset x0, eq
 ;   ret

-function %f2(r64) -> b1 {
+function %f2(r64) -> i8 {
 block0(v0: r64):
  v1 = is_invalid v0
  return v1
@@ -43,7 +43,7 @@ block0:
 ;   ret

 function %f4(r64, r64) -> r64, r64, r64 {
-    fn0 = %f(r64) -> b1
+    fn0 = %f(r64) -> i8
    ss0 = explicit_slot 8

 block0(v0: r64, v1: r64):
@@ -74,7 +74,7 @@ block3(v7: r64, v8: r64):
 ;   mov x2, sp
 ;   ldr x9, [sp, #8]
 ;   str x9, [x2]
-;   and w3, w0, #1
+;   uxtb w3, w0
 ;   cbz x3, label1 ; b label3
 ; block1:
 ;   b label2
--- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
@@ -108,8 +108,8 @@ block0:
 ;   bsl v0.16b, v0.16b, v4.16b, v5.16b
 ;   ret

-function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: b16x8, v1: i16x8, v2: i16x8):
+function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8, v2: i16x8):
    v3 = vselect v0, v1, v2
    return v3
 }
@@ -118,8 +118,8 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
 ;   bsl v0.16b, v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: b32x4, v1: f32x4, v2: f32x4):
+function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: i32x4, v1: f32x4, v2: f32x4):
    v3 = vselect v0, v1, v2
    return v3
 }
@@ -128,8 +128,8 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
 ;   bsl v0.16b, v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: b64x2, v1: f64x2, v2: f64x2):
+function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: i64x2, v1: f64x2, v2: f64x2):
    v3 = vselect v0, v1, v2
    return v3
 }
--- a/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-comparison-legalize.clif
@@ -2,7 +2,7 @@ test compile precise-output
 set enable_simd
 target aarch64

-function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ne_32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp ne v0, v1
    return v2
@@ -13,7 +13,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   mvn v0.16b, v4.16b
 ;   ret

-function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ugt_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp ugt v0, v1
    return v2
@@ -23,7 +23,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   cmhi v0.4s, v0.4s, v1.4s
 ;   ret

-function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_sge_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
    v2 = icmp sge v0, v1
    return v2
@@ -33,7 +33,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   cmge v0.8h, v0.8h, v1.8h
 ;   ret

-function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_uge_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
    v2 = icmp uge v0, v1
    return v2
--- a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif
@@ -59,10 +59,10 @@ block0(v0: i8):
 ;   dup v0.16b, w0
 ;   ret

-function %splat_b16() -> b16x8 {
+function %splat_i16() -> i16x8 {
 block0:
-    v0 = bconst.b16 true
-    v1 = splat.b16x8 v0
+    v0 = iconst.i16 -1
+    v1 = splat.i16x8 v0
    return v1
 }

--- a/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-logical-compile.clif
@@ -2,8 +2,8 @@ test compile precise-output
 set enable_simd
 target aarch64

-function %bnot_b32x4(b32x4) -> b32x4 {
-block0(v0: b32x4):
+function %bnot_i32x4(i32x4) -> i32x4 {
+block0(v0: i32x4):
    v1 = bnot v0
    return v1
 }
@@ -12,8 +12,8 @@ block0(v0: b32x4):
 ;   mvn v0.16b, v0.16b
 ;   ret

-function %vany_true_b32x4(b32x4) -> b1 {
-block0(v0: b32x4):
+function %vany_true_i32x4(i32x4) -> i8 {
+block0(v0: i32x4):
    v1 = vany_true v0
    return v1
 }
@@ -25,7 +25,7 @@ block0(v0: b32x4):
 ;   cset x0, ne
 ;   ret

-function %vall_true_i64x2(i64x2) -> b1 {
+function %vall_true_i64x2(i64x2) -> i8 {
 block0(v0: i64x2):
    v1 = vall_true v0
    return v1
--- a/cranelift/filetests/filetests/isa/aarch64/simd-valltrue.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-valltrue.clif
@@ -2,8 +2,8 @@ test compile precise-output
 set unwind_info=false
 target aarch64

-function %fn0(b8x8) -> b1 {
-block0(v0: b8x8):
+function %fn0(i8x8) -> i8 {
+block0(v0: i8x8):
    v1 = vall_true v0
    return v1
 }
@@ -15,8 +15,8 @@ block0(v0: b8x8):
 ;   cset x0, ne
 ;   ret

-function %fn1(b8x16) -> b1 {
-block0(v0: b8x16):
+function %fn1(i8x16) -> i8 {
+block0(v0: i8x16):
    v1 = vall_true v0
    return v1
 }
@@ -28,8 +28,8 @@ block0(v0: b8x16):
 ;   cset x0, ne
 ;   ret

-function %fn2(b16x4) -> b1 {
-block0(v0: b16x4):
+function %fn2(i16x4) -> i8 {
+block0(v0: i16x4):
    v1 = vall_true v0
    return v1
 }
@@ -41,8 +41,8 @@ block0(v0: b16x4):
 ;   cset x0, ne
 ;   ret

-function %fn3(b16x8) -> b1 {
-block0(v0: b16x8):
+function %fn3(i16x8) -> i8 {
+block0(v0: i16x8):
    v1 = vall_true v0
    return v1
 }
@@ -54,8 +54,8 @@ block0(v0: b16x8):
 ;   cset x0, ne
 ;   ret

-function %fn4(b32x2) -> b1 {
-block0(v0: b32x2):
+function %fn4(i32x2) -> i8 {
+block0(v0: i32x2):
    v1 = vall_true v0
    return v1
 }
@@ -67,8 +67,8 @@ block0(v0: b32x2):
 ;   cset x0, ne
 ;   ret

-function %fn5(b32x4) -> b1 {
-block0(v0: b32x4):
+function %fn5(i32x4) -> i8 {
+block0(v0: i32x4):
    v1 = vall_true v0
    return v1
 }
@@ -80,8 +80,8 @@ block0(v0: b32x4):
 ;   cset x0, ne
 ;   ret

-function %fn6(b64x2) -> b1 {
-block0(v0: b64x2):
+function %fn6(i64x2) -> i8 {
+block0(v0: i64x2):
    v1 = vall_true v0
    return v1
 }
@@ -92,3 +92,4 @@ block0(v0: b64x2):
 ;   fcmp d5, d5
 ;   cset x0, eq
 ;   ret
+
--- a/cranelift/filetests/filetests/isa/aarch64/simd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif
@@ -28,18 +28,6 @@ block0:
 ;   dup v0.8h, w2
 ;   ret

-function %f3() -> b8x16 {
-block0:
-  v0 = bconst.b32 true
-  v1 = breduce.b8 v0
-  v2 = splat.b8x16 v1
-  return v2
-}
-
-; block0:
-;   movi v0.16b, #255
-;   ret
-
 function %f4(i32, i8x16, i8x16) -> i8x16 {
 block0(v0: i32, v1: i8x16, v2: i8x16):
   v3 = select v0, v1, v2
--- a/cranelift/filetests/filetests/isa/aarch64/stack.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/stack.clif
@@ -123,10 +123,10 @@ block0(v0: i64):
 ;   ldp fp, lr, [sp], #16
 ;   ret

-function %b1_spill_slot(b1) -> b1, i64 {
+function %i8_spill_slot(i8) -> i8, i64 {
    ss0 = explicit_slot 1000

-block0(v0: b1):
+block0(v0: i8):
  v1 = iconst.i64 1
  v2 = iconst.i64 2
  v3 = iconst.i64 3
--- a/cranelift/filetests/filetests/isa/riscv64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif
@@ -315,28 +315,6 @@ block0(v0: i8):
 ;   mv a0,a3
 ;   ret

-function %bextend_b8() -> b32 {
-block0:
-    v1 = bconst.b8 true
-    v2 = bextend.b32 v1
-    return v2
-}
-
-; block0:
-;   li a0,-1
-;   ret
-
-function %bextend_b1() -> b32 {
-block0:
-    v1 = bconst.b1 true
-    v2 = bextend.b32 v1
-    return v2
-}
-
-; block0:
-;   li a0,-1
-;   ret
-
 function %bnot_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = bnot v0
--- a/cranelift/filetests/filetests/isa/riscv64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/condbr.clif
@@ -2,7 +2,7 @@ test compile precise-output
 set unwind_info=false
 target riscv64

-function %f(i64, i64) -> b1 {
+function %f(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = icmp eq v0, v1
  return v2
@@ -12,7 +12,7 @@ block0(v0: i64, v1: i64):
 ;   eq a0,a0,a1##ty=i64
 ;   ret

-function %icmp_eq_i128(i128, i128) -> b1 {
+function %icmp_eq_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp eq v0, v1
  return v2
@@ -22,7 +22,7 @@ block0(v0: i128, v1: i128):
 ;   eq a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_ne_i128(i128, i128) -> b1 {
+function %icmp_ne_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ne v0, v1
  return v2
@@ -32,7 +32,7 @@ block0(v0: i128, v1: i128):
 ;   ne a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_slt_i128(i128, i128) -> b1 {
+function %icmp_slt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp slt v0, v1
  return v2
@@ -42,7 +42,7 @@ block0(v0: i128, v1: i128):
 ;   slt a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_ult_i128(i128, i128) -> b1 {
+function %icmp_ult_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ult v0, v1
  return v2
@@ -52,7 +52,7 @@ block0(v0: i128, v1: i128):
 ;   ult a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_sle_i128(i128, i128) -> b1 {
+function %icmp_sle_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sle v0, v1
  return v2
@@ -62,7 +62,7 @@ block0(v0: i128, v1: i128):
 ;   sle a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_ule_i128(i128, i128) -> b1 {
+function %icmp_ule_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ule v0, v1
  return v2
@@ -72,7 +72,7 @@ block0(v0: i128, v1: i128):
 ;   ule a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_sgt_i128(i128, i128) -> b1 {
+function %icmp_sgt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sgt v0, v1
  return v2
@@ -82,7 +82,7 @@ block0(v0: i128, v1: i128):
 ;   sgt a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_ugt_i128(i128, i128) -> b1 {
+function %icmp_ugt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp ugt v0, v1
  return v2
@@ -92,7 +92,7 @@ block0(v0: i128, v1: i128):
 ;   ugt a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_sge_i128(i128, i128) -> b1 {
+function %icmp_sge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp sge v0, v1
  return v2
@@ -102,7 +102,7 @@ block0(v0: i128, v1: i128):
 ;   sge a0,[a0,a1],[a2,a3]##ty=i128
 ;   ret

-function %icmp_uge_i128(i128, i128) -> b1 {
+function %icmp_uge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp uge v0, v1
  return v2
@@ -209,8 +209,9 @@ block1:
 }

 ; block0:
-;   eq a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   eq a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -228,8 +229,9 @@ block1:
 }

 ; block0:
-;   ne a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   ne a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -247,8 +249,9 @@ block1:
 }

 ; block0:
-;   slt a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   slt a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -266,8 +269,9 @@ block1:
 }

 ; block0:
-;   ult a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   ult a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -285,8 +289,9 @@ block1:
 }

 ; block0:
-;   sle a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   sle a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -304,8 +309,9 @@ block1:
 }

 ; block0:
-;   ule a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   ule a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -323,8 +329,9 @@ block1:
 }

 ; block0:
-;   sgt a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   sgt a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -342,8 +349,9 @@ block1:
 }

 ; block0:
-;   ugt a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   ugt a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -361,8 +369,9 @@ block1:
 }

 ; block0:
-;   sge a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   sge a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
@@ -380,8 +389,9 @@ block1:
 }

 ; block0:
-;   uge a2,[a0,a1],[a2,a3]##ty=i128
-;   bne a2,zero,taken(label1),not_taken(label2)
+;   uge a3,[a0,a1],[a2,a3]##ty=i128
+;   andi a3,a3,255
+;   bne a3,zero,taken(label1),not_taken(label2)
 ; block1:
 ;   j label3
 ; block2:
--- a/cranelift/filetests/filetests/isa/riscv64/condops.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/condops.clif
@@ -18,7 +18,7 @@ block0(v0: i8, v1: i64, v2: i64):
 ;   selectif a0,a1,a2##test=t4
 ;   ret

-function %g(i8) -> b1 {
+function %g(i8) -> i8 {
 block0(v0: i8):
  v3 = iconst.i8 42
  v4 = ifcmp v0, v3
@@ -48,14 +48,15 @@ block0(v0: i8, v1: i8, v2: i8):
 ;   or a0,a2,a6
 ;   ret

-function %i(b1, i8, i8) -> i8 {
-block0(v0: b1, v1: i8, v2: i8):
+function %i(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v2: i8):
  v3 = select.i8 v0, v1, v2
  return v3
 }

 ; block0:
-;   select_i8 a0,a1,a2##condition=a0
+;   andi a3,a0,255
+;   select_i8 a0,a1,a2##condition=a3
 ;   ret

 function %i(i32, i8, i8) -> i8 {
@@ -67,20 +68,22 @@ block0(v0: i32, v1: i8, v2: i8):
 }

 ; block0:
-;   li a3,42
-;   uext.w a5,a0
-;   uext.w a7,a3
-;   eq t4,a5,a7##ty=i32
-;   select_i8 a0,a1,a2##condition=t4
+;   li a4,42
+;   uext.w a6,a0
+;   uext.w t3,a4
+;   eq t0,a6,t3##ty=i32
+;   andi a6,t0,255
+;   select_i8 a0,a1,a2##condition=a6
 ;   ret

-function %i128_select(b1, i128, i128) -> i128 {
-block0(v0: b1, v1: i128, v2: i128):
+function %i128_select(i8, i128, i128) -> i128 {
+block0(v0: i8, v1: i128, v2: i128):
  v3 = select.i128 v0, v1, v2
  return v3
 }

 ; block0:
-;   select_i128 [a0,a1],[a1,a2],[a3,a4]##condition=a0
+;   andi a5,a0,255
+;   select_i128 [a0,a1],[a1,a2],[a3,a4]##condition=a5
 ;   ret

--- a/cranelift/filetests/filetests/isa/riscv64/constants.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/constants.clif
@@ -2,9 +2,9 @@ test compile precise-output
 set unwind_info=false
 target riscv64

-function %f() -> b8 {
+function %f() -> i8 {
 block0:
-  v0 = bconst.b8 true
+  v0 = iconst.i8 -1
  return v0
 }

@@ -12,9 +12,9 @@ block0:
 ;   li a0,-1
 ;   ret

-function %f() -> b16 {
+function %f() -> i16 {
 block0:
-  v0 = bconst.b16 false
+  v0 = iconst.i16 0
  return v0
 }

--- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
@@ -13,16 +13,17 @@ block0(v0: i64, v1: i32):
 }

 ; block0:
-;   uext.w t3,a1
-;   ld t4,0(a0)
-;   addi t4,t4,0
-;   ugt t0,t3,t4##ty=i64
-;   beq t0,zero,taken(label1),not_taken(label2)
+;   uext.w t4,a1
+;   ld t0,0(a0)
+;   addi t0,t0,0
+;   ugt t1,t4,t0##ty=i64
+;   andi t1,t1,255
+;   beq t1,zero,taken(label1),not_taken(label2)
 ; block1:
-;   add t0,a0,t3
-;   ugt t3,t3,t4##ty=i64
-;   li t1,0
-;   selectif_spectre_guard a0,t1,t0##test=t3
+;   add t1,a0,t4
+;   ugt t4,t4,t0##ty=i64
+;   li t2,0
+;   selectif_spectre_guard a0,t2,t1##test=t4
 ;   ret
 ; block2:
 ;   udf##trap_code=heap_oob
@@ -37,16 +38,17 @@ block0(v0: i64, v1: i32):
 }

 ; block0:
-;   uext.w t3,a1
-;   lui a7,16
-;   ugt t4,t3,a7##ty=i64
-;   beq t4,zero,taken(label1),not_taken(label2)
+;   uext.w t4,a1
+;   lui t3,16
+;   ugt t0,t4,t3##ty=i64
+;   andi t0,t0,255
+;   beq t0,zero,taken(label1),not_taken(label2)
 ; block1:
-;   add t4,a0,t3
-;   lui a7,16
-;   ugt t0,t3,a7##ty=i64
-;   li t1,0
-;   selectif_spectre_guard a0,t1,t4##test=t0
+;   add t0,a0,t4
+;   lui t3,16
+;   ugt t1,t4,t3##ty=i64
+;   li t2,0
+;   selectif_spectre_guard a0,t2,t0##test=t1
 ;   ret
 ; block2:
 ;   udf##trap_code=heap_oob
--- a/cranelift/filetests/filetests/isa/riscv64/i128-bmask.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/i128-bmask.clif
@@ -0,0 +1,113 @@
+test compile precise-output
+set unwind_info=false
+target riscv64
+
+function %bmask_i128_i128(i128) -> i128 {
+block0(v0: i128):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   or a2,a0,a1
+;   li a4,-1
+;   select_reg a1,zero,a4##condition=(zero eq a2)
+;   mv a0,a1
+;   ret
+
+function %bmask_i128_i64(i128) -> i64 {
+block0(v0: i128):
+  v1 = bmask.i64 v0
+  return v1
+}
+
+; block0:
+;   or a1,a0,a1
+;   li a3,-1
+;   select_reg a0,zero,a3##condition=(zero eq a1)
+;   ret
+
+function %bmask_i128_i32(i128) -> i32 {
+block0(v0: i128):
+  v1 = bmask.i32 v0
+  return v1
+}
+
+; block0:
+;   or a1,a0,a1
+;   li a3,-1
+;   select_reg a0,zero,a3##condition=(zero eq a1)
+;   ret
+
+function %bmask_i128_i16(i128) -> i16 {
+block0(v0: i128):
+  v1 = bmask.i16 v0
+  return v1
+}
+
+; block0:
+;   or a1,a0,a1
+;   li a3,-1
+;   select_reg a0,zero,a3##condition=(zero eq a1)
+;   ret
+
+function %bmask_i128_i8(i128) -> i8 {
+block0(v0: i128):
+  v1 = bmask.i8 v0
+  return v1
+}
+
+; block0:
+;   or a1,a0,a1
+;   li a3,-1
+;   select_reg a0,zero,a3##condition=(zero eq a1)
+;   ret
+
+function %bmask_i64_i128(i64) -> i128 {
+block0(v0: i64):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   li a1,-1
+;   select_reg a1,zero,a1##condition=(zero eq a0)
+;   mv a0,a1
+;   ret
+
+function %bmask_i32_i128(i32) -> i128 {
+block0(v0: i32):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   li a1,-1
+;   select_reg a1,zero,a1##condition=(zero eq a0)
+;   mv a0,a1
+;   ret
+
+function %bmask_i16_i128(i16) -> i128 {
+block0(v0: i16):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   li a1,-1
+;   select_reg a1,zero,a1##condition=(zero eq a0)
+;   mv a0,a1
+;   ret
+
+function %bmask_i8_i128(i8) -> i128 {
+block0(v0: i8):
+  v1 = bmask.i128 v0
+  return v1
+}
+
+; block0:
+;   li a1,-1
+;   select_reg a1,zero,a1##condition=(zero eq a0)
+;   mv a0,a1
+;   ret
+
--- a/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
@@ -7,18 +7,16 @@ function u0:0() -> i8 system_v {
 block0:
    v0 = iconst.i16 0xddcc
    v1 = icmp.i16 ne v0, v0
-    v2 = bint.i8 v1
-    return v2
+    return v1
 }

 ; block0:
-;   lui t2,14
-;   addi t2,t2,3532
-;   lui a2,14
-;   addi a2,a2,3532
-;   uext.h a5,t2
-;   uext.h a7,a2
-;   ne t4,a5,a7##ty=i16
-;   andi a0,t4,1
+;   lui t1,14
+;   addi t1,t1,3532
+;   lui a1,14
+;   addi a1,a1,3532
+;   uext.h a4,t1
+;   uext.h a6,a1
+;   ne a0,a4,a6##ty=i16
 ;   ret

--- a/cranelift/filetests/filetests/isa/riscv64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif
@@ -10,7 +10,7 @@ block0(v0: r64):
 ; block0:
 ;   ret

-function %f1(r64) -> b1 {
+function %f1(r64) -> i8 {
 block0(v0: r64):
  v1 = is_null v0
  return v1
@@ -20,7 +20,7 @@ block0(v0: r64):
 ;   is_null a0,a0
 ;   ret

-function %f2(r64) -> b1 {
+function %f2(r64) -> i8 {
 block0(v0: r64):
  v1 = is_invalid v0
  return v1
@@ -41,7 +41,7 @@ block0:
 ;   ret

 function %f4(r64, r64) -> r64, r64, r64 {
-    fn0 = %f(r64) -> b1
+    fn0 = %f(r64) -> i8
    ss0 = explicit_slot 8

 block0(v0: r64, v1: r64):
@@ -65,37 +65,38 @@ block3(v7: r64, v8: r64):
 ;   sd ra,8(sp)
 ;   sd fp,0(sp)
 ;   mv fp,sp
-;   sd s9,-8(sp)
+;   sd s10,-8(sp)
 ;   add sp,-48
 ; block0:
 ;   sd a0,8(nominal_sp)
 ;   sd a1,16(nominal_sp)
-;   mv s9,a2
-;   load_sym a3,%f+0
-;   callind a3
-;   load_addr a2,nsp+0
-;   ld t1,8(nominal_sp)
-;   sd t1,0(a2)
-;   beq a0,zero,taken(label1),not_taken(label3)
+;   mv s10,a2
+;   load_sym a4,%f+0
+;   callind a4
+;   load_addr a3,nsp+0
+;   ld t2,8(nominal_sp)
+;   sd t2,0(a3)
+;   andi a4,a0,255
+;   beq a4,zero,taken(label1),not_taken(label3)
 ; block1:
 ;   j label2
 ; block2:
-;   mv a1,t1
+;   mv a1,t2
 ;   ld a0,16(nominal_sp)
 ;   j label5
 ; block3:
 ;   j label4
 ; block4:
-;   mv a0,t1
+;   mv a0,t2
 ;   ld a1,16(nominal_sp)
 ;   j label5
 ; block5:
-;   load_addr a4,nsp+0
-;   ld a4,0(a4)
-;   mv a2,s9
-;   sd a4,0(a2)
+;   load_addr a5,nsp+0
+;   ld a5,0(a5)
+;   mv a2,s10
+;   sd a5,0(a2)
 ;   add sp,+48
-;   ld s9,-8(sp)
+;   ld s10,-8(sp)
 ;   ld ra,8(sp)
 ;   ld fp,0(sp)
 ;   add sp,+16
--- a/cranelift/filetests/filetests/isa/riscv64/stack.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/stack.clif
@@ -144,10 +144,10 @@ block0(v0: i64):
 ;   add sp,+16
 ;   ret

-function %b1_spill_slot(b1) -> b1, i64 {
+function %i8_spill_slot(i8) -> i8, i64 {
    ss0 = explicit_slot 1000

-block0(v0: b1):
+block0(v0: i8):
  v1 = iconst.i64 1
  v2 = iconst.i64 2
  v3 = iconst.i64 3
--- a/cranelift/filetests/filetests/isa/s390x/condbr.clif
+++ b/cranelift/filetests/filetests/isa/s390x/condbr.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %f(i64, i64) -> b1 {
+function %f(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = icmp eq v0, v1
  return v2
--- a/cranelift/filetests/filetests/isa/s390x/condops.clif
+++ b/cranelift/filetests/filetests/isa/s390x/condops.clif
@@ -16,14 +16,14 @@ block0(v0: i8, v1: i64, v2: i64):
 ;   locgre %r2, %r3
 ;   br %r14

-function %g(b1, i8, i8) -> i8 {
-block0(v0: b1, v1: i8, v2: i8):
+function %g(i8, i8, i8) -> i8 {
+block0(v0: i8, v1: i8, v2: i8):
  v3 = select.i8 v0, v1, v2
  return v3
 }

 ; block0:
-;   llcr %r5, %r2
+;   lbr %r5, %r2
 ;   chi %r5, 0
 ;   lgr %r2, %r4
 ;   locrlh %r2, %r3
--- a/cranelift/filetests/filetests/isa/s390x/constants.clif
+++ b/cranelift/filetests/filetests/isa/s390x/constants.clif
@@ -1,19 +1,19 @@
 test compile precise-output
 target s390x

-function %f() -> b8 {
+function %f() -> i8 {
 block0:
-  v0 = bconst.b8 true
+  v0 = iconst.i8 -1
  return v0
 }

 ; block0:
-;   lhi %r2, 255
+;   lhi %r2, -1
 ;   br %r14

-function %f() -> b16 {
+function %f() -> i16 {
 block0:
-  v0 = bconst.b16 false
+  v0 = iconst.i16 0
  return v0
 }

--- a/cranelift/filetests/filetests/isa/s390x/conversions.clif
+++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif
--- a/cranelift/filetests/filetests/isa/s390x/icmp-i128.clif
+++ b/cranelift/filetests/filetests/isa/s390x/icmp-i128.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %icmp_eq_i128(i128, i128) -> b1 {
+function %icmp_eq_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 eq v0, v1
  return v2
@@ -15,7 +15,7 @@ block0(v0: i128, v1: i128):
 ;   lochie %r2, 1
 ;   br %r14

-function %icmp_ne_i128(i128, i128) -> b1 {
+function %icmp_ne_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 ne v0, v1
  return v2
@@ -29,7 +29,7 @@ block0(v0: i128, v1: i128):
 ;   lochine %r2, 1
 ;   br %r14

-function %icmp_slt_i128(i128, i128) -> b1 {
+function %icmp_slt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 slt v0, v1
  return v2
@@ -43,7 +43,7 @@ block0(v0: i128, v1: i128):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_sgt_i128(i128, i128) -> b1 {
+function %icmp_sgt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 sgt v0, v1
  return v2
@@ -57,7 +57,7 @@ block0(v0: i128, v1: i128):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_sle_i128(i128, i128) -> b1 {
+function %icmp_sle_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 sle v0, v1
  return v2
@@ -71,7 +71,7 @@ block0(v0: i128, v1: i128):
 ;   lochinl %r2, 1
 ;   br %r14

-function %icmp_sge_i128(i128, i128) -> b1 {
+function %icmp_sge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 sge v0, v1
  return v2
@@ -85,7 +85,7 @@ block0(v0: i128, v1: i128):
 ;   lochinl %r2, 1
 ;   br %r14

-function %icmp_ult_i128(i128, i128) -> b1 {
+function %icmp_ult_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 ult v0, v1
  return v2
@@ -99,7 +99,7 @@ block0(v0: i128, v1: i128):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ugt_i128(i128, i128) -> b1 {
+function %icmp_ugt_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 ugt v0, v1
  return v2
@@ -113,7 +113,7 @@ block0(v0: i128, v1: i128):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ule_i128(i128, i128) -> b1 {
+function %icmp_ule_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 ule v0, v1
  return v2
@@ -127,7 +127,7 @@ block0(v0: i128, v1: i128):
 ;   lochinl %r2, 1
 ;   br %r14

-function %icmp_uge_i128(i128, i128) -> b1 {
+function %icmp_uge_i128(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
  v2 = icmp.i128 uge v0, v1
  return v2
--- a/cranelift/filetests/filetests/isa/s390x/icmp.clif
+++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %icmp_slt_i64(i64, i64) -> b1 {
+function %icmp_slt_i64(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = icmp.i64 slt v0, v1
  return v2
@@ -13,7 +13,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_ext32(i64, i32) -> b1 {
+function %icmp_slt_i64_ext32(i64, i32) -> i8 {
 block0(v0: i64, v1: i32):
  v2 = sextend.i64 v1
  v3 = icmp.i64 slt v0, v2
@@ -26,7 +26,7 @@ block0(v0: i64, v1: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_imm16(i64) -> b1 {
+function %icmp_slt_i64_imm16(i64) -> i8 {
 block0(v0: i64):
  v1 = iconst.i64 1
  v2 = icmp.i64 slt v0, v1
@@ -39,7 +39,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_imm32(i64) -> b1 {
+function %icmp_slt_i64_imm32(i64) -> i8 {
 block0(v0: i64):
  v1 = iconst.i64 32768
  v2 = icmp.i64 slt v0, v1
@@ -52,7 +52,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_mem(i64, i64) -> b1 {
+function %icmp_slt_i64_mem(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = load.i64 v1
  v3 = icmp.i64 slt v0, v2
@@ -65,7 +65,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_sym(i64) -> b1 {
+function %icmp_slt_i64_sym(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -80,7 +80,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_mem_ext16(i64, i64) -> b1 {
+function %icmp_slt_i64_mem_ext16(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = sload16.i64 v1
  v3 = icmp.i64 slt v0, v2
@@ -93,7 +93,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_sym_ext16(i64) -> b1 {
+function %icmp_slt_i64_sym_ext16(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -108,7 +108,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_mem_ext32(i64, i64) -> b1 {
+function %icmp_slt_i64_mem_ext32(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = sload32.i64 v1
  v3 = icmp.i64 slt v0, v2
@@ -121,7 +121,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i64_sym_ext32(i64) -> b1 {
+function %icmp_slt_i64_sym_ext32(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -136,7 +136,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32(i32, i32) -> b1 {
+function %icmp_slt_i32(i32, i32) -> i8 {
 block0(v0: i32, v1: i32):
  v2 = icmp.i32 slt v0, v1
  return v2
@@ -148,7 +148,7 @@ block0(v0: i32, v1: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_imm16(i32) -> b1 {
+function %icmp_slt_i32_imm16(i32) -> i8 {
 block0(v0: i32):
  v1 = iconst.i32 1
  v2 = icmp.i32 slt v0, v1
@@ -161,7 +161,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_imm(i32) -> b1 {
+function %icmp_slt_i32_imm(i32) -> i8 {
 block0(v0: i32):
  v1 = iconst.i32 32768
  v2 = icmp.i32 slt v0, v1
@@ -174,7 +174,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_mem(i32, i64) -> b1 {
+function %icmp_slt_i32_mem(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = load.i32 v1
  v3 = icmp.i32 slt v0, v2
@@ -187,7 +187,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_memoff(i32, i64) -> b1 {
+function %icmp_slt_i32_memoff(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = load.i32 v1+4096
  v3 = icmp.i32 slt v0, v2
@@ -200,7 +200,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_sym(i32) -> b1 {
+function %icmp_slt_i32_sym(i32) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i32):
  v1 = symbol_value.i64 gv0
@@ -215,7 +215,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_mem_ext16(i32, i64) -> b1 {
+function %icmp_slt_i32_mem_ext16(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = sload16.i32 v1
  v3 = icmp.i32 slt v0, v2
@@ -228,7 +228,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_memoff_ext16(i32, i64) -> b1 {
+function %icmp_slt_i32_memoff_ext16(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = sload16.i32 v1+4096
  v3 = icmp.i32 slt v0, v2
@@ -241,7 +241,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i32_sym_ext16(i32) -> b1 {
+function %icmp_slt_i32_sym_ext16(i32) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i32):
  v1 = symbol_value.i64 gv0
@@ -256,7 +256,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i16(i16, i16) -> b1 {
+function %icmp_slt_i16(i16, i16) -> i8 {
 block0(v0: i16, v1: i16):
  v2 = icmp.i16 slt v0, v1
  return v2
@@ -270,7 +270,7 @@ block0(v0: i16, v1: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i16_imm(i16) -> b1 {
+function %icmp_slt_i16_imm(i16) -> i8 {
 block0(v0: i16):
  v1 = iconst.i16 1
  v2 = icmp.i16 slt v0, v1
@@ -284,7 +284,7 @@ block0(v0: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i16_mem(i16, i64) -> b1 {
+function %icmp_slt_i16_mem(i16, i64) -> i8 {
 block0(v0: i16, v1: i64):
  v2 = load.i16 v1
  v3 = icmp.i16 slt v0, v2
@@ -298,7 +298,7 @@ block0(v0: i16, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i16_sym(i16) -> b1 {
+function %icmp_slt_i16_sym(i16) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i16):
  v1 = symbol_value.i64 gv0
@@ -314,7 +314,7 @@ block0(v0: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i8(i8, i8) -> b1 {
+function %icmp_slt_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
  v2 = icmp.i8 slt v0, v1
  return v2
@@ -328,7 +328,7 @@ block0(v0: i8, v1: i8):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i8_imm(i8) -> b1 {
+function %icmp_slt_i8_imm(i8) -> i8 {
 block0(v0: i8):
  v1 = iconst.i8 1
  v2 = icmp.i8 slt v0, v1
@@ -342,7 +342,7 @@ block0(v0: i8):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_slt_i8_mem(i8, i64) -> b1 {
+function %icmp_slt_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
  v2 = load.i8 v1
  v3 = icmp.i8 slt v0, v2
@@ -357,7 +357,7 @@ block0(v0: i8, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64(i64, i64) -> b1 {
+function %icmp_ult_i64(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = icmp.i64 ult v0, v1
  return v2
@@ -369,7 +369,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_ext32(i64, i32) -> b1 {
+function %icmp_ult_i64_ext32(i64, i32) -> i8 {
 block0(v0: i64, v1: i32):
  v2 = uextend.i64 v1
  v3 = icmp.i64 ult v0, v2
@@ -382,7 +382,7 @@ block0(v0: i64, v1: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_imm(i64) -> b1 {
+function %icmp_ult_i64_imm(i64) -> i8 {
 block0(v0: i64):
  v1 = iconst.i64 1
  v2 = icmp.i64 ult v0, v1
@@ -395,7 +395,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_mem(i64, i64) -> b1 {
+function %icmp_ult_i64_mem(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = load.i64 v1
  v3 = icmp.i64 ult v0, v2
@@ -408,7 +408,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_sym(i64) -> b1 {
+function %icmp_ult_i64_sym(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -423,7 +423,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_mem_ext32(i64, i64) -> b1 {
+function %icmp_ult_i64_mem_ext32(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = uload32.i64 v1
  v3 = icmp.i64 ult v0, v2
@@ -436,7 +436,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_sym_ext32(i64) -> b1 {
+function %icmp_ult_i64_sym_ext32(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -451,7 +451,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_mem_ext16(i64, i64) -> b1 {
+function %icmp_ult_i64_mem_ext16(i64, i64) -> i8 {
 block0(v0: i64, v1: i64):
  v2 = uload16.i64 v1
  v3 = icmp.i64 ult v0, v2
@@ -465,7 +465,7 @@ block0(v0: i64, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i64_sym_ext16(i64) -> b1 {
+function %icmp_ult_i64_sym_ext16(i64) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i64):
  v1 = symbol_value.i64 gv0
@@ -480,7 +480,7 @@ block0(v0: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32(i32, i32) -> b1 {
+function %icmp_ult_i32(i32, i32) -> i8 {
 block0(v0: i32, v1: i32):
  v2 = icmp.i32 ult v0, v1
  return v2
@@ -492,7 +492,7 @@ block0(v0: i32, v1: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_imm(i32) -> b1 {
+function %icmp_ult_i32_imm(i32) -> i8 {
 block0(v0: i32):
  v1 = iconst.i32 1
  v2 = icmp.i32 ult v0, v1
@@ -505,7 +505,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_mem(i32, i64) -> b1 {
+function %icmp_ult_i32_mem(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = load.i32 v1
  v3 = icmp.i32 ult v0, v2
@@ -518,7 +518,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_memoff(i32, i64) -> b1 {
+function %icmp_ult_i32_memoff(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = load.i32 v1+4096
  v3 = icmp.i32 ult v0, v2
@@ -531,7 +531,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_sym(i32) -> b1 {
+function %icmp_ult_i32_sym(i32) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i32):
  v1 = symbol_value.i64 gv0
@@ -546,7 +546,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_mem_ext16(i32, i64) -> b1 {
+function %icmp_ult_i32_mem_ext16(i32, i64) -> i8 {
 block0(v0: i32, v1: i64):
  v2 = uload16.i32 v1
  v3 = icmp.i32 ult v0, v2
@@ -560,7 +560,7 @@ block0(v0: i32, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i32_sym_ext16(i32) -> b1 {
+function %icmp_ult_i32_sym_ext16(i32) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i32):
  v1 = symbol_value.i64 gv0
@@ -575,7 +575,7 @@ block0(v0: i32):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i16(i16, i16) -> b1 {
+function %icmp_ult_i16(i16, i16) -> i8 {
 block0(v0: i16, v1: i16):
  v2 = icmp.i16 ult v0, v1
  return v2
@@ -589,7 +589,7 @@ block0(v0: i16, v1: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i16_imm(i16) -> b1 {
+function %icmp_ult_i16_imm(i16) -> i8 {
 block0(v0: i16):
  v1 = iconst.i16 1
  v2 = icmp.i16 ult v0, v1
@@ -603,7 +603,7 @@ block0(v0: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i16_mem(i16, i64) -> b1 {
+function %icmp_ult_i16_mem(i16, i64) -> i8 {
 block0(v0: i16, v1: i64):
  v2 = load.i16 v1
  v3 = icmp.i16 ult v0, v2
@@ -618,7 +618,7 @@ block0(v0: i16, v1: i64):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i16_mem(i16) -> b1 {
+function %icmp_ult_i16_mem(i16) -> i8 {
  gv0 = symbol colocated %sym
 block0(v0: i16):
  v1 = symbol_value.i64 gv0
@@ -634,7 +634,7 @@ block0(v0: i16):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i8(i8, i8) -> b1 {
+function %icmp_ult_i8(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
  v2 = icmp.i8 ult v0, v1
  return v2
@@ -648,7 +648,7 @@ block0(v0: i8, v1: i8):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i8_imm(i8) -> b1 {
+function %icmp_ult_i8_imm(i8) -> i8 {
 block0(v0: i8):
  v1 = iconst.i8 1
  v2 = icmp.i8 ult v0, v1
@@ -662,7 +662,7 @@ block0(v0: i8):
 ;   lochil %r2, 1
 ;   br %r14

-function %icmp_ult_i8_mem(i8, i64) -> b1 {
+function %icmp_ult_i8_mem(i8, i64) -> i8 {
 block0(v0: i8, v1: i64):
  v2 = load.i8 v1
  v3 = icmp.i8 ult v0, v2
--- a/cranelift/filetests/filetests/isa/s390x/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/s390x/reftypes.clif
@@ -10,7 +10,7 @@ block0(v0: r64, v1: r64):
 ;   lgr %r2, %r3
 ;   br %r14

-function %f1(r64) -> b1 {
+function %f1(r64) -> i8 {
 block0(v0: r64):
  v1 = is_null v0
  return v1
@@ -22,7 +22,7 @@ block0(v0: r64):
 ;   lochie %r2, 1
 ;   br %r14

-function %f2(r64) -> b1 {
+function %f2(r64) -> i8 {
 block0(v0: r64):
  v1 = is_invalid v0
  return v1
@@ -45,7 +45,7 @@ block0:
 ;   br %r14

 function %f4(r64, r64) -> r64, r64, r64 {
-    fn0 = %f(r64) -> b1
+    fn0 = %f(r64) -> i8
    ss0 = explicit_slot 8

 block0(v0: r64, v1: r64):
@@ -76,7 +76,7 @@ block3(v7: r64, v8: r64):
 ;   la %r5, 160(%r15)
 ;   lg %r3, 168(%r15)
 ;   stg %r3, 0(%r5)
-;   llcr %r2, %r2
+;   lbr %r2, %r2
 ;   chi %r2, 0
 ;   jgnlh label1 ; jg label3
 ; block1:
--- a/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-bitwise.clif
@@ -322,8 +322,8 @@ block0(v0: i8x16, v1: i8x16, v2: i8x16):
 ;   vsel %v24, %v25, %v26, %v24
 ;   br %r14

-function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 {
-block0(v0: b64x2, v1: i64x2, v2: i64x2):
+function %vselect_i64x2(i64x2, i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2, v2: i64x2):
  v3 = vselect.i64x2 v0, v1, v2
  return v3
 }
@@ -332,8 +332,8 @@ block0(v0: b64x2, v1: i64x2, v2: i64x2):
 ;   vsel %v24, %v25, %v26, %v24
 ;   br %r14

-function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 {
-block0(v0: b32x4, v1: i32x4, v2: i32x4):
+function %vselect_i32x4(i32x4, i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4, v2: i32x4):
  v3 = vselect.i32x4 v0, v1, v2
  return v3
 }
@@ -342,8 +342,8 @@ block0(v0: b32x4, v1: i32x4, v2: i32x4):
 ;   vsel %v24, %v25, %v26, %v24
 ;   br %r14

-function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: b16x8, v1: i16x8, v2: i16x8):
+function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8, v2: i16x8):
  v3 = vselect.i16x8 v0, v1, v2
  return v3
 }
@@ -352,8 +352,8 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
 ;   vsel %v24, %v25, %v26, %v24
 ;   br %r14

-function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 {
-block0(v0: b8x16, v1: i8x16, v2: i8x16):
+function %vselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16, v2: i8x16):
  v3 = vselect.i8x16 v0, v1, v2
  return v3
 }
--- a/cranelift/filetests/filetests/isa/s390x/vec-fcmp.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-fcmp.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %fcmp_eq_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_eq_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 eq v0, v1
  return v2
@@ -11,7 +11,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vfcedb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_ne_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ne_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ne v0, v1
  return v2
@@ -22,7 +22,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_gt_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_gt_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 gt v0, v1
  return v2
@@ -32,7 +32,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vfchdb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_lt_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_lt_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 lt v0, v1
  return v2
@@ -42,7 +42,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vfchdb %v24, %v25, %v24
 ;   br %r14

-function %fcmp_ge_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ge_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ge v0, v1
  return v2
@@ -52,7 +52,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vfchedb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_le_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_le_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 le v0, v1
  return v2
@@ -62,7 +62,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vfchedb %v24, %v25, %v24
 ;   br %r14

-function %fcmp_ueq_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ueq_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ueq v0, v1
  return v2
@@ -74,7 +74,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v6
 ;   br %r14

-function %fcmp_one_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_one_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 one v0, v1
  return v2
@@ -86,7 +86,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vo %v24, %v4, %v6
 ;   br %r14

-function %fcmp_ugt_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ugt_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ugt v0, v1
  return v2
@@ -97,7 +97,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ult_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ult_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ult v0, v1
  return v2
@@ -108,7 +108,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_uge_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_uge_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 uge v0, v1
  return v2
@@ -119,7 +119,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ule_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ule_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ule v0, v1
  return v2
@@ -130,7 +130,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ord_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_ord_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 ord v0, v1
  return v2
@@ -142,7 +142,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vo %v24, %v4, %v6
 ;   br %r14

-function %fcmp_uno_f64x2(f64x2, f64x2) -> b64x2 {
+function %fcmp_uno_f64x2(f64x2, f64x2) -> i64x2 {
 block0(v0: f64x2, v1: f64x2):
  v2 = fcmp.f64x2 uno v0, v1
  return v2
@@ -154,7 +154,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   vno %v24, %v4, %v6
 ;   br %r14

-function %fcmp_eq_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_eq_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 eq v0, v1
  return v2
@@ -164,7 +164,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vfcesb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_ne_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ne_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ne v0, v1
  return v2
@@ -175,7 +175,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_gt_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_gt_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 gt v0, v1
  return v2
@@ -185,7 +185,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vfchsb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_lt_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_lt_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 lt v0, v1
  return v2
@@ -195,7 +195,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vfchsb %v24, %v25, %v24
 ;   br %r14

-function %fcmp_ge_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ge_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ge v0, v1
  return v2
@@ -205,7 +205,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vfchesb %v24, %v24, %v25
 ;   br %r14

-function %fcmp_le_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_le_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 le v0, v1
  return v2
@@ -215,7 +215,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vfchesb %v24, %v25, %v24
 ;   br %r14

-function %fcmp_ueq_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ueq_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ueq v0, v1
  return v2
@@ -227,7 +227,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v6
 ;   br %r14

-function %fcmp_one_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_one_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 one v0, v1
  return v2
@@ -239,7 +239,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vo %v24, %v4, %v6
 ;   br %r14

-function %fcmp_ugt_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ugt_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ugt v0, v1
  return v2
@@ -250,7 +250,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ult_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ult_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ult v0, v1
  return v2
@@ -261,7 +261,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_uge_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_uge_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 uge v0, v1
  return v2
@@ -272,7 +272,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ule_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ule_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ule v0, v1
  return v2
@@ -283,7 +283,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %fcmp_ord_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_ord_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 ord v0, v1
  return v2
@@ -295,7 +295,7 @@ block0(v0: f32x4, v1: f32x4):
 ;   vo %v24, %v4, %v6
 ;   br %r14

-function %fcmp_uno_f32x4(f32x4, f32x4) -> b32x4 {
+function %fcmp_uno_f32x4(f32x4, f32x4) -> i32x4 {
 block0(v0: f32x4, v1: f32x4):
  v2 = fcmp.f32x4 uno v0, v1
  return v2
--- a/cranelift/filetests/filetests/isa/s390x/vec-icmp.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-icmp.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %icmp_eq_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_eq_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 eq v0, v1
  return v2
@@ -11,7 +11,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vceqg %v24, %v24, %v25
 ;   br %r14

-function %icmp_ne_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_ne_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 ne v0, v1
  return v2
@@ -22,7 +22,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_sgt_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 sgt v0, v1
  return v2
@@ -32,7 +32,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vchg %v24, %v24, %v25
 ;   br %r14

-function %icmp_slt_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_slt_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 slt v0, v1
  return v2
@@ -42,7 +42,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vchg %v24, %v25, %v24
 ;   br %r14

-function %icmp_sge_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_sge_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 sge v0, v1
  return v2
@@ -53,7 +53,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sle_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_sle_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 sle v0, v1
  return v2
@@ -64,7 +64,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ugt_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_ugt_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 ugt v0, v1
  return v2
@@ -74,7 +74,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vchlg %v24, %v24, %v25
 ;   br %r14

-function %icmp_ult_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_ult_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 ult v0, v1
  return v2
@@ -84,7 +84,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vchlg %v24, %v25, %v24
 ;   br %r14

-function %icmp_uge_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_uge_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 uge v0, v1
  return v2
@@ -95,7 +95,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ule_i64x2(i64x2, i64x2) -> b64x2 {
+function %icmp_ule_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = icmp.i64x2 ule v0, v1
  return v2
@@ -106,7 +106,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_eq_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_eq_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 eq v0, v1
  return v2
@@ -116,7 +116,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vceqf %v24, %v24, %v25
 ;   br %r14

-function %icmp_ne_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ne_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 ne v0, v1
  return v2
@@ -127,7 +127,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_sgt_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 sgt v0, v1
  return v2
@@ -137,7 +137,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vchf %v24, %v24, %v25
 ;   br %r14

-function %icmp_slt_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_slt_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 slt v0, v1
  return v2
@@ -147,7 +147,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vchf %v24, %v25, %v24
 ;   br %r14

-function %icmp_sge_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_sge_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 sge v0, v1
  return v2
@@ -158,7 +158,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sle_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_sle_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 sle v0, v1
  return v2
@@ -169,7 +169,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ugt_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 ugt v0, v1
  return v2
@@ -179,7 +179,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vchlf %v24, %v24, %v25
 ;   br %r14

-function %icmp_ult_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ult_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 ult v0, v1
  return v2
@@ -189,7 +189,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vchlf %v24, %v25, %v24
 ;   br %r14

-function %icmp_uge_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_uge_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 uge v0, v1
  return v2
@@ -200,7 +200,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ule_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ule_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
  v2 = icmp.i32x4 ule v0, v1
  return v2
@@ -211,7 +211,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_eq_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_eq_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 eq v0, v1
  return v2
@@ -221,7 +221,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vceqh %v24, %v24, %v25
 ;   br %r14

-function %icmp_ne_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_ne_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 ne v0, v1
  return v2
@@ -232,7 +232,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_sgt_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 sgt v0, v1
  return v2
@@ -242,7 +242,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vchh %v24, %v24, %v25
 ;   br %r14

-function %icmp_slt_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_slt_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 slt v0, v1
  return v2
@@ -252,7 +252,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vchh %v24, %v25, %v24
 ;   br %r14

-function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_sge_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 sge v0, v1
  return v2
@@ -263,7 +263,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sle_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_sle_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 sle v0, v1
  return v2
@@ -274,7 +274,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ugt_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_ugt_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 ugt v0, v1
  return v2
@@ -284,7 +284,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vchlh %v24, %v24, %v25
 ;   br %r14

-function %icmp_ult_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_ult_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 ult v0, v1
  return v2
@@ -294,7 +294,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vchlh %v24, %v25, %v24
 ;   br %r14

-function %icmp_uge_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_uge_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 uge v0, v1
  return v2
@@ -305,7 +305,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ule_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_ule_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
  v2 = icmp.i16x8 ule v0, v1
  return v2
@@ -316,7 +316,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_eq_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_eq_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 eq v0, v1
  return v2
@@ -326,7 +326,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vceqb %v24, %v24, %v25
 ;   br %r14

-function %icmp_ne_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_ne_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 ne v0, v1
  return v2
@@ -337,7 +337,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_sgt_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 sgt v0, v1
  return v2
@@ -347,7 +347,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vchb %v24, %v24, %v25
 ;   br %r14

-function %icmp_slt_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_slt_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 slt v0, v1
  return v2
@@ -357,7 +357,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vchb %v24, %v25, %v24
 ;   br %r14

-function %icmp_sge_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_sge_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 sge v0, v1
  return v2
@@ -368,7 +368,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_sle_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_sle_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 sle v0, v1
  return v2
@@ -379,7 +379,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ugt_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_ugt_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 ugt v0, v1
  return v2
@@ -389,7 +389,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vchlb %v24, %v24, %v25
 ;   br %r14

-function %icmp_ult_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_ult_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 ult v0, v1
  return v2
@@ -399,7 +399,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vchlb %v24, %v25, %v24
 ;   br %r14

-function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_uge_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 uge v0, v1
  return v2
@@ -410,7 +410,7 @@ block0(v0: i8x16, v1: i8x16):
 ;   vno %v24, %v4, %v4
 ;   br %r14

-function %icmp_ule_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_ule_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
  v2 = icmp.i8x16 ule v0, v1
  return v2
--- a/cranelift/filetests/filetests/isa/s390x/vec-logical.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-logical.clif
@@ -1,7 +1,7 @@
 test compile precise-output
 target s390x

-function %vany_true_i64x2(i64x2) -> b1 {
+function %vany_true_i64x2(i64x2) -> i8 {
 block0(v0: i64x2):
    v1 = vany_true v0
    return v1
@@ -14,7 +14,7 @@ block0(v0: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_i32x4(i32x4) -> b1 {
+function %vany_true_i32x4(i32x4) -> i8 {
 block0(v0: i32x4):
    v1 = vany_true v0
    return v1
@@ -27,7 +27,7 @@ block0(v0: i32x4):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_i16x8(i16x8) -> b1 {
+function %vany_true_i16x8(i16x8) -> i8 {
 block0(v0: i16x8):
    v1 = vany_true v0
    return v1
@@ -40,7 +40,7 @@ block0(v0: i16x8):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_i8x16(i8x16) -> b1 {
+function %vany_true_i8x16(i8x16) -> i8 {
 block0(v0: i8x16):
    v1 = vany_true v0
    return v1
@@ -53,7 +53,7 @@ block0(v0: i8x16):
 ;   lochine %r2, 1
 ;   br %r14

-function %vall_true_i64x2(i64x2) -> b1 {
+function %vall_true_i64x2(i64x2) -> i8 {
 block0(v0: i64x2):
    v1 = vall_true v0
    return v1
@@ -66,7 +66,7 @@ block0(v0: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_i32x4(i32x4) -> b1 {
+function %vall_true_i32x4(i32x4) -> i8 {
 block0(v0: i32x4):
    v1 = vall_true v0
    return v1
@@ -79,7 +79,7 @@ block0(v0: i32x4):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_i16x8(i16x8) -> b1 {
+function %vall_true_i16x8(i16x8) -> i8 {
 block0(v0: i16x8):
    v1 = vall_true v0
    return v1
@@ -92,7 +92,7 @@ block0(v0: i16x8):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_i8x16(i8x16) -> b1 {
+function %vall_true_i8x16(i8x16) -> i8 {
 block0(v0: i8x16):
    v1 = vall_true v0
    return v1
@@ -105,7 +105,7 @@ block0(v0: i8x16):
 ;   lochio %r2, 1
 ;   br %r14

-function %vany_true_icmp_eq_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_eq_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp eq v0, v1
    v3 = vany_true v2
@@ -118,7 +118,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_icmp_ne_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_ne_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ne v0, v1
    v3 = vany_true v2
@@ -131,7 +131,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_icmp_sgt_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_sgt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sgt v0, v1
    v3 = vany_true v2
@@ -144,7 +144,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_icmp_sle_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_sle_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sle v0, v1
    v3 = vany_true v2
@@ -157,7 +157,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_icmp_slt_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_slt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp slt v0, v1
    v3 = vany_true v2
@@ -170,7 +170,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_icmp_sge_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_sge_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sge v0, v1
    v3 = vany_true v2
@@ -183,7 +183,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_icmp_ugt_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_ugt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ugt v0, v1
    v3 = vany_true v2
@@ -196,7 +196,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_icmp_ule_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_ule_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ule v0, v1
    v3 = vany_true v2
@@ -209,7 +209,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_icmp_ult_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_ult_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ult v0, v1
    v3 = vany_true v2
@@ -222,7 +222,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_icmp_uge_i64x2(i64x2, i64x2) -> b1 {
+function %vany_true_icmp_uge_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp uge v0, v1
    v3 = vany_true v2
@@ -235,7 +235,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_fcmp_eq_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_eq_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp eq v0, v1
    v3 = vany_true v2
@@ -248,7 +248,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_fcmp_ne_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_ne_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ne v0, v1
    v3 = vany_true v2
@@ -261,7 +261,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_fcmp_gt_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_gt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp gt v0, v1
    v3 = vany_true v2
@@ -274,7 +274,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_fcmp_ule_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_ule_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ule v0, v1
    v3 = vany_true v2
@@ -287,7 +287,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_fcmp_ge_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_ge_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ge v0, v1
    v3 = vany_true v2
@@ -300,7 +300,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_fcmp_ult_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_ult_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ult v0, v1
    v3 = vany_true v2
@@ -313,7 +313,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_fcmp_lt_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_lt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp lt v0, v1
    v3 = vany_true v2
@@ -326,7 +326,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_fcmp_uge_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_uge_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp uge v0, v1
    v3 = vany_true v2
@@ -339,7 +339,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vany_true_fcmp_le_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_le_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp le v0, v1
    v3 = vany_true v2
@@ -352,7 +352,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochino %r2, 1
 ;   br %r14

-function %vany_true_fcmp_ugt_f64x2(f64x2, f64x2) -> b1 {
+function %vany_true_fcmp_ugt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ugt v0, v1
    v3 = vany_true v2
@@ -365,7 +365,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochine %r2, 1
 ;   br %r14

-function %vall_true_icmp_eq_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_eq_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp eq v0, v1
    v3 = vall_true v2
@@ -378,7 +378,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_icmp_ne_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_ne_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ne v0, v1
    v3 = vall_true v2
@@ -391,7 +391,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_icmp_sgt_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_sgt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sgt v0, v1
    v3 = vall_true v2
@@ -404,7 +404,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_icmp_sle_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_sle_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sle v0, v1
    v3 = vall_true v2
@@ -417,7 +417,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_icmp_slt_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_slt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp slt v0, v1
    v3 = vall_true v2
@@ -430,7 +430,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_icmp_sge_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_sge_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp sge v0, v1
    v3 = vall_true v2
@@ -443,7 +443,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_icmp_ugt_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_ugt_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ugt v0, v1
    v3 = vall_true v2
@@ -456,7 +456,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_icmp_ule_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_ule_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ule v0, v1
    v3 = vall_true v2
@@ -469,7 +469,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_icmp_ult_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_ult_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp ult v0, v1
    v3 = vall_true v2
@@ -482,7 +482,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_icmp_uge_i64x2(i64x2, i64x2) -> b1 {
+function %vall_true_icmp_uge_i64x2(i64x2, i64x2) -> i8 {
 block0(v0: i64x2, v1: i64x2):
    v2 = icmp uge v0, v1
    v3 = vall_true v2
@@ -495,7 +495,7 @@ block0(v0: i64x2, v1: i64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_fcmp_eq_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_eq_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp eq v0, v1
    v3 = vall_true v2
@@ -508,7 +508,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_fcmp_ne_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_ne_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ne v0, v1
    v3 = vall_true v2
@@ -521,7 +521,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_fcmp_gt_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_gt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp gt v0, v1
    v3 = vall_true v2
@@ -534,7 +534,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_fcmp_ule_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_ule_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ule v0, v1
    v3 = vall_true v2
@@ -547,7 +547,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_fcmp_ge_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_ge_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ge v0, v1
    v3 = vall_true v2
@@ -560,7 +560,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_fcmp_ult_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_ult_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ult v0, v1
    v3 = vall_true v2
@@ -573,7 +573,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_fcmp_lt_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_lt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp lt v0, v1
    v3 = vall_true v2
@@ -586,7 +586,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_fcmp_uge_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_uge_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp uge v0, v1
    v3 = vall_true v2
@@ -599,7 +599,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochio %r2, 1
 ;   br %r14

-function %vall_true_fcmp_le_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_le_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp le v0, v1
    v3 = vall_true v2
@@ -612,7 +612,7 @@ block0(v0: f64x2, v1: f64x2):
 ;   lochie %r2, 1
 ;   br %r14

-function %vall_true_fcmp_ugt_f64x2(f64x2, f64x2) -> b1 {
+function %vall_true_fcmp_ugt_f64x2(f64x2, f64x2) -> i8 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fcmp ugt v0, v1
    v3 = vall_true v2
--- a/cranelift/filetests/filetests/isa/s390x/vec-permute-le-lane.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-permute-le-lane.clif
@@ -17,7 +17,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_0(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    return v2
 }

@@ -28,7 +28,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_1(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
+    v2 = shuffle v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
    return v2
 }

@@ -39,7 +39,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_2(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
+    v2 = shuffle v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
    return v2
 }

@@ -52,7 +52,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
+    v2 = shuffle v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
    return v2
 }

@@ -62,7 +62,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
+    v2 = shuffle v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
    return v2
 }

@@ -72,7 +72,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
+    v2 = shuffle v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
    return v2
 }

@@ -82,7 +82,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
+    v2 = shuffle v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
    return v2
 }

@@ -92,7 +92,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
+    v2 = shuffle v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
    return v2
 }

@@ -102,7 +102,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
+    v2 = shuffle v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
    return v2
 }

@@ -112,7 +112,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
+    v2 = shuffle v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
    return v2
 }

@@ -122,7 +122,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
+    v2 = shuffle v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
    return v2
 }

@@ -132,7 +132,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
+    v2 = shuffle v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
    return v2
 }

@@ -142,7 +142,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
+    v2 = shuffle v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
    return v2
 }

@@ -152,7 +152,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
+    v2 = shuffle v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
    return v2
 }

@@ -162,7 +162,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
+    v2 = shuffle v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
    return v2
 }

@@ -172,7 +172,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
+    v2 = shuffle v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
    return v2
 }

@@ -182,7 +182,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
+    v2 = shuffle v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
    return v2
 }

@@ -192,7 +192,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
+    v2 = shuffle v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
    return v2
 }

@@ -202,7 +202,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
+    v2 = shuffle v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
    return v2
 }

@@ -212,7 +212,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
+    v2 = shuffle v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
    return v2
 }

@@ -222,7 +222,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
+    v2 = shuffle v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
    return v2
 }

@@ -232,7 +232,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
+    v2 = shuffle v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
    return v2
 }

@@ -242,7 +242,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
+    v2 = shuffle v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
    return v2
 }

@@ -252,7 +252,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
+    v2 = shuffle v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
    return v2
 }

@@ -262,7 +262,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
+    v2 = shuffle v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
    return v2
 }

@@ -272,7 +272,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
+    v2 = shuffle v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
    return v2
 }

@@ -282,7 +282,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
+    v2 = shuffle v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
    return v2
 }

@@ -292,7 +292,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
+    v2 = shuffle v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
    return v2
 }

@@ -302,7 +302,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
+    v2 = shuffle v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
    return v2
 }

@@ -312,7 +312,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
+    v2 = shuffle v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
    return v2
 }

@@ -322,7 +322,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
+    v2 = shuffle v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
    return v2
 }

@@ -332,7 +332,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
+    v2 = shuffle v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
    return v2
 }

@@ -342,7 +342,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
+    v2 = shuffle v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
    return v2
 }

@@ -352,7 +352,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
+    v2 = shuffle v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
    return v2
 }

@@ -362,7 +362,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
+    v2 = shuffle v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
    return v2
 }

@@ -373,7 +373,7 @@ block0(v0: i8x16, v1: i8x16):
 ;; Special patterns that can be implemented via PACK.
 function %shuffle_vpkg_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 0 1 2 3 8 9 10 11]
+    v2 = shuffle v0, v1, [16 17 18 19 24 25 26 27 0 1 2 3 8 9 10 11]
    return v2
 }

@@ -383,7 +383,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 0 1 4 5 8 9 12 13]
+    v2 = shuffle v0, v1, [16 17 20 21 24 25 28 29 0 1 4 5 8 9 12 13]
    return v2
 }

@@ -393,7 +393,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_xy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 0 2 4 6 8 10 12 14]
+    v2 = shuffle v0, v1, [16 18 20 22 24 26 28 30 0 2 4 6 8 10 12 14]
    return v2
 }

@@ -403,7 +403,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 16 17 18 19 24 25 26 27]
+    v2 = shuffle v0, v1, [0 1 2 3 8 9 10 11 16 17 18 19 24 25 26 27]
    return v2
 }

@@ -413,7 +413,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29]
+    v2 = shuffle v0, v1, [0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29]
    return v2
 }

@@ -423,7 +423,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_yx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
+    v2 = shuffle v0, v1, [0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30]
    return v2
 }

@@ -433,7 +433,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11]
+    v2 = shuffle v0, v1, [0 1 2 3 8 9 10 11 0 1 2 3 8 9 10 11]
    return v2
 }

@@ -443,7 +443,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 4 5 8 9 12 13 0 1 4 5 8 9 12 13]
+    v2 = shuffle v0, v1, [0 1 4 5 8 9 12 13 0 1 4 5 8 9 12 13]
    return v2
 }

@@ -453,7 +453,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_xx(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 2 4 6 8 10 12 14 0 2 4 6 8 10 12 14]
+    v2 = shuffle v0, v1, [0 2 4 6 8 10 12 14 0 2 4 6 8 10 12 14]
    return v2
 }

@@ -463,7 +463,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 24 25 26 27 16 17 18 19 24 25 26 27]
+    v2 = shuffle v0, v1, [16 17 18 19 24 25 26 27 16 17 18 19 24 25 26 27]
    return v2
 }

@@ -473,7 +473,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 20 21 24 25 28 29 16 17 20 21 24 25 28 29]
+    v2 = shuffle v0, v1, [16 17 20 21 24 25 28 29 16 17 20 21 24 25 28 29]
    return v2
 }

@@ -483,7 +483,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_yy(i8x16, i8x16) -> i8x16 wasmtime_system_v {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 18 20 22 24 26 28 30 16 18 20 22 24 26 28 30]
+    v2 = shuffle v0, v1, [16 18 20 22 24 26 28 30 16 18 20 22 24 26 28 30]
    return v2
 }

--- a/cranelift/filetests/filetests/isa/s390x/vec-permute.clif
+++ b/cranelift/filetests/filetests/isa/s390x/vec-permute.clif
@@ -16,7 +16,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_0(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
    return v2
 }

@@ -27,7 +27,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_1(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
+    v2 = shuffle v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
    return v2
 }

@@ -38,7 +38,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_2(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
+    v2 = shuffle v0, v1, [0 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
    return v2
 }

@@ -51,7 +51,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
+    v2 = shuffle v0, v1, [0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23]
    return v2
 }

@@ -61,7 +61,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
+    v2 = shuffle v0, v1, [0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23]
    return v2
 }

@@ -71,7 +71,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
+    v2 = shuffle v0, v1, [0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23]
    return v2
 }

@@ -81,7 +81,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
+    v2 = shuffle v0, v1, [0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23]
    return v2
 }

@@ -91,7 +91,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
+    v2 = shuffle v0, v1, [16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7]
    return v2
 }

@@ -101,7 +101,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
+    v2 = shuffle v0, v1, [16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7]
    return v2
 }

@@ -111,7 +111,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
+    v2 = shuffle v0, v1, [16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7]
    return v2
 }

@@ -121,7 +121,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
+    v2 = shuffle v0, v1, [16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7]
    return v2
 }

@@ -131,7 +131,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
+    v2 = shuffle v0, v1, [0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]
    return v2
 }

@@ -141,7 +141,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
+    v2 = shuffle v0, v1, [0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7]
    return v2
 }

@@ -151,7 +151,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
+    v2 = shuffle v0, v1, [0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7]
    return v2
 }

@@ -161,7 +161,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
+    v2 = shuffle v0, v1, [0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7]
    return v2
 }

@@ -171,7 +171,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhg_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
+    v2 = shuffle v0, v1, [16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23]
    return v2
 }

@@ -181,7 +181,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhf_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
+    v2 = shuffle v0, v1, [16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23]
    return v2
 }

@@ -191,7 +191,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhh_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
+    v2 = shuffle v0, v1, [16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23]
    return v2
 }

@@ -201,7 +201,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrhb_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
+    v2 = shuffle v0, v1, [16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23]
    return v2
 }

@@ -211,7 +211,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
+    v2 = shuffle v0, v1, [8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31]
    return v2
 }

@@ -221,7 +221,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
+    v2 = shuffle v0, v1, [8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31]
    return v2
 }

@@ -231,7 +231,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
+    v2 = shuffle v0, v1, [8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31]
    return v2
 }

@@ -241,7 +241,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
+    v2 = shuffle v0, v1, [8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31]
    return v2
 }

@@ -251,7 +251,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
+    v2 = shuffle v0, v1, [24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15]
    return v2
 }

@@ -261,7 +261,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
+    v2 = shuffle v0, v1, [24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15]
    return v2
 }

@@ -271,7 +271,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
+    v2 = shuffle v0, v1, [24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15]
    return v2
 }

@@ -281,7 +281,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
+    v2 = shuffle v0, v1, [24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15]
    return v2
 }

@@ -291,7 +291,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
+    v2 = shuffle v0, v1, [8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15]
    return v2
 }

@@ -301,7 +301,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
+    v2 = shuffle v0, v1, [8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15]
    return v2
 }

@@ -311,7 +311,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
+    v2 = shuffle v0, v1, [8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15]
    return v2
 }

@@ -321,7 +321,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
+    v2 = shuffle v0, v1, [8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15]
    return v2
 }

@@ -331,7 +331,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlg_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
+    v2 = shuffle v0, v1, [24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31]
    return v2
 }

@@ -341,7 +341,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlf_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
+    v2 = shuffle v0, v1, [24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31]
    return v2
 }

@@ -351,7 +351,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlh_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
+    v2 = shuffle v0, v1, [24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31]
    return v2
 }

@@ -361,7 +361,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vmrlb_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
+    v2 = shuffle v0, v1, [24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31]
    return v2
 }

@@ -372,7 +372,7 @@ block0(v0: i8x16, v1: i8x16):
 ;; Special patterns that can be implemented via PACK.
 function %shuffle_vpkg_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [4 5 6 7 12 13 14 15 20 21 22 23 28 29 30 31]
+    v2 = shuffle v0, v1, [4 5 6 7 12 13 14 15 20 21 22 23 28 29 30 31]
    return v2
 }

@@ -382,7 +382,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [2 3 6 7 10 11 14 15 18 19 22 23 26 27 30 31]
+    v2 = shuffle v0, v1, [2 3 6 7 10 11 14 15 18 19 22 23 26 27 30 31]
    return v2
 }

@@ -392,7 +392,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_xy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31]
+    v2 = shuffle v0, v1, [1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31]
    return v2
 }

@@ -402,7 +402,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [20 21 22 23 28 29 30 31 4 5 6 7 12 13 14 15]
+    v2 = shuffle v0, v1, [20 21 22 23 28 29 30 31 4 5 6 7 12 13 14 15]
    return v2
 }

@@ -412,7 +412,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [18 19 22 23 26 27 30 31 2 3 6 7 10 11 14 15]
+    v2 = shuffle v0, v1, [18 19 22 23 26 27 30 31 2 3 6 7 10 11 14 15]
    return v2
 }

@@ -422,7 +422,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_yx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [17 19 21 23 25 27 29 31 1 3 5 7 9 11 13 15]
+    v2 = shuffle v0, v1, [17 19 21 23 25 27 29 31 1 3 5 7 9 11 13 15]
    return v2
 }

@@ -432,7 +432,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [4 5 6 7 12 13 14 15 4 5 6 7 12 13 14 15]
+    v2 = shuffle v0, v1, [4 5 6 7 12 13 14 15 4 5 6 7 12 13 14 15]
    return v2
 }

@@ -442,7 +442,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [2 3 6 7 10 11 14 15 2 3 6 7 10 11 14 15]
+    v2 = shuffle v0, v1, [2 3 6 7 10 11 14 15 2 3 6 7 10 11 14 15]
    return v2
 }

@@ -452,7 +452,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_xx(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [1 3 5 7 9 11 13 15 1 3 5 7 9 11 13 15]
+    v2 = shuffle v0, v1, [1 3 5 7 9 11 13 15 1 3 5 7 9 11 13 15]
    return v2
 }

@@ -462,7 +462,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkg_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [20 21 22 23 28 29 30 31 20 21 22 23 28 29 30 31]
+    v2 = shuffle v0, v1, [20 21 22 23 28 29 30 31 20 21 22 23 28 29 30 31]
    return v2
 }

@@ -472,7 +472,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkf_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [18 19 22 23 26 27 30 31 18 19 22 23 26 27 30 31]
+    v2 = shuffle v0, v1, [18 19 22 23 26 27 30 31 18 19 22 23 26 27 30 31]
    return v2
 }

@@ -482,7 +482,7 @@ block0(v0: i8x16, v1: i8x16):

 function %shuffle_vpkh_yy(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
-    v2 = shuffle.i8x16 v0, v1, [17 19 21 23 25 27 29 31 17 19 21 23 25 27 29 31]
+    v2 = shuffle v0, v1, [17 19 21 23 25 27 29 31 17 19 21 23 25 27 29 31]
    return v2
 }

--- a/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/atomic-cas-bug.clif
@@ -158,83 +158,74 @@ function u0:31(i64, i32, i32, i8, i8) -> i32, i32 system_v {
                                block9:
@000d                               v37 = atomic_cas.i32 v34, v35, v36
@000d                               v38 = icmp eq v37, v35
-@000d                               v39 = bint.i8 v38
@000d                               jump block10

                                block10:
-@000e                               jump block32(v37, v39)
+@000e                               jump block32(v37, v38)

                                block11:
@0012                               v43 = atomic_cas.i32 v40, v41, v42
@0012                               v44 = icmp eq v43, v41
-@0012                               v45 = bint.i8 v44
@0012                               jump block12

                                block12:
-@0013                               jump block32(v43, v45)
+@0013                               jump block32(v43, v44)

                                block13:
@0017                               v49 = atomic_cas.i32 v46, v47, v48
@0017                               v50 = icmp eq v49, v47
-@0017                               v51 = bint.i8 v50
@0017                               jump block14

                                block14:
-@0018                               jump block32(v49, v51)
+@0018                               jump block32(v49, v50)

                                block15:
@001c                               v55 = atomic_cas.i32 v52, v53, v54
@001c                               v56 = icmp eq v55, v53
-@001c                               v57 = bint.i8 v56
@001c                               jump block16

                                block16:
-@001d                               jump block32(v55, v57)
+@001d                               jump block32(v55, v56)

                                block17:
@0021                               v61 = atomic_cas.i32 v58, v59, v60
@0021                               v62 = icmp eq v61, v59
-@0021                               v63 = bint.i8 v62
@0021                               jump block18

                                block18:
-@0022                               jump block32(v61, v63)
+@0022                               jump block32(v61, v62)

                                block19:
@0026                               v67 = atomic_cas.i32 v64, v65, v66
@0026                               v68 = icmp eq v67, v65
-@0026                               v69 = bint.i8 v68
@0026                               jump block20

                                block20:
-@0027                               jump block32(v67, v69)
+@0027                               jump block32(v67, v68)

                                block21:
@002b                               v73 = atomic_cas.i32 v70, v71, v72
@002b                               v74 = icmp eq v73, v71
-@002b                               v75 = bint.i8 v74
@002b                               jump block22

                                block22:
-@002c                               jump block32(v73, v75)
+@002c                               jump block32(v73, v74)

                                block23:
@0030                               v79 = atomic_cas.i32 v76, v77, v78
@0030                               v80 = icmp eq v79, v77
-@0030                               v81 = bint.i8 v80
@0030                               jump block24

                                block24:
-@0031                               jump block32(v79, v81)
+@0031                               jump block32(v79, v80)

                                block25:
@0035                               v85 = atomic_cas.i32 v82, v83, v84
@0035                               v86 = icmp eq v85, v83
-@0035                               v87 = bint.i8 v86
@0035                               jump block26

                                block26:
-@0036                               jump block32(v85, v87)
+@0036                               jump block32(v85, v86)

                                block27:
@0038                               v88 = global_value.i64 gv2
--- a/cranelift/filetests/filetests/isa/x64/atomic_cas_const_addr.clif
+++ b/cranelift/filetests/filetests/isa/x64/atomic_cas_const_addr.clif
@@ -10,7 +10,7 @@ function u0:31() -> i32, i32 system_v {
                                    v0 = iconst.i64 0
                                    v1 = iconst.i32 0
                                    v2 = iconst.i32 0
-@0004                               v28 = bconst.b1 false
+@0004                               v28 = iconst.i8 0
@0005                               brnz v28, block25
                                    jump block1

--- a/cranelift/filetests/filetests/isa/x64/branches.clif
+++ b/cranelift/filetests/filetests/isa/x64/branches.clif
@@ -126,16 +126,16 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %f4(f32, f32) -> b1 {
+function %f4(f32, f32) -> i8 {
 block0(v0: f32, v1: f32):
  v2 = fcmp eq v0, v1
  brz v2, block1
  jump block2
 block1:
-  v3 = bconst.b1 true
+  v3 = iconst.i8 1
  return v3
 block2:
-  v4 = bconst.b1 false
+  v4 = iconst.i8 0
  return v4
 }

@@ -156,16 +156,16 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %f4(f32, f32) -> b1 {
+function %f4(f32, f32) -> i8 {
 block0(v0: f32, v1: f32):
  v2 = fcmp ne v0, v1
  brz v2, block1
  jump block2
 block1:
-  v3 = bconst.b1 true
+  v3 = iconst.i8 1
  return v3
 block2:
-  v4 = bconst.b1 false
+  v4 = iconst.i8 0
  return v4
 }

@@ -187,18 +187,18 @@ block2:
 ;   ret


-function %f5(i32) -> b1 {
+function %f5(i32) -> i8 {
  jt0 = jump_table [block1, block2]

 block0(v0: i32):
  br_table v0, block1, jt0

 block1:
-  v1 = bconst.b1 true
+  v1 = iconst.i8 1
  return v1

 block2:
-  v2 = bconst.b1 false
+  v2 = iconst.i8 0
  return v2
 }

@@ -222,17 +222,17 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %f6(i64) -> b1 {
+function %f6(i64) -> i8 {
 block0(v0: i64):
  v1 = iconst.i64 0
  v2 = icmp slt v0, v1
  brnz v2, block1
  jump block2
 block1:
-  v3 = bconst.b1 true
+  v3 = iconst.i8 1
  return v3
 block2:
-  v4 = bconst.b1 false
+  v4 = iconst.i8 0
  return v4
 }

@@ -252,17 +252,17 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %f7(i32) -> b1 {
+function %f7(i32) -> i8 {
 block0(v0: i32):
  v1 = iconst.i32 0
  v2 = icmp slt v0, v1
  brnz v2, block1
  jump block2
 block1:
-  v3 = bconst.b1 true
+  v3 = iconst.i8 1
  return v3
 block2:
-  v4 = bconst.b1 false
+  v4 = iconst.i8 0
  return v4
 }

--- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
@@ -5,7 +5,7 @@ function %f0(i64, i64) -> i64, i64 {
 block0(v0: i64, v1: i64):
    v2 = load.i64 v1
    v3 = icmp eq v0, v2
-    v4 = bint.i64 v3
+    v4 = uextend.i64 v3
    v5 = select.i64 v3, v0, v1
    return v4, v5
 }
@@ -16,7 +16,7 @@ block0(v0: i64, v1: i64):
 ;   movq    0(%rsi), %r11
 ;   cmpq    %r11, %rdi
 ;   setz    %al
-;   andq    %rax, $1, %rax
+;   movzbq  %al, %rax
 ;   cmpq    %r11, %rdi
 ;   movq    %rsi, %rdx
 ;   cmovzq  %rdi, %rdx, %rdx
@@ -28,7 +28,7 @@ function %f1(f64, i64) -> i64, f64 {
 block0(v0: f64, v1: i64):
    v2 = load.f64 v1
    v3 = fcmp eq v0, v2
-    v4 = bint.i64 v3
+    v4 = uextend.i64 v3
    v5 = select.f64 v3, v0, v0
    return v4, v5
 }
@@ -38,10 +38,10 @@ block0(v0: f64, v1: i64):
 ; block0:
 ;   movsd   0(%rdi), %xmm11
 ;   ucomisd %xmm11, %xmm0
-;   setnp   %al
-;   setz    %cl
-;   andl    %eax, %ecx, %eax
-;   andq    %rax, $1, %rax
+;   setnp   %cl
+;   setz    %dl
+;   andl    %ecx, %edx, %ecx
+;   movzbq  %cl, %rax
 ;   ucomisd %xmm0, %xmm11
 ;   movdqa  %xmm0, %xmm12
 ;   mov z, sd; j%xmm0 $next; mov%xmm12 %xmm12, %xmm12; $next: 
--- a/cranelift/filetests/filetests/isa/x64/conditional-values.clif
+++ b/cranelift/filetests/filetests/isa/x64/conditional-values.clif
@@ -1,8 +1,8 @@
 test compile precise-output
 target x86_64

-function %f0(b1, i32, i32) -> i32 {
-block0(v0: b1, v1: i32, v2: i32):
+function %f0(i8, i32, i32) -> i32 {
+block0(v0: i8, v1: i32, v2: i32):
    v3 = select.i32 v0, v1, v2
    return v3
 }
@@ -10,15 +10,15 @@ block0(v0: b1, v1: i32, v2: i32):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   testb   $1, %dil
+;   testb   %dil, %dil
 ;   movq    %rdx, %rax
 ;   cmovnzl %esi, %eax, %eax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret

-function %f1(b1) -> i32 {
-block0(v0: b1):
+function %f1(i8) -> i32 {
+block0(v0: i8):
    brnz v0, block1
    jump block2
 block1:
@@ -32,7 +32,7 @@ block2:
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   testb   $1, %dil
+;   testb   %dil, %dil
 ;   jnz     label1; j label2
 ; block1:
 ;   movl    $1, %eax
@@ -45,8 +45,8 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %f2(b1) -> i32 {
-block0(v0: b1):
+function %f2(i8) -> i32 {
+block0(v0: i8):
    brz v0, block1
    jump block2
 block1:
@@ -60,7 +60,7 @@ block2:
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   testb   $1, %dil
+;   testb   %dil, %dil
 ;   jz      label1; j label2
 ; block1:
 ;   movl    $1, %eax
@@ -137,7 +137,7 @@ block2:
 ;   popq    %rbp
 ;   ret

-function %test_x_slt_0_i64(i64) -> b1 {
+function %test_x_slt_0_i64(i64) -> i8 {
 block0(v0: i64):
    v1 = iconst.i64 0
    v2 = icmp slt v0, v1
@@ -153,7 +153,7 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

-function %test_x_slt_0_i32f4(i32) -> b1 {
+function %test_x_slt_0_i32f4(i32) -> i8 {
 block0(v0: i32):
    v1 = iconst.i32 0
    v2 = icmp slt v0, v1
@@ -169,7 +169,7 @@ block0(v0: i32):
 ;   popq    %rbp
 ;   ret

-function %test_0_sgt_x_i64(i64) -> b1 {
+function %test_0_sgt_x_i64(i64) -> i8 {
 block0(v0: i64):
    v1 = iconst.i64 0
    v2 = icmp sgt v1, v0
@@ -185,7 +185,7 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

-function %test_0_sgt_x_i32f4(i32) -> b1 {
+function %test_0_sgt_x_i32f4(i32) -> i8 {
 block0(v0: i32):
    v1 = iconst.i32 0
    v2 = icmp sgt v1, v0
@@ -201,7 +201,7 @@ block0(v0: i32):
 ;   popq    %rbp
 ;   ret

-function %test_0_sle_x_i64(i64) -> b1 {
+function %test_0_sle_x_i64(i64) -> i8 {
 block0(v0: i64):
    v1 = iconst.i64 0
    v2 = icmp sle v1, v0
@@ -218,7 +218,7 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

-function %test_0_sle_x_i32f4(i32) -> b1 {
+function %test_0_sle_x_i32f4(i32) -> i8 {
 block0(v0: i32):
    v1 = iconst.i32 0
    v2 = icmp sle v1, v0
@@ -235,7 +235,7 @@ block0(v0: i32):
 ;   popq    %rbp
 ;   ret

-function %test_x_sge_x_i64(i64) -> b1 {
+function %test_x_sge_x_i64(i64) -> i8 {
 block0(v0: i64):
    v1 = iconst.i64 0
    v2 = icmp sge v0, v1
@@ -252,7 +252,7 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

-function %test_x_sge_x_i32f4(i32) -> b1 {
+function %test_x_sge_x_i32f4(i32) -> i8 {
 block0(v0: i32):
    v1 = iconst.i32 0
    v2 = icmp sge v0, v1
--- a/cranelift/filetests/filetests/isa/x64/fcmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/fcmp-mem-bug.clif
@@ -223,9 +223,9 @@ function u0:11335(i64 vmctx, i64, i32, i32, i32, i32, i32, i32, i32, i32) fast {
                                    v394 -> v99
                                    v395 -> v356
@4b666c                             v16 = icmp sle v14, v15
-@4b666c                             v17 = bint.i32 v16
+@4b666c                             v17 = uextend.i32 v16
@4b6671                             v19 = icmp sle v18, v15
-@4b6671                             v20 = bint.i32 v19
+@4b6671                             v20 = uextend.i32 v19
@4b6672                             v21 = bor v17, v20
@4b6674                             brnz v21, block9
@4b6674                             jump block10
@@ -272,8 +272,7 @@ function u0:11335(i64 vmctx, i64, i32, i32, i32, i32, i32, i32, i32, i32) fast {
@4b6695                             v37 = iadd.i64 v438, v443
@4b6695                             v38 = load.f32 little v37+68
@4b6698                             v39 = fcmp.f32 gt v32, v38
-@4b6698                             v40 = bint.i32 v39
-@4b669a                             brnz v40, block14
+@4b669a                             brnz v39, block14
@4b669a                             jump block15

                                block15:
@@ -685,7 +684,7 @@ function u0:11335(i64 vmctx, i64, i32, i32, i32, i32, i32, i32, i32, i32) fast {
@4b69f3                             v366 = iadd.i64 v438, v534
@4b69f3                             v367 = load.f32 little v366+68
@4b69f6                             v368 = fcmp gt v362, v367
-@4b69f6                             v369 = bint.i32 v368
+@4b69f6                             v369 = uextend.i32 v368
@4b69f9                             v371 = bxor v369, v468
@4b69fb                             brnz v371, block71
@4b69fb                             jump block72
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -159,7 +159,7 @@ block0(v0: i128):
 ;   popq    %rbp
 ;   ret

-function %f9(i128, i128) -> b1 {
+function %f9(i128, i128) -> i8 {
 block0(v0: i128, v1: i128):
    v2 = icmp eq v0, v1
    v3 = icmp ne v0, v1
@@ -457,17 +457,16 @@ block0(v0: i128):
 ;   popq    %rbp
 ;   ret

-function %f18(b1) -> i128 {
-block0(v0: b1):
-    v1 = bint.i128 v0
+function %f18(i8) -> i128 {
+block0(v0: i8):
+    v1 = uextend.i128 v0
    return v1
 }

 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movq    %rdi, %rax
-;   andq    %rax, $1, %rax
+;   movzbq  %dil, %rax
 ;   xorq    %rdx, %rdx, %rdx
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -655,8 +654,8 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret

-function %f23(i128, b1) -> i128 {
-block0(v0: i128, v1: b1):
+function %f23(i128, i8) -> i128 {
+block0(v0: i128, v1: i8):
    v2 = iconst.i128 0
    brnz v1, block1(v2)
    jump block2(v2)
@@ -675,7 +674,7 @@ block2(v6: i128):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   testb   $1, %dl
+;   testb   %dl, %dl
 ;   jnz     label1; j label2
 ; block1:
 ;   xorq    %rax, %rax, %rax
--- a/cranelift/filetests/filetests/isa/x64/load-op.clif
+++ b/cranelift/filetests/filetests/isa/x64/load-op.clif
@@ -126,7 +126,7 @@ function %cmp_mem(i64) -> i64 {
 block0(v0: i64):
  v1 = load.i64 v0
  v2 = icmp eq v0, v1
-  v3 = bint.i64 v2
+  v3 = uextend.i64 v2
  return v3
 }

@@ -134,8 +134,8 @@ block0(v0: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   cmpq    0(%rdi), %rdi
-;   setz    %al
-;   andq    %rax, $1, %rax
+;   setz    %r8b
+;   movzbq  %r8b, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -2,14 +2,14 @@ test compile precise-output
 set enable_simd
 target x86_64 skylake

-function %move_registers(i32x4) -> b8x16 {
+function %move_registers(i32x4) -> i8x16 {
 block0(v0: i32x4):
    ;; In the x64 backend, all of these pseudo-instructions are lowered to moves between registers (e.g. MOVAPD, MOVDQA,
    ;; etc.). Because these have been marked as moves, no instructions are emitted by this function besides the prologue
    ;; and epilogue.
    v1 = raw_bitcast.f32x4 v0
    v2 = raw_bitcast.f64x2 v1
-    v3 = raw_bitcast.b8x16 v2
+    v3 = raw_bitcast.i8x16 v2
    return v3
 }

--- a/cranelift/filetests/filetests/isa/x64/sextend.clif
+++ b/cranelift/filetests/filetests/isa/x64/sextend.clif
@@ -1,9 +1,9 @@
 test compile precise-output
 target x86_64

-function %f0(b8) -> b64 {
-block0(v0: b8):
-  v1 = bextend.b64 v0
+function %f0(i8) -> i64 {
+block0(v0: i8):
+  v1 = sextend.i64 v0
  return v1
 }

--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -150,8 +150,8 @@ block0:
 ;   popq    %rbp
 ;   ret

-function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: b16x8, v1: i16x8, v2: i16x8):
+function %vselect_i16x8(i16x8, i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8, v2: i16x8):
    v3 = vselect v0, v1, v2
    return v3
 }
@@ -166,8 +166,8 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8):
 ;   popq    %rbp
 ;   ret

-function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: b32x4, v1: f32x4, v2: f32x4):
+function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: i32x4, v1: f32x4, v2: f32x4):
    v3 = vselect v0, v1, v2
    return v3
 }
@@ -182,8 +182,8 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4):
 ;   popq    %rbp
 ;   ret

-function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: b64x2, v1: f64x2, v2: f64x2):
+function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: i64x2, v1: f64x2, v2: f64x2):
    v3 = vselect v0, v1, v2
    return v3
 }
--- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
@@ -2,7 +2,7 @@ test compile precise-output
 set enable_simd
 target x86_64 skylake

-function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ne_32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp ne v0, v1
    return v2
@@ -18,7 +18,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   popq    %rbp
 ;   ret

-function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
+function %icmp_ugt_i32x4(i32x4, i32x4) -> i32x4 {
 block0(v0: i32x4, v1: i32x4):
    v2 = icmp ugt v0, v1
    return v2
@@ -35,7 +35,7 @@ block0(v0: i32x4, v1: i32x4):
 ;   popq    %rbp
 ;   ret

-function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
+function %icmp_sge_i16x8(i16x8, i16x8) -> i16x8 {
 block0(v0: i16x8, v1: i16x8):
    v2 = icmp sge v0, v1
    return v2
@@ -51,7 +51,7 @@ block0(v0: i16x8, v1: i16x8):
 ;   popq    %rbp
 ;   ret

-function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
+function %icmp_uge_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
    v2 = icmp uge v0, v1
    return v2
--- a/Show More
+++ b/Show More