diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 79797fff4c..cdba177578 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3298,21 +3298,10 @@ pub(crate) fn define( .operands_out(vec![a]), ); - let Bool = &TypeVar::new( - "Bool", - "A scalar or vector boolean type", - TypeSetBuilder::new() - .bools(Interval::All) - .simd_lanes(Interval::All) - .build(), - ); let BoolTo = &TypeVar::new( "BoolTo", - "A larger boolean type with the same number of lanes", - TypeSetBuilder::new() - .bools(Interval::All) - .simd_lanes(Interval::All) - .build(), + "A larger boolean type", + TypeSetBuilder::new().bools(Interval::All).build(), ); let x = &Operand::new("x", Bool); let a = &Operand::new("a", BoolTo); @@ -3321,11 +3310,7 @@ pub(crate) fn define( Inst::new( "bextend", r#" - Convert `x` to a larger boolean type in the platform-defined way. - - The result type must have the same number of vector lanes as the input, - and each lane must not have fewer bits that the input lanes. If the - input and output types are the same, this is a no-op. + Convert `x` to a larger boolean type "#, &formats.unary, ) @@ -3355,6 +3340,14 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let Bool = &TypeVar::new( + "Bool", + "A scalar or vector boolean type", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); let IntTo = &TypeVar::new( "IntTo", "An integer type with the same number of lanes", diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 4730f3262a..feb8eee83e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1124,6 +1124,46 @@ (rule (lower (bint x)) (and_imm $I32 x (u64_into_imm_logic $I32 1))) +;;;; Rules for `bmask`/`bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Bextend and Bmask both simply sign-extend. This works for: +;; - Bextend, because booleans are stored as 0 / -1, so we +;; sign-extend the -1 to a -1 in the wider width. +;; - Bmask, because the resulting integer mask value must be +;; all-ones (-1) if the argument is true. + +;; Use a common helper to type cast bools to either bool or integer types. +(decl cast_bool (Type Type Value) InstOutput) +(rule (lower (has_type out_ty (bextend x @ (value_type in_ty)))) + (cast_bool in_ty out_ty x)) +(rule (lower (has_type out_ty (bmask x @ (value_type in_ty)))) + (cast_bool in_ty out_ty x)) + + +;; If the target has the same or a smaller size than the source, it's a no-op. +(rule (cast_bool $B8 $I8 x) x) +(rule (cast_bool $B16 (fits_in_16 _out) x) x) +(rule (cast_bool $B32 (fits_in_32 _out) x) x) +(rule (cast_bool $B64 (fits_in_64 _out) x) x) + +;; Casting between 128 bits is a noop +(rule (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x) + x) + +;; Converting from 128 bits to anything below we just ignore the top register +(rule (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x) + (value_regs_get x 0)) + +;; Extend to 64 bits first, then this will be all 0s or all 1s and we can +;; duplicate to both halves of 128 bits +(rule (cast_bool in (ty_int_bool_128 _out) x) + (let ((tmp Reg (extend x $true (ty_bits in) 64))) + (value_regs tmp tmp))) + +;; Values that fit in a single register are sign extended normally +(rule (cast_bool (fits_in_64 in) (fits_in_64 out) x) + (extend x $true (ty_bits in) (ty_bits out))) + ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The implementation of `popcnt` for scalar types is done by moving the value diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 85f1803086..e255af98d6 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -425,44 +425,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx), - Opcode::Bextend | Opcode::Bmask => { - // Bextend and Bmask both simply sign-extend. This works for: - // - Bextend, because booleans are stored as 0 / -1, so we - // sign-extend the -1 to a -1 in the wider width. - // - Bmask, because the resulting integer mask value must be - // all-ones (-1) if the argument is true. - - let from_ty = ctx.input_ty(insn, 0); - let to_ty = ctx.output_ty(insn, 0); - let from_bits = ty_bits(from_ty); - let to_bits = ty_bits(to_ty); - - if from_ty.is_vector() || from_bits > 64 || to_bits > 64 { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, from_ty - ))); - } - - assert!(from_bits <= to_bits); - - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - - if from_bits == to_bits { - ctx.emit(Inst::gen_move(rd, rn, to_ty)); - } else { - let to_bits = if to_bits > 32 { 64 } else { 32 }; - let from_bits = from_bits as u8; - ctx.emit(Inst::Extend { - rd, - rn, - signed: true, - from_bits, - to_bits, - }); - } - } + Opcode::Bextend | Opcode::Bmask => implemented_in_isle(ctx), Opcode::Bint => implemented_in_isle(ctx), diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index da88426aab..c45d89ebd9 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -317,8 +317,8 @@ block0: } ; block0: -; movz x2, #255 -; sxtb w0, w2 +; movz x1, #255 +; sxtb w0, w1 ; ret function %bextend_b1() -> b32 { @@ -329,8 +329,8 @@ block0: } ; block0: -; movz x2, #1 -; sbfx w0, w2, #0, #1 +; movz x1, #1 +; sbfx w0, w1, #0, #1 ; ret function %bnot_i32(i32) -> i32 { diff --git a/cranelift/filetests/filetests/runtests/bmask.clif b/cranelift/filetests/filetests/runtests/bmask.clif index d68e59ec00..fb87c02166 100644 --- a/cranelift/filetests/filetests/runtests/bmask.clif +++ b/cranelift/filetests/filetests/runtests/bmask.clif @@ -1,4 +1,7 @@ test interpret +test run +target aarch64 +target s390x function %bmask_b64_i64(b64) -> i64 { block0(v0: b64): diff --git a/cranelift/filetests/filetests/runtests/i128-bextend.clif b/cranelift/filetests/filetests/runtests/i128-bextend.clif index 34372f98e4..4d53917932 100644 --- a/cranelift/filetests/filetests/runtests/i128-bextend.clif +++ b/cranelift/filetests/filetests/runtests/i128-bextend.clif @@ -1,4 +1,6 @@ test interpret +test run +target aarch64 function %bextend_b1_b128(b1) -> b128 { block0(v0: b1): diff --git a/cranelift/filetests/filetests/runtests/i128-bmask.clif b/cranelift/filetests/filetests/runtests/i128-bmask.clif index f87df7f2ab..2d282ccce9 100644 --- a/cranelift/filetests/filetests/runtests/i128-bmask.clif +++ b/cranelift/filetests/filetests/runtests/i128-bmask.clif @@ -1,4 +1,6 @@ test interpret +test run +target aarch64 function %bmask_b128_i128(b128) -> i128 { block0(v0: b128):