aarch64: Implement bmask/bextend in ISLE (#4358)

* aarch64: Implement `bmask`/`bextend` in ISLE

* cranelift: Remove vector versions of `bextend`

* aarch64: Cleanup `bmask`/`bextend` documentation
This commit is contained in:
Afonso Bordado
2022-07-01 17:37:18 +01:00
committed by GitHub
parent 64759f04a4
commit 38ecd3744f
7 changed files with 63 additions and 60 deletions

View File

@@ -3298,21 +3298,10 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let Bool = &TypeVar::new(
"Bool",
"A scalar or vector boolean type",
TypeSetBuilder::new()
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
);
let BoolTo = &TypeVar::new( let BoolTo = &TypeVar::new(
"BoolTo", "BoolTo",
"A larger boolean type with the same number of lanes", "A larger boolean type",
TypeSetBuilder::new() TypeSetBuilder::new().bools(Interval::All).build(),
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
); );
let x = &Operand::new("x", Bool); let x = &Operand::new("x", Bool);
let a = &Operand::new("a", BoolTo); let a = &Operand::new("a", BoolTo);
@@ -3321,11 +3310,7 @@ pub(crate) fn define(
Inst::new( Inst::new(
"bextend", "bextend",
r#" r#"
Convert `x` to a larger boolean type in the platform-defined way. Convert `x` to a larger boolean type
The result type must have the same number of vector lanes as the input,
and each lane must not have fewer bits that the input lanes. If the
input and output types are the same, this is a no-op.
"#, "#,
&formats.unary, &formats.unary,
) )
@@ -3355,6 +3340,14 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let Bool = &TypeVar::new(
"Bool",
"A scalar or vector boolean type",
TypeSetBuilder::new()
.bools(Interval::All)
.simd_lanes(Interval::All)
.build(),
);
let IntTo = &TypeVar::new( let IntTo = &TypeVar::new(
"IntTo", "IntTo",
"An integer type with the same number of lanes", "An integer type with the same number of lanes",

View File

@@ -1124,6 +1124,46 @@
(rule (lower (bint x)) (rule (lower (bint x))
(and_imm $I32 x (u64_into_imm_logic $I32 1))) (and_imm $I32 x (u64_into_imm_logic $I32 1)))
;;;; Rules for `bmask`/`bextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Bextend and Bmask both simply sign-extend. This works for:
;; - Bextend, because booleans are stored as 0 / -1, so we
;; sign-extend the -1 to a -1 in the wider width.
;; - Bmask, because the resulting integer mask value must be
;; all-ones (-1) if the argument is true.
;; Use a common helper to type cast bools to either bool or integer types.
(decl cast_bool (Type Type Value) InstOutput)
(rule (lower (has_type out_ty (bextend x @ (value_type in_ty))))
(cast_bool in_ty out_ty x))
(rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
(cast_bool in_ty out_ty x))
;; If the target has the same or a smaller size than the source, it's a no-op.
(rule (cast_bool $B8 $I8 x) x)
(rule (cast_bool $B16 (fits_in_16 _out) x) x)
(rule (cast_bool $B32 (fits_in_32 _out) x) x)
(rule (cast_bool $B64 (fits_in_64 _out) x) x)
;; Casting between 128 bits is a noop
(rule (cast_bool (ty_int_bool_128 _in) (ty_int_bool_128 _out) x)
x)
;; Converting from 128 bits to anything below we just ignore the top register
(rule (cast_bool (ty_int_bool_128 _in) (fits_in_64 _out) x)
(value_regs_get x 0))
;; Extend to 64 bits first, then this will be all 0s or all 1s and we can
;; duplicate to both halves of 128 bits
(rule (cast_bool in (ty_int_bool_128 _out) x)
(let ((tmp Reg (extend x $true (ty_bits in) 64)))
(value_regs tmp tmp)))
;; Values that fit in a single register are sign extended normally
(rule (cast_bool (fits_in_64 in) (fits_in_64 out) x)
(extend x $true (ty_bits in) (ty_bits out)))
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The implementation of `popcnt` for scalar types is done by moving the value ;; The implementation of `popcnt` for scalar types is done by moving the value

View File

@@ -425,44 +425,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx), Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),
Opcode::Bextend | Opcode::Bmask => { Opcode::Bextend | Opcode::Bmask => implemented_in_isle(ctx),
// Bextend and Bmask both simply sign-extend. This works for:
// - Bextend, because booleans are stored as 0 / -1, so we
// sign-extend the -1 to a -1 in the wider width.
// - Bmask, because the resulting integer mask value must be
// all-ones (-1) if the argument is true.
let from_ty = ctx.input_ty(insn, 0);
let to_ty = ctx.output_ty(insn, 0);
let from_bits = ty_bits(from_ty);
let to_bits = ty_bits(to_ty);
if from_ty.is_vector() || from_bits > 64 || to_bits > 64 {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported type: {:?}",
op, from_ty
)));
}
assert!(from_bits <= to_bits);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
if from_bits == to_bits {
ctx.emit(Inst::gen_move(rd, rn, to_ty));
} else {
let to_bits = if to_bits > 32 { 64 } else { 32 };
let from_bits = from_bits as u8;
ctx.emit(Inst::Extend {
rd,
rn,
signed: true,
from_bits,
to_bits,
});
}
}
Opcode::Bint => implemented_in_isle(ctx), Opcode::Bint => implemented_in_isle(ctx),

View File

@@ -317,8 +317,8 @@ block0:
} }
; block0: ; block0:
; movz x2, #255 ; movz x1, #255
; sxtb w0, w2 ; sxtb w0, w1
; ret ; ret
function %bextend_b1() -> b32 { function %bextend_b1() -> b32 {
@@ -329,8 +329,8 @@ block0:
} }
; block0: ; block0:
; movz x2, #1 ; movz x1, #1
; sbfx w0, w2, #0, #1 ; sbfx w0, w1, #0, #1
; ret ; ret
function %bnot_i32(i32) -> i32 { function %bnot_i32(i32) -> i32 {

View File

@@ -1,4 +1,7 @@
test interpret test interpret
test run
target aarch64
target s390x
function %bmask_b64_i64(b64) -> i64 { function %bmask_b64_i64(b64) -> i64 {
block0(v0: b64): block0(v0: b64):

View File

@@ -1,4 +1,6 @@
test interpret test interpret
test run
target aarch64
function %bextend_b1_b128(b1) -> b128 { function %bextend_b1_b128(b1) -> b128 {
block0(v0: b1): block0(v0: b1):

View File

@@ -1,4 +1,6 @@
test interpret test interpret
test run
target aarch64
function %bmask_b128_i128(b128) -> i128 { function %bmask_b128_i128(b128) -> i128 {
block0(v0: b128): block0(v0: b128):