[AArch64] Port IaddPairwise to ISLE (#4201)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-06-06 15:37:13 +01:00
committed by GitHub
parent 7148882867
commit acfeda4d80
7 changed files with 150 additions and 50 deletions

View File

@@ -920,7 +920,9 @@
;; Helper for calculating the `VectorSize` corresponding to a type
(decl vector_size (Type) VectorSize)
(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
(rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
(rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
(rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
(rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
(rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
@@ -1540,6 +1542,13 @@
(_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half))))
dst))
;; Helper for emitting `MInst.VecRRPairLong` instructions.
(decl vec_rr_pair_long (VecRRPairLongOp Reg) Reg)
(rule (vec_rr_pair_long op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecRRPairLong op dst src))))
dst))
;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
;; where the operation both reads and modifies the destination register.
;;
@@ -1729,6 +1738,20 @@
(decl shll32 (Reg bool) Reg)
(rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half))
;; Helpers for generating `addlp` instructions.
(decl saddlp8 (Reg) Reg)
(rule (saddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp8) x))
(decl saddlp16 (Reg) Reg)
(rule (saddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp16) x))
(decl uaddlp8 (Reg) Reg)
(rule (uaddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp8) x))
(decl uaddlp16 (Reg) Reg)
(rule (uaddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp16) x))
;; Helper for generating `umlal32` instructions.
(decl umlal32 (Reg Reg Reg bool) Reg)
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half))

View File

@@ -99,6 +99,27 @@
(add_with_flags_paired $I64 x_lo y_lo)
(adc_paired $I64 x_hi y_hi))))
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y))))
(if-let z (same_value x y))
(saddlp8 z))
(rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high y))))
(if-let z (same_value x y))
(saddlp16 z))
(rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
(if-let z (same_value x y))
(uaddlp8 z))
(rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
(if-let z (same_value x y))
(uaddlp16 z))
(rule (lower (has_type ty (iadd_pairwise x y)))
(addp x y (vector_size ty)))
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller

View File

@@ -1357,56 +1357,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}
Opcode::IaddPairwise => {
let ty = ty.unwrap();
let lane_type = ty.lane_type();
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> {
if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) {
if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) {
let lhs_inputs = insn_inputs(ctx, lhs);
let rhs_inputs = insn_inputs(ctx, rhs);
let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None);
let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None);
if low == high {
match (lane_type, ext_low_op) {
(I16, Opcode::SwidenLow) => {
return Some((VecRRPairLongOp::Saddlp8, low))
}
(I32, Opcode::SwidenLow) => {
return Some((VecRRPairLongOp::Saddlp16, low))
}
(I16, Opcode::UwidenLow) => {
return Some((VecRRPairLongOp::Uaddlp8, low))
}
(I32, Opcode::UwidenLow) => {
return Some((VecRRPairLongOp::Uaddlp16, low))
}
_ => (),
};
}
}
}
None
};
if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) {
ctx.emit(Inst::VecRRPairLong { op, rd, rn });
} else if let Some((op, rn)) = match_long_pair(Opcode::UwidenLow, Opcode::UwidenHigh) {
ctx.emit(Inst::VecRRPairLong { op, rd, rn });
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Addp,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}
}
Opcode::IaddPairwise => implemented_in_isle(ctx),
Opcode::WideningPairwiseDotProductS => {
let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

View File

@@ -29,6 +29,15 @@ pub type BoxExternalName = Box<ExternalName>;
#[doc(hidden)]
macro_rules! isle_prelude_methods {
() => {
#[inline]
fn same_value(&mut self, a: Value, b: Value) -> Option<Value> {
if a == b {
Some(a)
} else {
None
}
}
#[inline]
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
let [a, b] = *arr;

View File

@@ -371,6 +371,10 @@
(extractor (unwrap_head_value_list_2 head1 head2 tail)
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
;; Constructor to test whether two values are same.
(decl pure same_value (Value Value) Value)
(extern constructor same_value same_value)
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
(decl writable_reg_to_reg (WritableReg) Reg)
(extern constructor writable_reg_to_reg writable_reg_to_reg)

View File

@@ -107,3 +107,53 @@ block0(v0: i8x16):
; addp v0.8h, v2.8h, v4.8h
; ret
function %fn9(i8x8, i8x8) -> i8x8 {
block0(v0: i8x8, v1: i8x8):
v2 = iadd_pairwise v0, v1
return v2
}
; block0:
; addp v0.8b, v0.8b, v1.8b
; ret
function %fn10(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = iadd_pairwise v0, v1
return v2
}
; block0:
; addp v0.16b, v0.16b, v1.16b
; ret
function %fn11(i16x4, i16x4) -> i16x4 {
block0(v0: i16x4, v1: i16x4):
v2 = iadd_pairwise v0, v1
return v2
}
; block0:
; addp v0.4h, v0.4h, v1.4h
; ret
function %fn12(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = iadd_pairwise v0, v1
return v2
}
; block0:
; addp v0.8h, v0.8h, v1.8h
; ret
function %fn14(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = iadd_pairwise v0, v1
return v2
}
; block0:
; addp v0.4s, v0.4s, v1.4s
; ret

View File

@@ -23,3 +23,45 @@ block0(v0: i32x4, v1: i32x4):
}
; run: %iaddp_i32x4([1 2 3 4], [5 6 7 8]) == [3 7 11 15]
; run: %iaddp_i32x4([4294967290 5 4294967290 5], [100 100 100 100]) == [4294967295 4294967295 200 200]
function %swiden_i8x16(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = swiden_low v0
v2 = swiden_high v0
v3 = iadd_pairwise v1, v2
return v3
}
; run: %swiden_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [3 7 11 15 19 23 27 31]
; run: %swiden_i8x16([-1 2 -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16]) == [1 1 1 1 1 1 1 1]
; run: %swiden_i8x16([127 1 126 2 125 3 124 4 123 5 122 6 121 7 120 8]) == [128 128 128 128 128 128 128 128]
function %uwiden_i8x16(i8x16) -> i16x8 {
block0(v0: i8x16):
v1 = uwiden_low v0
v2 = uwiden_high v0
v3 = iadd_pairwise v1, v2
return v3
}
; run: %uwiden_i8x16([17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [35 39 43 47 51 55 59 63]
; run: %uwiden_i8x16([2 254 3 253 4 252 5 251 6 250 7 249 8 248 9 247]) == [256 256 256 256 256 256 256 256]
function %swiden_i16x8(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = swiden_low v0
v2 = swiden_high v0
v3 = iadd_pairwise v1, v2
return v3
}
; run: %swiden_i16x8([1 2 3 4 5 6 7 8]) == [3 7 11 15]
; run: %swiden_i16x8([32767 1 32766 3 32765 5 32764 8]) == [32768 32769 32770 32772]
; run: %swiden_i16x8([-32768 -1 32766 3 32765 5 -32764 -8]) == [-32769 32769 32770 -32772]
function %uwiden_i16x8(i16x8) -> i32x4 {
block0(v0: i16x8):
v1 = uwiden_low v0
v2 = uwiden_high v0
v3 = iadd_pairwise v1, v2
return v3
}
; run: %uwiden_i16x8([100 99 98 97 96 95 94 93]) == [199 195 191 187]
; run: %uwiden_i16x8([65535 1 65534 3 65533 5 65532 8]) == [65536 65537 65538 65540]