[AArch64] Port IaddPairwise to ISLE (#4201)
Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -920,7 +920,9 @@
|
||||
|
||||
;; Helper for calculating the `VectorSize` corresponding to a type
|
||||
(decl vector_size (Type) VectorSize)
|
||||
(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8))
|
||||
(rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16))
|
||||
(rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4))
|
||||
(rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8))
|
||||
(rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4))
|
||||
(rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2))
|
||||
@@ -1540,6 +1542,13 @@
|
||||
(_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRPairLong` instructions.
|
||||
(decl vec_rr_pair_long (VecRRPairLongOp Reg) Reg)
|
||||
(rule (vec_rr_pair_long op src)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecRRPairLong op dst src))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants
|
||||
;; where the operation both reads and modifies the destination register.
|
||||
;;
|
||||
@@ -1729,6 +1738,20 @@
|
||||
(decl shll32 (Reg bool) Reg)
|
||||
(rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half))
|
||||
|
||||
;; Helpers for generating `addlp` instructions.
|
||||
|
||||
(decl saddlp8 (Reg) Reg)
|
||||
(rule (saddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp8) x))
|
||||
|
||||
(decl saddlp16 (Reg) Reg)
|
||||
(rule (saddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp16) x))
|
||||
|
||||
(decl uaddlp8 (Reg) Reg)
|
||||
(rule (uaddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp8) x))
|
||||
|
||||
(decl uaddlp16 (Reg) Reg)
|
||||
(rule (uaddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp16) x))
|
||||
|
||||
;; Helper for generating `umlal32` instructions.
|
||||
(decl umlal32 (Reg Reg Reg bool) Reg)
|
||||
(rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half))
|
||||
|
||||
@@ -99,6 +99,27 @@
|
||||
(add_with_flags_paired $I64 x_lo y_lo)
|
||||
(adc_paired $I64 x_hi y_hi))))
|
||||
|
||||
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y))))
|
||||
(if-let z (same_value x y))
|
||||
(saddlp8 z))
|
||||
|
||||
(rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high y))))
|
||||
(if-let z (same_value x y))
|
||||
(saddlp16 z))
|
||||
|
||||
(rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
|
||||
(if-let z (same_value x y))
|
||||
(uaddlp8 z))
|
||||
|
||||
(rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high y))))
|
||||
(if-let z (same_value x y))
|
||||
(uaddlp16 z))
|
||||
|
||||
(rule (lower (has_type ty (iadd_pairwise x y)))
|
||||
(addp x y (vector_size ty)))
|
||||
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller
|
||||
|
||||
@@ -1357,56 +1357,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::IaddPairwise => {
|
||||
let ty = ty.unwrap();
|
||||
let lane_type = ty.lane_type();
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> {
|
||||
if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) {
|
||||
if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) {
|
||||
let lhs_inputs = insn_inputs(ctx, lhs);
|
||||
let rhs_inputs = insn_inputs(ctx, rhs);
|
||||
let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None);
|
||||
let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None);
|
||||
if low == high {
|
||||
match (lane_type, ext_low_op) {
|
||||
(I16, Opcode::SwidenLow) => {
|
||||
return Some((VecRRPairLongOp::Saddlp8, low))
|
||||
}
|
||||
(I32, Opcode::SwidenLow) => {
|
||||
return Some((VecRRPairLongOp::Saddlp16, low))
|
||||
}
|
||||
(I16, Opcode::UwidenLow) => {
|
||||
return Some((VecRRPairLongOp::Uaddlp8, low))
|
||||
}
|
||||
(I32, Opcode::UwidenLow) => {
|
||||
return Some((VecRRPairLongOp::Uaddlp16, low))
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) {
|
||||
ctx.emit(Inst::VecRRPairLong { op, rd, rn });
|
||||
} else if let Some((op, rn)) = match_long_pair(Opcode::UwidenLow, Opcode::UwidenHigh) {
|
||||
ctx.emit(Inst::VecRRPairLong { op, rd, rn });
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::IaddPairwise => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::WideningPairwiseDotProductS => {
|
||||
let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
@@ -29,6 +29,15 @@ pub type BoxExternalName = Box<ExternalName>;
|
||||
#[doc(hidden)]
|
||||
macro_rules! isle_prelude_methods {
|
||||
() => {
|
||||
#[inline]
|
||||
fn same_value(&mut self, a: Value, b: Value) -> Option<Value> {
|
||||
if a == b {
|
||||
Some(a)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
|
||||
let [a, b] = *arr;
|
||||
|
||||
@@ -371,6 +371,10 @@
|
||||
(extractor (unwrap_head_value_list_2 head1 head2 tail)
|
||||
(value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
|
||||
|
||||
;; Constructor to test whether two values are same.
|
||||
(decl pure same_value (Value Value) Value)
|
||||
(extern constructor same_value same_value)
|
||||
|
||||
;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
|
||||
(decl writable_reg_to_reg (WritableReg) Reg)
|
||||
(extern constructor writable_reg_to_reg writable_reg_to_reg)
|
||||
|
||||
@@ -107,3 +107,53 @@ block0(v0: i8x16):
|
||||
; addp v0.8h, v2.8h, v4.8h
|
||||
; ret
|
||||
|
||||
function %fn9(i8x8, i8x8) -> i8x8 {
|
||||
block0(v0: i8x8, v1: i8x8):
|
||||
v2 = iadd_pairwise v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addp v0.8b, v0.8b, v1.8b
|
||||
; ret
|
||||
|
||||
function %fn10(i8x16, i8x16) -> i8x16 {
|
||||
block0(v0: i8x16, v1: i8x16):
|
||||
v2 = iadd_pairwise v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addp v0.16b, v0.16b, v1.16b
|
||||
; ret
|
||||
|
||||
function %fn11(i16x4, i16x4) -> i16x4 {
|
||||
block0(v0: i16x4, v1: i16x4):
|
||||
v2 = iadd_pairwise v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addp v0.4h, v0.4h, v1.4h
|
||||
; ret
|
||||
|
||||
function %fn12(i16x8, i16x8) -> i16x8 {
|
||||
block0(v0: i16x8, v1: i16x8):
|
||||
v2 = iadd_pairwise v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addp v0.8h, v0.8h, v1.8h
|
||||
; ret
|
||||
|
||||
function %fn14(i32x4, i32x4) -> i32x4 {
|
||||
block0(v0: i32x4, v1: i32x4):
|
||||
v2 = iadd_pairwise v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addp v0.4s, v0.4s, v1.4s
|
||||
; ret
|
||||
|
||||
|
||||
@@ -23,3 +23,45 @@ block0(v0: i32x4, v1: i32x4):
|
||||
}
|
||||
; run: %iaddp_i32x4([1 2 3 4], [5 6 7 8]) == [3 7 11 15]
|
||||
; run: %iaddp_i32x4([4294967290 5 4294967290 5], [100 100 100 100]) == [4294967295 4294967295 200 200]
|
||||
|
||||
function %swiden_i8x16(i8x16) -> i16x8 {
|
||||
block0(v0: i8x16):
|
||||
v1 = swiden_low v0
|
||||
v2 = swiden_high v0
|
||||
v3 = iadd_pairwise v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %swiden_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [3 7 11 15 19 23 27 31]
|
||||
; run: %swiden_i8x16([-1 2 -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16]) == [1 1 1 1 1 1 1 1]
|
||||
; run: %swiden_i8x16([127 1 126 2 125 3 124 4 123 5 122 6 121 7 120 8]) == [128 128 128 128 128 128 128 128]
|
||||
|
||||
function %uwiden_i8x16(i8x16) -> i16x8 {
|
||||
block0(v0: i8x16):
|
||||
v1 = uwiden_low v0
|
||||
v2 = uwiden_high v0
|
||||
v3 = iadd_pairwise v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %uwiden_i8x16([17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [35 39 43 47 51 55 59 63]
|
||||
; run: %uwiden_i8x16([2 254 3 253 4 252 5 251 6 250 7 249 8 248 9 247]) == [256 256 256 256 256 256 256 256]
|
||||
|
||||
function %swiden_i16x8(i16x8) -> i32x4 {
|
||||
block0(v0: i16x8):
|
||||
v1 = swiden_low v0
|
||||
v2 = swiden_high v0
|
||||
v3 = iadd_pairwise v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %swiden_i16x8([1 2 3 4 5 6 7 8]) == [3 7 11 15]
|
||||
; run: %swiden_i16x8([32767 1 32766 3 32765 5 32764 8]) == [32768 32769 32770 32772]
|
||||
; run: %swiden_i16x8([-32768 -1 32766 3 32765 5 -32764 -8]) == [-32769 32769 32770 -32772]
|
||||
|
||||
function %uwiden_i16x8(i16x8) -> i32x4 {
|
||||
block0(v0: i16x8):
|
||||
v1 = uwiden_low v0
|
||||
v2 = uwiden_high v0
|
||||
v3 = iadd_pairwise v1, v2
|
||||
return v3
|
||||
}
|
||||
; run: %uwiden_i16x8([100 99 98 97 96 95 94 93]) == [199 195 191 187]
|
||||
; run: %uwiden_i16x8([65535 1 65534 3 65533 5 65532 8]) == [65536 65537 65538 65540]
|
||||
|
||||
Reference in New Issue
Block a user