diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 12324ab7db..18398ca736 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -920,7 +920,9 @@ ;; Helper for calculating the `VectorSize` corresponding to a type (decl vector_size (Type) VectorSize) +(rule (vector_size (multi_lane 8 8)) (VectorSize.Size8x8)) (rule (vector_size (multi_lane 8 16)) (VectorSize.Size8x16)) +(rule (vector_size (multi_lane 16 4)) (VectorSize.Size16x4)) (rule (vector_size (multi_lane 16 8)) (VectorSize.Size16x8)) (rule (vector_size (multi_lane 32 4)) (VectorSize.Size32x4)) (rule (vector_size (multi_lane 64 2)) (VectorSize.Size64x2)) @@ -1540,6 +1542,13 @@ (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half)))) dst)) +;; Helper for emitting `MInst.VecRRPairLong` instructions. +(decl vec_rr_pair_long (VecRRPairLongOp Reg) Reg) +(rule (vec_rr_pair_long op src) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRPairLong op dst src)))) + dst)) + ;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants ;; where the operation both reads and modifies the destination register. ;; @@ -1729,6 +1738,20 @@ (decl shll32 (Reg bool) Reg) (rule (shll32 x high_half) (vec_rr_long (VecRRLongOp.Shll32) x high_half)) +;; Helpers for generating `addlp` instructions. + +(decl saddlp8 (Reg) Reg) +(rule (saddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp8) x)) + +(decl saddlp16 (Reg) Reg) +(rule (saddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Saddlp16) x)) + +(decl uaddlp8 (Reg) Reg) +(rule (uaddlp8 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp8) x)) + +(decl uaddlp16 (Reg) Reg) +(rule (uaddlp16 x) (vec_rr_pair_long (VecRRPairLongOp.Uaddlp16) x)) + ;; Helper for generating `umlal32` instructions. (decl umlal32 (Reg Reg Reg bool) Reg) (rule (umlal32 x y z high_half) (vec_rrrr_long (VecRRRLongOp.Umlal32) x y z high_half)) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index b298a30509..de30bff355 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -99,6 +99,27 @@ (add_with_flags_paired $I64 x_lo y_lo) (adc_paired $I64 x_hi y_hi)))) +;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y)))) + (if-let z (same_value x y)) + (saddlp8 z)) + +(rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high y)))) + (if-let z (same_value x y)) + (saddlp16 z)) + +(rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high y)))) + (if-let z (same_value x y)) + (uaddlp8 z)) + +(rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high y)))) + (if-let z (same_value x y)) + (uaddlp16 z)) + +(rule (lower (has_type ty (iadd_pairwise x y))) + (addp x y (vector_size ty))) + ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 7618ed1b30..f13f09cf40 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1357,56 +1357,7 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::IaddPairwise => { - let ty = ty.unwrap(); - let lane_type = ty.lane_type(); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> { - if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) { - if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) { - let lhs_inputs = insn_inputs(ctx, lhs); - let rhs_inputs = insn_inputs(ctx, rhs); - let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None); - let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None); - if low == high { - match (lane_type, ext_low_op) { - (I16, Opcode::SwidenLow) => { - return Some((VecRRPairLongOp::Saddlp8, low)) - } - (I32, Opcode::SwidenLow) => { - return Some((VecRRPairLongOp::Saddlp16, low)) - } - (I16, Opcode::UwidenLow) => { - return Some((VecRRPairLongOp::Uaddlp8, low)) - } - (I32, Opcode::UwidenLow) => { - return Some((VecRRPairLongOp::Uaddlp16, low)) - } - _ => (), - }; - } - } - } - None - }; - - if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) { - ctx.emit(Inst::VecRRPairLong { op, rd, rn }); - } else if let Some((op, rn)) = match_long_pair(Opcode::UwidenLow, Opcode::UwidenHigh) { - ctx.emit(Inst::VecRRPairLong { op, rd, rn }); - } else { - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Addp, - rd, - rn, - rm, - size: VectorSize::from_ty(ty), - }); - } - } + Opcode::IaddPairwise => implemented_in_isle(ctx), Opcode::WideningPairwiseDotProductS => { let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 3e26be3d4f..24cee437a8 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -29,6 +29,15 @@ pub type BoxExternalName = Box; #[doc(hidden)] macro_rules! isle_prelude_methods { () => { + #[inline] + fn same_value(&mut self, a: Value, b: Value) -> Option { + if a == b { + Some(a) + } else { + None + } + } + #[inline] fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) { let [a, b] = *arr; diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 1f9641a31d..d387a77fac 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -371,6 +371,10 @@ (extractor (unwrap_head_value_list_2 head1 head2 tail) (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail)))) +;; Constructor to test whether two values are same. +(decl pure same_value (Value Value) Value) +(extern constructor same_value same_value) + ;; Turn a `Writable` into a `Reg` via `Writable::to_reg`. (decl writable_reg_to_reg (WritableReg) Reg) (extern constructor writable_reg_to_reg writable_reg_to_reg) diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif index d1d81c61f7..33942d371c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif @@ -107,3 +107,53 @@ block0(v0: i8x16): ; addp v0.8h, v2.8h, v4.8h ; ret +function %fn9(i8x8, i8x8) -> i8x8 { +block0(v0: i8x8, v1: i8x8): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; block0: +; addp v0.8b, v0.8b, v1.8b +; ret + +function %fn10(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; block0: +; addp v0.16b, v0.16b, v1.16b +; ret + +function %fn11(i16x4, i16x4) -> i16x4 { +block0(v0: i16x4, v1: i16x4): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; block0: +; addp v0.4h, v0.4h, v1.4h +; ret + +function %fn12(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; block0: +; addp v0.8h, v0.8h, v1.8h +; ret + +function %fn14(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = iadd_pairwise v0, v1 + return v2 +} + +; block0: +; addp v0.4s, v0.4s, v1.4s +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif b/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif index 7884ba3b02..92f5d776fe 100644 --- a/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif +++ b/cranelift/filetests/filetests/runtests/simd-iaddpairwise.clif @@ -23,3 +23,45 @@ block0(v0: i32x4, v1: i32x4): } ; run: %iaddp_i32x4([1 2 3 4], [5 6 7 8]) == [3 7 11 15] ; run: %iaddp_i32x4([4294967290 5 4294967290 5], [100 100 100 100]) == [4294967295 4294967295 200 200] + +function %swiden_i8x16(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = swiden_low v0 + v2 = swiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} +; run: %swiden_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [3 7 11 15 19 23 27 31] +; run: %swiden_i8x16([-1 2 -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16]) == [1 1 1 1 1 1 1 1] +; run: %swiden_i8x16([127 1 126 2 125 3 124 4 123 5 122 6 121 7 120 8]) == [128 128 128 128 128 128 128 128] + +function %uwiden_i8x16(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_low v0 + v2 = uwiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} +; run: %uwiden_i8x16([17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [35 39 43 47 51 55 59 63] +; run: %uwiden_i8x16([2 254 3 253 4 252 5 251 6 250 7 249 8 248 9 247]) == [256 256 256 256 256 256 256 256] + +function %swiden_i16x8(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = swiden_low v0 + v2 = swiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} +; run: %swiden_i16x8([1 2 3 4 5 6 7 8]) == [3 7 11 15] +; run: %swiden_i16x8([32767 1 32766 3 32765 5 32764 8]) == [32768 32769 32770 32772] +; run: %swiden_i16x8([-32768 -1 32766 3 32765 5 -32764 -8]) == [-32769 32769 32770 -32772] + +function %uwiden_i16x8(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = uwiden_low v0 + v2 = uwiden_high v0 + v3 = iadd_pairwise v1, v2 + return v3 +} +; run: %uwiden_i16x8([100 99 98 97 96 95 94 93]) == [199 195 191 187] +; run: %uwiden_i16x8([65535 1 65534 3 65533 5 65532 8]) == [65536 65537 65538 65540]