aarch64: Add specialized shuffle lowerings (#5977)
* aarch64: Add `shuffle` lowerings for the `uzp{1,2}` instructions
This commit uses the same style of patterns in the x64 backend to start
adding specific lowerings of the Cranelift `shuffle` instruction to
particular AArch64 instructions.
* aarch64: Add `shuffle` lowerings to the `zip{1,2}` instructions
These instructions match the `punpck*` family of instructions on x64 and
should help provide more efficient lowerings than the current `shuffle`
fallback.
* aarch64: Add `shuffle` lowerings for `trn{1,2}`
Along the lines of prior commits adds specific patterns to lowering for
individual AArch64 instructions available.
* aarch64: Add a `shuffle` lowering for the `ext` instruction
This instruction will more-or-less concatenate two 128-bit vector
registers to create a 256-bit value, shift it right, and then take the
lower 128-bits into the destination. This can be modeled with a
`shuffle` of consecutive bytes so this adds a lowering rule to generate
this instruction.
* aarch64: Add `shuffle` special case for `dup`
This commit adds special cases for Cranelift's `shuffle` on AArch64 when
the lowering can be represented with a `dup` instruction which
broadcasts one vector's lane into all lanes of the destination.
* aarch64: Add `shuffle` specializations for `rev` instructions
This commit adds shuffle mask specializations for the `rev{16,32,64}`
family of instructions on AArch64 which can be used to reverse bytes,
16-bit values, or 32-bit values within larger values.
* Fix tests
* Add doc-comments in ISLE
This commit is contained in:
@@ -553,7 +553,8 @@
|
||||
(VecDupFromFpu
|
||||
(rd WritableReg)
|
||||
(rn Reg)
|
||||
(size VectorSize))
|
||||
(size VectorSize)
|
||||
(lane u8))
|
||||
|
||||
;; Duplicate FP immediate to vector.
|
||||
(VecDupFPImm
|
||||
@@ -1390,8 +1391,18 @@
|
||||
(Addp)
|
||||
;; Zip vectors (primary) [meaning, high halves]
|
||||
(Zip1)
|
||||
;; Zip vectors (secondary)
|
||||
(Zip2)
|
||||
;; Signed saturating rounding doubling multiply returning high half
|
||||
(Sqrdmulh)
|
||||
;; Unzip vectors (primary)
|
||||
(Uzp1)
|
||||
;; Unzip vectors (secondary)
|
||||
(Uzp2)
|
||||
;; Transpose vectors (primary)
|
||||
(Trn1)
|
||||
;; Transpose vectors (secondary)
|
||||
(Trn2)
|
||||
))
|
||||
|
||||
;; A Vector ALU operation which modifies a source register.
|
||||
@@ -1420,6 +1431,10 @@
|
||||
(Fneg)
|
||||
;; Floating-point square root
|
||||
(Fsqrt)
|
||||
;; Reverse elements in 16-bit lanes
|
||||
(Rev16)
|
||||
;; Reverse elements in 32-bit lanes
|
||||
(Rev32)
|
||||
;; Reverse elements in 64-bit doublewords
|
||||
(Rev64)
|
||||
;; Floating-point convert to signed integer, rounding toward zero
|
||||
@@ -1887,10 +1902,10 @@
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.VecDupFromFpu` instructions.
|
||||
(decl vec_dup_from_fpu (Reg VectorSize) Reg)
|
||||
(rule (vec_dup_from_fpu src size)
|
||||
(decl vec_dup_from_fpu (Reg VectorSize u8) Reg)
|
||||
(rule (vec_dup_from_fpu src size lane)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecDupFromFpu dst src size))))
|
||||
(_ Unit (emit (MInst.VecDupFromFpu dst src size lane))))
|
||||
dst))
|
||||
|
||||
;; Helper for emitting `MInst.AluRRImm12` instructions.
|
||||
@@ -2386,6 +2401,14 @@
|
||||
(decl neg (Reg VectorSize) Reg)
|
||||
(rule (neg x size) (vec_misc (VecMisc2.Neg) x size))
|
||||
|
||||
;; Helper for generating `rev16` instructions.
|
||||
(decl rev16 (Reg VectorSize) Reg)
|
||||
(rule (rev16 x size) (vec_misc (VecMisc2.Rev16) x size))
|
||||
|
||||
;; Helper for generating `rev32` instructions.
|
||||
(decl rev32 (Reg VectorSize) Reg)
|
||||
(rule (rev32 x size) (vec_misc (VecMisc2.Rev32) x size))
|
||||
|
||||
;; Helper for generating `rev64` instructions.
|
||||
(decl rev64 (Reg VectorSize) Reg)
|
||||
(rule (rev64 x size) (vec_misc (VecMisc2.Rev64) x size))
|
||||
@@ -3767,3 +3790,27 @@
|
||||
(emit_side_effect (with_flags_side_effect
|
||||
(cmp (OperandSize.Size32) ridx jt_size)
|
||||
(jt_sequence ridx jt_info)))))
|
||||
|
||||
;; Helper for emitting the `uzp1` instruction
|
||||
(decl vec_uzp1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_uzp1 rn rm size) (vec_rrr (VecALUOp.Uzp1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `uzp2` instruction
|
||||
(decl vec_uzp2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_uzp2 rn rm size) (vec_rrr (VecALUOp.Uzp2) rn rm size))
|
||||
|
||||
;; Helper for emitting the `zip1` instruction
|
||||
(decl vec_zip1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_zip1 rn rm size) (vec_rrr (VecALUOp.Zip1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `zip2` instruction
|
||||
(decl vec_zip2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_zip2 rn rm size) (vec_rrr (VecALUOp.Zip2) rn rm size))
|
||||
|
||||
;; Helper for emitting the `trn1` instruction
|
||||
(decl vec_trn1 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_trn1 rn rm size) (vec_rrr (VecALUOp.Trn1) rn rm size))
|
||||
|
||||
;; Helper for emitting the `trn2` instruction
|
||||
(decl vec_trn2 (Reg Reg VectorSize) Reg)
|
||||
(rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size))
|
||||
|
||||
@@ -1977,8 +1977,20 @@ impl MachInstEmit for Inst {
|
||||
);
|
||||
(0b1, 0b11111, enc_size)
|
||||
}
|
||||
VecMisc2::Rev16 => {
|
||||
debug_assert_eq!(size, VectorSize::Size8x16);
|
||||
(0b0, 0b00001, enc_size)
|
||||
}
|
||||
VecMisc2::Rev32 => {
|
||||
debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);
|
||||
(0b1, 0b00000, enc_size)
|
||||
}
|
||||
VecMisc2::Rev64 => {
|
||||
debug_assert_ne!(VectorSize::Size64x2, size);
|
||||
debug_assert!(
|
||||
size == VectorSize::Size8x16
|
||||
|| size == VectorSize::Size16x8
|
||||
|| size == VectorSize::Size32x4
|
||||
);
|
||||
(0b0, 0b00000, enc_size)
|
||||
}
|
||||
VecMisc2::Fcvtzs => {
|
||||
@@ -2493,13 +2505,27 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
&Inst::VecDupFromFpu { rd, rn, size, lane } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
let rn = allocs.next(rn);
|
||||
let q = size.is_128bits() as u32;
|
||||
let imm5 = match size.lane_size() {
|
||||
ScalarSize::Size32 => 0b00100,
|
||||
ScalarSize::Size64 => 0b01000,
|
||||
ScalarSize::Size8 => {
|
||||
assert!(lane < 16);
|
||||
0b00001 | (u32::from(lane) << 1)
|
||||
}
|
||||
ScalarSize::Size16 => {
|
||||
assert!(lane < 8);
|
||||
0b00010 | (u32::from(lane) << 2)
|
||||
}
|
||||
ScalarSize::Size32 => {
|
||||
assert!(lane < 4);
|
||||
0b00100 | (u32::from(lane) << 3)
|
||||
}
|
||||
ScalarSize::Size64 => {
|
||||
assert!(lane < 2);
|
||||
0b01000 | (u32::from(lane) << 4)
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
@@ -2870,6 +2896,7 @@ impl MachInstEmit for Inst {
|
||||
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
|
||||
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
|
||||
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
|
||||
VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),
|
||||
VecALUOp::Sqrdmulh => {
|
||||
debug_assert!(
|
||||
size.lane_size() == ScalarSize::Size16
|
||||
@@ -2878,6 +2905,10 @@ impl MachInstEmit for Inst {
|
||||
|
||||
(0b001_01110_00_1 | enc_size << 1, 0b101101)
|
||||
}
|
||||
VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),
|
||||
VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),
|
||||
VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),
|
||||
VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),
|
||||
};
|
||||
let top11 = if is_float {
|
||||
top11 | size.enc_float_size() << 1
|
||||
|
||||
@@ -2657,6 +2657,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(14),
|
||||
rn: vreg(19),
|
||||
size: VectorSize::Size32x4,
|
||||
lane: 0,
|
||||
},
|
||||
"6E06044E",
|
||||
"dup v14.4s, v19.s[0]",
|
||||
@@ -2666,6 +2667,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(10),
|
||||
size: VectorSize::Size64x2,
|
||||
lane: 0,
|
||||
},
|
||||
"5205084E",
|
||||
"dup v18.2d, v10.d[0]",
|
||||
|
||||
@@ -2123,9 +2123,9 @@ impl Inst {
|
||||
let rn = pretty_print_ireg(rn, size.operand_size(), allocs);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
&Inst::VecDupFromFpu { rd, rn, size, lane } => {
|
||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||
let rn = pretty_print_vreg_element(rn, 0, size.lane_size(), allocs);
|
||||
let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size(), allocs);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupFPImm { rd, imm, size } => {
|
||||
@@ -2345,7 +2345,12 @@ impl Inst {
|
||||
VecALUOp::Fmul => ("fmul", size),
|
||||
VecALUOp::Addp => ("addp", size),
|
||||
VecALUOp::Zip1 => ("zip1", size),
|
||||
VecALUOp::Zip2 => ("zip2", size),
|
||||
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
|
||||
VecALUOp::Uzp1 => ("uzp1", size),
|
||||
VecALUOp::Uzp2 => ("uzp2", size),
|
||||
VecALUOp::Trn1 => ("trn1", size),
|
||||
VecALUOp::Trn2 => ("trn2", size),
|
||||
};
|
||||
let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs);
|
||||
let rn = pretty_print_vreg_vector(rn, size, allocs);
|
||||
@@ -2471,6 +2476,8 @@ impl Inst {
|
||||
VecMisc2::Fabs => ("fabs", size, ""),
|
||||
VecMisc2::Fneg => ("fneg", size, ""),
|
||||
VecMisc2::Fsqrt => ("fsqrt", size, ""),
|
||||
VecMisc2::Rev16 => ("rev16", size, ""),
|
||||
VecMisc2::Rev32 => ("rev32", size, ""),
|
||||
VecMisc2::Rev64 => ("rev64", size, ""),
|
||||
VecMisc2::Fcvtzs => ("fcvtzs", size, ""),
|
||||
VecMisc2::Fcvtzu => ("fcvtzu", size, ""),
|
||||
|
||||
@@ -118,6 +118,118 @@
|
||||
|
||||
;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; When a single element of one vector is broadcast to all the destination
|
||||
;; lanes then the `dup` instruction can be used for this operation. Note that
|
||||
;; for now this only matches lane selection from the first vector `a`, but
|
||||
;; if necessary in the future rules can be added to select from `b` as well.
|
||||
(rule 6 (lower (shuffle a b (shuffle_dup8_from_imm n)))
|
||||
(vec_dup_from_fpu a (VectorSize.Size8x16) n))
|
||||
(rule 5 (lower (shuffle a b (shuffle_dup16_from_imm n)))
|
||||
(vec_dup_from_fpu a (VectorSize.Size16x8) n))
|
||||
(rule 4 (lower (shuffle a b (shuffle_dup32_from_imm n)))
|
||||
(vec_dup_from_fpu a (VectorSize.Size32x4) n))
|
||||
(rule 3 (lower (shuffle a b (shuffle_dup64_from_imm n)))
|
||||
(vec_dup_from_fpu a (VectorSize.Size64x2) n))
|
||||
|
||||
;; If the `Immediate` specified to the extractor looks like a duplication of the
|
||||
;; `n`th lane of the first vector of size K-byte lanes, then each extractor
|
||||
;; returns the `n` value as a `u8` to be used as part of a `vec_dup_from_fpu`
|
||||
;; instruction. Note that there's a different extractor for each bit-width of
|
||||
;; lane.
|
||||
(decl shuffle_dup8_from_imm (u8) Immediate)
|
||||
(extern extractor shuffle_dup8_from_imm shuffle_dup8_from_imm)
|
||||
(decl shuffle_dup16_from_imm (u8) Immediate)
|
||||
(extern extractor shuffle_dup16_from_imm shuffle_dup16_from_imm)
|
||||
(decl shuffle_dup32_from_imm (u8) Immediate)
|
||||
(extern extractor shuffle_dup32_from_imm shuffle_dup32_from_imm)
|
||||
(decl shuffle_dup64_from_imm (u8) Immediate)
|
||||
(extern extractor shuffle_dup64_from_imm shuffle_dup64_from_imm)
|
||||
|
||||
;; When the shuffle looks like "concatenate `a` and `b` and shift right by n*8
|
||||
;; bytes", that's an `ext` instruction.
|
||||
(rule 2 (lower (shuffle a b (vec_extract_imm4_from_immediate n)))
|
||||
(vec_extract a b n))
|
||||
|
||||
;; Attempts to extract `n` from the specified shuffle `Immediate` where each
|
||||
;; byte of the `Immediate` is a consecutive sequence starting from `n`. This
|
||||
;; value of `n` is used as part of the `vec_extract` instruction which extracts
|
||||
;; consecutive bytes from two vectors into one final vector, offset by `n`
|
||||
;; bytes.
|
||||
(decl vec_extract_imm4_from_immediate (u8) Immediate)
|
||||
(extern extractor vec_extract_imm4_from_immediate vec_extract_imm4_from_immediate)
|
||||
|
||||
;; Rules for the `uzp1` and `uzp2` instructions which gather even-numbered lanes
|
||||
;; or odd-numbered lanes
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1e1c_1a18_1614_1210_0e0c_0a08_0604_0200)))
|
||||
(vec_uzp1 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1d_1b19_1715_1311_0f0d_0b09_0705_0301)))
|
||||
(vec_uzp2 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1d1c_1918_1514_1110_0d0c_0908_0504_0100)))
|
||||
(vec_uzp1 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e_1b1a_1716_1312_0f0e_0b0a_0706_0302)))
|
||||
(vec_uzp2 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1b1a1918_13121110_0b0a0908_03020100)))
|
||||
(vec_uzp1 a b (VectorSize.Size32x4)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e1d1c_17161514_0f0e0d0c_07060504)))
|
||||
(vec_uzp2 a b (VectorSize.Size32x4)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1716151413121110_0706050403020100)))
|
||||
(vec_uzp1 a b (VectorSize.Size64x2)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e1d1c1b1a1918_0f0e0d0c0b0a0908)))
|
||||
(vec_uzp2 a b (VectorSize.Size64x2)))
|
||||
|
||||
;; Rules for the `zip1` and `zip2` instructions which interleave lanes in the
|
||||
;; low or high halves of the two input vectors.
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1707_1606_1505_1404_1303_1202_1101_1000)))
|
||||
(vec_zip1 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f0f_1e0e_1d0d_1c0c_1b0b_1a0a_1909_1808)))
|
||||
(vec_zip2 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1716_0706_1514_0504_1312_0302_1110_0100)))
|
||||
(vec_zip1 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e_0f0e_1d1c_0d0c_1b1a_0b0a_1918_0908)))
|
||||
(vec_zip2 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x17161514_07060504_13121110_03020100)))
|
||||
(vec_zip1 a b (VectorSize.Size32x4)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e1d1c_0f0e0d0c_1b1a1918_0b0a0908)))
|
||||
(vec_zip2 a b (VectorSize.Size32x4)))
|
||||
;; Note that zip1/zip2 for i64x2 vectors is omitted since it's already covered
|
||||
;; by the i64x2 cases of uzp1/uzp2 above where both zip and uzp have the same
|
||||
;; semantics for 64-bit lanes.
|
||||
|
||||
;; Rules for the `trn1` and `trn2` instructions which interleave odd or even
|
||||
;; lanes in the two input vectors.
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1e0e_1c0c_1a0a_1808_1606_1404_1202_1000)))
|
||||
(vec_trn1 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f0f_1d0d_1b0b_1909_1707_1505_1303_1101)))
|
||||
(vec_trn2 a b (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1d1c_0d0c_1918_0908_1514_0504_1110_0100)))
|
||||
(vec_trn1 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e_0f0e_1b1a_0b0a_1716_0706_1312_0302)))
|
||||
(vec_trn2 a b (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1b1a1918_0b0a0908_13121110_03020100)))
|
||||
(vec_trn1 a b (VectorSize.Size32x4)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x1f1e1d1c_0f0e0d0c_17161514_07060504)))
|
||||
(vec_trn2 a b (VectorSize.Size32x4)))
|
||||
;; Note that trn1/trn2 for i64x2 vectors is omitted since it's already covered
|
||||
;; by the i64x2 cases of uzp1/uzp2 above where both trn and uzp have the same
|
||||
;; semantics for 64-bit lanes.
|
||||
|
||||
;; Rules for the `rev{16,32,64}` instructions where reversals happen at either
|
||||
;; the byte level, the 16-bit level, or 32-bit level. Note that all of these
|
||||
;; patterns only match reversals in the first operand, but they can
|
||||
;; theoretically be extended if necessary to reversals in the second operand.
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x0e0f_0c0d_0a0b_0809_0607_0405_0203_0001)))
|
||||
(rev16 a (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x0c0d0e0f_08090a0b_04050607_00010203)))
|
||||
(rev32 a (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x0d0c0f0e_09080b0a_05040706_01000302)))
|
||||
(rev32 a (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x08090a0b0c0d0e0f_0001020304050607)))
|
||||
(rev64 a (VectorSize.Size8x16)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x09080b0a0d0c0f0e_0100030205040706)))
|
||||
(rev64 a (VectorSize.Size16x8)))
|
||||
(rule 1 (lower (shuffle a b (u128_from_immediate 0x0b0a09080f0e0d0c_0302010007060504)))
|
||||
(rev64 a (VectorSize.Size32x4)))
|
||||
|
||||
(rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask))))
|
||||
(let ((mask_reg Reg (constant_f128 mask)))
|
||||
(vec_tbl2 rn rn2 mask_reg ty)))
|
||||
@@ -1840,7 +1952,7 @@
|
||||
(vec_dup x (vector_size ty)))
|
||||
|
||||
(rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
|
||||
(vec_dup_from_fpu x (vector_size ty)))
|
||||
(vec_dup_from_fpu x (vector_size ty) 0))
|
||||
|
||||
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
|
||||
(splat_const n (vector_size ty)))
|
||||
|
||||
@@ -742,4 +742,47 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn vec_extract_imm4_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
|
||||
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
|
||||
|
||||
if bytes.windows(2).all(|a| a[0] + 1 == a[1]) && bytes[0] < 16 {
|
||||
Some(bytes[0])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn shuffle_dup8_from_imm(&mut self, imm: Immediate) -> Option<u8> {
|
||||
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
|
||||
if bytes.iter().all(|b| *b == bytes[0]) && bytes[0] < 16 {
|
||||
Some(bytes[0])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
fn shuffle_dup16_from_imm(&mut self, imm: Immediate) -> Option<u8> {
|
||||
let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
|
||||
if a == b && b == c && c == d && d == e && e == f && f == g && g == h && a < 8 {
|
||||
Some(a)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
fn shuffle_dup32_from_imm(&mut self, imm: Immediate) -> Option<u8> {
|
||||
let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
|
||||
if a == b && b == c && c == d && a < 4 {
|
||||
Some(a)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
fn shuffle_dup64_from_imm(&mut self, imm: Immediate) -> Option<u8> {
|
||||
let (a, b) = self.shuffle64_from_imm(imm)?;
|
||||
if a == b && a < 2 {
|
||||
Some(a)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -586,6 +586,17 @@ macro_rules! isle_lower_prelude_methods {
|
||||
self.lower_ctx.gen_return(rets);
|
||||
}
|
||||
|
||||
/// Same as `shuffle32_from_imm`, but for 64-bit lane shuffles.
|
||||
fn shuffle64_from_imm(&mut self, imm: Immediate) -> Option<(u8, u8)> {
|
||||
use crate::machinst::isle::shuffle_imm_as_le_lane_idx;
|
||||
|
||||
let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
|
||||
Some((
|
||||
shuffle_imm_as_le_lane_idx(8, &bytes[0..8])?,
|
||||
shuffle_imm_as_le_lane_idx(8, &bytes[8..16])?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Attempts to interpret the shuffle immediate `imm` as a shuffle of
|
||||
/// 32-bit lanes, returning four integers, each of which is less than 8,
|
||||
/// which represents a permutation of 32-bit lanes as specified by
|
||||
|
||||
@@ -597,6 +597,8 @@
|
||||
;; returned will be in the range of 0 to (256/N)-1, inclusive, and index the
|
||||
;; N-bit chunks of two concatenated 128-bit vectors starting from the
|
||||
;; least-significant bits.
|
||||
(decl shuffle64_from_imm (u8 u8) Immediate)
|
||||
(extern extractor shuffle64_from_imm shuffle64_from_imm)
|
||||
(decl shuffle32_from_imm (u8 u8 u8 u8) Immediate)
|
||||
(extern extractor shuffle32_from_imm shuffle32_from_imm)
|
||||
(decl shuffle16_from_imm (u8 u8 u8 u8 u8 u8 u8 u8) Immediate)
|
||||
|
||||
Reference in New Issue
Block a user