Add vector compare to 0 optims (#3887)

Signed-off-by: Freddie Liardet <frederick.liardet@arm.com>
This commit is contained in:
FreddieLiardet
2022-03-10 00:20:06 +00:00
committed by GitHub
parent 8b48ce7fb7
commit 13b9396931
10 changed files with 1748 additions and 162 deletions

View File

@@ -1158,6 +1158,24 @@
(Cnt)
;; Compare bitwise equal to 0
(Cmeq0)
;; Compare signed greater than or equal to 0
(Cmge0)
;; Compare signed greater than 0
(Cmgt0)
;; Compare signed less than or equal to 0
(Cmle0)
;; Compare signed less than 0
(Cmlt0)
;; Floating point compare equal to 0
(Fcmeq0)
;; Floating point compare greater than or equal to 0
(Fcmge0)
;; Floating point compare greater than 0
(Fcmgt0)
;; Floating point compare less than or equal to 0
(Fcmle0)
;; Floating point compare less than 0
(Fcmlt0)
))
;; A vector widening operation with one argument.
@@ -1997,3 +2015,79 @@
(value_regs
(alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi))))
;; Float vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match 32 bit float 0 value
(decl zero_value_f32 (Ieee32) Ieee32)
(extern extractor zero_value_f32 zero_value_f32)
;; Match 64 bit float 0 value
(decl zero_value_f64 (Ieee64) Ieee64)
(extern extractor zero_value_f64 zero_value_f64)
;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
(decl float_cc_cmp_zero_to_vec_misc_op_swap (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op_swap float_cc_cmp_zero_to_vec_misc_op_swap)
;; Match valid generic compare to zero cases
(decl fcmp_zero_cond (FloatCC) FloatCC)
(extern extractor fcmp_zero_cond fcmp_zero_cond)
;; Match not equal compare to zero separately as it requires two output instructions
(decl fcmp_zero_cond_not_eq (FloatCC) FloatCC)
(extern extractor fcmp_zero_cond_not_eq fcmp_zero_cond_not_eq)
;; Helper for generating float compare to zero instructions where 2nd argument is zero
(decl float_cmp_zero (FloatCC Reg VectorSize) Reg)
(rule (float_cmp_zero cond rn size)
(vec_misc (float_cc_cmp_zero_to_vec_misc_op cond) rn size))
;; Helper for generating float compare to zero instructions in case where 1st argument is zero
(decl float_cmp_zero_swap (FloatCC Reg VectorSize) Reg)
(rule (float_cmp_zero_swap cond rn size)
(vec_misc (float_cc_cmp_zero_to_vec_misc_op_swap cond) rn size))
;; Helper for generating float compare equal to zero instruction
(decl fcmeq0 (Reg VectorSize) Reg)
(rule (fcmeq0 rn size)
(vec_misc (VecMisc2.Fcmeq0) rn size))
;; Int vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match integer 0 value
(decl zero_value (Imm64) Imm64)
(extern extractor zero_value zero_value)
;; Generate comparison to zero operator from input condition code
(decl int_cc_cmp_zero_to_vec_misc_op (IntCC) VecMisc2)
(extern constructor int_cc_cmp_zero_to_vec_misc_op int_cc_cmp_zero_to_vec_misc_op)
(decl int_cc_cmp_zero_to_vec_misc_op_swap (IntCC) VecMisc2)
(extern constructor int_cc_cmp_zero_to_vec_misc_op_swap int_cc_cmp_zero_to_vec_misc_op_swap)
;; Match valid generic compare to zero cases
(decl icmp_zero_cond (IntCC) IntCC)
(extern extractor icmp_zero_cond icmp_zero_cond)
;; Match not equal compare to zero separately as it requires two output instructions
(decl icmp_zero_cond_not_eq (IntCC) IntCC)
(extern extractor icmp_zero_cond_not_eq icmp_zero_cond_not_eq)
;; Helper for generating int compare to zero instructions where 2nd argument is zero
(decl int_cmp_zero (IntCC Reg VectorSize) Reg)
(rule (int_cmp_zero cond rn size)
(vec_misc (int_cc_cmp_zero_to_vec_misc_op cond) rn size))
;; Helper for generating int compare to zero instructions in case where 1st argument is zero
(decl int_cmp_zero_swap (IntCC Reg VectorSize) Reg)
(rule (int_cmp_zero_swap cond rn size)
(vec_misc (int_cc_cmp_zero_to_vec_misc_op_swap cond) rn size))
;; Helper for generating int compare equal to zero instruction
(decl cmeq0 (Reg VectorSize) Reg)
(rule (cmeq0 rn size)
(vec_misc (VecMisc2.Cmeq0) rn size))

View File

@@ -1765,6 +1765,50 @@ impl MachInstEmit for Inst {
(0b0, 0b00101, enc_size)
}
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
VecMisc2::Fcmeq0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01101, enc_size)
}
VecMisc2::Fcmge0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b1, 0b01100, enc_size)
}
VecMisc2::Fcmgt0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01100, enc_size)
}
VecMisc2::Fcmle0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b1, 0b01101, enc_size)
}
VecMisc2::Fcmlt0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01110, enc_size)
}
};
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
}

View File

@@ -4518,15 +4518,114 @@ fn test_aarch64_binemit() {
"cnt v23.8b, v5.8b",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmeq0,
rd: writable_vreg(5),
rn: vreg(2),
size: VectorSize::Size32x4,
},
"45D8A04E",
"fcmeq v5.4s, v2.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmge0,
rd: writable_vreg(3),
rn: vreg(1),
size: VectorSize::Size64x2,
},
"23C8E06E",
"fcmge v3.2d, v1.2d, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmgt0,
rd: writable_vreg(5),
rn: vreg(7),
size: VectorSize::Size32x4,
},
"E5C8A04E",
"fcmgt v5.4s, v7.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmle0,
rd: writable_vreg(10),
rn: vreg(2),
size: VectorSize::Size32x4,
},
"4AD8A06E",
"fcmle v10.4s, v2.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmlt0,
rd: writable_vreg(12),
rn: vreg(12),
size: VectorSize::Size64x2,
},
"8CE9E04E",
"fcmlt v12.2d, v12.2d, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmeq0,
rd: writable_vreg(22),
rn: vreg(27),
size: VectorSize::Size16x8,
},
"769B604E",
"cmeq v22.8h, v27.8h, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmge0,
rd: writable_vreg(12),
rn: vreg(27),
size: VectorSize::Size16x8,
},
"6C9B604E",
"cmeq v12.8h, v27.8h, #0",
"6C8B606E",
"cmge v12.8h, v27.8h, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmgt0,
rd: writable_vreg(12),
rn: vreg(27),
size: VectorSize::Size8x16,
},
"6C8B204E",
"cmgt v12.16b, v27.16b, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmle0,
rd: writable_vreg(1),
rn: vreg(27),
size: VectorSize::Size32x4,
},
"619BA06E",
"cmle v1.4s, v27.4s, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmlt0,
rd: writable_vreg(0),
rn: vreg(7),
size: VectorSize::Size64x2,
},
"E0A8E04E",
"cmlt v0.2d, v7.2d, #0",
));
insns.push((

View File

@@ -3175,6 +3175,15 @@ impl Inst {
VecMisc2::Frintp => ("frintp", size, ""),
VecMisc2::Cnt => ("cnt", size, ""),
VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
VecMisc2::Cmge0 => ("cmge", size, ", #0"),
VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
VecMisc2::Cmle0 => ("cmle", size, ", #0"),
VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, size);

View File

@@ -1124,3 +1124,69 @@
(rule (lower (has_type $I8X16 (popcnt x)))
(vec_cnt x (VectorSize.Size8x16)))
;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f32const (zero_value_f32 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f32const (zero_value_f32 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f32const (zero_value_f32 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size))))
;;;; Rules for `fcmp` 64 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f64const (zero_value_f64 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f64const (zero_value_f64 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f64const (zero_value_f64 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f64const (zero_value_f64 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size))))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x (splat (iconst (zero_value y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (cmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x (splat (iconst (zero_value y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (int_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) (splat (iconst (zero_value x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (cmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) (splat (iconst (zero_value x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (int_cmp_zero_swap cond rn vec_size))))

View File

@@ -6,9 +6,9 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift,
Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode,
OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
};
use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::isle::*;
@@ -286,4 +286,105 @@ where
let amount = val.value() & u8::try_from(ty.bits() - 1).unwrap();
ImmShift::maybe_from_u64(u64::from(ty.bits()) - u64::from(amount)).unwrap()
}
fn icmp_zero_cond(&mut self, cond: &IntCC) -> Option<IntCC> {
match cond {
&IntCC::Equal
| &IntCC::SignedGreaterThanOrEqual
| &IntCC::SignedGreaterThan
| &IntCC::SignedLessThanOrEqual
| &IntCC::SignedLessThan => Some(*cond),
_ => None,
}
}
fn fcmp_zero_cond(&mut self, cond: &FloatCC) -> Option<FloatCC> {
match cond {
&FloatCC::Equal
| &FloatCC::GreaterThanOrEqual
| &FloatCC::GreaterThan
| &FloatCC::LessThanOrEqual
| &FloatCC::LessThan => Some(*cond),
_ => None,
}
}
fn fcmp_zero_cond_not_eq(&mut self, cond: &FloatCC) -> Option<FloatCC> {
match cond {
&FloatCC::NotEqual => Some(FloatCC::NotEqual),
_ => None,
}
}
fn icmp_zero_cond_not_eq(&mut self, cond: &IntCC) -> Option<IntCC> {
match cond {
&IntCC::NotEqual => Some(IntCC::NotEqual),
_ => None,
}
}
fn float_cc_cmp_zero_to_vec_misc_op(&mut self, cond: &FloatCC) -> VecMisc2 {
match cond {
&FloatCC::Equal => VecMisc2::Fcmeq0,
&FloatCC::GreaterThanOrEqual => VecMisc2::Fcmge0,
&FloatCC::LessThanOrEqual => VecMisc2::Fcmle0,
&FloatCC::GreaterThan => VecMisc2::Fcmgt0,
&FloatCC::LessThan => VecMisc2::Fcmlt0,
_ => panic!(),
}
}
fn int_cc_cmp_zero_to_vec_misc_op(&mut self, cond: &IntCC) -> VecMisc2 {
match cond {
&IntCC::Equal => VecMisc2::Cmeq0,
&IntCC::SignedGreaterThanOrEqual => VecMisc2::Cmge0,
&IntCC::SignedLessThanOrEqual => VecMisc2::Cmle0,
&IntCC::SignedGreaterThan => VecMisc2::Cmgt0,
&IntCC::SignedLessThan => VecMisc2::Cmlt0,
_ => panic!(),
}
}
fn float_cc_cmp_zero_to_vec_misc_op_swap(&mut self, cond: &FloatCC) -> VecMisc2 {
match cond {
&FloatCC::Equal => VecMisc2::Fcmeq0,
&FloatCC::GreaterThanOrEqual => VecMisc2::Fcmle0,
&FloatCC::LessThanOrEqual => VecMisc2::Fcmge0,
&FloatCC::GreaterThan => VecMisc2::Fcmlt0,
&FloatCC::LessThan => VecMisc2::Fcmgt0,
_ => panic!(),
}
}
fn int_cc_cmp_zero_to_vec_misc_op_swap(&mut self, cond: &IntCC) -> VecMisc2 {
match cond {
&IntCC::Equal => VecMisc2::Cmeq0,
&IntCC::SignedGreaterThanOrEqual => VecMisc2::Cmle0,
&IntCC::SignedLessThanOrEqual => VecMisc2::Cmge0,
&IntCC::SignedGreaterThan => VecMisc2::Cmlt0,
&IntCC::SignedLessThan => VecMisc2::Cmgt0,
_ => panic!(),
}
}
fn zero_value(&mut self, value: Imm64) -> Option<Imm64> {
if value.bits() == 0 {
return Some(value);
}
None
}
fn zero_value_f32(&mut self, value: Ieee32) -> Option<Ieee32> {
if value.bits() == 0 {
return Some(value);
}
None
}
fn zero_value_f64(&mut self, value: Ieee64) -> Option<Ieee64> {
if value.bits() == 0 {
return Some(value);
}
None
}
}

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle b2bc986bcbbbb77
src/isa/aarch64/inst.isle 3678d0a37bdb4cff
src/isa/aarch64/lower.isle 90accbfcadaea46d
src/isa/aarch64/inst.isle 19ccefb6a496d392
src/isa/aarch64/lower.isle 90ead921762336d2

File diff suppressed because it is too large Load Diff