Add vector compare to 0 optims (#3887)

Signed-off-by: Freddie Liardet <frederick.liardet@arm.com>
This commit is contained in:
FreddieLiardet
2022-03-10 00:20:06 +00:00
committed by GitHub
parent 8b48ce7fb7
commit 13b9396931
10 changed files with 1748 additions and 162 deletions

View File

@@ -1158,6 +1158,24 @@
(Cnt)
;; Compare bitwise equal to 0
(Cmeq0)
;; Compare signed greater than or equal to 0
(Cmge0)
;; Compare signed greater than 0
(Cmgt0)
;; Compare signed less than or equal to 0
(Cmle0)
;; Compare signed less than 0
(Cmlt0)
;; Floating point compare equal to 0
(Fcmeq0)
;; Floating point compare greater than or equal to 0
(Fcmge0)
;; Floating point compare greater than 0
(Fcmgt0)
;; Floating point compare less than or equal to 0
(Fcmle0)
;; Floating point compare less than 0
(Fcmlt0)
))
;; A vector widening operation with one argument.
@@ -1997,3 +2015,79 @@
(value_regs
(alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi))))
;; Float vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match 32 bit float 0 value
(decl zero_value_f32 (Ieee32) Ieee32)
(extern extractor zero_value_f32 zero_value_f32)
;; Match 64 bit float 0 value
(decl zero_value_f64 (Ieee64) Ieee64)
(extern extractor zero_value_f64 zero_value_f64)
;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
(decl float_cc_cmp_zero_to_vec_misc_op_swap (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op_swap float_cc_cmp_zero_to_vec_misc_op_swap)
;; Match valid generic compare to zero cases
(decl fcmp_zero_cond (FloatCC) FloatCC)
(extern extractor fcmp_zero_cond fcmp_zero_cond)
;; Match not equal compare to zero separately as it requires two output instructions
(decl fcmp_zero_cond_not_eq (FloatCC) FloatCC)
(extern extractor fcmp_zero_cond_not_eq fcmp_zero_cond_not_eq)
;; Helper for generating float compare to zero instructions where 2nd argument is zero
(decl float_cmp_zero (FloatCC Reg VectorSize) Reg)
(rule (float_cmp_zero cond rn size)
(vec_misc (float_cc_cmp_zero_to_vec_misc_op cond) rn size))
;; Helper for generating float compare to zero instructions in case where 1st argument is zero
(decl float_cmp_zero_swap (FloatCC Reg VectorSize) Reg)
(rule (float_cmp_zero_swap cond rn size)
(vec_misc (float_cc_cmp_zero_to_vec_misc_op_swap cond) rn size))
;; Helper for generating float compare equal to zero instruction
(decl fcmeq0 (Reg VectorSize) Reg)
(rule (fcmeq0 rn size)
(vec_misc (VecMisc2.Fcmeq0) rn size))
;; Int vector compare helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Match integer 0 value
(decl zero_value (Imm64) Imm64)
(extern extractor zero_value zero_value)
;; Generate comparison to zero operator from input condition code
(decl int_cc_cmp_zero_to_vec_misc_op (IntCC) VecMisc2)
(extern constructor int_cc_cmp_zero_to_vec_misc_op int_cc_cmp_zero_to_vec_misc_op)
(decl int_cc_cmp_zero_to_vec_misc_op_swap (IntCC) VecMisc2)
(extern constructor int_cc_cmp_zero_to_vec_misc_op_swap int_cc_cmp_zero_to_vec_misc_op_swap)
;; Match valid generic compare to zero cases
(decl icmp_zero_cond (IntCC) IntCC)
(extern extractor icmp_zero_cond icmp_zero_cond)
;; Match not equal compare to zero separately as it requires two output instructions
(decl icmp_zero_cond_not_eq (IntCC) IntCC)
(extern extractor icmp_zero_cond_not_eq icmp_zero_cond_not_eq)
;; Helper for generating int compare to zero instructions where 2nd argument is zero
(decl int_cmp_zero (IntCC Reg VectorSize) Reg)
(rule (int_cmp_zero cond rn size)
(vec_misc (int_cc_cmp_zero_to_vec_misc_op cond) rn size))
;; Helper for generating int compare to zero instructions in case where 1st argument is zero
(decl int_cmp_zero_swap (IntCC Reg VectorSize) Reg)
(rule (int_cmp_zero_swap cond rn size)
(vec_misc (int_cc_cmp_zero_to_vec_misc_op_swap cond) rn size))
;; Helper for generating int compare equal to zero instruction
(decl cmeq0 (Reg VectorSize) Reg)
(rule (cmeq0 rn size)
(vec_misc (VecMisc2.Cmeq0) rn size))

View File

@@ -1765,6 +1765,50 @@ impl MachInstEmit for Inst {
(0b0, 0b00101, enc_size)
}
VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),
VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),
VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),
VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),
VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),
VecMisc2::Fcmeq0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01101, enc_size)
}
VecMisc2::Fcmge0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b1, 0b01100, enc_size)
}
VecMisc2::Fcmgt0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01100, enc_size)
}
VecMisc2::Fcmle0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b1, 0b01101, enc_size)
}
VecMisc2::Fcmlt0 => {
debug_assert!(
size == VectorSize::Size32x2
|| size == VectorSize::Size32x4
|| size == VectorSize::Size64x2
);
(0b0, 0b01110, enc_size)
}
};
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
}

View File

@@ -4518,15 +4518,114 @@ fn test_aarch64_binemit() {
"cnt v23.8b, v5.8b",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmeq0,
rd: writable_vreg(5),
rn: vreg(2),
size: VectorSize::Size32x4,
},
"45D8A04E",
"fcmeq v5.4s, v2.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmge0,
rd: writable_vreg(3),
rn: vreg(1),
size: VectorSize::Size64x2,
},
"23C8E06E",
"fcmge v3.2d, v1.2d, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmgt0,
rd: writable_vreg(5),
rn: vreg(7),
size: VectorSize::Size32x4,
},
"E5C8A04E",
"fcmgt v5.4s, v7.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmle0,
rd: writable_vreg(10),
rn: vreg(2),
size: VectorSize::Size32x4,
},
"4AD8A06E",
"fcmle v10.4s, v2.4s, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcmlt0,
rd: writable_vreg(12),
rn: vreg(12),
size: VectorSize::Size64x2,
},
"8CE9E04E",
"fcmlt v12.2d, v12.2d, #0.0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmeq0,
rd: writable_vreg(22),
rn: vreg(27),
size: VectorSize::Size16x8,
},
"769B604E",
"cmeq v22.8h, v27.8h, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmge0,
rd: writable_vreg(12),
rn: vreg(27),
size: VectorSize::Size16x8,
},
"6C9B604E",
"cmeq v12.8h, v27.8h, #0",
"6C8B606E",
"cmge v12.8h, v27.8h, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmgt0,
rd: writable_vreg(12),
rn: vreg(27),
size: VectorSize::Size8x16,
},
"6C8B204E",
"cmgt v12.16b, v27.16b, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmle0,
rd: writable_vreg(1),
rn: vreg(27),
size: VectorSize::Size32x4,
},
"619BA06E",
"cmle v1.4s, v27.4s, #0",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Cmlt0,
rd: writable_vreg(0),
rn: vreg(7),
size: VectorSize::Size64x2,
},
"E0A8E04E",
"cmlt v0.2d, v7.2d, #0",
));
insns.push((

View File

@@ -3175,6 +3175,15 @@ impl Inst {
VecMisc2::Frintp => ("frintp", size, ""),
VecMisc2::Cnt => ("cnt", size, ""),
VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),
VecMisc2::Cmge0 => ("cmge", size, ", #0"),
VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),
VecMisc2::Cmle0 => ("cmle", size, ", #0"),
VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),
VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),
VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),
VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),
VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),
VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, size);

View File

@@ -1124,3 +1124,69 @@
(rule (lower (has_type $I8X16 (popcnt x)))
(vec_cnt x (VectorSize.Size8x16)))
;;;; Rules for `fcmp` 32 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f32const (zero_value_f32 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f32const (zero_value_f32 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f32const (zero_value_f32 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f32const (zero_value_f32 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size))))
;;;; Rules for `fcmp` 64 bit ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x (splat (f64const (zero_value_f64 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x (splat (f64const (zero_value_f64 y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) (splat (f64const (zero_value_f64 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (fcmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) (splat (f64const (zero_value_f64 x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (float_cmp_zero_swap cond rn vec_size))))
;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x (splat (iconst (zero_value y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (cmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x (splat (iconst (zero_value y))))))
(let ((rn Reg x)
(vec_size VectorSize (vector_size ty)))
(value_reg (int_cmp_zero cond rn vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) (splat (iconst (zero_value x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (not (cmeq0 rn vec_size) vec_size))))
(rule (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) (splat (iconst (zero_value x))) y)))
(let ((rn Reg y)
(vec_size VectorSize (vector_size ty)))
(value_reg (int_cmp_zero_swap cond rn vec_size))))

View File

@@ -6,9 +6,9 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, AtomicRmwOp, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, Imm12, ImmLogic, ImmShift,
Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode,
OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
};
use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::isle::*;
@@ -286,4 +286,105 @@ where
let amount = val.value() & u8::try_from(ty.bits() - 1).unwrap();
ImmShift::maybe_from_u64(u64::from(ty.bits()) - u64::from(amount)).unwrap()
}
fn icmp_zero_cond(&mut self, cond: &IntCC) -> Option<IntCC> {
match cond {
&IntCC::Equal
| &IntCC::SignedGreaterThanOrEqual
| &IntCC::SignedGreaterThan
| &IntCC::SignedLessThanOrEqual
| &IntCC::SignedLessThan => Some(*cond),
_ => None,
}
}
fn fcmp_zero_cond(&mut self, cond: &FloatCC) -> Option<FloatCC> {
match cond {
&FloatCC::Equal
| &FloatCC::GreaterThanOrEqual
| &FloatCC::GreaterThan
| &FloatCC::LessThanOrEqual
| &FloatCC::LessThan => Some(*cond),
_ => None,
}
}
fn fcmp_zero_cond_not_eq(&mut self, cond: &FloatCC) -> Option<FloatCC> {
match cond {
&FloatCC::NotEqual => Some(FloatCC::NotEqual),
_ => None,
}
}
fn icmp_zero_cond_not_eq(&mut self, cond: &IntCC) -> Option<IntCC> {
match cond {
&IntCC::NotEqual => Some(IntCC::NotEqual),
_ => None,
}
}
fn float_cc_cmp_zero_to_vec_misc_op(&mut self, cond: &FloatCC) -> VecMisc2 {
match cond {
&FloatCC::Equal => VecMisc2::Fcmeq0,
&FloatCC::GreaterThanOrEqual => VecMisc2::Fcmge0,
&FloatCC::LessThanOrEqual => VecMisc2::Fcmle0,
&FloatCC::GreaterThan => VecMisc2::Fcmgt0,
&FloatCC::LessThan => VecMisc2::Fcmlt0,
_ => panic!(),
}
}
fn int_cc_cmp_zero_to_vec_misc_op(&mut self, cond: &IntCC) -> VecMisc2 {
match cond {
&IntCC::Equal => VecMisc2::Cmeq0,
&IntCC::SignedGreaterThanOrEqual => VecMisc2::Cmge0,
&IntCC::SignedLessThanOrEqual => VecMisc2::Cmle0,
&IntCC::SignedGreaterThan => VecMisc2::Cmgt0,
&IntCC::SignedLessThan => VecMisc2::Cmlt0,
_ => panic!(),
}
}
fn float_cc_cmp_zero_to_vec_misc_op_swap(&mut self, cond: &FloatCC) -> VecMisc2 {
match cond {
&FloatCC::Equal => VecMisc2::Fcmeq0,
&FloatCC::GreaterThanOrEqual => VecMisc2::Fcmle0,
&FloatCC::LessThanOrEqual => VecMisc2::Fcmge0,
&FloatCC::GreaterThan => VecMisc2::Fcmlt0,
&FloatCC::LessThan => VecMisc2::Fcmgt0,
_ => panic!(),
}
}
fn int_cc_cmp_zero_to_vec_misc_op_swap(&mut self, cond: &IntCC) -> VecMisc2 {
match cond {
&IntCC::Equal => VecMisc2::Cmeq0,
&IntCC::SignedGreaterThanOrEqual => VecMisc2::Cmle0,
&IntCC::SignedLessThanOrEqual => VecMisc2::Cmge0,
&IntCC::SignedGreaterThan => VecMisc2::Cmlt0,
&IntCC::SignedLessThan => VecMisc2::Cmgt0,
_ => panic!(),
}
}
fn zero_value(&mut self, value: Imm64) -> Option<Imm64> {
if value.bits() == 0 {
return Some(value);
}
None
}
fn zero_value_f32(&mut self, value: Ieee32) -> Option<Ieee32> {
if value.bits() == 0 {
return Some(value);
}
None
}
fn zero_value_f64(&mut self, value: Ieee64) -> Option<Ieee64> {
if value.bits() == 0 {
return Some(value);
}
None
}
}

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle b2bc986bcbbbb77
src/isa/aarch64/inst.isle 3678d0a37bdb4cff
src/isa/aarch64/lower.isle 90accbfcadaea46d
src/isa/aarch64/inst.isle 19ccefb6a496d392
src/isa/aarch64/lower.isle 90ead921762336d2

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,415 @@
test compile precise-output
set unwind_info=false
target aarch64
function %f0(i8x16) -> b8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v2 = splat.i8x16 v1
v3 = icmp eq v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmeq v0.16b, v0.16b, #0
; Inst 1: ret
; }}
function %f1(i16x8) -> b16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v2 = splat.i16x8 v1
v3 = icmp eq v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmeq v0.8h, v0.8h, #0
; Inst 1: ret
; }}
function %f2(i32x4) -> b32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v2 = splat.i32x4 v1
v3 = icmp ne v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 3)
; Inst 0: cmeq v0.4s, v0.4s, #0
; Inst 1: mvn v0.16b, v0.16b
; Inst 2: ret
; }}
function %f3(i64x2) -> b64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v2 = splat.i64x2 v1
v3 = icmp ne v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 3)
; Inst 0: cmeq v0.2d, v0.2d, #0
; Inst 1: mvn v0.16b, v0.16b
; Inst 2: ret
; }}
function %f4(i8x16) -> b8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v2 = splat.i8x16 v1
v3 = icmp sle v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmle v0.16b, v0.16b, #0
; Inst 1: ret
; }}
function %f5(i16x8) -> b16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v2 = splat.i16x8 v1
v3 = icmp sle v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmge v0.8h, v0.8h, #0
; Inst 1: ret
; }}
function %f6(i32x4) -> b32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v2 = splat.i32x4 v1
v3 = icmp sge v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmge v0.4s, v0.4s, #0
; Inst 1: ret
; }}
function %f7(i64x2) -> b64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v2 = splat.i64x2 v1
v3 = icmp sge v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmle v0.2d, v0.2d, #0
; Inst 1: ret
; }}
function %f8(i8x16) -> b8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v2 = splat.i8x16 v1
v3 = icmp slt v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmlt v0.16b, v0.16b, #0
; Inst 1: ret
; }}
function %f9(i16x8) -> b16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v2 = splat.i16x8 v1
v3 = icmp slt v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmgt v0.8h, v0.8h, #0
; Inst 1: ret
; }}
function %f10(i32x4) -> b32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v2 = splat.i32x4 v1
v3 = icmp sgt v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmgt v0.4s, v0.4s, #0
; Inst 1: ret
; }}
function %f11(i64x2) -> b64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v2 = splat.i64x2 v1
v3 = icmp sgt v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: cmlt v0.2d, v0.2d, #0
; Inst 1: ret
; }}
function %f12(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp eq v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmeq v0.4s, v0.4s, #0.0
; Inst 1: ret
; }}
function %f13(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp eq v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmeq v0.2d, v0.2d, #0.0
; Inst 1: ret
; }}
function %f14(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp ne v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 3)
; Inst 0: fcmeq v0.2d, v0.2d, #0.0
; Inst 1: mvn v0.16b, v0.16b
; Inst 2: ret
; }}
function %f15(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp ne v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 3)
; Inst 0: fcmeq v0.4s, v0.4s, #0.0
; Inst 1: mvn v0.16b, v0.16b
; Inst 2: ret
; }}
function %f16(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp le v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmle v0.4s, v0.4s, #0.0
; Inst 1: ret
; }}
function %f17(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp le v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmge v0.2d, v0.2d, #0.0
; Inst 1: ret
; }}
function %f18(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp ge v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmge v0.2d, v0.2d, #0.0
; Inst 1: ret
; }}
function %f19(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp ge v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmle v0.4s, v0.4s, #0.0
; Inst 1: ret
; }}
function %f20(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp lt v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmlt v0.4s, v0.4s, #0.0
; Inst 1: ret
; }}
function %f21(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp lt v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmgt v0.2d, v0.2d, #0.0
; Inst 1: ret
; }}
function %f22(f64x2) -> b64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v2 = splat.f64x2 v1
v3 = fcmp gt v0, v2
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmgt v0.2d, v0.2d, #0.0
; Inst 1: ret
; }}
function %f23(f32x4) -> b32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v2 = splat.f32x4 v1
v3 = fcmp gt v2, v0
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: fcmlt v0.4s, v0.4s, #0.0
; Inst 1: ret
; }}

View File

@@ -0,0 +1,255 @@
test run
target aarch64
; raw_bitcast is needed to get around issue with "bint" on aarch64
function %simd_icmp_eq_i8(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v3 = splat.i8x16 v1
v2 = icmp eq v0, v3
v4 = raw_bitcast.i8x16 v2
return v4
}
; run: %simd_icmp_eq_i8([-1 0 1 100 -1 0 1 100 -1 0 1 100 -1 0 1 100]) == [0 0xff 0 0 0 0xff 0 0 0 0xff 0 0 0 0xff 0 0]
function %simd_icmp_ne_i16(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v3 = splat.i16x8 v1
v2 = icmp ne v0, v3
v4 = raw_bitcast.i16x8 v2
return v4
}
; run: %simd_icmp_ne_i16([-1 0 1 100 -1 0 1 100]) == [0xffff 0 0xffff 0xffff 0xffff 0 0xffff 0xffff]
function %simd_icmp_le_i32(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v3 = splat.i32x4 v1
v2 = icmp sle v0, v3
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_icmp_le_i32([-1 0 1 100]) == [0xffffffff 0xffffffff 0 0]
function %simd_icmp_ge_i64(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v3 = splat.i64x2 v1
v2 = icmp sge v0, v3
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_icmp_ge_i64([-1 0]) == [0 0xffffffffffffffff]
; run: %simd_icmp_ge_i64([1 100]) == [0xffffffffffffffff 0xffffffffffffffff]
function %simd_icmp_lt_i8(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v3 = splat.i8x16 v1
v2 = icmp slt v0, v3
v4 = raw_bitcast.i8x16 v2
return v4
}
; run: %simd_icmp_lt_i8([-1 0 1 100 -1 0 1 100 -1 0 1 100 -1 0 1 100]) == [0xff 0 0 0 0xff 0 0 0 0xff 0 0 0 0xff 0 0 0]
function %simd_icmp_gt_i16(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v3 = splat.i16x8 v1
v2 = icmp sgt v0, v3
v4 = raw_bitcast.i16x8 v2
return v4
}
; run: %simd_icmp_gt_i16([-1 0 1 100 -1 0 1 100]) == [0 0 0xffff 0xffff 0 0 0xffff 0xffff]
function %simd_fcmp_eq_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp eq v0, v3
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_eq_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0 0xffffffff 0 0]
function %simd_fcmp_ne_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp ne v0, v3
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_ne_f64([-0x1.0 0x0.0]) == [0xffffffffffffffff 0]
; run: %simd_fcmp_ne_f64([0x1.0 NaN]) == [0xffffffffffffffff 0xffffffffffffffff]
function %simd_fcmp_le_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp le v0, v3
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_le_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0xffffffff 0xffffffff 0 0]
function %simd_fcmp_ge_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp ge v0, v3
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_ge_f64([-0x1.0 0x0.0]) == [0 0xffffffffffffffff]
; run: %simd_fcmp_ge_f64([0x1.0 NaN]) == [0xffffffffffffffff 0]
function %simd_fcmp_lt_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp lt v0, v3
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_lt_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0xffffffff 0 0 0]
function %simd_fcmp_gt_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp gt v0, v3
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_gt_f64([-0x1.0 0x0.0]) == [0 0]
; run: %simd_fcmp_gt_f64([0x1.0 NaN]) == [0xffffffffffffffff 0]
function %simd_icmp_eq_i32(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v3 = splat.i32x4 v1
v2 = icmp eq v3, v0
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_icmp_eq_i32([1 0 -1 100]) == [0 0xffffffff 0 0]
function %simd_icmp_ne_i64(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v3 = splat.i64x2 v1
v2 = icmp ne v3, v0
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_icmp_ne_i64([-1 0]) == [0xffffffffffffffff 0]
; run: %simd_icmp_ne_i64([1 100]) == [0xffffffffffffffff 0xffffffffffffffff]
function %simd_icmp_le_i8(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 0
v3 = splat.i8x16 v1
v2 = icmp sle v3, v0
v4 = raw_bitcast.i8x16 v2
return v4
}
; run: %simd_icmp_le_i8([-1 0 1 100 -1 0 1 100 -1 0 1 100 -1 0 1 100]) == [0 0xff 0xff 0xff 0 0xff 0xff 0xff 0 0xff 0xff 0xff 0 0xff 0xff 0xff]
function %simd_icmp_ge_i16(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 0
v3 = splat.i16x8 v1
v2 = icmp sge v3, v0
v4 = raw_bitcast.i16x8 v2
return v4
}
; run: %simd_icmp_ge_i16([-1 0 1 100 -1 0 1 100]) == [0xffff 0xffff 0 0 0xffff 0xffff 0 0]
function %simd_icmp_lt_i32(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 0
v3 = splat.i32x4 v1
v2 = icmp slt v3, v0
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_icmp_lt_i32([-1 0 1 100]) == [0 0 0xffffffff 0xffffffff]
function %simd_icmp_gt_i64(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 0
v3 = splat.i64x2 v1
v2 = icmp sgt v3, v0
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_icmp_gt_i64([-1 0]) == [0xffffffffffffffff 0]
; run: %simd_icmp_gt_i64([1 100]) == [0 0]
function %simd_fcmp_eq_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp eq v3, v0
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_eq_f64([-0x1.0 0x0.0]) == [0 0xffffffffffffffff]
; run: %simd_fcmp_eq_f64([0x1.0 NaN]) == [0 0]
function %simd_fcmp_ne_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp ne v3, v0
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_ne_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0xffffffff 0 0xffffffff 0xffffffff]
function %simd_fcmp_le_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp le v3, v0
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_le_f64([-0x1.0 0x0.0]) == [0 0xffffffffffffffff]
; run: %simd_fcmp_le_f64([0x1.0 NaN]) == [0xffffffffffffffff 0]
function %simd_fcmp_ge_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp ge v3, v0
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_ge_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0xffffffff 0xffffffff 0 0]
function %simd_fcmp_lt_f64(f64x2) -> i64x2 {
block0(v0: f64x2):
v1 = f64const 0.0
v3 = splat.f64x2 v1
v2 = fcmp lt v3, v0
v4 = raw_bitcast.i64x2 v2
return v4
}
; run: %simd_fcmp_lt_f64([-0x1.0 0x0.0]) == [0 0]
; run: %simd_fcmp_lt_f64([0x1.0 NaN]) == [0xffffffffffffffff 0]
function %simd_fcmp_gt_f32(f32x4) -> i32x4 {
block0(v0: f32x4):
v1 = f32const 0.0
v3 = splat.f32x4 v1
v2 = fcmp gt v3, v0
v4 = raw_bitcast.i32x4 v2
return v4
}
; run: %simd_fcmp_gt_f32([-0x1.0 0x0.0 0x1.0 NaN]) == [0xffffffff 0 0 0]