[AArch64] i64x2 support for min/max (#4575)

2022-08-02 19:42:05 +01:00
parent c77bec4dcb
commit 37cd96beff
4 changed files with 117 additions and 32 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -693,15 +693,27 @@
 (rule (lower (has_type ty @ (not_i64x2) (imin x y)))
      (vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
 (rule (lower (has_type $I64X2 (imin x y)))
      (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y))
 (rule (lower (has_type ty @ (not_i64x2) (umin x y)))
      (vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
 (rule (lower (has_type $I64X2 (umin x y)))
      (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y))
 (rule (lower (has_type ty @ (not_i64x2) (imax x y)))
      (vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
 (rule (lower (has_type $I64X2 (imax x y)))
      (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y))
 (rule (lower (has_type ty @ (not_i64x2) (umax x y)))
      (vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
 (rule (lower (has_type $I64X2 (umax x y)))
      (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y))
 ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; General rule for extending input to an output which fits in a single
--- a/cranelift/filetests/filetests/runtests/simd-min-max-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-min-max-aarch64.clif
@@ -0,0 +1,39 @@
 test run
 test interpret
 target aarch64
 function %imin_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = imin v0, v1
  return v2
 }
 ; run: %imin_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ]
 ; run: %imin_i64x2([0x80000000C00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x80000000C00FFFEE 0x43216789 ]
 function %imax_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = imax v0, v1
  return v2
 }
 ; run: %imax_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0xBADAB00F ]
 ; run: %imax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x43216789 ]
 function %umin_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = umin v0, v1
  return v2
 }
 ; run: %umin_i64x2([0xDEADBEEF 0xBADAB00F], [0x12349876 0x43216789]) == [ 0x12349876 0x43216789 ]
 ; run: %umin_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ]
 function %umax_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
  v2 = umax v0, v1
  return v2
 }
 ; run: %umax_i64x2([0xBAADF00D 0xBADAB00F], [0xCA11ACAB 0x43216789]) == [ 0xCA11ACAB 0xBADAB00F ]
 ; run: %umax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x80000000BADAB00F ]
--- a/cranelift/filetests/filetests/runtests/simd-min-max.clif
+++ b/cranelift/filetests/filetests/runtests/simd-min-max.clif
@@ -1,4 +1,5 @@
 test run
 test interpret
 target aarch64
 target x86_64
 target s390x
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -488,24 +488,52 @@ where
            }
            ControlFlow::Continue
        }
-        Opcode::Imin => choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?),
+        Opcode::Imin => {
-        Opcode::Umin => choose(
+            if ctrl_ty.is_vector() {
-            Value::gt(
+                let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(1)?, &arg(0)?)?;
-                &arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
+                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
-                &arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
+            } else {
-            )?,
+                choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?)
-            arg(0)?,
+            }
-            arg(1)?,
+        }
-        ),
+        Opcode::Umin => {
-        Opcode::Imax => choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?),
+            if ctrl_ty.is_vector() {
-        Opcode::Umax => choose(
+                let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(1)?, &arg(0)?)?;
-            Value::gt(
+                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
-                &arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
+            } else {
-                &arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
+                choose(
-            )?,
+                    Value::gt(
-            arg(0)?,
+                        &arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
-            arg(1)?,
+                        &arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
-        ),
+                    )?,
                    arg(0)?,
                    arg(1)?,
                )
            }
        }
        Opcode::Imax => {
            if ctrl_ty.is_vector() {
                let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(0)?, &arg(1)?)?;
                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
            } else {
                choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?)
            }
        }
        Opcode::Umax => {
            if ctrl_ty.is_vector() {
                let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(0)?, &arg(1)?)?;
                assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
            } else {
                choose(
                    Value::gt(
                        &arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
                        &arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
                    )?,
                    arg(0)?,
                    arg(1)?,
                )
            }
        }
        Opcode::AvgRound => {
            let sum = Value::add(arg(0)?, arg(1)?)?;
            let one = Value::int(1, arg(0)?.ty())?;
@@ -897,20 +925,7 @@ where
        }
        Opcode::Vsplit => unimplemented!("Vsplit"),
        Opcode::Vconcat => unimplemented!("Vconcat"),
-        Opcode::Vselect => {
+        Opcode::Vselect => assign(vselect(&arg(0)?, &arg(1)?, &arg(2)?, ctrl_ty)?),
            let c = extractlanes(&arg(0)?, ctrl_ty)?;
            let x = extractlanes(&arg(1)?, ctrl_ty)?;
            let y = extractlanes(&arg(2)?, ctrl_ty)?;
            let mut new_vec = SimdVec::new();
            for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
                if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? {
                    new_vec.push(y);
                } else {
                    new_vec.push(x);
                }
            }
            assign(vectorizelanes(&new_vec, ctrl_ty)?)
        }
        Opcode::VanyTrue => assign(fold_vector(
            arg(0)?,
            ctrl_ty,
@@ -1296,3 +1311,21 @@ where
    vectorizelanes(&result, vector_type)
 }
 fn vselect<V>(c: &V, x: &V, y: &V, vector_type: types::Type) -> ValueResult<V>
 where
    V: Value,
 {
    let c = extractlanes(c, vector_type)?;
    let x = extractlanes(x, vector_type)?;
    let y = extractlanes(y, vector_type)?;
    let mut new_vec = SimdVec::new();
    for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
        if Value::eq(&c, &Value::int(0, vector_type.lane_type())?)? {
            new_vec.push(y);
        } else {
            new_vec.push(x);
        }
    }
    vectorizelanes(&new_vec, vector_type)
 }