[AArch64] i64x2 support for min/max (#4575)
Also added interpreter support for vector min/max. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -693,15 +693,27 @@
|
|||||||
(rule (lower (has_type ty @ (not_i64x2) (imin x y)))
|
(rule (lower (has_type ty @ (not_i64x2) (imin x y)))
|
||||||
(vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
|
(vec_rrr (VecALUOp.Smin) x y (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64X2 (imin x y)))
|
||||||
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y))
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (not_i64x2) (umin x y)))
|
(rule (lower (has_type ty @ (not_i64x2) (umin x y)))
|
||||||
(vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
|
(vec_rrr (VecALUOp.Umin) x y (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64X2 (umin x y)))
|
||||||
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y))
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (not_i64x2) (imax x y)))
|
(rule (lower (has_type ty @ (not_i64x2) (imax x y)))
|
||||||
(vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
|
(vec_rrr (VecALUOp.Smax) x y (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64X2 (imax x y)))
|
||||||
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y))
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (not_i64x2) (umax x y)))
|
(rule (lower (has_type ty @ (not_i64x2) (umax x y)))
|
||||||
(vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
|
(vec_rrr (VecALUOp.Umax) x y (vector_size ty)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64X2 (umax x y)))
|
||||||
|
(bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y))
|
||||||
|
|
||||||
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; General rule for extending input to an output which fits in a single
|
;; General rule for extending input to an output which fits in a single
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
test run
|
||||||
|
test interpret
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %imin_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = imin v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %imin_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ]
|
||||||
|
; run: %imin_i64x2([0x80000000C00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x80000000C00FFFEE 0x43216789 ]
|
||||||
|
|
||||||
|
function %imax_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = imax v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %imax_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0xBADAB00F ]
|
||||||
|
; run: %imax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x43216789 ]
|
||||||
|
|
||||||
|
function %umin_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = umin v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %umin_i64x2([0xDEADBEEF 0xBADAB00F], [0x12349876 0x43216789]) == [ 0x12349876 0x43216789 ]
|
||||||
|
; run: %umin_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ]
|
||||||
|
|
||||||
|
function %umax_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = umax v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %umax_i64x2([0xBAADF00D 0xBADAB00F], [0xCA11ACAB 0x43216789]) == [ 0xCA11ACAB 0xBADAB00F ]
|
||||||
|
; run: %umax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x80000000BADAB00F ]
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
test run
|
test run
|
||||||
|
test interpret
|
||||||
target aarch64
|
target aarch64
|
||||||
target x86_64
|
target x86_64
|
||||||
target s390x
|
target s390x
|
||||||
|
|||||||
@@ -488,24 +488,52 @@ where
|
|||||||
}
|
}
|
||||||
ControlFlow::Continue
|
ControlFlow::Continue
|
||||||
}
|
}
|
||||||
Opcode::Imin => choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?),
|
Opcode::Imin => {
|
||||||
Opcode::Umin => choose(
|
if ctrl_ty.is_vector() {
|
||||||
|
let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(1)?, &arg(0)?)?;
|
||||||
|
assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
|
||||||
|
} else {
|
||||||
|
choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Opcode::Umin => {
|
||||||
|
if ctrl_ty.is_vector() {
|
||||||
|
let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(1)?, &arg(0)?)?;
|
||||||
|
assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
|
||||||
|
} else {
|
||||||
|
choose(
|
||||||
Value::gt(
|
Value::gt(
|
||||||
&arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
|
&arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
|
||||||
&arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
|
&arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
|
||||||
)?,
|
)?,
|
||||||
arg(0)?,
|
arg(0)?,
|
||||||
arg(1)?,
|
arg(1)?,
|
||||||
),
|
)
|
||||||
Opcode::Imax => choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?),
|
}
|
||||||
Opcode::Umax => choose(
|
}
|
||||||
|
Opcode::Imax => {
|
||||||
|
if ctrl_ty.is_vector() {
|
||||||
|
let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(0)?, &arg(1)?)?;
|
||||||
|
assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
|
||||||
|
} else {
|
||||||
|
choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Opcode::Umax => {
|
||||||
|
if ctrl_ty.is_vector() {
|
||||||
|
let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(0)?, &arg(1)?)?;
|
||||||
|
assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?)
|
||||||
|
} else {
|
||||||
|
choose(
|
||||||
Value::gt(
|
Value::gt(
|
||||||
&arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
|
&arg(0)?.convert(ValueConversionKind::ToUnsigned)?,
|
||||||
&arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
|
&arg(1)?.convert(ValueConversionKind::ToUnsigned)?,
|
||||||
)?,
|
)?,
|
||||||
arg(0)?,
|
arg(0)?,
|
||||||
arg(1)?,
|
arg(1)?,
|
||||||
),
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
Opcode::AvgRound => {
|
Opcode::AvgRound => {
|
||||||
let sum = Value::add(arg(0)?, arg(1)?)?;
|
let sum = Value::add(arg(0)?, arg(1)?)?;
|
||||||
let one = Value::int(1, arg(0)?.ty())?;
|
let one = Value::int(1, arg(0)?.ty())?;
|
||||||
@@ -897,20 +925,7 @@ where
|
|||||||
}
|
}
|
||||||
Opcode::Vsplit => unimplemented!("Vsplit"),
|
Opcode::Vsplit => unimplemented!("Vsplit"),
|
||||||
Opcode::Vconcat => unimplemented!("Vconcat"),
|
Opcode::Vconcat => unimplemented!("Vconcat"),
|
||||||
Opcode::Vselect => {
|
Opcode::Vselect => assign(vselect(&arg(0)?, &arg(1)?, &arg(2)?, ctrl_ty)?),
|
||||||
let c = extractlanes(&arg(0)?, ctrl_ty)?;
|
|
||||||
let x = extractlanes(&arg(1)?, ctrl_ty)?;
|
|
||||||
let y = extractlanes(&arg(2)?, ctrl_ty)?;
|
|
||||||
let mut new_vec = SimdVec::new();
|
|
||||||
for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
|
|
||||||
if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? {
|
|
||||||
new_vec.push(y);
|
|
||||||
} else {
|
|
||||||
new_vec.push(x);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assign(vectorizelanes(&new_vec, ctrl_ty)?)
|
|
||||||
}
|
|
||||||
Opcode::VanyTrue => assign(fold_vector(
|
Opcode::VanyTrue => assign(fold_vector(
|
||||||
arg(0)?,
|
arg(0)?,
|
||||||
ctrl_ty,
|
ctrl_ty,
|
||||||
@@ -1296,3 +1311,21 @@ where
|
|||||||
|
|
||||||
vectorizelanes(&result, vector_type)
|
vectorizelanes(&result, vector_type)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn vselect<V>(c: &V, x: &V, y: &V, vector_type: types::Type) -> ValueResult<V>
|
||||||
|
where
|
||||||
|
V: Value,
|
||||||
|
{
|
||||||
|
let c = extractlanes(c, vector_type)?;
|
||||||
|
let x = extractlanes(x, vector_type)?;
|
||||||
|
let y = extractlanes(y, vector_type)?;
|
||||||
|
let mut new_vec = SimdVec::new();
|
||||||
|
for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
|
||||||
|
if Value::eq(&c, &Value::int(0, vector_type.lane_type())?)? {
|
||||||
|
new_vec.push(y);
|
||||||
|
} else {
|
||||||
|
new_vec.push(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vectorizelanes(&new_vec, vector_type)
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user