diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index ce536c1198..f46a103fdd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -693,15 +693,27 @@ (rule (lower (has_type ty @ (not_i64x2) (imin x y))) (vec_rrr (VecALUOp.Smin) x y (vector_size ty))) +(rule (lower (has_type $I64X2 (imin x y))) + (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y)) + (rule (lower (has_type ty @ (not_i64x2) (umin x y))) (vec_rrr (VecALUOp.Umin) x y (vector_size ty))) +(rule (lower (has_type $I64X2 (umin x y))) + (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y)) + (rule (lower (has_type ty @ (not_i64x2) (imax x y))) (vec_rrr (VecALUOp.Smax) x y (vector_size ty))) +(rule (lower (has_type $I64X2 (imax x y))) + (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y)) + (rule (lower (has_type ty @ (not_i64x2) (umax x y))) (vec_rrr (VecALUOp.Umax) x y (vector_size ty))) +(rule (lower (has_type $I64X2 (umax x y))) + (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y)) + ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General rule for extending input to an output which fits in a single diff --git a/cranelift/filetests/filetests/runtests/simd-min-max-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-min-max-aarch64.clif new file mode 100644 index 0000000000..ca78e14883 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-min-max-aarch64.clif @@ -0,0 +1,39 @@ +test run +test interpret +target aarch64 + +function %imin_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = imin v0, v1 + return v2 +} + +; run: %imin_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ] +; run: %imin_i64x2([0x80000000C00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0x80000000C00FFFEE 0x43216789 ] + +function %imax_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = imax v0, v1 + return v2 +} + +; run: %imax_i64x2([0xC00FFFEE 0xBADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0xBADAB00F ] +; run: %imax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x43216789 ] + +function %umin_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umin v0, v1 + return v2 +} + +; run: %umin_i64x2([0xDEADBEEF 0xBADAB00F], [0x12349876 0x43216789]) == [ 0x12349876 0x43216789 ] +; run: %umin_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0x98763210 0x43216789 ] + +function %umax_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umax v0, v1 + return v2 +} + +; run: %umax_i64x2([0xBAADF00D 0xBADAB00F], [0xCA11ACAB 0x43216789]) == [ 0xCA11ACAB 0xBADAB00F ] +; run: %umax_i64x2([0xC00FFFEE 0x80000000BADAB00F], [0x98763210 0x43216789]) == [ 0xC00FFFEE 0x80000000BADAB00F ] diff --git a/cranelift/filetests/filetests/runtests/simd-min-max.clif b/cranelift/filetests/filetests/runtests/simd-min-max.clif index 7a4cc0a078..b9a0904f4a 100644 --- a/cranelift/filetests/filetests/runtests/simd-min-max.clif +++ b/cranelift/filetests/filetests/runtests/simd-min-max.clif @@ -1,4 +1,5 @@ test run +test interpret target aarch64 target x86_64 target s390x diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 38ebcae11c..1cc704a994 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -488,24 +488,52 @@ where } ControlFlow::Continue } - Opcode::Imin => choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?), - Opcode::Umin => choose( - Value::gt( - &arg(1)?.convert(ValueConversionKind::ToUnsigned)?, - &arg(0)?.convert(ValueConversionKind::ToUnsigned)?, - )?, - arg(0)?, - arg(1)?, - ), - Opcode::Imax => choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?), - Opcode::Umax => choose( - Value::gt( - &arg(0)?.convert(ValueConversionKind::ToUnsigned)?, - &arg(1)?.convert(ValueConversionKind::ToUnsigned)?, - )?, - arg(0)?, - arg(1)?, - ), + Opcode::Imin => { + if ctrl_ty.is_vector() { + let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(1)?, &arg(0)?)?; + assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?) + } else { + choose(Value::gt(&arg(1)?, &arg(0)?)?, arg(0)?, arg(1)?) + } + } + Opcode::Umin => { + if ctrl_ty.is_vector() { + let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(1)?, &arg(0)?)?; + assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?) + } else { + choose( + Value::gt( + &arg(1)?.convert(ValueConversionKind::ToUnsigned)?, + &arg(0)?.convert(ValueConversionKind::ToUnsigned)?, + )?, + arg(0)?, + arg(1)?, + ) + } + } + Opcode::Imax => { + if ctrl_ty.is_vector() { + let icmp = icmp(ctrl_ty, IntCC::SignedGreaterThan, &arg(0)?, &arg(1)?)?; + assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?) + } else { + choose(Value::gt(&arg(0)?, &arg(1)?)?, arg(0)?, arg(1)?) + } + } + Opcode::Umax => { + if ctrl_ty.is_vector() { + let icmp = icmp(ctrl_ty, IntCC::UnsignedGreaterThan, &arg(0)?, &arg(1)?)?; + assign(vselect(&icmp, &arg(0)?, &arg(1)?, ctrl_ty)?) + } else { + choose( + Value::gt( + &arg(0)?.convert(ValueConversionKind::ToUnsigned)?, + &arg(1)?.convert(ValueConversionKind::ToUnsigned)?, + )?, + arg(0)?, + arg(1)?, + ) + } + } Opcode::AvgRound => { let sum = Value::add(arg(0)?, arg(1)?)?; let one = Value::int(1, arg(0)?.ty())?; @@ -897,20 +925,7 @@ where } Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), - Opcode::Vselect => { - let c = extractlanes(&arg(0)?, ctrl_ty)?; - let x = extractlanes(&arg(1)?, ctrl_ty)?; - let y = extractlanes(&arg(2)?, ctrl_ty)?; - let mut new_vec = SimdVec::new(); - for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { - if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? { - new_vec.push(y); - } else { - new_vec.push(x); - } - } - assign(vectorizelanes(&new_vec, ctrl_ty)?) - } + Opcode::Vselect => assign(vselect(&arg(0)?, &arg(1)?, &arg(2)?, ctrl_ty)?), Opcode::VanyTrue => assign(fold_vector( arg(0)?, ctrl_ty, @@ -1296,3 +1311,21 @@ where vectorizelanes(&result, vector_type) } + +fn vselect(c: &V, x: &V, y: &V, vector_type: types::Type) -> ValueResult +where + V: Value, +{ + let c = extractlanes(c, vector_type)?; + let x = extractlanes(x, vector_type)?; + let y = extractlanes(y, vector_type)?; + let mut new_vec = SimdVec::new(); + for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { + if Value::eq(&c, &Value::int(0, vector_type.lane_type())?)? { + new_vec.push(y); + } else { + new_vec.push(x); + } + } + vectorizelanes(&new_vec, vector_type) +}