From 84b9c7bb8a0602b0a9568d789b9bf80d346cd752 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Mon, 14 Feb 2022 19:21:19 +0100 Subject: [PATCH] cranelift/x64: lower min and max for <= `i64` (#3748) * cranelift/x64: lower min and max for <= `i64` * cranelift: add runtests for integer min/max --- cranelift/codegen/src/isa/x64/lower.isle | 32 ++++- .../x64/lower/isle/generated_code.manifest | 2 +- .../src/isa/x64/lower/isle/generated_code.rs | 96 +++++++++++-- .../filetests/runtests/integer-minmax.clif | 135 ++++++++++++++++++ 4 files changed, 246 insertions(+), 19 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/integer-minmax.clif diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 7f30ccda79..c3d84b7f49 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1414,7 +1414,31 @@ (rule (vec_insert_lane $F64X2 vec val 1) (movlhps vec (reg_mem_to_xmm_mem val))) -;;;; Rules for `imax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `imin`, `imax`, `umin`, `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `i64` and smaller. + +(decl cmp_and_choose (Type CC Value Value) ValueRegs) +(rule (cmp_and_choose (fits_in_64 ty) cc x y) + (let ((x_reg Reg (put_in_reg x)) + (y_reg Reg (put_in_reg y)) + (size OperandSize (raw_operand_size_of_type ty))) + (value_reg (with_flags_1 (cmp size (RegMemImm.Reg x_reg) y_reg) + (cmove ty cc (RegMem.Reg y_reg) x_reg))))) + +(rule (lower (has_type (fits_in_64 ty) (umin x y))) + (cmp_and_choose ty (CC.B) x y)) + +(rule (lower (has_type (fits_in_64 ty) (umax x y))) + (cmp_and_choose ty (CC.NB) x y)) + +(rule (lower (has_type (fits_in_64 ty) (imin x y))) + (cmp_and_choose ty (CC.L) x y)) + +(rule (lower (has_type (fits_in_64 ty) (imax x y))) + (cmp_and_choose ty (CC.NL) x y)) + +;; SSE `imax`. (rule (lower (has_type $I8X16 (imax x y))) (value_xmm (pmaxsb (put_in_xmm x) (put_in_xmm_mem y)))) @@ -1425,7 +1449,7 @@ (rule (lower (has_type $I32X4 (imax x y))) (value_xmm (pmaxsd (put_in_xmm x) (put_in_xmm_mem y)))) -;;;; Rules for `imin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SSE `imin`. (rule (lower (has_type $I8X16 (imin x y))) (value_xmm (pminsb (put_in_xmm x) (put_in_xmm_mem y)))) @@ -1436,7 +1460,7 @@ (rule (lower (has_type $I32X4 (imin x y))) (value_xmm (pminsd (put_in_xmm x) (put_in_xmm_mem y)))) -;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SSE `umax`. (rule (lower (has_type $I8X16 (umax x y))) (value_xmm (pmaxub (put_in_xmm x) (put_in_xmm_mem y)))) @@ -1447,7 +1471,7 @@ (rule (lower (has_type $I32X4 (umax x y))) (value_xmm (pmaxud (put_in_xmm x) (put_in_xmm_mem y)))) -;;;; Rules for `umin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SSE `umin`. (rule (lower (has_type $I8X16 (umin x y))) (value_xmm (pminub (put_in_xmm x) (put_in_xmm_mem y)))) diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index ea91fd2477..f3599d8258 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9ea75a6f790b5c03 src/prelude.isle 73285cd431346d53 src/isa/x64/inst.isle 7513533d16948249 -src/isa/x64/lower.isle 976ac116c5fcfa16 +src/isa/x64/lower.isle 802b6e750d407100 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index cbf660ec80..0a5bb3bacd 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -2997,13 +2997,13 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { match &pattern2_0 { &Opcode::Trap => { - // Rule at src/isa/x64/lower.isle line 1463. + // Rule at src/isa/x64/lower.isle line 1487. let expr0_0 = constructor_ud2(ctx, &pattern2_1)?; let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; return Some(expr1_0); } &Opcode::ResumableTrap => { - // Rule at src/isa/x64/lower.isle line 1468. + // Rule at src/isa/x64/lower.isle line 1492. let expr0_0 = constructor_ud2(ctx, &pattern2_1)?; let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; return Some(expr1_0); @@ -3411,7 +3411,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1430. + // Rule at src/isa/x64/lower.isle line 1454. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminsb(ctx, expr0_0, &expr1_0)?; @@ -3421,7 +3421,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1452. + // Rule at src/isa/x64/lower.isle line 1476. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminub(ctx, expr0_0, &expr1_0)?; @@ -3431,7 +3431,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1419. + // Rule at src/isa/x64/lower.isle line 1443. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxsb(ctx, expr0_0, &expr1_0)?; @@ -3441,7 +3441,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1441. + // Rule at src/isa/x64/lower.isle line 1465. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxub(ctx, expr0_0, &expr1_0)?; @@ -3549,7 +3549,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1433. + // Rule at src/isa/x64/lower.isle line 1457. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminsw(ctx, expr0_0, &expr1_0)?; @@ -3559,7 +3559,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1455. + // Rule at src/isa/x64/lower.isle line 1479. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminuw(ctx, expr0_0, &expr1_0)?; @@ -3569,7 +3569,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1422. + // Rule at src/isa/x64/lower.isle line 1446. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxsw(ctx, expr0_0, &expr1_0)?; @@ -3579,7 +3579,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1444. + // Rule at src/isa/x64/lower.isle line 1468. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxuw(ctx, expr0_0, &expr1_0)?; @@ -3662,7 +3662,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1436. + // Rule at src/isa/x64/lower.isle line 1460. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminsd(ctx, expr0_0, &expr1_0)?; @@ -3672,7 +3672,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1458. + // Rule at src/isa/x64/lower.isle line 1482. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pminud(ctx, expr0_0, &expr1_0)?; @@ -3682,7 +3682,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1425. + // Rule at src/isa/x64/lower.isle line 1449. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxsd(ctx, expr0_0, &expr1_0)?; @@ -3692,7 +3692,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/x64/lower.isle line 1447. + // Rule at src/isa/x64/lower.isle line 1471. let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; let expr2_0 = constructor_pmaxud(ctx, expr0_0, &expr1_0)?; @@ -5156,6 +5156,46 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { match &pattern5_0 { + &Opcode::Imin => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1435. + let expr0_0 = CC::L; + let expr1_0 = constructor_cmp_and_choose( + ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1, + )?; + return Some(expr1_0); + } + &Opcode::Umin => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1429. + let expr0_0 = CC::B; + let expr1_0 = constructor_cmp_and_choose( + ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1, + )?; + return Some(expr1_0); + } + &Opcode::Imax => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1438. + let expr0_0 = CC::NL; + let expr1_0 = constructor_cmp_and_choose( + ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1, + )?; + return Some(expr1_0); + } + &Opcode::Umax => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/x64/lower.isle line 1432. + let expr0_0 = CC::NB; + let expr1_0 = constructor_cmp_and_choose( + ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1, + )?; + return Some(expr1_0); + } &Opcode::Iadd => { let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); @@ -6233,3 +6273,31 @@ pub fn constructor_vec_insert_lane( } return None; } + +// Generated as internal constructor for term cmp_and_choose. +pub fn constructor_cmp_and_choose( + ctx: &mut C, + arg0: Type, + arg1: &CC, + arg2: Value, + arg3: Value, +) -> Option { + let pattern0_0 = arg0; + if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 1422. + let expr0_0 = C::put_in_reg(ctx, pattern3_0); + let expr1_0 = C::put_in_reg(ctx, pattern4_0); + let expr2_0 = C::raw_operand_size_of_type(ctx, pattern1_0); + let expr3_0 = RegMemImm::Reg { reg: expr0_0 }; + let expr4_0 = constructor_cmp(ctx, &expr2_0, &expr3_0, expr1_0)?; + let expr5_0 = RegMem::Reg { reg: expr1_0 }; + let expr6_0 = constructor_cmove(ctx, pattern1_0, pattern2_0, &expr5_0, expr0_0)?; + let expr7_0 = constructor_with_flags_1(ctx, &expr4_0, &expr6_0)?; + let expr8_0 = C::value_reg(ctx, expr7_0); + return Some(expr8_0); + } + return None; +} diff --git a/cranelift/filetests/filetests/runtests/integer-minmax.clif b/cranelift/filetests/filetests/runtests/integer-minmax.clif new file mode 100644 index 0000000000..423ce34335 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/integer-minmax.clif @@ -0,0 +1,135 @@ +test interpret +test run +; target aarch64 +; target s390x +target x86_64 + +; sort three signed i8s with imin and imax only +function %isort3(i8, i8, i8) -> i8, i8, i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = imin.i8 v0, v1 + v4 = imin.i8 v1, v2 + v5 = imin.i8 v2, v0 + v6 = imin.i8 v3, v4 ; low + v7 = imax.i8 v0, v1 + v8 = imax.i8 v1, v2 + v9 = imax.i8 v7, v8 ; high + v10 = imax.i8 v3, v4 + v11 = imax.i8 v10, v5 ; mid = max of min of all pairs + return v6, v11, v9 +} +; run: %isort3(1, 2, 3) == [1, 2, 3] +; run: %isort3(1, 3, 2) == [1, 2, 3] +; run: %isort3(2, 1, 3) == [1, 2, 3] +; run: %isort3(2, 3, 1) == [1, 2, 3] +; run: %isort3(3, 1, 2) == [1, 2, 3] +; run: %isort3(3, 2, 1) == [1, 2, 3] +; run: %isort3(-1, 0, 1) == [-1, 0, 1] +; run: %isort3(-1, 1, 1) == [-1, 1, 1] +; run: %isort3(-2, 1, 0) == [-2, 0, 1] +; run: %isort3(0, 0, 0) == [0, 0, 0] +; run: %isort3(5, 4, 4) == [4, 4, 5] + + +function %imin_max_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2 = imin.i8 v0, v1 + v3 = imax.i8 v0, v1 + return v2, v3 +} +; run: %imin_max_i8(127, -128) == [-128, 127] +; run: %imin_max_i8(-128, 127) == [-128, 127] +; run: %imin_max_i8(-1, 0) == [-1, 0] +; run: %imin_max_i8(1, -1) == [-1, 1] +; run: %imin_max_i8(1, 2) == [1, 2] +; run: %imin_max_i8(2, 1) == [1, 2] +; run: %imin_max_i8(2, 2) == [2, 2] +; run: %imin_max_i8(0x7f, 0x80) == [0x80, 0x7f] + +function %imin_max_i16(i16, i16) -> i16, i16 { +block0(v0: i16, v1: i16): + v2 = imin.i16 v0, v1 + v3 = imax.i16 v0, v1 + return v2, v3 +} +; run: %imin_max_i16(32767, -32768) == [-32768, 32767] +; run: %imin_max_i16(-32768, 32767) == [-32768, 32767] +; run: %imin_max_i16(-1, 0) == [-1, 0] +; run: %imin_max_i16(1, -1) == [-1, 1] +; run: %imin_max_i16(1, 2) == [1, 2] +; run: %imin_max_i16(2, 1) == [1, 2] +; run: %imin_max_i16(2, 2) == [2, 2] +; run: %imin_max_i16(0x7f, 0x80) == [0x7f, 0x80] +; run: %imin_max_i16(0x7fff, 0x8000) == [0x8000, 0x7fff] + +function %imin_max_i32(i32, i32) -> i32, i32 { +block0(v0: i32, v1: i32): + v2 = imin.i32 v0, v1 + v3 = imax.i32 v0, v1 + return v2, v3 +} +; run: %imin_max_i32(-1, 0) == [-1, 0] +; run: %imin_max_i32(1, -1) == [-1, 1] +; run: %imin_max_i32(1, 2) == [1, 2] +; run: %imin_max_i32(2, 1) == [1, 2] +; run: %imin_max_i32(0x7f, 0x80) == [0x7f, 0x80] +; run: %imin_max_i32(0x7fff, 0x8000) == [0x7fff, 0x8000] +; run: %imin_max_i32(0x7fffffff, 0x80000000) == [0x80000000, 0x7fffffff] + +function %imin_max_i64(i64, i64) -> i64, i64 { +block0(v0: i64, v1: i64): + v2 = imin.i64 v0, v1 + v3 = imax.i64 v0, v1 + return v2, v3 +} +; run: %imin_max_i64(-1, 0) == [-1, 0] +; run: %imin_max_i64(1, -1) == [-1, 1] +; run: %imin_max_i64(1, 2) == [1, 2] +; run: %imin_max_i64(2, 1) == [1, 2] +; run: %imin_max_i64(0x7f, 0x80) == [0x7f, 0x80] +; run: %imin_max_i64(0x7fff, 0x8000) == [0x7fff, 0x8000] +; run: %imin_max_i64(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000] +; run: %imin_max_i64(0x7fffffffffffffff, 0x8000000000000000) == [0x8000000000000000, 0x7fffffffffffffff] + +function %umin_max_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2 = umin.i8 v0, v1 + v3 = umax.i8 v0, v1 + return v2, v3 +} +; run: %umin_max_i8(1, 2) == [1, 2] +; run: %umin_max_i8(2, 1) == [1, 2] +; run: %umin_max_i8(0x7f, 0x80) == [0x7f, 0x80] + +function %umin_max_i16(i16, i16) -> i16, i16 { +block0(v0: i16, v1: i16): + v2 = umin.i16 v0, v1 + v3 = umax.i16 v0, v1 + return v2, v3 +} +; run: %umin_max_i16(1, 2) == [1, 2] +; run: %umin_max_i16(2, 1) == [1, 2] +; run: %umin_max_i16(0x7f, 0x80) == [0x7f, 0x80] +; run: %umin_max_i16(0x7fff, 0x8000) == [0x7fff, 0x8000] + +function %umin_max_i32(i32, i32) -> i32, i32 { +block0(v0: i32, v1: i32): + v2 = umin.i32 v0, v1 + v3 = umax.i32 v0, v1 + return v2, v3 +} +; run: %umin_max_i32(1, 2) == [1, 2] +; run: %umin_max_i32(2, 1) == [1, 2] +; run: %umin_max_i32(0x7fff, 0x8000) == [0x7fff, 0x8000] +; run: %umin_max_i32(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000] + +function %umin_max_i64(i64, i64) -> i64, i64 { +block0(v0: i64, v1: i64): + v2 = umin.i64 v0, v1 + v3 = umax.i64 v0, v1 + return v2, v3 +} +; run: %umin_max_i64(1, 2) == [1, 2] +; run: %umin_max_i64(2, 1) == [1, 2] +; run: %umin_max_i64(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000] +; run: %umin_max_i64(0x7fffffffffffffff, 0x8000000000000000) == [0x7fffffffffffffff, 0x8000000000000000] \ No newline at end of file