cranelift/x64: lower min and max for <= i64 (#3748)

* cranelift/x64: lower min and max for <= `i64`

* cranelift: add runtests for integer min/max
This commit is contained in:
Mrmaxmeier
2022-02-14 19:21:19 +01:00
committed by GitHub
parent da539255a5
commit 84b9c7bb8a
4 changed files with 246 additions and 19 deletions

View File

@@ -1414,7 +1414,31 @@
(rule (vec_insert_lane $F64X2 vec val 1)
(movlhps vec (reg_mem_to_xmm_mem val)))
;;;; Rules for `imax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Rules for `imin`, `imax`, `umin`, `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
(decl cmp_and_choose (Type CC Value Value) ValueRegs)
(rule (cmp_and_choose (fits_in_64 ty) cc x y)
(let ((x_reg Reg (put_in_reg x))
(y_reg Reg (put_in_reg y))
(size OperandSize (raw_operand_size_of_type ty)))
(value_reg (with_flags_1 (cmp size (RegMemImm.Reg x_reg) y_reg)
(cmove ty cc (RegMem.Reg y_reg) x_reg)))))
(rule (lower (has_type (fits_in_64 ty) (umin x y)))
(cmp_and_choose ty (CC.B) x y))
(rule (lower (has_type (fits_in_64 ty) (umax x y)))
(cmp_and_choose ty (CC.NB) x y))
(rule (lower (has_type (fits_in_64 ty) (imin x y)))
(cmp_and_choose ty (CC.L) x y))
(rule (lower (has_type (fits_in_64 ty) (imax x y)))
(cmp_and_choose ty (CC.NL) x y))
;; SSE `imax`.
(rule (lower (has_type $I8X16 (imax x y)))
(value_xmm (pmaxsb (put_in_xmm x) (put_in_xmm_mem y))))
@@ -1425,7 +1449,7 @@
(rule (lower (has_type $I32X4 (imax x y)))
(value_xmm (pmaxsd (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `imin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; SSE `imin`.
(rule (lower (has_type $I8X16 (imin x y)))
(value_xmm (pminsb (put_in_xmm x) (put_in_xmm_mem y))))
@@ -1436,7 +1460,7 @@
(rule (lower (has_type $I32X4 (imin x y)))
(value_xmm (pminsd (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `umax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; SSE `umax`.
(rule (lower (has_type $I8X16 (umax x y)))
(value_xmm (pmaxub (put_in_xmm x) (put_in_xmm_mem y))))
@@ -1447,7 +1471,7 @@
(rule (lower (has_type $I32X4 (umax x y)))
(value_xmm (pmaxud (put_in_xmm x) (put_in_xmm_mem y))))
;;;; Rules for `umin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; SSE `umin`.
(rule (lower (has_type $I8X16 (umin x y)))
(value_xmm (pminub (put_in_xmm x) (put_in_xmm_mem y))))

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 73285cd431346d53
src/isa/x64/inst.isle 7513533d16948249
src/isa/x64/lower.isle 976ac116c5fcfa16
src/isa/x64/lower.isle 802b6e750d407100

View File

@@ -2997,13 +2997,13 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
} => {
match &pattern2_0 {
&Opcode::Trap => {
// Rule at src/isa/x64/lower.isle line 1463.
// Rule at src/isa/x64/lower.isle line 1487.
let expr0_0 = constructor_ud2(ctx, &pattern2_1)?;
let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
return Some(expr1_0);
}
&Opcode::ResumableTrap => {
// Rule at src/isa/x64/lower.isle line 1468.
// Rule at src/isa/x64/lower.isle line 1492.
let expr0_0 = constructor_ud2(ctx, &pattern2_1)?;
let expr1_0 = constructor_safepoint(ctx, &expr0_0)?;
return Some(expr1_0);
@@ -3411,7 +3411,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1430.
// Rule at src/isa/x64/lower.isle line 1454.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminsb(ctx, expr0_0, &expr1_0)?;
@@ -3421,7 +3421,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1452.
// Rule at src/isa/x64/lower.isle line 1476.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminub(ctx, expr0_0, &expr1_0)?;
@@ -3431,7 +3431,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1419.
// Rule at src/isa/x64/lower.isle line 1443.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxsb(ctx, expr0_0, &expr1_0)?;
@@ -3441,7 +3441,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1441.
// Rule at src/isa/x64/lower.isle line 1465.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxub(ctx, expr0_0, &expr1_0)?;
@@ -3549,7 +3549,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1433.
// Rule at src/isa/x64/lower.isle line 1457.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminsw(ctx, expr0_0, &expr1_0)?;
@@ -3559,7 +3559,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1455.
// Rule at src/isa/x64/lower.isle line 1479.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminuw(ctx, expr0_0, &expr1_0)?;
@@ -3569,7 +3569,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1422.
// Rule at src/isa/x64/lower.isle line 1446.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxsw(ctx, expr0_0, &expr1_0)?;
@@ -3579,7 +3579,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1444.
// Rule at src/isa/x64/lower.isle line 1468.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxuw(ctx, expr0_0, &expr1_0)?;
@@ -3662,7 +3662,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1436.
// Rule at src/isa/x64/lower.isle line 1460.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminsd(ctx, expr0_0, &expr1_0)?;
@@ -3672,7 +3672,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1458.
// Rule at src/isa/x64/lower.isle line 1482.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pminud(ctx, expr0_0, &expr1_0)?;
@@ -3682,7 +3682,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Imax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1425.
// Rule at src/isa/x64/lower.isle line 1449.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxsd(ctx, expr0_0, &expr1_0)?;
@@ -3692,7 +3692,7 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
&Opcode::Umax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1447.
// Rule at src/isa/x64/lower.isle line 1471.
let expr0_0 = constructor_put_in_xmm(ctx, pattern7_0)?;
let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?;
let expr2_0 = constructor_pmaxud(ctx, expr0_0, &expr1_0)?;
@@ -5156,6 +5156,46 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
args: ref pattern5_1,
} => {
match &pattern5_0 {
&Opcode::Imin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1435.
let expr0_0 = CC::L;
let expr1_0 = constructor_cmp_and_choose(
ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1,
)?;
return Some(expr1_0);
}
&Opcode::Umin => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1429.
let expr0_0 = CC::B;
let expr1_0 = constructor_cmp_and_choose(
ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1,
)?;
return Some(expr1_0);
}
&Opcode::Imax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1438.
let expr0_0 = CC::NL;
let expr1_0 = constructor_cmp_and_choose(
ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1,
)?;
return Some(expr1_0);
}
&Opcode::Umax => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
// Rule at src/isa/x64/lower.isle line 1432.
let expr0_0 = CC::NB;
let expr1_0 = constructor_cmp_and_choose(
ctx, pattern3_0, &expr0_0, pattern7_0, pattern7_1,
)?;
return Some(expr1_0);
}
&Opcode::Iadd => {
let (pattern7_0, pattern7_1) =
C::unpack_value_array_2(ctx, &pattern5_1);
@@ -6233,3 +6273,31 @@ pub fn constructor_vec_insert_lane<C: Context>(
}
return None;
}
// Generated as internal constructor for term cmp_and_choose.
pub fn constructor_cmp_and_choose<C: Context>(
ctx: &mut C,
arg0: Type,
arg1: &CC,
arg2: Value,
arg3: Value,
) -> Option<ValueRegs> {
let pattern0_0 = arg0;
if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 1422.
let expr0_0 = C::put_in_reg(ctx, pattern3_0);
let expr1_0 = C::put_in_reg(ctx, pattern4_0);
let expr2_0 = C::raw_operand_size_of_type(ctx, pattern1_0);
let expr3_0 = RegMemImm::Reg { reg: expr0_0 };
let expr4_0 = constructor_cmp(ctx, &expr2_0, &expr3_0, expr1_0)?;
let expr5_0 = RegMem::Reg { reg: expr1_0 };
let expr6_0 = constructor_cmove(ctx, pattern1_0, pattern2_0, &expr5_0, expr0_0)?;
let expr7_0 = constructor_with_flags_1(ctx, &expr4_0, &expr6_0)?;
let expr8_0 = C::value_reg(ctx, expr7_0);
return Some(expr8_0);
}
return None;
}

View File

@@ -0,0 +1,135 @@
test interpret
test run
; target aarch64
; target s390x
target x86_64
; sort three signed i8s with imin and imax only
function %isort3(i8, i8, i8) -> i8, i8, i8 {
block0(v0: i8, v1: i8, v2: i8):
v3 = imin.i8 v0, v1
v4 = imin.i8 v1, v2
v5 = imin.i8 v2, v0
v6 = imin.i8 v3, v4 ; low
v7 = imax.i8 v0, v1
v8 = imax.i8 v1, v2
v9 = imax.i8 v7, v8 ; high
v10 = imax.i8 v3, v4
v11 = imax.i8 v10, v5 ; mid = max of min of all pairs
return v6, v11, v9
}
; run: %isort3(1, 2, 3) == [1, 2, 3]
; run: %isort3(1, 3, 2) == [1, 2, 3]
; run: %isort3(2, 1, 3) == [1, 2, 3]
; run: %isort3(2, 3, 1) == [1, 2, 3]
; run: %isort3(3, 1, 2) == [1, 2, 3]
; run: %isort3(3, 2, 1) == [1, 2, 3]
; run: %isort3(-1, 0, 1) == [-1, 0, 1]
; run: %isort3(-1, 1, 1) == [-1, 1, 1]
; run: %isort3(-2, 1, 0) == [-2, 0, 1]
; run: %isort3(0, 0, 0) == [0, 0, 0]
; run: %isort3(5, 4, 4) == [4, 4, 5]
function %imin_max_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2 = imin.i8 v0, v1
v3 = imax.i8 v0, v1
return v2, v3
}
; run: %imin_max_i8(127, -128) == [-128, 127]
; run: %imin_max_i8(-128, 127) == [-128, 127]
; run: %imin_max_i8(-1, 0) == [-1, 0]
; run: %imin_max_i8(1, -1) == [-1, 1]
; run: %imin_max_i8(1, 2) == [1, 2]
; run: %imin_max_i8(2, 1) == [1, 2]
; run: %imin_max_i8(2, 2) == [2, 2]
; run: %imin_max_i8(0x7f, 0x80) == [0x80, 0x7f]
function %imin_max_i16(i16, i16) -> i16, i16 {
block0(v0: i16, v1: i16):
v2 = imin.i16 v0, v1
v3 = imax.i16 v0, v1
return v2, v3
}
; run: %imin_max_i16(32767, -32768) == [-32768, 32767]
; run: %imin_max_i16(-32768, 32767) == [-32768, 32767]
; run: %imin_max_i16(-1, 0) == [-1, 0]
; run: %imin_max_i16(1, -1) == [-1, 1]
; run: %imin_max_i16(1, 2) == [1, 2]
; run: %imin_max_i16(2, 1) == [1, 2]
; run: %imin_max_i16(2, 2) == [2, 2]
; run: %imin_max_i16(0x7f, 0x80) == [0x7f, 0x80]
; run: %imin_max_i16(0x7fff, 0x8000) == [0x8000, 0x7fff]
function %imin_max_i32(i32, i32) -> i32, i32 {
block0(v0: i32, v1: i32):
v2 = imin.i32 v0, v1
v3 = imax.i32 v0, v1
return v2, v3
}
; run: %imin_max_i32(-1, 0) == [-1, 0]
; run: %imin_max_i32(1, -1) == [-1, 1]
; run: %imin_max_i32(1, 2) == [1, 2]
; run: %imin_max_i32(2, 1) == [1, 2]
; run: %imin_max_i32(0x7f, 0x80) == [0x7f, 0x80]
; run: %imin_max_i32(0x7fff, 0x8000) == [0x7fff, 0x8000]
; run: %imin_max_i32(0x7fffffff, 0x80000000) == [0x80000000, 0x7fffffff]
function %imin_max_i64(i64, i64) -> i64, i64 {
block0(v0: i64, v1: i64):
v2 = imin.i64 v0, v1
v3 = imax.i64 v0, v1
return v2, v3
}
; run: %imin_max_i64(-1, 0) == [-1, 0]
; run: %imin_max_i64(1, -1) == [-1, 1]
; run: %imin_max_i64(1, 2) == [1, 2]
; run: %imin_max_i64(2, 1) == [1, 2]
; run: %imin_max_i64(0x7f, 0x80) == [0x7f, 0x80]
; run: %imin_max_i64(0x7fff, 0x8000) == [0x7fff, 0x8000]
; run: %imin_max_i64(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000]
; run: %imin_max_i64(0x7fffffffffffffff, 0x8000000000000000) == [0x8000000000000000, 0x7fffffffffffffff]
function %umin_max_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2 = umin.i8 v0, v1
v3 = umax.i8 v0, v1
return v2, v3
}
; run: %umin_max_i8(1, 2) == [1, 2]
; run: %umin_max_i8(2, 1) == [1, 2]
; run: %umin_max_i8(0x7f, 0x80) == [0x7f, 0x80]
function %umin_max_i16(i16, i16) -> i16, i16 {
block0(v0: i16, v1: i16):
v2 = umin.i16 v0, v1
v3 = umax.i16 v0, v1
return v2, v3
}
; run: %umin_max_i16(1, 2) == [1, 2]
; run: %umin_max_i16(2, 1) == [1, 2]
; run: %umin_max_i16(0x7f, 0x80) == [0x7f, 0x80]
; run: %umin_max_i16(0x7fff, 0x8000) == [0x7fff, 0x8000]
function %umin_max_i32(i32, i32) -> i32, i32 {
block0(v0: i32, v1: i32):
v2 = umin.i32 v0, v1
v3 = umax.i32 v0, v1
return v2, v3
}
; run: %umin_max_i32(1, 2) == [1, 2]
; run: %umin_max_i32(2, 1) == [1, 2]
; run: %umin_max_i32(0x7fff, 0x8000) == [0x7fff, 0x8000]
; run: %umin_max_i32(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000]
function %umin_max_i64(i64, i64) -> i64, i64 {
block0(v0: i64, v1: i64):
v2 = umin.i64 v0, v1
v3 = umax.i64 v0, v1
return v2, v3
}
; run: %umin_max_i64(1, 2) == [1, 2]
; run: %umin_max_i64(2, 1) == [1, 2]
; run: %umin_max_i64(0x7fffffff, 0x80000000) == [0x7fffffff, 0x80000000]
; run: %umin_max_i64(0x7fffffffffffffff, 0x8000000000000000) == [0x7fffffffffffffff, 0x8000000000000000]