Cranelift AArch64: Further integral constant fixes (#4530)

2022-07-26 17:35:06 +01:00
parent 1935428af7
commit d041c4b376
4 changed files with 188 additions and 32 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -148,41 +148,59 @@ where
            value
        };
        let rd = self.temp_writable_reg(I64);
+        let size = OperandSize::Size64;
+
+        // If the top 32 bits are zero, use 32-bit `mov` operations.
+        if value >> 32 == 0 {
+            let size = OperandSize::Size32;
+            let lower_halfword = value as u16;
+            let upper_halfword = (value >> 16) as u16;
+
+            if upper_halfword == u16::MAX {
+                self.emit(&MInst::MovWide {
+                    op: MoveWideOp::MovN,
+                    rd,
+                    imm: MoveWideConst::maybe_with_shift(!lower_halfword, 0).unwrap(),
+                    size,
+                });
+            } else {
+                self.emit(&MInst::MovWide {
+                    op: MoveWideOp::MovZ,
+                    rd,
+                    imm: MoveWideConst::maybe_with_shift(lower_halfword, 0).unwrap(),
+                    size,
+                });
+
+                if upper_halfword != 0 {
+                    self.emit(&MInst::MovWide {
+                        op: MoveWideOp::MovK,
+                        rd,
+                        imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
+                        size,
+                    });
+                }
+            }

-        if value == 0 {
-            self.emit(&MInst::MovWide {
-                op: MoveWideOp::MovZ,
-                rd,
-                imm: MoveWideConst::zero(),
-                size: OperandSize::Size64,
-            });
            return rd.to_reg();
        } else if value == u64::MAX {
            self.emit(&MInst::MovWide {
                op: MoveWideOp::MovN,
                rd,
                imm: MoveWideConst::zero(),
-                size: OperandSize::Size64,
+                size,
            });
            return rd.to_reg();
        };

-        // If the top 32 bits are zero, use 32-bit `mov` operations.
-        let (num_half_words, size, negated) = if value >> 32 == 0 {
-            (2, OperandSize::Size32, (!value << 32) >> 32)
-        } else {
-            (4, OperandSize::Size64, !value)
-        };
        // If the number of 0xffff half words is greater than the number of 0x0000 half words
        // it is more efficient to use `movn` for the first instruction.
-        let first_is_inverted = count_zero_half_words(negated, num_half_words)
-            > count_zero_half_words(value, num_half_words);
+        let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
        // Either 0xffff or 0x0000 half words can be skipped, depending on the first
        // instruction used.
        let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
        let mut first_mov_emitted = false;

-        for i in 0..num_half_words {
+        for i in 0..4 {
            let imm16 = (value >> (16 * i)) & 0xffff;
            if imm16 != ignored_halfword {
                if !first_mov_emitted {
@@ -222,9 +240,9 @@ where

        return self.writable_reg_to_reg(rd);

-        fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
+        fn count_zero_half_words(mut value: u64) -> usize {
            let mut count = 0;
-            for _ in 0..num_half_words {
+            for _ in 0..4 {
                if value & 0xffff == 0 {
                    count += 1;
                }
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -111,7 +111,7 @@ block0(v0: i128):
 ;   clz x8, x0
 ;   lsr x10, x6, #6
 ;   madd x0, x8, x10, x6
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %c(i8) -> i8 {
@@ -173,7 +173,7 @@ block0(v0: i128):
 ;   subs xzr, x8, #63
 ;   csel x1, x14, xzr, eq
 ;   add x0, x1, x8
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %d(i8) -> i8 {
@@ -235,7 +235,7 @@ block0(v0: i128):
 ;   clz x12, x8
 ;   lsr x14, x10, #6
 ;   madd x0, x12, x14, x10
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %d(i128) -> i128 {
@@ -253,7 +253,7 @@ block0(v0: i128):
 ;   cnt v11.16b, v6.16b
 ;   addv b13, v11.16b
 ;   umov w0, v13.b[0]
-;   movz x1, #0
+;   movz w1, #0
 ;   ldp d11, d13, [sp], #16
 ;   ldp fp, lr, [sp], #16
 ;   ret
--- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
@@ -33,7 +33,7 @@ block0(v0: i64):
 }

 ; block0:
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i128_sextend_i64(i64) -> i128 {
@@ -54,7 +54,7 @@ block0(v0: i32):

 ; block0:
 ;   mov w0, w0
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i128_sextend_i32(i32) -> i128 {
@@ -76,7 +76,7 @@ block0(v0: i16):

 ; block0:
 ;   uxth w0, w0
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i128_sextend_i16(i16) -> i128 {
@@ -98,7 +98,7 @@ block0(v0: i8):

 ; block0:
 ;   uxtb w0, w0
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i128_sextend_i8(i8) -> i128 {
@@ -154,7 +154,7 @@ block0(v0: i8x16):

 ; block0:
 ;   umov w0, v0.b[1]
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i8x16_sextend_i16(i8x16) -> i16 {
@@ -233,7 +233,7 @@ block0(v0: i16x8):

 ; block0:
 ;   umov w0, v0.h[1]
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i16x8_sextend_i32(i16x8) -> i32 {
@@ -290,7 +290,7 @@ block0(v0: i32x4):

 ; block0:
 ;   mov w0, v0.s[1]
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i32x4_sextend_i64(i32x4) -> i64 {
@@ -325,7 +325,7 @@ block0(v0: i64x2):

 ; block0:
 ;   mov x0, v0.d[1]
-;   movz x1, #0
+;   movz w1, #0
 ;   ret

 function %i64x2_sextend_i128(i64x2) -> i128 {
--- a/cranelift/filetests/filetests/runtests/div-checks.clif
+++ b/cranelift/filetests/filetests/runtests/div-checks.clif
@@ -9,26 +9,164 @@ block0(v0: i8, v1: i8):
  v2 = srem.i8 v0, v1
  return v2
 }
+; run: %i8(0, 1) == 0
+; run: %i8(1, -1) == 0
 ; run: %i8(0x80, 0xff) == 0
 ; run: %i8(0x2, 0x7) == 0x2

+function %i8_const(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 -1
+  v2 = srem.i8 v0, v1
+  return v2
+}
+; run: %i8_const(0) == 0
+; run: %i8_const(1) == 0
+; run: %i8_const(0x80) == 0
+; run: %i8_const(0x2) == 0
+
 function %i16(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
  v2 = srem.i16 v0, v1
  return v2
 }
+; run: %i16(0, 42) == 0
+; run: %i16(4, -2) == 0
+; run: %i16(13, 5) == 3
 ; run: %i16(0x8000, 0xffff) == 0

+function %i16_const(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 0xffff
+  v2 = srem.i16 v0, v1
+  return v2
+}
+; run: %i16_const(0) == 0
+; run: %i16_const(4) == 0
+; run: %i16_const(13) == 0
+; run: %i16_const(0x8000) == 0
+
 function %i32(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
  v2 = srem.i32 v0, v1
  return v2
 }
+; run: %i32(0, 13) == 0
+; run: %i32(1048576, 8192) == 0
+; run: %i32(-1024, 255) == -4
 ; run: %i32(0x80000000, 0xffffffff) == 0

+function %i32_const(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 -1
+  v2 = srem.i32 v0, v1
+  return v2
+}
+; run: %i32_const(0) == 0
+; run: %i32_const(1057) == 0
+; run: %i32_const(-42) == 0
+; run: %i32_const(0x80000000) == 0
+
 function %i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
  v2 = srem.i64 v0, v1
  return v2
 }
-; run: %i32(0x800000000000000, 0xffffffffffffffff) == 0
+; run: %i64(0, 104857600000) == 0
+; run: %i64(104857600000, 511) == 398
+; run: %i64(-57, -5) == -2
+; run: %i64(0x800000000000000, 0xffffffffffffffff) == 0
+
+function %i64_const(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = srem.i64 v0, v1
+  return v2
+}
+; run: %i64_const(0) == 0
+; run: %i64_const(104857600000) == 0
+; run: %i64_const(-57) == 0
+; run: %i64_const(0x800000000000000) == 0
+
+function %i8_u(i8, i8) -> i8 {
+block0(v0: i8,v1: i8):
+  v2 = urem v0, v1
+  return v2
+}
+; run: %i8_u(0, 1) == 0
+; run: %i8_u(2, 2) == 0
+; run: %i8_u(1, -1) == 1
+; run: %i8_u(3, 2) == 1
+; run: %i8_u(0x80, 0xff) == 0x80
+
+function %i8_u_const(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 -1
+  v2 = urem v0, v1
+  return v2
+}
+; run: %i8_u_const(0) == 0
+; run: %i8_u_const(3) == 3
+; run: %i8_u_const(0x80) == 0x80
+
+function %i16_u(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = urem.i16 v0, v1
+  return v2
+}
+; run: %i16_u(0, 42) == 0
+; run: %i16_u(4, -2) == 4
+; run: %i16_u(13, 5) == 3
+; run: %i16_u(0x8000, 0xffff) == 0x8000
+
+function %i16_u_const(i16) -> i16 {
+block0(v0: i16):
+  v1 = iconst.i16 0xffff
+  v2 = urem.i16 v0, v1
+  return v2
+}
+; run: %i16_u_const(0) == 0
+; run: %i16_u_const(4) == 4
+; run: %i16_u_const(0x8000) == 0x8000
+
+function %i32_u(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = urem.i32 v0, v1
+  return v2
+}
+; run: %i32_u(0, 13) == 0
+; run: %i32_u(1048576, 8192) == 0
+; run: %i32_u(-1024, 255) == 252
+; run: %i32_u(0x80000000, 0xffffffff) == 0x80000000
+
+function %i32_u_const(i32) -> i32 {
+block0(v0: i32):
+  v1 = iconst.i32 -1
+  v2 = urem.i32 v0, v1
+  return v2
+}
+; run: %i32_u_const(0) == 0
+; run: %i32_u_const(1057) == 1057
+; run: %i32_u_const(-42) == -42
+; run: %i32_u_const(0x80000000) == 0x80000000
+
+function %i64_u(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = urem.i64 v0, v1
+  return v2
+}
+; run: %i64_u(0, 104857600000) == 0
+; run: %i64_u(104857600000, 511) == 398
+; run: %i64_u(-57, -5) == -57
+; run: %i64_u(0x800000000000000, 0xffffffffffffffff) == 0x800000000000000
+
+function %i64_u_const(i64) -> i64 {
+block0(v0: i64):
+  v1 = iconst.i64 -1
+  v2 = urem.i64 v0, v1
+  return v2
+}
+; run: %i64_u_const(0) == 0
+; run: %i64_u_const(104857600000) == 104857600000
+; run: %i64_u_const(-57) == -57
+; run: %i64_u_const(0x800000000000000) == 0x800000000000000