Cranelift AArch64: Further integral constant fixes (#4530)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Anton Kirilov
2022-07-26 17:35:06 +01:00
committed by GitHub
parent 1935428af7
commit d041c4b376
4 changed files with 188 additions and 32 deletions

View File

@@ -148,41 +148,59 @@ where
value
};
let rd = self.temp_writable_reg(I64);
let size = OperandSize::Size64;
// If the top 32 bits are zero, use 32-bit `mov` operations.
if value >> 32 == 0 {
let size = OperandSize::Size32;
let lower_halfword = value as u16;
let upper_halfword = (value >> 16) as u16;
if upper_halfword == u16::MAX {
self.emit(&MInst::MovWide {
op: MoveWideOp::MovN,
rd,
imm: MoveWideConst::maybe_with_shift(!lower_halfword, 0).unwrap(),
size,
});
} else {
self.emit(&MInst::MovWide {
op: MoveWideOp::MovZ,
rd,
imm: MoveWideConst::maybe_with_shift(lower_halfword, 0).unwrap(),
size,
});
if upper_halfword != 0 {
self.emit(&MInst::MovWide {
op: MoveWideOp::MovK,
rd,
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
size,
});
}
}
if value == 0 {
self.emit(&MInst::MovWide {
op: MoveWideOp::MovZ,
rd,
imm: MoveWideConst::zero(),
size: OperandSize::Size64,
});
return rd.to_reg();
} else if value == u64::MAX {
self.emit(&MInst::MovWide {
op: MoveWideOp::MovN,
rd,
imm: MoveWideConst::zero(),
size: OperandSize::Size64,
size,
});
return rd.to_reg();
};
// If the top 32 bits are zero, use 32-bit `mov` operations.
let (num_half_words, size, negated) = if value >> 32 == 0 {
(2, OperandSize::Size32, (!value << 32) >> 32)
} else {
(4, OperandSize::Size64, !value)
};
// If the number of 0xffff half words is greater than the number of 0x0000 half words
// it is more efficient to use `movn` for the first instruction.
let first_is_inverted = count_zero_half_words(negated, num_half_words)
> count_zero_half_words(value, num_half_words);
let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
// Either 0xffff or 0x0000 half words can be skipped, depending on the first
// instruction used.
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
let mut first_mov_emitted = false;
for i in 0..num_half_words {
for i in 0..4 {
let imm16 = (value >> (16 * i)) & 0xffff;
if imm16 != ignored_halfword {
if !first_mov_emitted {
@@ -222,9 +240,9 @@ where
return self.writable_reg_to_reg(rd);
fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
fn count_zero_half_words(mut value: u64) -> usize {
let mut count = 0;
for _ in 0..num_half_words {
for _ in 0..4 {
if value & 0xffff == 0 {
count += 1;
}

View File

@@ -111,7 +111,7 @@ block0(v0: i128):
; clz x8, x0
; lsr x10, x6, #6
; madd x0, x8, x10, x6
; movz x1, #0
; movz w1, #0
; ret
function %c(i8) -> i8 {
@@ -173,7 +173,7 @@ block0(v0: i128):
; subs xzr, x8, #63
; csel x1, x14, xzr, eq
; add x0, x1, x8
; movz x1, #0
; movz w1, #0
; ret
function %d(i8) -> i8 {
@@ -235,7 +235,7 @@ block0(v0: i128):
; clz x12, x8
; lsr x14, x10, #6
; madd x0, x12, x14, x10
; movz x1, #0
; movz w1, #0
; ret
function %d(i128) -> i128 {
@@ -253,7 +253,7 @@ block0(v0: i128):
; cnt v11.16b, v6.16b
; addv b13, v11.16b
; umov w0, v13.b[0]
; movz x1, #0
; movz w1, #0
; ldp d11, d13, [sp], #16
; ldp fp, lr, [sp], #16
; ret

View File

@@ -33,7 +33,7 @@ block0(v0: i64):
}
; block0:
; movz x1, #0
; movz w1, #0
; ret
function %i128_sextend_i64(i64) -> i128 {
@@ -54,7 +54,7 @@ block0(v0: i32):
; block0:
; mov w0, w0
; movz x1, #0
; movz w1, #0
; ret
function %i128_sextend_i32(i32) -> i128 {
@@ -76,7 +76,7 @@ block0(v0: i16):
; block0:
; uxth w0, w0
; movz x1, #0
; movz w1, #0
; ret
function %i128_sextend_i16(i16) -> i128 {
@@ -98,7 +98,7 @@ block0(v0: i8):
; block0:
; uxtb w0, w0
; movz x1, #0
; movz w1, #0
; ret
function %i128_sextend_i8(i8) -> i128 {
@@ -154,7 +154,7 @@ block0(v0: i8x16):
; block0:
; umov w0, v0.b[1]
; movz x1, #0
; movz w1, #0
; ret
function %i8x16_sextend_i16(i8x16) -> i16 {
@@ -233,7 +233,7 @@ block0(v0: i16x8):
; block0:
; umov w0, v0.h[1]
; movz x1, #0
; movz w1, #0
; ret
function %i16x8_sextend_i32(i16x8) -> i32 {
@@ -290,7 +290,7 @@ block0(v0: i32x4):
; block0:
; mov w0, v0.s[1]
; movz x1, #0
; movz w1, #0
; ret
function %i32x4_sextend_i64(i32x4) -> i64 {
@@ -325,7 +325,7 @@ block0(v0: i64x2):
; block0:
; mov x0, v0.d[1]
; movz x1, #0
; movz w1, #0
; ret
function %i64x2_sextend_i128(i64x2) -> i128 {

View File

@@ -9,26 +9,164 @@ block0(v0: i8, v1: i8):
v2 = srem.i8 v0, v1
return v2
}
; run: %i8(0, 1) == 0
; run: %i8(1, -1) == 0
; run: %i8(0x80, 0xff) == 0
; run: %i8(0x2, 0x7) == 0x2
function %i8_const(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 -1
v2 = srem.i8 v0, v1
return v2
}
; run: %i8_const(0) == 0
; run: %i8_const(1) == 0
; run: %i8_const(0x80) == 0
; run: %i8_const(0x2) == 0
function %i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = srem.i16 v0, v1
return v2
}
; run: %i16(0, 42) == 0
; run: %i16(4, -2) == 0
; run: %i16(13, 5) == 3
; run: %i16(0x8000, 0xffff) == 0
function %i16_const(i16) -> i16 {
block0(v0: i16):
v1 = iconst.i16 0xffff
v2 = srem.i16 v0, v1
return v2
}
; run: %i16_const(0) == 0
; run: %i16_const(4) == 0
; run: %i16_const(13) == 0
; run: %i16_const(0x8000) == 0
function %i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
return v2
}
; run: %i32(0, 13) == 0
; run: %i32(1048576, 8192) == 0
; run: %i32(-1024, 255) == -4
; run: %i32(0x80000000, 0xffffffff) == 0
function %i32_const(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = srem.i32 v0, v1
return v2
}
; run: %i32_const(0) == 0
; run: %i32_const(1057) == 0
; run: %i32_const(-42) == 0
; run: %i32_const(0x80000000) == 0
function %i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem.i64 v0, v1
return v2
}
; run: %i32(0x800000000000000, 0xffffffffffffffff) == 0
; run: %i64(0, 104857600000) == 0
; run: %i64(104857600000, 511) == 398
; run: %i64(-57, -5) == -2
; run: %i64(0x800000000000000, 0xffffffffffffffff) == 0
function %i64_const(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 -1
v2 = srem.i64 v0, v1
return v2
}
; run: %i64_const(0) == 0
; run: %i64_const(104857600000) == 0
; run: %i64_const(-57) == 0
; run: %i64_const(0x800000000000000) == 0
function %i8_u(i8, i8) -> i8 {
block0(v0: i8,v1: i8):
v2 = urem v0, v1
return v2
}
; run: %i8_u(0, 1) == 0
; run: %i8_u(2, 2) == 0
; run: %i8_u(1, -1) == 1
; run: %i8_u(3, 2) == 1
; run: %i8_u(0x80, 0xff) == 0x80
function %i8_u_const(i8) -> i8 {
block0(v0: i8):
v1 = iconst.i8 -1
v2 = urem v0, v1
return v2
}
; run: %i8_u_const(0) == 0
; run: %i8_u_const(3) == 3
; run: %i8_u_const(0x80) == 0x80
function %i16_u(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = urem.i16 v0, v1
return v2
}
; run: %i16_u(0, 42) == 0
; run: %i16_u(4, -2) == 4
; run: %i16_u(13, 5) == 3
; run: %i16_u(0x8000, 0xffff) == 0x8000
function %i16_u_const(i16) -> i16 {
block0(v0: i16):
v1 = iconst.i16 0xffff
v2 = urem.i16 v0, v1
return v2
}
; run: %i16_u_const(0) == 0
; run: %i16_u_const(4) == 4
; run: %i16_u_const(0x8000) == 0x8000
function %i32_u(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = urem.i32 v0, v1
return v2
}
; run: %i32_u(0, 13) == 0
; run: %i32_u(1048576, 8192) == 0
; run: %i32_u(-1024, 255) == 252
; run: %i32_u(0x80000000, 0xffffffff) == 0x80000000
function %i32_u_const(i32) -> i32 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = urem.i32 v0, v1
return v2
}
; run: %i32_u_const(0) == 0
; run: %i32_u_const(1057) == 1057
; run: %i32_u_const(-42) == -42
; run: %i32_u_const(0x80000000) == 0x80000000
function %i64_u(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = urem.i64 v0, v1
return v2
}
; run: %i64_u(0, 104857600000) == 0
; run: %i64_u(104857600000, 511) == 398
; run: %i64_u(-57, -5) == -57
; run: %i64_u(0x800000000000000, 0xffffffffffffffff) == 0x800000000000000
function %i64_u_const(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 -1
v2 = urem.i64 v0, v1
return v2
}
; run: %i64_u_const(0) == 0
; run: %i64_u_const(104857600000) == 104857600000
; run: %i64_u_const(-57) == -57
; run: %i64_u_const(0x800000000000000) == 0x800000000000000