diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 8ae644955d..a58cb5b8e3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -148,41 +148,59 @@ where value }; let rd = self.temp_writable_reg(I64); + let size = OperandSize::Size64; + + // If the top 32 bits are zero, use 32-bit `mov` operations. + if value >> 32 == 0 { + let size = OperandSize::Size32; + let lower_halfword = value as u16; + let upper_halfword = (value >> 16) as u16; + + if upper_halfword == u16::MAX { + self.emit(&MInst::MovWide { + op: MoveWideOp::MovN, + rd, + imm: MoveWideConst::maybe_with_shift(!lower_halfword, 0).unwrap(), + size, + }); + } else { + self.emit(&MInst::MovWide { + op: MoveWideOp::MovZ, + rd, + imm: MoveWideConst::maybe_with_shift(lower_halfword, 0).unwrap(), + size, + }); + + if upper_halfword != 0 { + self.emit(&MInst::MovWide { + op: MoveWideOp::MovK, + rd, + imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(), + size, + }); + } + } - if value == 0 { - self.emit(&MInst::MovWide { - op: MoveWideOp::MovZ, - rd, - imm: MoveWideConst::zero(), - size: OperandSize::Size64, - }); return rd.to_reg(); } else if value == u64::MAX { self.emit(&MInst::MovWide { op: MoveWideOp::MovN, rd, imm: MoveWideConst::zero(), - size: OperandSize::Size64, + size, }); return rd.to_reg(); }; - // If the top 32 bits are zero, use 32-bit `mov` operations. - let (num_half_words, size, negated) = if value >> 32 == 0 { - (2, OperandSize::Size32, (!value << 32) >> 32) - } else { - (4, OperandSize::Size64, !value) - }; // If the number of 0xffff half words is greater than the number of 0x0000 half words // it is more efficient to use `movn` for the first instruction. - let first_is_inverted = count_zero_half_words(negated, num_half_words) - > count_zero_half_words(value, num_half_words); + let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value); // Either 0xffff or 0x0000 half words can be skipped, depending on the first // instruction used. let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; let mut first_mov_emitted = false; - for i in 0..num_half_words { + for i in 0..4 { let imm16 = (value >> (16 * i)) & 0xffff; if imm16 != ignored_halfword { if !first_mov_emitted { @@ -222,9 +240,9 @@ where return self.writable_reg_to_reg(rd); - fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize { + fn count_zero_half_words(mut value: u64) -> usize { let mut count = 0; - for _ in 0..num_half_words { + for _ in 0..4 { if value & 0xffff == 0 { count += 1; } diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index c45d89ebd9..7a88d27be4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -111,7 +111,7 @@ block0(v0: i128): ; clz x8, x0 ; lsr x10, x6, #6 ; madd x0, x8, x10, x6 -; movz x1, #0 +; movz w1, #0 ; ret function %c(i8) -> i8 { @@ -173,7 +173,7 @@ block0(v0: i128): ; subs xzr, x8, #63 ; csel x1, x14, xzr, eq ; add x0, x1, x8 -; movz x1, #0 +; movz w1, #0 ; ret function %d(i8) -> i8 { @@ -235,7 +235,7 @@ block0(v0: i128): ; clz x12, x8 ; lsr x14, x10, #6 ; madd x0, x12, x14, x10 -; movz x1, #0 +; movz w1, #0 ; ret function %d(i128) -> i128 { @@ -253,7 +253,7 @@ block0(v0: i128): ; cnt v11.16b, v6.16b ; addv b13, v11.16b ; umov w0, v13.b[0] -; movz x1, #0 +; movz w1, #0 ; ldp d11, d13, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif index 9b31cd20fc..76fc191425 100644 --- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif @@ -33,7 +33,7 @@ block0(v0: i64): } ; block0: -; movz x1, #0 +; movz w1, #0 ; ret function %i128_sextend_i64(i64) -> i128 { @@ -54,7 +54,7 @@ block0(v0: i32): ; block0: ; mov w0, w0 -; movz x1, #0 +; movz w1, #0 ; ret function %i128_sextend_i32(i32) -> i128 { @@ -76,7 +76,7 @@ block0(v0: i16): ; block0: ; uxth w0, w0 -; movz x1, #0 +; movz w1, #0 ; ret function %i128_sextend_i16(i16) -> i128 { @@ -98,7 +98,7 @@ block0(v0: i8): ; block0: ; uxtb w0, w0 -; movz x1, #0 +; movz w1, #0 ; ret function %i128_sextend_i8(i8) -> i128 { @@ -154,7 +154,7 @@ block0(v0: i8x16): ; block0: ; umov w0, v0.b[1] -; movz x1, #0 +; movz w1, #0 ; ret function %i8x16_sextend_i16(i8x16) -> i16 { @@ -233,7 +233,7 @@ block0(v0: i16x8): ; block0: ; umov w0, v0.h[1] -; movz x1, #0 +; movz w1, #0 ; ret function %i16x8_sextend_i32(i16x8) -> i32 { @@ -290,7 +290,7 @@ block0(v0: i32x4): ; block0: ; mov w0, v0.s[1] -; movz x1, #0 +; movz w1, #0 ; ret function %i32x4_sextend_i64(i32x4) -> i64 { @@ -325,7 +325,7 @@ block0(v0: i64x2): ; block0: ; mov x0, v0.d[1] -; movz x1, #0 +; movz w1, #0 ; ret function %i64x2_sextend_i128(i64x2) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/div-checks.clif b/cranelift/filetests/filetests/runtests/div-checks.clif index 32b5588528..a9f9ca73ba 100644 --- a/cranelift/filetests/filetests/runtests/div-checks.clif +++ b/cranelift/filetests/filetests/runtests/div-checks.clif @@ -9,26 +9,164 @@ block0(v0: i8, v1: i8): v2 = srem.i8 v0, v1 return v2 } +; run: %i8(0, 1) == 0 +; run: %i8(1, -1) == 0 ; run: %i8(0x80, 0xff) == 0 ; run: %i8(0x2, 0x7) == 0x2 +function %i8_const(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -1 + v2 = srem.i8 v0, v1 + return v2 +} +; run: %i8_const(0) == 0 +; run: %i8_const(1) == 0 +; run: %i8_const(0x80) == 0 +; run: %i8_const(0x2) == 0 + function %i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = srem.i16 v0, v1 return v2 } +; run: %i16(0, 42) == 0 +; run: %i16(4, -2) == 0 +; run: %i16(13, 5) == 3 ; run: %i16(0x8000, 0xffff) == 0 +function %i16_const(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 0xffff + v2 = srem.i16 v0, v1 + return v2 +} +; run: %i16_const(0) == 0 +; run: %i16_const(4) == 0 +; run: %i16_const(13) == 0 +; run: %i16_const(0x8000) == 0 + function %i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): v2 = srem.i32 v0, v1 return v2 } +; run: %i32(0, 13) == 0 +; run: %i32(1048576, 8192) == 0 +; run: %i32(-1024, 255) == -4 ; run: %i32(0x80000000, 0xffffffff) == 0 +function %i32_const(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = srem.i32 v0, v1 + return v2 +} +; run: %i32_const(0) == 0 +; run: %i32_const(1057) == 0 +; run: %i32_const(-42) == 0 +; run: %i32_const(0x80000000) == 0 + function %i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = srem.i64 v0, v1 return v2 } -; run: %i32(0x800000000000000, 0xffffffffffffffff) == 0 +; run: %i64(0, 104857600000) == 0 +; run: %i64(104857600000, 511) == 398 +; run: %i64(-57, -5) == -2 +; run: %i64(0x800000000000000, 0xffffffffffffffff) == 0 + +function %i64_const(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v2 = srem.i64 v0, v1 + return v2 +} +; run: %i64_const(0) == 0 +; run: %i64_const(104857600000) == 0 +; run: %i64_const(-57) == 0 +; run: %i64_const(0x800000000000000) == 0 + +function %i8_u(i8, i8) -> i8 { +block0(v0: i8,v1: i8): + v2 = urem v0, v1 + return v2 +} +; run: %i8_u(0, 1) == 0 +; run: %i8_u(2, 2) == 0 +; run: %i8_u(1, -1) == 1 +; run: %i8_u(3, 2) == 1 +; run: %i8_u(0x80, 0xff) == 0x80 + +function %i8_u_const(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -1 + v2 = urem v0, v1 + return v2 +} +; run: %i8_u_const(0) == 0 +; run: %i8_u_const(3) == 3 +; run: %i8_u_const(0x80) == 0x80 + +function %i16_u(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem.i16 v0, v1 + return v2 +} +; run: %i16_u(0, 42) == 0 +; run: %i16_u(4, -2) == 4 +; run: %i16_u(13, 5) == 3 +; run: %i16_u(0x8000, 0xffff) == 0x8000 + +function %i16_u_const(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 0xffff + v2 = urem.i16 v0, v1 + return v2 +} +; run: %i16_u_const(0) == 0 +; run: %i16_u_const(4) == 4 +; run: %i16_u_const(0x8000) == 0x8000 + +function %i32_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} +; run: %i32_u(0, 13) == 0 +; run: %i32_u(1048576, 8192) == 0 +; run: %i32_u(-1024, 255) == 252 +; run: %i32_u(0x80000000, 0xffffffff) == 0x80000000 + +function %i32_u_const(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = urem.i32 v0, v1 + return v2 +} +; run: %i32_u_const(0) == 0 +; run: %i32_u_const(1057) == 1057 +; run: %i32_u_const(-42) == -42 +; run: %i32_u_const(0x80000000) == 0x80000000 + +function %i64_u(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} +; run: %i64_u(0, 104857600000) == 0 +; run: %i64_u(104857600000, 511) == 398 +; run: %i64_u(-57, -5) == -57 +; run: %i64_u(0x800000000000000, 0xffffffffffffffff) == 0x800000000000000 + +function %i64_u_const(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v2 = urem.i64 v0, v1 + return v2 +} +; run: %i64_u_const(0) == 0 +; run: %i64_u_const(104857600000) == 104857600000 +; run: %i64_u_const(-57) == -57 +; run: %i64_u_const(0x800000000000000) == 0x800000000000000