diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 60a81eb005..f19ce9e0cf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -170,7 +170,7 @@ fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 { } } -const MOVE_WIDE_FIXED: u32 = 0x92800000; +const MOVE_WIDE_FIXED: u32 = 0x12800000; #[repr(u32)] enum MoveWideOpcode { @@ -179,9 +179,15 @@ enum MoveWideOpcode { MOVK = 0b11, } -fn enc_move_wide(op: MoveWideOpcode, rd: Writable, imm: MoveWideConst) -> u32 { +fn enc_move_wide( + op: MoveWideOpcode, + rd: Writable, + imm: MoveWideConst, + size: OperandSize, +) -> u32 { assert!(imm.shift <= 0b11); MOVE_WIDE_FIXED + | size.sf_bit() << 31 | (op as u32) << 29 | u32::from(imm.shift) << 21 | u32::from(imm.bits) << 5 @@ -1029,9 +1035,15 @@ impl MachInstEmit for Inst { // Encoded as ORR rd, rm, zero. sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); } - &Inst::MovZ { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm)), - &Inst::MovN { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm)), - &Inst::MovK { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm)), + &Inst::MovZ { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size)) + } + &Inst::MovN { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size)) + } + &Inst::MovK { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size)) + } &Inst::CSel { rd, rn, rm, cond } => { sink.put4(enc_csel(rd, rn, rm, cond)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index f8b446de31..25fb934a0d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1370,8 +1370,8 @@ fn test_aarch64_binemit() { mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1 srcloc: None, }, - "300080D21002A0F2B063308B010240F9", - "movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + "300080521002A072B063308B010240F9", + "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", )); insns.push(( @@ -1674,6 +1674,7 @@ fn test_aarch64_binemit() { Inst::MovZ { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, }, "E8FF9FD2", "movz x8, #65535", @@ -1682,6 +1683,7 @@ fn test_aarch64_binemit() { Inst::MovZ { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFBFD2", "movz x8, #65535, LSL #16", @@ -1690,6 +1692,7 @@ fn test_aarch64_binemit() { Inst::MovZ { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFDFD2", "movz x8, #65535, LSL #32", @@ -1698,15 +1701,26 @@ fn test_aarch64_binemit() { Inst::MovZ { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFFFD2", "movz x8, #65535, LSL #48", )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size32, + }, + "E8FFBF52", + "movz w8, #65535, LSL #16", + )); insns.push(( Inst::MovN { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, }, "E8FF9F92", "movn x8, #65535", @@ -1715,6 +1729,7 @@ fn test_aarch64_binemit() { Inst::MovN { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFBF92", "movn x8, #65535, LSL #16", @@ -1723,6 +1738,7 @@ fn test_aarch64_binemit() { Inst::MovN { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFDF92", "movn x8, #65535, LSL #32", @@ -1731,15 +1747,26 @@ fn test_aarch64_binemit() { Inst::MovN { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFFF92", "movn x8, #65535, LSL #48", )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size32, + }, + "E8FF9F12", + "movn w8, #65535", + )); insns.push(( Inst::MovK { rd: writable_xreg(12), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(), + size: OperandSize::Size64, }, "0C0080F2", "movk x12, #0", @@ -1748,6 +1775,7 @@ fn test_aarch64_binemit() { Inst::MovK { rd: writable_xreg(19), imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(), + size: OperandSize::Size64, }, "1300A0F2", "movk x19, #0, LSL #16", @@ -1756,6 +1784,7 @@ fn test_aarch64_binemit() { Inst::MovK { rd: writable_xreg(3), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, }, "E3FF9FF2", "movk x3, #65535", @@ -1764,6 +1793,7 @@ fn test_aarch64_binemit() { Inst::MovK { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFBFF2", "movk x8, #65535, LSL #16", @@ -1772,6 +1802,7 @@ fn test_aarch64_binemit() { Inst::MovK { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFDFF2", "movk x8, #65535, LSL #32", @@ -1780,6 +1811,7 @@ fn test_aarch64_binemit() { Inst::MovK { rd: writable_xreg(8), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, }, "E8FFFFF2", "movk x8, #65535, LSL #48", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index b527b7dc19..50788300b3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -587,18 +587,21 @@ pub enum Inst { MovZ { rd: Writable, imm: MoveWideConst, + size: OperandSize, }, /// A MOVN with a 16-bit immediate. MovN { rd: Writable, imm: MoveWideConst, + size: OperandSize, }, /// A MOVK with a 16-bit immediate. MovK { rd: Writable, imm: MoveWideConst, + size: OperandSize, }, /// A sign- or zero-extend operation. @@ -1122,9 +1125,9 @@ pub enum Inst { }, } -fn count_zero_half_words(mut value: u64) -> usize { +fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize { let mut count = 0; - for _ in 0..4 { + for _ in 0..num_half_words { if value & 0xffff == 0 { count += 1; } @@ -1176,10 +1179,18 @@ impl Inst { pub fn load_constant(rd: Writable, value: u64) -> SmallVec<[Inst; 4]> { if let Some(imm) = MoveWideConst::maybe_from_u64(value) { // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ - smallvec![Inst::MovZ { rd, imm }] + smallvec![Inst::MovZ { + rd, + imm, + size: OperandSize::Size64 + }] } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) { // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN - smallvec![Inst::MovN { rd, imm }] + smallvec![Inst::MovN { + rd, + imm, + size: OperandSize::Size64 + }] } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) { // Weird logical-instruction immediate in ORI using zero register smallvec![Inst::AluRRImmLogic { @@ -1191,15 +1202,22 @@ impl Inst { } else { let mut insts = smallvec![]; + // If the top 32 bits are zero, use 32-bit `mov` operations. + let (num_half_words, size, negated) = if value >> 32 == 0 { + (2, OperandSize::Size32, (!value << 32) >> 32) + } else { + (4, OperandSize::Size64, !value) + }; // If the number of 0xffff half words is greater than the number of 0x0000 half words // it is more efficient to use `movn` for the first instruction. - let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value); + let first_is_inverted = count_zero_half_words(negated, num_half_words) + > count_zero_half_words(value, num_half_words); // Either 0xffff or 0x0000 half words can be skipped, depending on the first // instruction used. let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; let mut first_mov_emitted = false; - for i in 0..4 { + for i in 0..num_half_words { let imm16 = (value >> (16 * i)) & 0xffff; if imm16 != ignored_halfword { if !first_mov_emitted { @@ -1208,15 +1226,15 @@ impl Inst { let imm = MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16) .unwrap(); - insts.push(Inst::MovN { rd, imm }); + insts.push(Inst::MovN { rd, imm, size }); } else { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); - insts.push(Inst::MovZ { rd, imm }); + insts.push(Inst::MovZ { rd, imm, size }); } } else { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); - insts.push(Inst::MovK { rd, imm }); + insts.push(Inst::MovK { rd, imm, size }); } } } @@ -2870,18 +2888,18 @@ impl Inst { let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32); format!("mov {}, {}", rd, rm) } - &Inst::MovZ { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + &Inst::MovZ { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); let imm = imm.show_rru(mb_rru); format!("movz {}, {}", rd, imm) } - &Inst::MovN { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + &Inst::MovN { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); let imm = imm.show_rru(mb_rru); format!("movn {}, {}", rd, imm) } - &Inst::MovK { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + &Inst::MovK { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); let imm = imm.show_rru(mb_rru); format!("movk {}, {}", rd, imm) } diff --git a/cranelift/filetests/filetests/vcode/aarch64/amodes.clif b/cranelift/filetests/filetests/vcode/aarch64/amodes.clif index aaaffd0286..b88b8e6590 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/amodes.clif @@ -230,8 +230,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x1, #51712 -; nextln: movk x1, #15258, LSL #16 +; nextln: movz w1, #51712 +; nextln: movk w1, #15258, LSL #16 ; nextln: add x0, x1, x0 ; nextln: ldr w0, [x0] ; nextln: mov sp, fp diff --git a/cranelift/filetests/filetests/vcode/aarch64/constants.clif b/cranelift/filetests/filetests/vcode/aarch64/constants.clif index 09f0aeaa53..48fa386891 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/constants.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/constants.clif @@ -213,3 +213,42 @@ block0: ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f() -> i32 { +block0: + v0 = iconst.i32 0xfffffff7 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movn w0, #8 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xfffffff7 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movn w0, #8 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xfffffffffffffff7 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: movn x0, #8 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index 8ad0393669..f4f8bd78bf 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -100,8 +100,8 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; udf -; nextln: movz x17, #6784 -; nextln: movk x17, #6, LSL #16 +; nextln: movz w17, #6784 +; nextln: movk w17, #6, LSL #16 ; nextln: add x16, x0, x17, UXTX ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf @@ -149,8 +149,8 @@ block0(v0: i64): ; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf -; nextln: movz x17, #6784 -; nextln: movk x17, #6, LSL #16 +; nextln: movz w17, #6784 +; nextln: movk w17, #6, LSL #16 ; nextln: add x16, x16, x17, UXTX ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf @@ -171,7 +171,7 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: movz x16, #6784 ; movk x16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] +; nextln: movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; nextln: add x16, x16, #32 ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf