arm64: Don't always materialise a 64-bit constant

This improves the mov/movk/movn sequnce when the high half of the
64-bit value is all zero.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Joey Gouly
2020-09-01 13:29:01 +01:00
parent c9ec60dcce
commit 650d48cd84
6 changed files with 130 additions and 29 deletions

View File

@@ -170,7 +170,7 @@ fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
} }
} }
const MOVE_WIDE_FIXED: u32 = 0x92800000; const MOVE_WIDE_FIXED: u32 = 0x12800000;
#[repr(u32)] #[repr(u32)]
enum MoveWideOpcode { enum MoveWideOpcode {
@@ -179,9 +179,15 @@ enum MoveWideOpcode {
MOVK = 0b11, MOVK = 0b11,
} }
fn enc_move_wide(op: MoveWideOpcode, rd: Writable<Reg>, imm: MoveWideConst) -> u32 { fn enc_move_wide(
op: MoveWideOpcode,
rd: Writable<Reg>,
imm: MoveWideConst,
size: OperandSize,
) -> u32 {
assert!(imm.shift <= 0b11); assert!(imm.shift <= 0b11);
MOVE_WIDE_FIXED MOVE_WIDE_FIXED
| size.sf_bit() << 31
| (op as u32) << 29 | (op as u32) << 29
| u32::from(imm.shift) << 21 | u32::from(imm.shift) << 21
| u32::from(imm.bits) << 5 | u32::from(imm.bits) << 5
@@ -1029,9 +1035,15 @@ impl MachInstEmit for Inst {
// Encoded as ORR rd, rm, zero. // Encoded as ORR rd, rm, zero.
sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
} }
&Inst::MovZ { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm)), &Inst::MovZ { rd, imm, size } => {
&Inst::MovN { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm)), sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
&Inst::MovK { rd, imm } => sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm)), }
&Inst::MovN { rd, imm, size } => {
sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
}
&Inst::MovK { rd, imm, size } => {
sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
}
&Inst::CSel { rd, rn, rm, cond } => { &Inst::CSel { rd, rn, rm, cond } => {
sink.put4(enc_csel(rd, rn, rm, cond)); sink.put4(enc_csel(rd, rn, rm, cond));
} }

View File

@@ -1370,8 +1370,8 @@ fn test_aarch64_binemit() {
mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1 mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1
srcloc: None, srcloc: None,
}, },
"300080D21002A0F2B063308B010240F9", "300080521002A072B063308B010240F9",
"movz x16, #1 ; movk x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
)); ));
insns.push(( insns.push((
@@ -1674,6 +1674,7 @@ fn test_aarch64_binemit() {
Inst::MovZ { Inst::MovZ {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
size: OperandSize::Size64,
}, },
"E8FF9FD2", "E8FF9FD2",
"movz x8, #65535", "movz x8, #65535",
@@ -1682,6 +1683,7 @@ fn test_aarch64_binemit() {
Inst::MovZ { Inst::MovZ {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFBFD2", "E8FFBFD2",
"movz x8, #65535, LSL #16", "movz x8, #65535, LSL #16",
@@ -1690,6 +1692,7 @@ fn test_aarch64_binemit() {
Inst::MovZ { Inst::MovZ {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFDFD2", "E8FFDFD2",
"movz x8, #65535, LSL #32", "movz x8, #65535, LSL #32",
@@ -1698,15 +1701,26 @@ fn test_aarch64_binemit() {
Inst::MovZ { Inst::MovZ {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFFFD2", "E8FFFFD2",
"movz x8, #65535, LSL #48", "movz x8, #65535, LSL #48",
)); ));
insns.push((
Inst::MovZ {
rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
size: OperandSize::Size32,
},
"E8FFBF52",
"movz w8, #65535, LSL #16",
));
insns.push(( insns.push((
Inst::MovN { Inst::MovN {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
size: OperandSize::Size64,
}, },
"E8FF9F92", "E8FF9F92",
"movn x8, #65535", "movn x8, #65535",
@@ -1715,6 +1729,7 @@ fn test_aarch64_binemit() {
Inst::MovN { Inst::MovN {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFBF92", "E8FFBF92",
"movn x8, #65535, LSL #16", "movn x8, #65535, LSL #16",
@@ -1723,6 +1738,7 @@ fn test_aarch64_binemit() {
Inst::MovN { Inst::MovN {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFDF92", "E8FFDF92",
"movn x8, #65535, LSL #32", "movn x8, #65535, LSL #32",
@@ -1731,15 +1747,26 @@ fn test_aarch64_binemit() {
Inst::MovN { Inst::MovN {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFFF92", "E8FFFF92",
"movn x8, #65535, LSL #48", "movn x8, #65535, LSL #48",
)); ));
insns.push((
Inst::MovN {
rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
size: OperandSize::Size32,
},
"E8FF9F12",
"movn w8, #65535",
));
insns.push(( insns.push((
Inst::MovK { Inst::MovK {
rd: writable_xreg(12), rd: writable_xreg(12),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"0C0080F2", "0C0080F2",
"movk x12, #0", "movk x12, #0",
@@ -1748,6 +1775,7 @@ fn test_aarch64_binemit() {
Inst::MovK { Inst::MovK {
rd: writable_xreg(19), rd: writable_xreg(19),
imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(), imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
size: OperandSize::Size64,
}, },
"1300A0F2", "1300A0F2",
"movk x19, #0, LSL #16", "movk x19, #0, LSL #16",
@@ -1756,6 +1784,7 @@ fn test_aarch64_binemit() {
Inst::MovK { Inst::MovK {
rd: writable_xreg(3), rd: writable_xreg(3),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
size: OperandSize::Size64,
}, },
"E3FF9FF2", "E3FF9FF2",
"movk x3, #65535", "movk x3, #65535",
@@ -1764,6 +1793,7 @@ fn test_aarch64_binemit() {
Inst::MovK { Inst::MovK {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFBFF2", "E8FFBFF2",
"movk x8, #65535, LSL #16", "movk x8, #65535, LSL #16",
@@ -1772,6 +1802,7 @@ fn test_aarch64_binemit() {
Inst::MovK { Inst::MovK {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFDFF2", "E8FFDFF2",
"movk x8, #65535, LSL #32", "movk x8, #65535, LSL #32",
@@ -1780,6 +1811,7 @@ fn test_aarch64_binemit() {
Inst::MovK { Inst::MovK {
rd: writable_xreg(8), rd: writable_xreg(8),
imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
size: OperandSize::Size64,
}, },
"E8FFFFF2", "E8FFFFF2",
"movk x8, #65535, LSL #48", "movk x8, #65535, LSL #48",

View File

@@ -587,18 +587,21 @@ pub enum Inst {
MovZ { MovZ {
rd: Writable<Reg>, rd: Writable<Reg>,
imm: MoveWideConst, imm: MoveWideConst,
size: OperandSize,
}, },
/// A MOVN with a 16-bit immediate. /// A MOVN with a 16-bit immediate.
MovN { MovN {
rd: Writable<Reg>, rd: Writable<Reg>,
imm: MoveWideConst, imm: MoveWideConst,
size: OperandSize,
}, },
/// A MOVK with a 16-bit immediate. /// A MOVK with a 16-bit immediate.
MovK { MovK {
rd: Writable<Reg>, rd: Writable<Reg>,
imm: MoveWideConst, imm: MoveWideConst,
size: OperandSize,
}, },
/// A sign- or zero-extend operation. /// A sign- or zero-extend operation.
@@ -1122,9 +1125,9 @@ pub enum Inst {
}, },
} }
fn count_zero_half_words(mut value: u64) -> usize { fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
let mut count = 0; let mut count = 0;
for _ in 0..4 { for _ in 0..num_half_words {
if value & 0xffff == 0 { if value & 0xffff == 0 {
count += 1; count += 1;
} }
@@ -1176,10 +1179,18 @@ impl Inst {
pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> { pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
if let Some(imm) = MoveWideConst::maybe_from_u64(value) { if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
// 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
smallvec![Inst::MovZ { rd, imm }] smallvec![Inst::MovZ {
rd,
imm,
size: OperandSize::Size64
}]
} else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) { } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
// 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
smallvec![Inst::MovN { rd, imm }] smallvec![Inst::MovN {
rd,
imm,
size: OperandSize::Size64
}]
} else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) { } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
// Weird logical-instruction immediate in ORI using zero register // Weird logical-instruction immediate in ORI using zero register
smallvec![Inst::AluRRImmLogic { smallvec![Inst::AluRRImmLogic {
@@ -1191,15 +1202,22 @@ impl Inst {
} else { } else {
let mut insts = smallvec![]; let mut insts = smallvec![];
// If the top 32 bits are zero, use 32-bit `mov` operations.
let (num_half_words, size, negated) = if value >> 32 == 0 {
(2, OperandSize::Size32, (!value << 32) >> 32)
} else {
(4, OperandSize::Size64, !value)
};
// If the number of 0xffff half words is greater than the number of 0x0000 half words // If the number of 0xffff half words is greater than the number of 0x0000 half words
// it is more efficient to use `movn` for the first instruction. // it is more efficient to use `movn` for the first instruction.
let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value); let first_is_inverted = count_zero_half_words(negated, num_half_words)
> count_zero_half_words(value, num_half_words);
// Either 0xffff or 0x0000 half words can be skipped, depending on the first // Either 0xffff or 0x0000 half words can be skipped, depending on the first
// instruction used. // instruction used.
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
let mut first_mov_emitted = false; let mut first_mov_emitted = false;
for i in 0..4 { for i in 0..num_half_words {
let imm16 = (value >> (16 * i)) & 0xffff; let imm16 = (value >> (16 * i)) & 0xffff;
if imm16 != ignored_halfword { if imm16 != ignored_halfword {
if !first_mov_emitted { if !first_mov_emitted {
@@ -1208,15 +1226,15 @@ impl Inst {
let imm = let imm =
MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16) MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
.unwrap(); .unwrap();
insts.push(Inst::MovN { rd, imm }); insts.push(Inst::MovN { rd, imm, size });
} else { } else {
let imm = let imm =
MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
insts.push(Inst::MovZ { rd, imm }); insts.push(Inst::MovZ { rd, imm, size });
} }
} else { } else {
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
insts.push(Inst::MovK { rd, imm }); insts.push(Inst::MovK { rd, imm, size });
} }
} }
} }
@@ -2870,18 +2888,18 @@ impl Inst {
let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32); let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
format!("mov {}, {}", rd, rm) format!("mov {}, {}", rd, rm)
} }
&Inst::MovZ { rd, ref imm } => { &Inst::MovZ { rd, ref imm, size } => {
let rd = rd.to_reg().show_rru(mb_rru); let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
let imm = imm.show_rru(mb_rru); let imm = imm.show_rru(mb_rru);
format!("movz {}, {}", rd, imm) format!("movz {}, {}", rd, imm)
} }
&Inst::MovN { rd, ref imm } => { &Inst::MovN { rd, ref imm, size } => {
let rd = rd.to_reg().show_rru(mb_rru); let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
let imm = imm.show_rru(mb_rru); let imm = imm.show_rru(mb_rru);
format!("movn {}, {}", rd, imm) format!("movn {}, {}", rd, imm)
} }
&Inst::MovK { rd, ref imm } => { &Inst::MovK { rd, ref imm, size } => {
let rd = rd.to_reg().show_rru(mb_rru); let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
let imm = imm.show_rru(mb_rru); let imm = imm.show_rru(mb_rru);
format!("movk {}, {}", rd, imm) format!("movk {}, {}", rd, imm)
} }

View File

@@ -230,8 +230,8 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: movz x1, #51712 ; nextln: movz w1, #51712
; nextln: movk x1, #15258, LSL #16 ; nextln: movk w1, #15258, LSL #16
; nextln: add x0, x1, x0 ; nextln: add x0, x1, x0
; nextln: ldr w0, [x0] ; nextln: ldr w0, [x0]
; nextln: mov sp, fp ; nextln: mov sp, fp

View File

@@ -213,3 +213,42 @@ block0:
; nextln: mov sp, fp ; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret
function %f() -> i32 {
block0:
v0 = iconst.i32 0xfffffff7
return v0
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movn w0, #8
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f() -> i64 {
block0:
v0 = iconst.i64 0xfffffff7
return v0
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movn w0, #8
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %f() -> i64 {
block0:
v0 = iconst.i64 0xfffffffffffffff7
return v0
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movn x0, #8
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

View File

@@ -100,8 +100,8 @@ block0(v0: i64):
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: subs xzr, sp, x0 ; nextln: subs xzr, sp, x0
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
; nextln: movz x17, #6784 ; nextln: movz w17, #6784
; nextln: movk x17, #6, LSL #16 ; nextln: movk w17, #6, LSL #16
; nextln: add x16, x0, x17, UXTX ; nextln: add x16, x0, x17, UXTX
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
@@ -149,8 +149,8 @@ block0(v0: i64):
; nextln: ldur x16, [x16, #4] ; nextln: ldur x16, [x16, #4]
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
; nextln: movz x17, #6784 ; nextln: movz w17, #6784
; nextln: movk x17, #6, LSL #16 ; nextln: movk w17, #6, LSL #16
; nextln: add x16, x16, x17, UXTX ; nextln: add x16, x16, x17, UXTX
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf
@@ -171,7 +171,7 @@ block0(v0: i64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: movz x16, #6784 ; movk x16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] ; nextln: movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16]
; nextln: add x16, x16, #32 ; nextln: add x16, x16, #32
; nextln: subs xzr, sp, x16 ; nextln: subs xzr, sp, x16
; nextln: b.hs 8 ; udf ; nextln: b.hs 8 ; udf