Merge pull request #2823 from akirilov-arm/callee_saves

Cranelift AArch64: Improve the handling of callee-saved registers
This commit is contained in:
Chris Fallin
2021-04-13 15:35:46 -07:00
committed by GitHub
6 changed files with 747 additions and 74 deletions

View File

@@ -135,12 +135,28 @@ impl Into<AMode> for StackAMode {
// Returns the size of stack space needed to store the // Returns the size of stack space needed to store the
// `int_reg` and `vec_reg`. // `int_reg` and `vec_reg`.
fn saved_reg_stack_size( fn saved_reg_stack_size(
call_conv: isa::CallConv,
int_reg: &[Writable<RealReg>], int_reg: &[Writable<RealReg>],
vec_reg: &[Writable<RealReg>], vec_reg: &[Writable<RealReg>],
) -> (usize, usize) { ) -> (usize, usize) {
// Round up to multiple of 2, to keep 16-byte stack alignment. // Round up to multiple of 2, to keep 16-byte stack alignment.
let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8; let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
let vec_save_bytes = vec_reg.len() * 16; // The Baldrdash ABIs require saving and restoring the whole 16-byte
// SIMD & FP registers, so the necessary stack space is always a
// multiple of the mandatory 16-byte stack alignment. However, the
// Procedure Call Standard for the Arm 64-bit Architecture (AAPCS64,
// including several related ABIs such as the one used by Windows)
// mandates saving only the bottom 8 bytes of the vector registers,
// so in that case we round up the number of registers to ensure proper
// stack alignment (similarly to the situation with `int_reg`).
let vec_reg_size = if call_conv.extends_baldrdash() { 16 } else { 8 };
let vec_save_padding = if call_conv.extends_baldrdash() {
0
} else {
vec_reg.len() & 1
};
let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size;
(int_save_bytes, vec_save_bytes) (int_save_bytes, vec_save_bytes)
} }
@@ -591,7 +607,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
let mut insts = SmallVec::new(); let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers); let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec); let (int_save_bytes, vec_save_bytes) =
saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
let total_save_bytes = int_save_bytes + vec_save_bytes; let total_save_bytes = int_save_bytes + vec_save_bytes;
let clobber_size = total_save_bytes as i32; let clobber_size = total_save_bytes as i32;
@@ -620,59 +637,170 @@ impl ABIMachineSpec for AArch64MachineDeps {
// `frame_offset` tracks offset above start-of-clobbers for unwind-info // `frame_offset` tracks offset above start-of-clobbers for unwind-info
// purposes. // purposes.
let mut clobber_offset = clobber_size as u32; let mut clobber_offset = clobber_size as u32;
for reg_pair in clobbered_int.chunks(2) { let clobber_offset_change = 16;
let (r1, r2) = if reg_pair.len() == 2 { let iter = clobbered_int.chunks_exact(2);
// .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
(reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
} else {
(reg_pair[0].to_reg().to_reg(), zero_reg())
};
debug_assert!(r1.get_class() == RegClass::I64); if let [rd] = iter.remainder() {
debug_assert!(r2.get_class() == RegClass::I64); let rd = rd.to_reg().to_reg();
// stp r1, r2, [sp, #-16]! debug_assert_eq!(rd.get_class(), RegClass::I64);
insts.push(Inst::StoreP64 { // str rd, [sp, #-16]!
rt: r1, insts.push(Inst::Store64 {
rt2: r2, rd,
mem: PairAMode::PreIndexed( mem: AMode::PreIndexed(
writable_stack_reg(), writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(), SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
), ),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
if flags.unwind_info() { if flags.unwind_info() {
clobber_offset -= 8; clobber_offset -= clobber_offset_change as u32;
if r2 != zero_reg() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: r2.to_real_reg(),
},
});
}
clobber_offset -= 8;
insts.push(Inst::Unwind { insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg { inst: UnwindInst::SaveReg {
clobber_offset, clobber_offset,
reg: r1.to_real_reg(), reg: rd.to_real_reg(),
}, },
}); });
} }
} }
for reg in clobbered_vec.iter() { let mut iter = iter.rev();
insts.push(Inst::FpuStore128 {
rd: reg.to_reg().to_reg(), while let Some([rt, rt2]) = iter.next() {
mem: AMode::PreIndexed(writable_stack_reg(), SImm9::maybe_from_i64(-16).unwrap()), // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
let rt = rt.to_reg().to_reg();
let rt2 = rt2.to_reg().to_reg();
debug_assert!(rt.get_class() == RegClass::I64);
debug_assert!(rt2.get_class() == RegClass::I64);
// stp rt, rt2, [sp, #-16]!
insts.push(Inst::StoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),
),
flags: MemFlags::trusted(), flags: MemFlags::trusted(),
}); });
if flags.unwind_info() { if flags.unwind_info() {
clobber_offset -= 16; clobber_offset -= clobber_offset_change as u32;
insts.push(Inst::Unwind { insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg { inst: UnwindInst::SaveReg {
clobber_offset, clobber_offset,
reg: reg.to_reg(), reg: rt.to_real_reg(),
},
});
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,
reg: rt2.to_real_reg(),
},
});
}
}
let store_vec_reg = |rd| {
if call_conv.extends_baldrdash() {
Inst::FpuStore128 {
rd,
mem: AMode::PreIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(),
}
} else {
Inst::FpuStore64 {
rd,
mem: AMode::PreIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
let iter = clobbered_vec.chunks_exact(2);
if let [rd] = iter.remainder() {
let rd = rd.to_reg().to_reg();
debug_assert_eq!(rd.get_class(), RegClass::V128);
insts.push(store_vec_reg(rd));
if flags.unwind_info() {
clobber_offset -= clobber_offset_change as u32;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: rd.to_real_reg(),
},
});
}
}
let store_vec_reg_pair = |rt, rt2| {
if call_conv.extends_baldrdash() {
let clobber_offset_change = 32;
(
Inst::FpuStoreP128 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
clobber_offset_change as u32,
)
} else {
let clobber_offset_change = 16;
(
Inst::FpuStoreP64 {
rt,
rt2,
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
clobber_offset_change as u32,
)
}
};
let mut iter = iter.rev();
while let Some([rt, rt2]) = iter.next() {
let rt = rt.to_reg().to_reg();
let rt2 = rt2.to_reg().to_reg();
debug_assert_eq!(rt.get_class(), RegClass::V128);
debug_assert_eq!(rt2.get_class(), RegClass::V128);
let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
insts.push(inst);
if flags.unwind_info() {
clobber_offset -= clobber_offset_change;
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset,
reg: rt.to_real_reg(),
},
});
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: clobber_offset + clobber_offset_change / 2,
reg: rt2.to_real_reg(),
}, },
}); });
} }
@@ -700,31 +828,83 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32)); insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32));
} }
for reg in clobbered_vec.iter().rev() { let load_vec_reg = |rd| {
insts.push(Inst::FpuLoad128 { if call_conv.extends_baldrdash() {
rd: Writable::from_reg(reg.to_reg().to_reg()), Inst::FpuLoad128 {
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()), rd,
flags: MemFlags::trusted(), mem: AMode::PostIndexed(
}); writable_stack_reg(),
SImm9::maybe_from_i64(16).unwrap(),
),
flags: MemFlags::trusted(),
}
} else {
Inst::FpuLoad64 {
rd,
mem: AMode::PostIndexed(
writable_stack_reg(),
SImm9::maybe_from_i64(16).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
let load_vec_reg_pair = |rt, rt2| {
if call_conv.extends_baldrdash() {
Inst::FpuLoadP128 {
rt,
rt2,
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(32, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
}
} else {
Inst::FpuLoadP64 {
rt,
rt2,
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
),
flags: MemFlags::trusted(),
}
}
};
let mut iter = clobbered_vec.chunks_exact(2);
while let Some([rt, rt2]) = iter.next() {
let rt = rt.map(|r| r.to_reg());
let rt2 = rt2.map(|r| r.to_reg());
debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128);
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128);
insts.push(load_vec_reg_pair(rt, rt2));
} }
for reg_pair in clobbered_int.chunks(2).rev() { debug_assert!(iter.remainder().len() <= 1);
let (r1, r2) = if reg_pair.len() == 2 {
(
reg_pair[0].map(|r| r.to_reg()),
reg_pair[1].map(|r| r.to_reg()),
)
} else {
(reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
};
debug_assert!(r1.to_reg().get_class() == RegClass::I64); if let [rd] = iter.remainder() {
debug_assert!(r2.to_reg().get_class() == RegClass::I64); let rd = rd.map(|r| r.to_reg());
// ldp r1, r2, [sp], #16 debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128);
insts.push(load_vec_reg(rd));
}
let mut iter = clobbered_int.chunks_exact(2);
while let Some([rt, rt2]) = iter.next() {
let rt = rt.map(|r| r.to_reg());
let rt2 = rt2.map(|r| r.to_reg());
debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64);
debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64);
// ldp rt, rt2, [sp], #16
insts.push(Inst::LoadP64 { insts.push(Inst::LoadP64 {
rt: r1, rt,
rt2: r2, rt2,
mem: PairAMode::PostIndexed( mem: PairAMode::PostIndexed(
writable_stack_reg(), writable_stack_reg(),
SImm7Scaled::maybe_from_i64(16, I64).unwrap(), SImm7Scaled::maybe_from_i64(16, I64).unwrap(),
@@ -733,6 +913,20 @@ impl ABIMachineSpec for AArch64MachineDeps {
}); });
} }
debug_assert!(iter.remainder().len() <= 1);
if let [rd] = iter.remainder() {
let rd = rd.map(|r| r.to_reg());
debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64);
// ldr rd, [sp], #16
insts.push(Inst::ULoad64 {
rd,
mem: AMode::PostIndexed(writable_stack_reg(), SImm9::maybe_from_i64(16).unwrap()),
flags: MemFlags::trusted(),
});
}
// If this is Baldrdash-2020, restore the callee (i.e., our) TLS // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
// register. We may have allocated it for something else and clobbered // register. We may have allocated it for something else and clobbered
// it, but the ABI expects us to leave the TLS register unchanged. // it, but the ABI expects us to leave the TLS register unchanged.

View File

@@ -258,6 +258,28 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
| machreg_to_vec(rt.to_reg()) | machreg_to_vec(rt.to_reg())
} }
fn enc_ldst_vec_pair(
opc: u32,
amode: u32,
is_load: bool,
simm7: SImm7Scaled,
rn: Reg,
rt: Reg,
rt2: Reg,
) -> u32 {
debug_assert_eq!(opc & 0b11, opc);
debug_assert_eq!(amode & 0b11, amode);
0b00_10110_00_0_0000000_00000_00000_00000
| opc << 30
| amode << 23
| (is_load as u32) << 22
| simm7.bits() << 15
| machreg_to_vec(rt2) << 10
| machreg_to_gpr(rn) << 5
| machreg_to_vec(rt)
}
fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 { fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
(top11 << 21) (top11 << 21)
| (machreg_to_vec(rm) << 16) | (machreg_to_vec(rm) << 16)
@@ -923,7 +945,7 @@ impl MachInstEmit for Inst {
let srcloc = state.cur_srcloc(); let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() { if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts. // Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
} }
@@ -987,7 +1009,7 @@ impl MachInstEmit for Inst {
} => { } => {
let srcloc = state.cur_srcloc(); let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() { if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts. // Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
} }
match mem { match mem {
@@ -1034,6 +1056,120 @@ impl MachInstEmit for Inst {
} }
} }
} }
&Inst::FpuLoadP64 {
rt,
rt2,
ref mem,
flags,
}
| &Inst::FpuLoadP128 {
rt,
rt2,
ref mem,
flags,
} => {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let opc = match self {
&Inst::FpuLoadP64 { .. } => 0b01,
&Inst::FpuLoadP128 { .. } => 0b10,
_ => unreachable!(),
};
let rt = rt.to_reg();
let rt2 = rt2.to_reg();
match mem {
&PairAMode::SignedOffset(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));
}
&PairAMode::PreIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b11,
true,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
&PairAMode::PostIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b01,
true,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
}
}
&Inst::FpuStoreP64 {
rt,
rt2,
ref mem,
flags,
}
| &Inst::FpuStoreP128 {
rt,
rt2,
ref mem,
flags,
} => {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual store instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let opc = match self {
&Inst::FpuStoreP64 { .. } => 0b01,
&Inst::FpuStoreP128 { .. } => 0b10,
_ => unreachable!(),
};
match mem {
&PairAMode::SignedOffset(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));
}
&PairAMode::PreIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b11,
false,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
&PairAMode::PostIndexed(reg, simm7) => {
assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);
sink.put4(enc_ldst_vec_pair(
opc,
0b01,
false,
simm7,
reg.to_reg(),
rt,
rt2,
));
}
}
}
&Inst::Mov64 { rd, rm } => { &Inst::Mov64 { rd, rm } => {
assert!(rd.to_reg().get_class() == rm.get_class()); assert!(rd.to_reg().get_class() == rm.get_class());
assert!(rm.get_class() == RegClass::I64); assert!(rm.get_class() == RegClass::I64);

View File

@@ -5105,6 +5105,168 @@ fn test_aarch64_binemit() {
"str q16, [x8, x9, LSL #4]", "str q16, [x8, x9, LSL #4]",
)); ));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(0),
rt2: writable_vreg(31),
mem: PairAMode::SignedOffset(xreg(0), SImm7Scaled::zero(F64)),
flags: MemFlags::trusted(),
},
"007C406D",
"ldp d0, d31, [x0]",
));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(19),
rt2: writable_vreg(11),
mem: PairAMode::PreIndexed(
writable_xreg(25),
SImm7Scaled::maybe_from_i64(-512, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"332FE06D",
"ldp d19, d11, [x25, #-512]!",
));
insns.push((
Inst::FpuLoadP64 {
rt: writable_vreg(7),
rt2: writable_vreg(20),
mem: PairAMode::PostIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(64, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"E753C46C",
"ldp d7, d20, [sp], #64",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(4),
rt2: vreg(26),
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64(504, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"E4EB1F6D",
"stp d4, d26, [sp, #504]",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(16),
rt2: vreg(8),
mem: PairAMode::PreIndexed(
writable_xreg(15),
SImm7Scaled::maybe_from_i64(48, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"F021836D",
"stp d16, d8, [x15, #48]!",
));
insns.push((
Inst::FpuStoreP64 {
rt: vreg(5),
rt2: vreg(6),
mem: PairAMode::PostIndexed(
writable_xreg(28),
SImm7Scaled::maybe_from_i64(-32, F64).unwrap(),
),
flags: MemFlags::trusted(),
},
"851BBE6C",
"stp d5, d6, [x28], #-32",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(0),
rt2: writable_vreg(17),
mem: PairAMode::SignedOffset(xreg(3), SImm7Scaled::zero(I8X16)),
flags: MemFlags::trusted(),
},
"604440AD",
"ldp q0, q17, [x3]",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(29),
rt2: writable_vreg(9),
mem: PairAMode::PreIndexed(
writable_xreg(16),
SImm7Scaled::maybe_from_i64(-1024, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"1D26E0AD",
"ldp q29, q9, [x16, #-1024]!",
));
insns.push((
Inst::FpuLoadP128 {
rt: writable_vreg(10),
rt2: writable_vreg(20),
mem: PairAMode::PostIndexed(
writable_xreg(26),
SImm7Scaled::maybe_from_i64(256, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"4A53C8AC",
"ldp q10, q20, [x26], #256",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(9),
rt2: vreg(31),
mem: PairAMode::SignedOffset(
stack_reg(),
SImm7Scaled::maybe_from_i64(1008, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"E9FF1FAD",
"stp q9, q31, [sp, #1008]",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(27),
rt2: vreg(13),
mem: PairAMode::PreIndexed(
writable_stack_reg(),
SImm7Scaled::maybe_from_i64(-192, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"FB37BAAD",
"stp q27, q13, [sp, #-192]!",
));
insns.push((
Inst::FpuStoreP128 {
rt: vreg(18),
rt2: vreg(22),
mem: PairAMode::PostIndexed(
writable_xreg(13),
SImm7Scaled::maybe_from_i64(304, I8X16).unwrap(),
),
flags: MemFlags::trusted(),
},
"B2D989AC",
"stp q18, q22, [x13], #304",
));
insns.push(( insns.push((
Inst::LoadFpuConst64 { Inst::LoadFpuConst64 {
rd: writable_vreg(16), rd: writable_vreg(16),

View File

@@ -73,7 +73,7 @@ impl SImm7Scaled {
/// Create a SImm7Scaled from a raw offset and the known scale type, if /// Create a SImm7Scaled from a raw offset and the known scale type, if
/// possible. /// possible.
pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> { pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
assert!(scale_ty == I64 || scale_ty == I32); assert!(scale_ty == I64 || scale_ty == I32 || scale_ty == F64 || scale_ty == I8X16);
let scale = scale_ty.bytes(); let scale = scale_ty.bytes();
assert!(scale.is_power_of_two()); assert!(scale.is_power_of_two());
let scale = i64::from(scale); let scale = i64::from(scale);

View File

@@ -848,7 +848,34 @@ pub enum Inst {
mem: AMode, mem: AMode,
flags: MemFlags, flags: MemFlags,
}, },
/// A load of a pair of floating-point registers, double precision (64-bit).
FpuLoadP64 {
rt: Writable<Reg>,
rt2: Writable<Reg>,
mem: PairAMode,
flags: MemFlags,
},
/// A store of a pair of floating-point registers, double precision (64-bit).
FpuStoreP64 {
rt: Reg,
rt2: Reg,
mem: PairAMode,
flags: MemFlags,
},
/// A load of a pair of floating-point registers, 128-bit.
FpuLoadP128 {
rt: Writable<Reg>,
rt2: Writable<Reg>,
mem: PairAMode,
flags: MemFlags,
},
/// A store of a pair of floating-point registers, 128-bit.
FpuStoreP128 {
rt: Reg,
rt2: Reg,
mem: PairAMode,
flags: MemFlags,
},
LoadFpuConst64 { LoadFpuConst64 {
rd: Writable<Reg>, rd: Writable<Reg>,
const_data: u64, const_data: u64,
@@ -1908,6 +1935,34 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_use(rd); collector.add_use(rd);
memarg_regs(mem, collector); memarg_regs(mem, collector);
} }
&Inst::FpuLoadP64 {
rt, rt2, ref mem, ..
} => {
collector.add_def(rt);
collector.add_def(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuStoreP64 {
rt, rt2, ref mem, ..
} => {
collector.add_use(rt);
collector.add_use(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuLoadP128 {
rt, rt2, ref mem, ..
} => {
collector.add_def(rt);
collector.add_def(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::FpuStoreP128 {
rt, rt2, ref mem, ..
} => {
collector.add_use(rt);
collector.add_use(rt2);
pairmemarg_regs(mem, collector);
}
&Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => { &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
collector.add_def(rd); collector.add_def(rd);
} }
@@ -2590,6 +2645,46 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_use(mapper, rd); map_use(mapper, rd);
map_mem(mapper, mem); map_mem(mapper, mem);
} }
&mut Inst::FpuLoadP64 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_def(mapper, rt);
map_def(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuStoreP64 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_use(mapper, rt);
map_use(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuLoadP128 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_def(mapper, rt);
map_def(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::FpuStoreP128 {
ref mut rt,
ref mut rt2,
ref mut mem,
..
} => {
map_use(mapper, rt);
map_use(mapper, rt2);
map_pairmem(mapper, mem);
}
&mut Inst::LoadFpuConst64 { ref mut rd, .. } => { &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
map_def(mapper, rd); map_def(mapper, rd);
} }
@@ -3508,6 +3603,42 @@ impl Inst {
let mem = mem.show_rru(mb_rru); let mem = mem.show_rru(mb_rru);
format!("{}str {}, {}", mem_str, rd, mem) format!("{}str {}, {}", mem_str, rd, mem)
} }
&Inst::FpuLoadP64 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64);
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64);
let mem = mem.show_rru(mb_rru);
format!("ldp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuStoreP64 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64);
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuLoadP128 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128);
let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128);
let mem = mem.show_rru(mb_rru);
format!("ldp {}, {}, {}", rt, rt2, mem)
}
&Inst::FpuStoreP128 {
rt, rt2, ref mem, ..
} => {
let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128);
let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::LoadFpuConst64 { rd, const_data } => { &Inst::LoadFpuConst64 { rd, const_data } => {
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
format!( format!(

View File

@@ -77,22 +77,72 @@ block0(v0: f64):
; check: stp fp, lr, [sp, #-16]! ; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp ; nextln: mov fp, sp
; nextln: str q8, [sp, #-16]! ; nextln: stp d14, d15, [sp, #-16]!
; nextln: str q9, [sp, #-16]! ; nextln: stp d12, d13, [sp, #-16]!
; nextln: str q10, [sp, #-16]! ; nextln: stp d10, d11, [sp, #-16]!
; nextln: str q11, [sp, #-16]! ; nextln: stp d8, d9, [sp, #-16]!
; nextln: str q12, [sp, #-16]!
; nextln: str q13, [sp, #-16]!
; nextln: str q14, [sp, #-16]!
; nextln: str q15, [sp, #-16]!
; check: ldr q15, [sp], #16 ; check: ldp d8, d9, [sp], #16
; nextln: ldr q14, [sp], #16 ; nextln: ldp d10, d11, [sp], #16
; nextln: ldr q13, [sp], #16 ; nextln: ldp d12, d13, [sp], #16
; nextln: ldr q12, [sp], #16 ; nextln: ldp d14, d15, [sp], #16
; nextln: ldr q11, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ldr q10, [sp], #16 ; nextln: ret
; nextln: ldr q9, [sp], #16
; nextln: ldr q8, [sp], #16 function %f2(i64) -> i64 {
block0(v0: i64):
v1 = iadd.i64 v0, v0
v2 = iadd.i64 v0, v1
v3 = iadd.i64 v0, v2
v4 = iadd.i64 v0, v3
v5 = iadd.i64 v0, v4
v6 = iadd.i64 v0, v5
v7 = iadd.i64 v0, v6
v8 = iadd.i64 v0, v7
v9 = iadd.i64 v0, v8
v10 = iadd.i64 v0, v9
v11 = iadd.i64 v0, v10
v12 = iadd.i64 v0, v11
v13 = iadd.i64 v0, v12
v14 = iadd.i64 v0, v13
v15 = iadd.i64 v0, v14
v16 = iadd.i64 v0, v15
v17 = iadd.i64 v0, v16
v18 = iadd.i64 v0, v17
v19 = iadd.i64 v0, v1
v20 = iadd.i64 v2, v3
v21 = iadd.i64 v4, v5
v22 = iadd.i64 v6, v7
v23 = iadd.i64 v8, v9
v24 = iadd.i64 v10, v11
v25 = iadd.i64 v12, v13
v26 = iadd.i64 v14, v15
v27 = iadd.i64 v16, v17
v28 = iadd.i64 v18, v19
v29 = iadd.i64 v20, v21
v30 = iadd.i64 v22, v23
v31 = iadd.i64 v24, v25
v32 = iadd.i64 v26, v27
v33 = iadd.i64 v28, v29
v34 = iadd.i64 v30, v31
v35 = iadd.i64 v32, v33
v36 = iadd.i64 v34, v35
return v36
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: str x22, [sp, #-16]!
; nextln: stp x19, x20, [sp, #-16]!
; nextln: add x1, x0, x0
; check: add x0, x1, x0
; nextln: ldp x19, x20, [sp], #16
; nextln: ldr x22, [sp], #16
; nextln: ldp fp, lr, [sp], #16 ; nextln: ldp fp, lr, [sp], #16
; nextln: ret ; nextln: ret