Allocate temporary intermediates when loading constants on aarch64 (#5366)
As loading constants on aarch64 can take up to 4 instructions, we need to plumb through some additional registers. Rather than pass a fixed list of registers in, pass an allocation function.
This commit is contained in:
@@ -430,7 +430,10 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
} else {
|
} else {
|
||||||
let scratch2 = writable_tmp2_reg();
|
let scratch2 = writable_tmp2_reg();
|
||||||
assert_ne!(scratch2.to_reg(), from_reg);
|
assert_ne!(scratch2.to_reg(), from_reg);
|
||||||
insts.extend(Inst::load_constant(scratch2, imm.into()));
|
// `gen_add_imm` is only ever called after register allocation has take place, and as a
|
||||||
|
// result it's ok to reuse the scratch2 register here. If that changes, we'll need to
|
||||||
|
// plumb through a way to allocate temporary virtual registers
|
||||||
|
insts.extend(Inst::load_constant(scratch2, imm.into(), &mut |_| scratch2));
|
||||||
insts.push(Inst::AluRRRExtend {
|
insts.push(Inst::AluRRRExtend {
|
||||||
alu_op: ALUOp::Add,
|
alu_op: ALUOp::Add,
|
||||||
size: OperandSize::Size64,
|
size: OperandSize::Size64,
|
||||||
@@ -515,7 +518,9 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
ret.push(adj_inst);
|
ret.push(adj_inst);
|
||||||
} else {
|
} else {
|
||||||
let tmp = writable_spilltmp_reg();
|
let tmp = writable_spilltmp_reg();
|
||||||
let const_inst = Inst::load_constant(tmp, amount);
|
// `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for
|
||||||
|
// intermediates in `load_constant`.
|
||||||
|
let const_inst = Inst::load_constant(tmp, amount, &mut |_| tmp);
|
||||||
let adj_inst = Inst::AluRRRExtend {
|
let adj_inst = Inst::AluRRRExtend {
|
||||||
alu_op,
|
alu_op,
|
||||||
size: OperandSize::Size64,
|
size: OperandSize::Size64,
|
||||||
@@ -673,8 +678,10 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
// itself is not allowed to use the registers.
|
// itself is not allowed to use the registers.
|
||||||
let start = writable_spilltmp_reg();
|
let start = writable_spilltmp_reg();
|
||||||
let end = writable_tmp2_reg();
|
let end = writable_tmp2_reg();
|
||||||
insts.extend(Inst::load_constant(start, 0));
|
// `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse
|
||||||
insts.extend(Inst::load_constant(end, frame_size.into()));
|
// `start` and `end` as temporaries in load_constant.
|
||||||
|
insts.extend(Inst::load_constant(start, 0, &mut |_| start));
|
||||||
|
insts.extend(Inst::load_constant(end, frame_size.into(), &mut |_| end));
|
||||||
insts.push(Inst::StackProbeLoop {
|
insts.push(Inst::StackProbeLoop {
|
||||||
start,
|
start,
|
||||||
end: end.to_reg(),
|
end: end.to_reg(),
|
||||||
@@ -1019,19 +1026,19 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
insts
|
insts
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_memcpy(
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
dst: Reg,
|
dst: Reg,
|
||||||
src: Reg,
|
src: Reg,
|
||||||
tmp: Writable<Reg>,
|
|
||||||
_tmp2: Writable<Reg>,
|
|
||||||
size: usize,
|
size: usize,
|
||||||
|
mut alloc_tmp: F,
|
||||||
) -> SmallVec<[Self::I; 8]> {
|
) -> SmallVec<[Self::I; 8]> {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let arg0 = writable_xreg(0);
|
let arg0 = writable_xreg(0);
|
||||||
let arg1 = writable_xreg(1);
|
let arg1 = writable_xreg(1);
|
||||||
let arg2 = writable_xreg(2);
|
let arg2 = writable_xreg(2);
|
||||||
insts.extend(Inst::load_constant(tmp, size as u64).into_iter());
|
let tmp = alloc_tmp(Self::word_type());
|
||||||
|
insts.extend(Inst::load_constant(tmp, size as u64, &mut alloc_tmp));
|
||||||
insts.push(Inst::Call {
|
insts.push(Inst::Call {
|
||||||
info: Box::new(CallInfo {
|
info: Box::new(CallInfo {
|
||||||
dest: ExternalName::LibCall(LibCall::Memcpy),
|
dest: ExternalName::LibCall(LibCall::Memcpy),
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ pub fn mem_finalize(
|
|||||||
} else {
|
} else {
|
||||||
let tmp = writable_spilltmp_reg();
|
let tmp = writable_spilltmp_reg();
|
||||||
(
|
(
|
||||||
Inst::load_constant(tmp, off as u64),
|
Inst::load_constant(tmp, off as u64, &mut |_| tmp),
|
||||||
AMode::RegExtended {
|
AMode::RegExtended {
|
||||||
rn: basereg,
|
rn: basereg,
|
||||||
rm: tmp.to_reg(),
|
rm: tmp.to_reg(),
|
||||||
@@ -3333,7 +3333,7 @@ impl MachInstEmit for Inst {
|
|||||||
debug_assert!(rd.to_reg() != tmp2_reg());
|
debug_assert!(rd.to_reg() != tmp2_reg());
|
||||||
debug_assert!(reg != tmp2_reg());
|
debug_assert!(reg != tmp2_reg());
|
||||||
let tmp = writable_tmp2_reg();
|
let tmp = writable_tmp2_reg();
|
||||||
for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
|
for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
|
||||||
insn.emit(&[], sink, emit_info, state);
|
insn.emit(&[], sink, emit_info, state);
|
||||||
}
|
}
|
||||||
let add = Inst::AluRRR {
|
let add = Inst::AluRRR {
|
||||||
|
|||||||
@@ -130,7 +130,11 @@ fn inst_size_test() {
|
|||||||
impl Inst {
|
impl Inst {
|
||||||
/// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
|
/// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
|
||||||
/// logical immediate, or constant pool).
|
/// logical immediate, or constant pool).
|
||||||
pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
|
pub fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
value: u64,
|
||||||
|
alloc_tmp: &mut F,
|
||||||
|
) -> SmallVec<[Inst; 4]> {
|
||||||
// NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
|
// NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,
|
||||||
// if modifications are made here before this is deleted after moving to
|
// if modifications are made here before this is deleted after moving to
|
||||||
// ISLE then those locations should be updated as well.
|
// ISLE then those locations should be updated as well.
|
||||||
@@ -169,60 +173,73 @@ impl Inst {
|
|||||||
} else {
|
} else {
|
||||||
(4, OperandSize::Size64, !value)
|
(4, OperandSize::Size64, !value)
|
||||||
};
|
};
|
||||||
|
|
||||||
// If the number of 0xffff half words is greater than the number of 0x0000 half words
|
// If the number of 0xffff half words is greater than the number of 0x0000 half words
|
||||||
// it is more efficient to use `movn` for the first instruction.
|
// it is more efficient to use `movn` for the first instruction.
|
||||||
let first_is_inverted = count_zero_half_words(negated, num_half_words)
|
let first_is_inverted = count_zero_half_words(negated, num_half_words)
|
||||||
> count_zero_half_words(value, num_half_words);
|
> count_zero_half_words(value, num_half_words);
|
||||||
|
|
||||||
// Either 0xffff or 0x0000 half words can be skipped, depending on the first
|
// Either 0xffff or 0x0000 half words can be skipped, depending on the first
|
||||||
// instruction used.
|
// instruction used.
|
||||||
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
|
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
|
||||||
let mut first_mov_emitted = false;
|
|
||||||
|
|
||||||
for i in 0..num_half_words {
|
let halfwords: SmallVec<[_; 4]> = (0..num_half_words)
|
||||||
let imm16 = (value >> (16 * i)) & 0xffff;
|
.filter_map(|i| {
|
||||||
if imm16 != ignored_halfword {
|
let imm16 = (value >> (16 * i)) & 0xffff;
|
||||||
if !first_mov_emitted {
|
if imm16 == ignored_halfword {
|
||||||
first_mov_emitted = true;
|
None
|
||||||
if first_is_inverted {
|
|
||||||
let imm =
|
|
||||||
MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
|
|
||||||
.unwrap();
|
|
||||||
insts.push(Inst::MovWide {
|
|
||||||
op: MoveWideOp::MovN,
|
|
||||||
rd,
|
|
||||||
imm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let imm =
|
|
||||||
MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
|
||||||
insts.push(Inst::MovWide {
|
|
||||||
op: MoveWideOp::MovZ,
|
|
||||||
rd,
|
|
||||||
imm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
Some((i, imm16))
|
||||||
insts.push(Inst::MovK {
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut prev_result = None;
|
||||||
|
let last_index = halfwords.last().unwrap().0;
|
||||||
|
for (i, imm16) in halfwords {
|
||||||
|
let shift = i * 16;
|
||||||
|
let rd = if i == last_index { rd } else { alloc_tmp(I16) };
|
||||||
|
|
||||||
|
if let Some(rn) = prev_result {
|
||||||
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
|
||||||
|
insts.push(Inst::MovK { rd, rn, imm, size });
|
||||||
|
} else {
|
||||||
|
if first_is_inverted {
|
||||||
|
let imm =
|
||||||
|
MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)
|
||||||
|
.unwrap();
|
||||||
|
insts.push(Inst::MovWide {
|
||||||
|
op: MoveWideOp::MovN,
|
||||||
|
rd,
|
||||||
|
imm,
|
||||||
|
size,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
|
||||||
|
insts.push(Inst::MovWide {
|
||||||
|
op: MoveWideOp::MovZ,
|
||||||
rd,
|
rd,
|
||||||
rn: rd.to_reg(), // Redef the same virtual register.
|
|
||||||
imm,
|
imm,
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_result = Some(rd.to_reg());
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(first_mov_emitted);
|
assert!(prev_result.is_some());
|
||||||
|
|
||||||
insts
|
insts
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create instructions that load a 128-bit constant.
|
/// Create instructions that load a 128-bit constant.
|
||||||
pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
|
pub fn load_constant128<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
|
to_regs: ValueRegs<Writable<Reg>>,
|
||||||
|
value: u128,
|
||||||
|
mut alloc_tmp: F,
|
||||||
|
) -> SmallVec<[Inst; 4]> {
|
||||||
assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
|
assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
|
||||||
|
|
||||||
let lower = value as u64;
|
let lower = value as u64;
|
||||||
@@ -231,8 +248,8 @@ impl Inst {
|
|||||||
let lower_reg = to_regs.regs()[0];
|
let lower_reg = to_regs.regs()[0];
|
||||||
let upper_reg = to_regs.regs()[1];
|
let upper_reg = to_regs.regs()[1];
|
||||||
|
|
||||||
let mut load_ins = Inst::load_constant(lower_reg, lower);
|
let mut load_ins = Inst::load_constant(lower_reg, lower, &mut alloc_tmp);
|
||||||
let load_upper = Inst::load_constant(upper_reg, upper);
|
let load_upper = Inst::load_constant(upper_reg, upper, &mut alloc_tmp);
|
||||||
|
|
||||||
load_ins.extend(load_upper.into_iter());
|
load_ins.extend(load_upper.into_iter());
|
||||||
load_ins
|
load_ins
|
||||||
@@ -264,7 +281,7 @@ impl Inst {
|
|||||||
}]
|
}]
|
||||||
} else {
|
} else {
|
||||||
let tmp = alloc_tmp(I32);
|
let tmp = alloc_tmp(I32);
|
||||||
let mut insts = Inst::load_constant(tmp, const_data as u64);
|
let mut insts = Inst::load_constant(tmp, const_data as u64, &mut alloc_tmp);
|
||||||
|
|
||||||
insts.push(Inst::MovToFpu {
|
insts.push(Inst::MovToFpu {
|
||||||
rd,
|
rd,
|
||||||
@@ -304,7 +321,7 @@ impl Inst {
|
|||||||
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
|
||||||
} else if const_data & (u32::MAX as u64) == 0 {
|
} else if const_data & (u32::MAX as u64) == 0 {
|
||||||
let tmp = alloc_tmp(I64);
|
let tmp = alloc_tmp(I64);
|
||||||
let mut insts = Inst::load_constant(tmp, const_data);
|
let mut insts = Inst::load_constant(tmp, const_data, &mut alloc_tmp);
|
||||||
|
|
||||||
insts.push(Inst::MovToFpu {
|
insts.push(Inst::MovToFpu {
|
||||||
rd,
|
rd,
|
||||||
@@ -426,7 +443,7 @@ impl Inst {
|
|||||||
smallvec![Inst::VecDupFPImm { rd, imm, size }]
|
smallvec![Inst::VecDupFPImm { rd, imm, size }]
|
||||||
} else {
|
} else {
|
||||||
let tmp = alloc_tmp(I64);
|
let tmp = alloc_tmp(I64);
|
||||||
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
|
let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern, &mut alloc_tmp)[..]);
|
||||||
|
|
||||||
insts.push(Inst::VecDup {
|
insts.push(Inst::VecDup {
|
||||||
rd,
|
rd,
|
||||||
@@ -1212,14 +1229,16 @@ impl MachInst for Inst {
|
|||||||
to_regs: ValueRegs<Writable<Reg>>,
|
to_regs: ValueRegs<Writable<Reg>>,
|
||||||
value: u128,
|
value: u128,
|
||||||
ty: Type,
|
ty: Type,
|
||||||
alloc_tmp: F,
|
mut alloc_tmp: F,
|
||||||
) -> SmallVec<[Inst; 4]> {
|
) -> SmallVec<[Inst; 4]> {
|
||||||
let to_reg = to_regs.only_reg();
|
let to_reg = to_regs.only_reg();
|
||||||
match ty {
|
match ty {
|
||||||
F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
|
F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
|
||||||
F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
|
F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
|
||||||
I8 | I16 | I32 | I64 | R32 | R64 => Inst::load_constant(to_reg.unwrap(), value as u64),
|
I8 | I16 | I32 | I64 | R32 | R64 => {
|
||||||
I128 => Inst::load_constant128(to_regs, value),
|
Inst::load_constant(to_reg.unwrap(), value as u64, &mut alloc_tmp)
|
||||||
|
}
|
||||||
|
I128 => Inst::load_constant128(to_regs, value, alloc_tmp),
|
||||||
_ => panic!("Cannot generate constant for type: {}", ty),
|
_ => panic!("Cannot generate constant for type: {}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2837,7 +2856,7 @@ impl Inst {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
let tmp = writable_spilltmp_reg();
|
let tmp = writable_spilltmp_reg();
|
||||||
for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
|
for inst in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() {
|
||||||
ret.push_str(
|
ret.push_str(
|
||||||
&inst.print_with_state(&mut EmitState::default(), &mut empty_allocs),
|
&inst.print_with_state(&mut EmitState::default(), &mut empty_allocs),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -575,7 +575,7 @@ fn lower_add_immediate(ctx: &mut Lower<Inst>, dst: Writable<Reg>, src: Reg, imm:
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u64) {
|
pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u64) {
|
||||||
for inst in Inst::load_constant(rd, value) {
|
for inst in Inst::load_constant(rd, value, &mut |ty| ctx.alloc_tmp(ty).only_reg().unwrap()) {
|
||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
// Pull in the ISLE generated code.
|
// Pull in the ISLE generated code.
|
||||||
pub mod generated_code;
|
pub mod generated_code;
|
||||||
use generated_code::Context;
|
use generated_code::Context;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
// Types that the generated ISLE code uses via `use super::*`.
|
// Types that the generated ISLE code uses via `use super::*`.
|
||||||
use super::{
|
use super::{
|
||||||
@@ -217,7 +218,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
} else {
|
} else {
|
||||||
value
|
value
|
||||||
};
|
};
|
||||||
let rd = self.temp_writable_reg(I64);
|
|
||||||
let size = OperandSize::Size64;
|
let size = OperandSize::Size64;
|
||||||
|
|
||||||
// If the top 32 bits are zero, use 32-bit `mov` operations.
|
// If the top 32 bits are zero, use 32-bit `mov` operations.
|
||||||
@@ -226,6 +226,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
let lower_halfword = value as u16;
|
let lower_halfword = value as u16;
|
||||||
let upper_halfword = (value >> 16) as u16;
|
let upper_halfword = (value >> 16) as u16;
|
||||||
|
|
||||||
|
let rd = self.temp_writable_reg(I64);
|
||||||
if upper_halfword == u16::MAX {
|
if upper_halfword == u16::MAX {
|
||||||
self.emit(&MInst::MovWide {
|
self.emit(&MInst::MovWide {
|
||||||
op: MoveWideOp::MovN,
|
op: MoveWideOp::MovN,
|
||||||
@@ -242,17 +243,20 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
if upper_halfword != 0 {
|
if upper_halfword != 0 {
|
||||||
|
let tmp = self.temp_writable_reg(I64);
|
||||||
self.emit(&MInst::MovK {
|
self.emit(&MInst::MovK {
|
||||||
rd,
|
rd: tmp,
|
||||||
rn: rd.to_reg(),
|
rn: rd.to_reg(),
|
||||||
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
|
imm: MoveWideConst::maybe_with_shift(upper_halfword, 16).unwrap(),
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
|
return tmp.to_reg();
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
return rd.to_reg();
|
return rd.to_reg();
|
||||||
} else if value == u64::MAX {
|
} else if value == u64::MAX {
|
||||||
|
let rd = self.temp_writable_reg(I64);
|
||||||
self.emit(&MInst::MovWide {
|
self.emit(&MInst::MovWide {
|
||||||
op: MoveWideOp::MovN,
|
op: MoveWideOp::MovN,
|
||||||
rd,
|
rd,
|
||||||
@@ -265,50 +269,57 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
// If the number of 0xffff half words is greater than the number of 0x0000 half words
|
// If the number of 0xffff half words is greater than the number of 0x0000 half words
|
||||||
// it is more efficient to use `movn` for the first instruction.
|
// it is more efficient to use `movn` for the first instruction.
|
||||||
let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
|
let first_is_inverted = count_zero_half_words(!value) > count_zero_half_words(value);
|
||||||
|
|
||||||
// Either 0xffff or 0x0000 half words can be skipped, depending on the first
|
// Either 0xffff or 0x0000 half words can be skipped, depending on the first
|
||||||
// instruction used.
|
// instruction used.
|
||||||
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
|
let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
|
||||||
let mut first_mov_emitted = false;
|
|
||||||
|
|
||||||
for i in 0..4 {
|
let halfwords: SmallVec<[_; 4]> = (0..4)
|
||||||
let imm16 = (value >> (16 * i)) & 0xffff;
|
.filter_map(|i| {
|
||||||
if imm16 != ignored_halfword {
|
let imm16 = (value >> (16 * i)) & 0xffff;
|
||||||
if !first_mov_emitted {
|
if imm16 == ignored_halfword {
|
||||||
first_mov_emitted = true;
|
None
|
||||||
if first_is_inverted {
|
|
||||||
let imm =
|
|
||||||
MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
|
|
||||||
.unwrap();
|
|
||||||
self.emit(&MInst::MovWide {
|
|
||||||
op: MoveWideOp::MovN,
|
|
||||||
rd,
|
|
||||||
imm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
|
||||||
self.emit(&MInst::MovWide {
|
|
||||||
op: MoveWideOp::MovZ,
|
|
||||||
rd,
|
|
||||||
imm,
|
|
||||||
size,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
|
Some((i, imm16))
|
||||||
self.emit(&MInst::MovK {
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut prev_result = None;
|
||||||
|
for (i, imm16) in halfwords {
|
||||||
|
let shift = i * 16;
|
||||||
|
let rd = self.temp_writable_reg(I64);
|
||||||
|
|
||||||
|
if let Some(rn) = prev_result {
|
||||||
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
|
||||||
|
self.emit(&MInst::MovK { rd, rn, imm, size });
|
||||||
|
} else {
|
||||||
|
if first_is_inverted {
|
||||||
|
let imm =
|
||||||
|
MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift).unwrap();
|
||||||
|
self.emit(&MInst::MovWide {
|
||||||
|
op: MoveWideOp::MovN,
|
||||||
|
rd,
|
||||||
|
imm,
|
||||||
|
size,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();
|
||||||
|
self.emit(&MInst::MovWide {
|
||||||
|
op: MoveWideOp::MovZ,
|
||||||
rd,
|
rd,
|
||||||
rn: rd.to_reg(),
|
|
||||||
imm,
|
imm,
|
||||||
size,
|
size,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_result = Some(rd.to_reg());
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(first_mov_emitted);
|
assert!(prev_result.is_some());
|
||||||
|
|
||||||
return self.writable_reg_to_reg(rd);
|
return prev_result.unwrap();
|
||||||
|
|
||||||
fn count_zero_half_words(mut value: u64) -> usize {
|
fn count_zero_half_words(mut value: u64) -> usize {
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
|
|||||||
@@ -525,18 +525,18 @@ impl ABIMachineSpec for Riscv64MachineDeps {
|
|||||||
insts
|
insts
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_memcpy(
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
dst: Reg,
|
dst: Reg,
|
||||||
src: Reg,
|
src: Reg,
|
||||||
tmp: Writable<Reg>,
|
|
||||||
_tmp2: Writable<Reg>,
|
|
||||||
size: usize,
|
size: usize,
|
||||||
|
mut alloc_tmp: F,
|
||||||
) -> SmallVec<[Self::I; 8]> {
|
) -> SmallVec<[Self::I; 8]> {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let arg0 = Writable::from_reg(x_reg(10));
|
let arg0 = Writable::from_reg(x_reg(10));
|
||||||
let arg1 = Writable::from_reg(x_reg(11));
|
let arg1 = Writable::from_reg(x_reg(11));
|
||||||
let arg2 = Writable::from_reg(x_reg(12));
|
let arg2 = Writable::from_reg(x_reg(12));
|
||||||
|
let tmp = alloc_tmp(Self::word_type());
|
||||||
insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
|
insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
|
||||||
insts.push(Inst::Call {
|
insts.push(Inst::Call {
|
||||||
info: Box::new(CallInfo {
|
info: Box::new(CallInfo {
|
||||||
|
|||||||
@@ -752,13 +752,12 @@ impl ABIMachineSpec for S390xMachineDeps {
|
|||||||
unreachable!();
|
unreachable!();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_memcpy(
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
_call_conv: isa::CallConv,
|
_call_conv: isa::CallConv,
|
||||||
_dst: Reg,
|
_dst: Reg,
|
||||||
_src: Reg,
|
_src: Reg,
|
||||||
_tmp1: Writable<Reg>,
|
|
||||||
_tmp2: Writable<Reg>,
|
|
||||||
_size: usize,
|
_size: usize,
|
||||||
|
_alloc: F,
|
||||||
) -> SmallVec<[Self::I; 8]> {
|
) -> SmallVec<[Self::I; 8]> {
|
||||||
unimplemented!("StructArgs not implemented for S390X yet");
|
unimplemented!("StructArgs not implemented for S390X yet");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -624,18 +624,19 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
insts
|
insts
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gen_memcpy(
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
dst: Reg,
|
dst: Reg,
|
||||||
src: Reg,
|
src: Reg,
|
||||||
temp: Writable<Reg>,
|
|
||||||
temp2: Writable<Reg>,
|
|
||||||
size: usize,
|
size: usize,
|
||||||
|
mut alloc_tmp: F,
|
||||||
) -> SmallVec<[Self::I; 8]> {
|
) -> SmallVec<[Self::I; 8]> {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
|
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
|
||||||
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
|
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
|
||||||
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
|
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
|
||||||
|
let temp = alloc_tmp(Self::word_type());
|
||||||
|
let temp2 = alloc_tmp(Self::word_type());
|
||||||
insts.extend(
|
insts.extend(
|
||||||
Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| {
|
Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| {
|
||||||
panic!("tmp should not be needed")
|
panic!("tmp should not be needed")
|
||||||
|
|||||||
@@ -567,16 +567,14 @@ pub trait ABIMachineSpec {
|
|||||||
) -> SmallVec<[Self::I; 2]>;
|
) -> SmallVec<[Self::I; 2]>;
|
||||||
|
|
||||||
/// Generate a memcpy invocation. Used to set up struct
|
/// Generate a memcpy invocation. Used to set up struct
|
||||||
/// args. Takes `src`, `dst` as read-only inputs and requires two
|
/// args. Takes `src`, `dst` as read-only inputs and passes a temporary
|
||||||
/// temporaries to generate the call (for the size immediate and
|
/// allocator.
|
||||||
/// possibly for the address of `memcpy` itself).
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
fn gen_memcpy(
|
|
||||||
call_conv: isa::CallConv,
|
call_conv: isa::CallConv,
|
||||||
dst: Reg,
|
dst: Reg,
|
||||||
src: Reg,
|
src: Reg,
|
||||||
tmp1: Writable<Reg>,
|
|
||||||
tmp2: Writable<Reg>,
|
|
||||||
size: usize,
|
size: usize,
|
||||||
|
alloc_tmp: F,
|
||||||
) -> SmallVec<[Self::I; 8]>;
|
) -> SmallVec<[Self::I; 8]>;
|
||||||
|
|
||||||
/// Get the number of spillslots required for the given register-class.
|
/// Get the number of spillslots required for the given register-class.
|
||||||
@@ -2152,15 +2150,12 @@ impl<M: ABIMachineSpec> Caller<M> {
|
|||||||
// arg regs.
|
// arg regs.
|
||||||
let memcpy_call_conv =
|
let memcpy_call_conv =
|
||||||
isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv);
|
isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv);
|
||||||
let tmp1 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
|
|
||||||
let tmp2 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
|
|
||||||
for insn in M::gen_memcpy(
|
for insn in M::gen_memcpy(
|
||||||
memcpy_call_conv,
|
memcpy_call_conv,
|
||||||
dst_ptr.to_reg(),
|
dst_ptr.to_reg(),
|
||||||
src_ptr,
|
src_ptr,
|
||||||
tmp1,
|
|
||||||
tmp2,
|
|
||||||
size as usize,
|
size as usize,
|
||||||
|
|ty| ctx.alloc_tmp(ty).only_reg().unwrap(),
|
||||||
)
|
)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -37,9 +37,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w2, #4369
|
; movz w3, #4369
|
||||||
; movk w2, w2, #17, LSL #16
|
; movk w3, w3, #17, LSL #16
|
||||||
; subs wzr, w0, w2
|
; subs wzr, w0, w3
|
||||||
; cset x0, hs
|
; cset x0, hs
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -51,9 +51,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w2, #4368
|
; movz w3, #4368
|
||||||
; movk w2, w2, #17, LSL #16
|
; movk w3, w3, #17, LSL #16
|
||||||
; subs wzr, w0, w2
|
; subs wzr, w0, w3
|
||||||
; cset x0, hs
|
; cset x0, hs
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -89,9 +89,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w2, #4369
|
; movz w3, #4369
|
||||||
; movk w2, w2, #17, LSL #16
|
; movk w3, w3, #17, LSL #16
|
||||||
; subs wzr, w0, w2
|
; subs wzr, w0, w3
|
||||||
; cset x0, ge
|
; cset x0, ge
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
@@ -103,9 +103,9 @@ block0(v0: i32):
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz w2, #4368
|
; movz w3, #4368
|
||||||
; movk w2, w2, #17, LSL #16
|
; movk w3, w3, #17, LSL #16
|
||||||
; subs wzr, w0, w2
|
; subs wzr, w0, w3
|
||||||
; cset x0, ge
|
; cset x0, ge
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -14,11 +14,11 @@ block0(v0: i8x16):
|
|||||||
; movk x5, x5, #8208, LSL #32
|
; movk x5, x5, #8208, LSL #32
|
||||||
; movk x5, x5, #32832, LSL #48
|
; movk x5, x5, #32832, LSL #48
|
||||||
; dup v16.2d, x5
|
; dup v16.2d, x5
|
||||||
; and v19.16b, v2.16b, v16.16b
|
; and v22.16b, v2.16b, v16.16b
|
||||||
; ext v21.16b, v19.16b, v19.16b, #8
|
; ext v24.16b, v22.16b, v22.16b, #8
|
||||||
; zip1 v23.16b, v19.16b, v21.16b
|
; zip1 v26.16b, v22.16b, v24.16b
|
||||||
; addv h25, v23.8h
|
; addv h28, v26.8h
|
||||||
; umov w0, v25.h[0]
|
; umov w0, v28.h[0]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f2(i8x16) -> i16 {
|
function %f2(i8x16) -> i16 {
|
||||||
@@ -34,11 +34,11 @@ block0(v0: i8x16):
|
|||||||
; movk x5, x5, #8208, LSL #32
|
; movk x5, x5, #8208, LSL #32
|
||||||
; movk x5, x5, #32832, LSL #48
|
; movk x5, x5, #32832, LSL #48
|
||||||
; dup v16.2d, x5
|
; dup v16.2d, x5
|
||||||
; and v19.16b, v2.16b, v16.16b
|
; and v22.16b, v2.16b, v16.16b
|
||||||
; ext v21.16b, v19.16b, v19.16b, #8
|
; ext v24.16b, v22.16b, v22.16b, #8
|
||||||
; zip1 v23.16b, v19.16b, v21.16b
|
; zip1 v26.16b, v22.16b, v24.16b
|
||||||
; addv h25, v23.8h
|
; addv h28, v26.8h
|
||||||
; umov w0, v25.h[0]
|
; umov w0, v28.h[0]
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f3(i16x8) -> i8 {
|
function %f3(i16x8) -> i8 {
|
||||||
|
|||||||
Reference in New Issue
Block a user