Cranelift AArch64: Various small fixes
* Use FMOV to move 64-bit FP registers and SIMD vectors. * Add support for additional vector load types. * Fix the printing of Inst::LoadAddr. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -1127,7 +1127,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Uload32x2 => {
|
||||
| Opcode::Uload32x2
|
||||
| Opcode::Uload8x8Complex
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload16x4Complex
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload32x2Complex
|
||||
| Opcode::Sload32x2Complex => {
|
||||
let off = ctx.data(insn).load_store_offset().unwrap();
|
||||
let elem_ty = match op {
|
||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
||||
@@ -1142,9 +1148,18 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Sload32Complex
|
||||
| Opcode::Uload32Complex => I32,
|
||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
|
||||
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
|
||||
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
|
||||
Opcode::Sload8x8
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload8x8Complex => I8X8,
|
||||
Opcode::Sload16x4
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload16x4Complex => I16X4,
|
||||
Opcode::Sload32x2
|
||||
| Opcode::Uload32x2
|
||||
| Opcode::Sload32x2Complex
|
||||
| Opcode::Uload32x2Complex => I32X2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let sign_extend = match op {
|
||||
@@ -1180,11 +1195,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
let vec_extend = match op {
|
||||
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
|
||||
Opcode::Sload8x8Complex => Some(VecExtendOp::Sxtl8),
|
||||
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
|
||||
Opcode::Uload8x8Complex => Some(VecExtendOp::Uxtl8),
|
||||
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
|
||||
Opcode::Sload16x4Complex => Some(VecExtendOp::Sxtl16),
|
||||
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
|
||||
Opcode::Uload16x4Complex => Some(VecExtendOp::Uxtl16),
|
||||
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
|
||||
Opcode::Sload32x2Complex => Some(VecExtendOp::Sxtl32),
|
||||
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
|
||||
Opcode::Uload32x2Complex => Some(VecExtendOp::Uxtl32),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
@@ -1641,11 +1662,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ity = ctx.input_ty(insn, 0);
|
||||
let oty = ctx.output_ty(insn, 0);
|
||||
let ity_bits = ty_bits(ity);
|
||||
let ity_vec_reg = ty_has_float_or_vec_representation(ity);
|
||||
let oty_bits = ty_bits(oty);
|
||||
let oty_vec_reg = ty_has_float_or_vec_representation(oty);
|
||||
|
||||
debug_assert_eq!(ity_bits, oty_bits);
|
||||
|
||||
match (ity_vec_reg, oty_vec_reg) {
|
||||
(true, true) => {
|
||||
let narrow_mode = if ty_bits(ity) <= 32 && ty_bits(oty) <= 32 {
|
||||
let narrow_mode = if ity_bits <= 32 {
|
||||
NarrowValueMode::ZeroExtend32
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
@@ -1667,11 +1693,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
(true, false) => {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let size = VectorSize::from_lane_size(ScalarSize::from_bits(oty_bits), true);
|
||||
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn,
|
||||
idx: 0,
|
||||
size: VectorSize::Size64x2,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1877,12 +1905,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::GetPinnedReg => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
|
||||
ctx.emit(Inst::gen_move(rd, xreg(PINNED_REG), I64));
|
||||
}
|
||||
|
||||
Opcode::SetPinnedReg => {
|
||||
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
|
||||
ctx.emit(Inst::gen_move(writable_xreg(PINNED_REG), rm, I64));
|
||||
}
|
||||
|
||||
Opcode::Spill
|
||||
@@ -2314,14 +2342,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::Vsplit
|
||||
| Opcode::Vconcat
|
||||
| Opcode::Uload8x8Complex
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload16x4Complex
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload32x2Complex
|
||||
| Opcode::Sload32x2Complex => {
|
||||
Opcode::Vsplit | Opcode::Vconcat => {
|
||||
// TODO
|
||||
panic!("Vector ops not implemented.");
|
||||
}
|
||||
@@ -2569,6 +2590,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
//
|
||||
// This is a scalar Fcopysign.
|
||||
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
||||
// In the latter case it still sets all bits except the lowest 32 to 0.
|
||||
//
|
||||
// mov vd, vn
|
||||
// ushr vtmp, vm, #63 / #31
|
||||
@@ -2583,7 +2605,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let tmp = ctx.alloc_tmp(RegClass::V128, F64);
|
||||
|
||||
// Copy LHS to rd.
|
||||
ctx.emit(Inst::FpuMove64 { rd, rn });
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
|
||||
// Copy the sign bit to the lowest bit in tmp.
|
||||
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
|
||||
Reference in New Issue
Block a user