Port widening ops to ISLE (AArch64) (#4751)

Ported the existing implementations of the following opcodes for AArch64
to ISLE, and implemented support for 64-bit vectors (per the docs):
- `SwidenLow`
- `SwidenHigh`
- `UwidenLow`
- `UwidenHigh`

Also ported `WideningPairwiseDotProductS` as-is.

Copyright (c) 2022 Arm Limited
This commit is contained in:
Damian Heaton
2022-08-23 17:42:11 +01:00
committed by GitHub
parent da1fb305a3
commit 3b68d76905
10 changed files with 250 additions and 161 deletions

View File

@@ -159,22 +159,23 @@ pub(crate) fn lower_insn_to_regs(
});
let vec_extend = match op {
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
Opcode::Sload8x8 => Some((VecExtendOp::Sxtl, ScalarSize::Size16)),
Opcode::Uload8x8 => Some((VecExtendOp::Uxtl, ScalarSize::Size16)),
Opcode::Sload16x4 => Some((VecExtendOp::Sxtl, ScalarSize::Size32)),
Opcode::Uload16x4 => Some((VecExtendOp::Uxtl, ScalarSize::Size32)),
Opcode::Sload32x2 => Some((VecExtendOp::Sxtl, ScalarSize::Size64)),
Opcode::Uload32x2 => Some((VecExtendOp::Uxtl, ScalarSize::Size64)),
_ => None,
};
if let Some(t) = vec_extend {
if let Some((t, lane_size)) = vec_extend {
let rd = dst.only_reg().unwrap();
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
high_half: false,
lane_size,
});
}
@@ -961,46 +962,7 @@ pub(crate) fn lower_insn_to_regs(
Opcode::IaddPairwise => implemented_in_isle(ctx),
Opcode::WideningPairwiseDotProductS => {
let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let ty = ty.unwrap();
if ty == I32X4 {
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
// The args have type I16X8.
// "y = i32x4.dot_i16x8_s(a, b)"
// => smull tmp, a, b
// smull2 y, a, b
// addp y, tmp, y
ctx.emit(Inst::VecRRRLong {
alu_op: VecRRRLongOp::Smull16,
rd: tmp,
rn: r_a,
rm: r_b,
high_half: false,
});
ctx.emit(Inst::VecRRRLong {
alu_op: VecRRRLongOp::Smull16,
rd: r_y,
rn: r_a,
rm: r_b,
high_half: true,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Addp,
rd: r_y,
rn: tmp.to_reg(),
rm: r_y.to_reg(),
size: VectorSize::Size32x4,
});
} else {
return Err(CodegenError::Unsupported(format!(
"Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
ty
)));
}
}
Opcode::WideningPairwiseDotProductS => implemented_in_isle(ctx),
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
implemented_in_isle(ctx)
@@ -1485,42 +1447,7 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let ty = ty.unwrap();
let ty = if ty.is_dynamic_vector() {
ty.dynamic_to_vector()
.unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty))
} else {
ty
};
let (t, high_half) = match (ty, op) {
(I16X8, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
(I16X8, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
(I16X8, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
(I16X8, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
(I32X4, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
(I32X4, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
(I32X4, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
(I32X4, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
(I64X2, Opcode::SwidenLow) => (VecExtendOp::Sxtl32, false),
(I64X2, Opcode::SwidenHigh) => (VecExtendOp::Sxtl32, true),
(I64X2, Opcode::UwidenLow) => (VecExtendOp::Uxtl32, false),
(I64X2, Opcode::UwidenHigh) => (VecExtendOp::Uxtl32, true),
(ty, _) => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported type: {:?}",
op, ty
)));
}
};
ctx.emit(Inst::VecExtend {
t,
rd,
rn,
high_half,
});
implemented_in_isle(ctx)
}
Opcode::TlsValue => match flags.tls_model() {
@@ -1557,10 +1484,11 @@ pub(crate) fn lower_insn_to_regs(
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecExtend {
t: VecExtendOp::Sxtl32,
t: VecExtendOp::Sxtl,
rd,
rn,
high_half: false,
lane_size: ScalarSize::Size64,
});
ctx.emit(Inst::VecMisc {
op: VecMisc2::Scvtf,