Port widening ops to ISLE (AArch64) (#4751)
Ported the existing implementations of the following opcodes for AArch64 to ISLE, and implemented support for 64-bit vectors (per the docs): - `SwidenLow` - `SwidenHigh` - `UwidenLow` - `UwidenHigh` Also ported `WideningPairwiseDotProductS` as-is. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -159,22 +159,23 @@ pub(crate) fn lower_insn_to_regs(
|
||||
});
|
||||
|
||||
let vec_extend = match op {
|
||||
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
|
||||
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
|
||||
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
|
||||
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
|
||||
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
|
||||
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
|
||||
Opcode::Sload8x8 => Some((VecExtendOp::Sxtl, ScalarSize::Size16)),
|
||||
Opcode::Uload8x8 => Some((VecExtendOp::Uxtl, ScalarSize::Size16)),
|
||||
Opcode::Sload16x4 => Some((VecExtendOp::Sxtl, ScalarSize::Size32)),
|
||||
Opcode::Uload16x4 => Some((VecExtendOp::Uxtl, ScalarSize::Size32)),
|
||||
Opcode::Sload32x2 => Some((VecExtendOp::Sxtl, ScalarSize::Size64)),
|
||||
Opcode::Uload32x2 => Some((VecExtendOp::Uxtl, ScalarSize::Size64)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(t) = vec_extend {
|
||||
if let Some((t, lane_size)) = vec_extend {
|
||||
let rd = dst.only_reg().unwrap();
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
high_half: false,
|
||||
lane_size,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -961,46 +962,7 @@ pub(crate) fn lower_insn_to_regs(
|
||||
|
||||
Opcode::IaddPairwise => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::WideningPairwiseDotProductS => {
|
||||
let r_y = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty == I32X4 {
|
||||
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||
// The args have type I16X8.
|
||||
// "y = i32x4.dot_i16x8_s(a, b)"
|
||||
// => smull tmp, a, b
|
||||
// smull2 y, a, b
|
||||
// addp y, tmp, y
|
||||
ctx.emit(Inst::VecRRRLong {
|
||||
alu_op: VecRRRLongOp::Smull16,
|
||||
rd: tmp,
|
||||
rn: r_a,
|
||||
rm: r_b,
|
||||
high_half: false,
|
||||
});
|
||||
ctx.emit(Inst::VecRRRLong {
|
||||
alu_op: VecRRRLongOp::Smull16,
|
||||
rd: r_y,
|
||||
rn: r_a,
|
||||
rm: r_b,
|
||||
high_half: true,
|
||||
});
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Addp,
|
||||
rd: r_y,
|
||||
rn: tmp.to_reg(),
|
||||
rm: r_y.to_reg(),
|
||||
size: VectorSize::Size32x4,
|
||||
});
|
||||
} else {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
}
|
||||
Opcode::WideningPairwiseDotProductS => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
|
||||
implemented_in_isle(ctx)
|
||||
@@ -1485,42 +1447,7 @@ pub(crate) fn lower_insn_to_regs(
|
||||
Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let ty = if ty.is_dynamic_vector() {
|
||||
ty.dynamic_to_vector()
|
||||
.unwrap_or_else(|| panic!("Unsupported dynamic type: {}?", ty))
|
||||
} else {
|
||||
ty
|
||||
};
|
||||
let (t, high_half) = match (ty, op) {
|
||||
(I16X8, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
|
||||
(I16X8, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
|
||||
(I16X8, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
|
||||
(I16X8, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
|
||||
(I32X4, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
|
||||
(I32X4, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
|
||||
(I32X4, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
|
||||
(I32X4, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
|
||||
(I64X2, Opcode::SwidenLow) => (VecExtendOp::Sxtl32, false),
|
||||
(I64X2, Opcode::SwidenHigh) => (VecExtendOp::Sxtl32, true),
|
||||
(I64X2, Opcode::UwidenLow) => (VecExtendOp::Uxtl32, false),
|
||||
(I64X2, Opcode::UwidenHigh) => (VecExtendOp::Uxtl32, true),
|
||||
(ty, _) => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported type: {:?}",
|
||||
op, ty
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn,
|
||||
high_half,
|
||||
});
|
||||
implemented_in_isle(ctx)
|
||||
}
|
||||
|
||||
Opcode::TlsValue => match flags.tls_model() {
|
||||
@@ -1557,10 +1484,11 @@ pub(crate) fn lower_insn_to_regs(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl32,
|
||||
t: VecExtendOp::Sxtl,
|
||||
rd,
|
||||
rn,
|
||||
high_half: false,
|
||||
lane_size: ScalarSize::Size64,
|
||||
});
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Scvtf,
|
||||
|
||||
Reference in New Issue
Block a user