arm64: Implement saturating SIMD arithmetic

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Joey Gouly
2020-07-14 18:19:11 +01:00
parent 85ffc8f595
commit aa84a4173c
5 changed files with 260 additions and 64 deletions

View File

@@ -93,74 +93,64 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}
}
Opcode::UaddSat | Opcode::SaddSat => {
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
// We use the vector instruction set's saturating adds (UQADD /
// SQADD), which require vector registers.
let is_signed = op == Opcode::SaddSat;
let narrow_mode = if is_signed {
NarrowValueMode::SignExtend64
} else {
NarrowValueMode::ZeroExtend64
};
let alu_op = if is_signed {
VecALUOp::SQAddScalar
} else {
VecALUOp::UQAddScalar
};
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
let ty = ty.unwrap();
let rd = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
ctx.emit(Inst::VecRRR {
rd: va,
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec {
rd,
rn: va.to_reg(),
idx: 0,
ty: I64,
});
}
if ty_bits(ty) < 128 {
let narrow_mode = if is_signed {
NarrowValueMode::SignExtend64
} else {
NarrowValueMode::ZeroExtend64
};
let alu_op = match op {
Opcode::UaddSat => VecALUOp::UQAddScalar,
Opcode::SaddSat => VecALUOp::SQAddScalar,
Opcode::UsubSat => VecALUOp::UQSubScalar,
Opcode::SsubSat => VecALUOp::SQSubScalar,
_ => unreachable!(),
};
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
ctx.emit(Inst::VecRRR {
rd: va,
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec {
rd,
rn: va.to_reg(),
idx: 0,
ty: I64,
});
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
Opcode::UsubSat | Opcode::SsubSat => {
let is_signed = op == Opcode::SsubSat;
let narrow_mode = if is_signed {
NarrowValueMode::SignExtend64
} else {
NarrowValueMode::ZeroExtend64
};
let alu_op = if is_signed {
VecALUOp::SQSubScalar
} else {
VecALUOp::UQSubScalar
};
let va = ctx.alloc_tmp(RegClass::V128, I128);
let vb = ctx.alloc_tmp(RegClass::V128, I128);
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
let rd = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
ctx.emit(Inst::VecRRR {
rd: va,
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec {
rd,
rn: va.to_reg(),
idx: 0,
ty: I64,
});
let alu_op = match op {
Opcode::UaddSat => VecALUOp::Uqadd,
Opcode::SaddSat => VecALUOp::Sqadd,
Opcode::UsubSat => VecALUOp::Uqsub,
Opcode::SsubSat => VecALUOp::Sqsub,
_ => unreachable!(),
};
ctx.emit(Inst::VecRRR {
rd,
rn,
rm,
alu_op,
ty,
});
}
}
Opcode::Ineg => {