AArch64: Implement SIMD conversions
Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::CodegenResult;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
|
||||
use crate::isa::aarch64::abi::*;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
@@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
@@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
@@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let narrow_mode = if is_signed {
|
||||
NarrowValueMode::SignExtend64
|
||||
} else {
|
||||
@@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Ineg => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rn = zero_reg();
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
@@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
|
||||
ctx.emit(Inst::AluRRRR {
|
||||
alu_op,
|
||||
@@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd: tmp1,
|
||||
rn,
|
||||
size: VectorSize::Size32x2,
|
||||
high_half: false,
|
||||
});
|
||||
|
||||
// Sum the respective high half components.
|
||||
@@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd: tmp2,
|
||||
rn: rm,
|
||||
size: VectorSize::Size32x2,
|
||||
high_half: false,
|
||||
});
|
||||
|
||||
// Shift the high half components, into the high half.
|
||||
@@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Bnot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
||||
@@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::BxorNot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = match op {
|
||||
@@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let size = OperandSize::from_bits(ty_bits(ty));
|
||||
let narrow_mode = match (op, size) {
|
||||
(Opcode::Ishl, _) => NarrowValueMode::None,
|
||||
@@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
t,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
high_half: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Bitselect | Opcode::Vselect => {
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
debug_assert_ne!(Opcode::Vselect, op);
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
@@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||
@@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
if ty_bits(ty) < 128 {
|
||||
if !ty.is_vector() {
|
||||
match ty_bits(ty) {
|
||||
32 => {
|
||||
ctx.emit(Inst::FpuCmp32 { rn, rm });
|
||||
@@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if bits < 128 {
|
||||
if !ty.is_vector() {
|
||||
let fpu_op = match (op, bits) {
|
||||
(Opcode::Fadd, 32) => FPUOp2::Add32,
|
||||
(Opcode::Fadd, 64) => FPUOp2::Add64,
|
||||
@@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let bits = ty_bits(ty);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if bits < 128 {
|
||||
if !ty.is_vector() {
|
||||
let fpu_op = match (op, bits) {
|
||||
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
|
||||
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
|
||||
@@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
|
||||
let in_bits = ty_bits(ctx.input_ty(insn, 0));
|
||||
let out_bits = ty_bits(ctx.output_ty(insn, 0));
|
||||
let ty = ty.unwrap();
|
||||
let signed = op == Opcode::FcvtFromSint;
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
|
||||
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
|
||||
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
|
||||
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
|
||||
(false, 64, 32) => IntToFpuOp::U64ToF32,
|
||||
(true, 64, 32) => IntToFpuOp::I64ToF32,
|
||||
(false, 64, 64) => IntToFpuOp::U64ToF64,
|
||||
(true, 64, 64) => IntToFpuOp::I64ToF64,
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
};
|
||||
let narrow_mode = match (signed, in_bits) {
|
||||
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
|
||||
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
|
||||
(false, 64) => NarrowValueMode::ZeroExtend64,
|
||||
(true, 64) => NarrowValueMode::SignExtend64,
|
||||
_ => panic!("Unknown input size"),
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::IntToFpu { op, rd, rn });
|
||||
|
||||
if ty.is_vector() {
|
||||
let op = if signed {
|
||||
VecMisc2::Scvtf
|
||||
} else {
|
||||
VecMisc2::Ucvtf
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
let in_bits = ty_bits(ctx.input_ty(insn, 0));
|
||||
let out_bits = ty_bits(ty);
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
|
||||
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
|
||||
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
|
||||
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
|
||||
(false, 64, 32) => IntToFpuOp::U64ToF32,
|
||||
(true, 64, 32) => IntToFpuOp::I64ToF32,
|
||||
(false, 64, 64) => IntToFpuOp::U64ToF64,
|
||||
(true, 64, 64) => IntToFpuOp::I64ToF64,
|
||||
_ => panic!("Unknown input/output-bits combination"),
|
||||
};
|
||||
let narrow_mode = match (signed, in_bits) {
|
||||
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
|
||||
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
|
||||
(false, 64) => NarrowValueMode::ZeroExtend64,
|
||||
(true, 64) => NarrowValueMode::SignExtend64,
|
||||
_ => panic!("Unknown input size"),
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
ctx.emit(Inst::IntToFpu { op, rd, rn });
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
|
||||
let in_ty = ctx.input_ty(insn, 0);
|
||||
let in_bits = ty_bits(in_ty);
|
||||
let out_ty = ctx.output_ty(insn, 0);
|
||||
let out_bits = ty_bits(out_ty);
|
||||
let ty = ty.unwrap();
|
||||
let out_signed = op == Opcode::FcvtToSintSat;
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
||||
// FMIN Vtmp2, Vin, Vtmp1
|
||||
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
||||
// FMAX Vtmp2, Vtmp2, Vtmp1
|
||||
// (if signed) FIMM Vtmp1, 0
|
||||
// FCMP Vin, Vin
|
||||
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
||||
// convert Rout, Vtmp2
|
||||
if ty.is_vector() {
|
||||
let op = if out_signed {
|
||||
VecMisc2::Fcvtzs
|
||||
} else {
|
||||
VecMisc2::Fcvtzu
|
||||
};
|
||||
|
||||
assert!(in_bits == 32 || in_bits == 64);
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
|
||||
let min: f64 = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MIN as f64,
|
||||
(32, false) => 0.0,
|
||||
(64, true) => std::i64::MIN as f64,
|
||||
(64, false) => 0.0,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let max = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MAX as f64,
|
||||
(32, false) => std::u32::MAX as f64,
|
||||
(64, true) => std::i64::MAX as f64,
|
||||
(64, false) => std::u64::MAX as f64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: max as f32,
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: max,
|
||||
});
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
|
||||
rd: rtmp2,
|
||||
rn: rn,
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: min as f32,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: min,
|
||||
});
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
|
||||
rd: rtmp2,
|
||||
rn: rtmp2.to_reg(),
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
if out_signed {
|
||||
let in_ty = ctx.input_ty(insn, 0);
|
||||
let in_bits = ty_bits(in_ty);
|
||||
let out_bits = ty_bits(ty);
|
||||
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
||||
// FMIN Vtmp2, Vin, Vtmp1
|
||||
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
||||
// FMAX Vtmp2, Vtmp2, Vtmp1
|
||||
// (if signed) FIMM Vtmp1, 0
|
||||
// FCMP Vin, Vin
|
||||
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
||||
// convert Rout, Vtmp2
|
||||
|
||||
assert!(in_bits == 32 || in_bits == 64);
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
|
||||
let min: f64 = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MIN as f64,
|
||||
(32, false) => 0.0,
|
||||
(64, true) => std::i64::MIN as f64,
|
||||
(64, false) => 0.0,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let max = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MAX as f64,
|
||||
(32, false) => std::u32::MAX as f64,
|
||||
(64, true) => std::i64::MAX as f64,
|
||||
(64, false) => std::u64::MAX as f64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
|
||||
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
const_data: max as f32,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
const_data: max,
|
||||
});
|
||||
}
|
||||
}
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
|
||||
ctx.emit(Inst::FpuCSel32 {
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
rn: rn,
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
|
||||
ctx.emit(Inst::FpuCSel64 {
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: min as f32,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: min,
|
||||
});
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
rn: rtmp2.to_reg(),
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
}
|
||||
if out_signed {
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::LoadFpuConst32 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::LoadFpuConst64 {
|
||||
rd: rtmp1,
|
||||
const_data: 0.0,
|
||||
});
|
||||
}
|
||||
}
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
|
||||
ctx.emit(Inst::FpuCSel32 {
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
|
||||
ctx.emit(Inst::FpuCSel64 {
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
});
|
||||
}
|
||||
|
||||
let cvt = match (in_bits, out_bits, out_signed) {
|
||||
(32, 32, false) => FpuToIntOp::F32ToU32,
|
||||
(32, 32, true) => FpuToIntOp::F32ToI32,
|
||||
(32, 64, false) => FpuToIntOp::F32ToU64,
|
||||
(32, 64, true) => FpuToIntOp::F32ToI64,
|
||||
(64, 32, false) => FpuToIntOp::F64ToU32,
|
||||
(64, 32, true) => FpuToIntOp::F64ToI32,
|
||||
(64, 64, false) => FpuToIntOp::F64ToU64,
|
||||
(64, 64, true) => FpuToIntOp::F64ToI64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(Inst::FpuToInt {
|
||||
op: cvt,
|
||||
rd,
|
||||
rn: rtmp2.to_reg(),
|
||||
});
|
||||
let cvt = match (in_bits, out_bits, out_signed) {
|
||||
(32, 32, false) => FpuToIntOp::F32ToU32,
|
||||
(32, 32, true) => FpuToIntOp::F32ToI32,
|
||||
(32, 64, false) => FpuToIntOp::F32ToU64,
|
||||
(32, 64, true) => FpuToIntOp::F32ToI64,
|
||||
(64, 32, false) => FpuToIntOp::F64ToU32,
|
||||
(64, 32, true) => FpuToIntOp::F64ToI32,
|
||||
(64, 64, false) => FpuToIntOp::F64ToU64,
|
||||
(64, 64, true) => FpuToIntOp::F64ToI64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(Inst::FpuToInt {
|
||||
op: cvt,
|
||||
rd,
|
||||
rn: rtmp2.to_reg(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::IaddIfcout => {
|
||||
@@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::Snarrow
|
||||
| Opcode::Unarrow
|
||||
| Opcode::SwidenLow
|
||||
| Opcode::SwidenHigh
|
||||
| Opcode::UwidenLow
|
||||
| Opcode::UwidenHigh => unimplemented!(),
|
||||
Opcode::Snarrow | Opcode::Unarrow => {
|
||||
let op = if op == Opcode::Snarrow {
|
||||
VecMiscNarrowOp::Sqxtn
|
||||
} else {
|
||||
VecMiscNarrowOp::Sqxtun
|
||||
};
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
high_half: false,
|
||||
});
|
||||
ctx.emit(Inst::VecMiscNarrow {
|
||||
op,
|
||||
rd,
|
||||
rn: rn2,
|
||||
size: VectorSize::from_ty(ty),
|
||||
high_half: true,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
|
||||
let lane_type = ty.unwrap().lane_type();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let (t, high_half) = match (lane_type, op) {
|
||||
(I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
|
||||
(I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
|
||||
(I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
|
||||
(I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
|
||||
(I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
|
||||
(I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
|
||||
(I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
|
||||
(I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Unsupported SIMD vector lane type: {:?}",
|
||||
lane_type
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn,
|
||||
high_half,
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user