Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)
* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)
Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
- Also introduced missing support for `fcopysign` on vector values, as
per the docs.
- This introduces the vector encoding for the `SLI` machine
instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`
Copyright (c) 2022 Arm Limited
* Document helpers and abstract conversion checks
This commit is contained in:
@@ -2,10 +2,9 @@
|
||||
|
||||
use super::lower::*;
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::condcodes::FloatCC;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||
use crate::ir::{InstructionData, Opcode};
|
||||
use crate::isa::aarch64::abi::*;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::settings as aarch64_settings;
|
||||
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(
|
||||
|
||||
Opcode::Fma => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Fcopysign => {
|
||||
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
||||
//
|
||||
// This is a scalar Fcopysign.
|
||||
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
||||
// In the latter case it still sets all bits except the lowest 32 to 0.
|
||||
//
|
||||
// mov vd, vn
|
||||
// ushr vtmp, vm, #63 / #31
|
||||
// sli vd, vtmp, #63 / #31
|
||||
Opcode::Fcopysign => implemented_in_isle(ctx),
|
||||
|
||||
let ty = ctx.output_ty(insn, 0);
|
||||
Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),
|
||||
|
||||
if ty != F32 && ty != F64 {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"Fcopysign: Unsupported type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),
|
||||
|
||||
let bits = ty_bits(ty) as u8;
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
|
||||
|
||||
// Copy LHS to rd.
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
|
||||
// Copy the sign bit to the lowest bit in tmp.
|
||||
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
});
|
||||
|
||||
// Insert the bit from tmp into the sign bit of rd.
|
||||
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||
ctx.emit(Inst::FpuRRI {
|
||||
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
}
|
||||
|
||||
Opcode::FcvtToUint | Opcode::FcvtToSint => {
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let in_bits = ty_bits(input_ty);
|
||||
let output_ty = ty.unwrap();
|
||||
let out_bits = ty_bits(output_ty);
|
||||
let signed = op == Opcode::FcvtToSint;
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
|
||||
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
|
||||
(false, 32, 64) => FpuToIntOp::F32ToU64,
|
||||
(true, 32, 64) => FpuToIntOp::F32ToI64,
|
||||
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
|
||||
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
|
||||
(false, 64, 64) => FpuToIntOp::F64ToU64,
|
||||
(true, 64, 64) => FpuToIntOp::F64ToI64,
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported types: {:?} -> {:?}",
|
||||
op, input_ty, output_ty
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
// First, check the output: it's important to carry the NaN conversion before the
|
||||
// in-bounds conversion, per wasm semantics.
|
||||
|
||||
// Check that the input is not a NaN.
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::from_ty(input_ty),
|
||||
rn,
|
||||
rm: rn,
|
||||
});
|
||||
let trap_code = TrapCode::BadConversionToInteger;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
|
||||
});
|
||||
|
||||
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
|
||||
|
||||
// Check that the input is in range, with "truncate towards zero" semantics. This means
|
||||
// we allow values that are slightly out of range:
|
||||
// - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
|
||||
// can be represented), and strictly less than INT_MAX+1 (when this can be
|
||||
// represented).
|
||||
// - for unsigned conversions, we allow values strictly greater than -1, and strictly
|
||||
// less than UINT_MAX+1 (when this can be represented).
|
||||
|
||||
if in_bits == 32 {
|
||||
// From float32.
|
||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
||||
(true, 8) => (
|
||||
i8::min_value() as f32 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i8::max_value() as f32 + 1.,
|
||||
),
|
||||
(true, 16) => (
|
||||
i16::min_value() as f32 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i16::max_value() as f32 + 1.,
|
||||
),
|
||||
(true, 32) => (
|
||||
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
i32::max_value() as f32 + 1.,
|
||||
),
|
||||
(true, 64) => (
|
||||
i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
i64::max_value() as f32 + 1.,
|
||||
),
|
||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
|
||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
|
||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
|
||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// >= low_bound
|
||||
lower_constant_f32(ctx, tmp, low_bound);
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::Size32,
|
||||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
let trap_code = TrapCode::IntegerOverflow;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
||||
});
|
||||
|
||||
// <= high_bound
|
||||
lower_constant_f32(ctx, tmp, high_bound);
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::Size32,
|
||||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
let trap_code = TrapCode::IntegerOverflow;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
||||
});
|
||||
} else {
|
||||
// From float64.
|
||||
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
|
||||
(true, 8) => (
|
||||
i8::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i8::max_value() as f64 + 1.,
|
||||
),
|
||||
(true, 16) => (
|
||||
i16::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i16::max_value() as f64 + 1.,
|
||||
),
|
||||
(true, 32) => (
|
||||
i32::min_value() as f64 - 1.,
|
||||
FloatCC::GreaterThan,
|
||||
i32::max_value() as f64 + 1.,
|
||||
),
|
||||
(true, 64) => (
|
||||
i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
|
||||
FloatCC::GreaterThanOrEqual,
|
||||
i64::max_value() as f64 + 1.,
|
||||
),
|
||||
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
|
||||
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
|
||||
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
|
||||
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// >= low_bound
|
||||
lower_constant_f64(ctx, tmp, low_bound);
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::Size64,
|
||||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
let trap_code = TrapCode::IntegerOverflow;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
|
||||
});
|
||||
|
||||
// <= high_bound
|
||||
lower_constant_f64(ctx, tmp, high_bound);
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::Size64,
|
||||
rn,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
let trap_code = TrapCode::IntegerOverflow;
|
||||
ctx.emit(Inst::TrapIf {
|
||||
trap_code,
|
||||
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
|
||||
});
|
||||
};
|
||||
|
||||
// Do the conversion.
|
||||
ctx.emit(Inst::FpuToInt { op, rd, rn });
|
||||
}
|
||||
|
||||
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let ty = ty.unwrap();
|
||||
let signed = op == Opcode::FcvtFromSint;
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
if ty.is_vector() {
|
||||
if input_ty.lane_bits() != ty.lane_bits() {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported types: {:?} -> {:?}",
|
||||
op, input_ty, ty
|
||||
)));
|
||||
}
|
||||
|
||||
let op = if signed {
|
||||
VecMisc2::Scvtf
|
||||
} else {
|
||||
VecMisc2::Ucvtf
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
let in_bits = ty_bits(input_ty);
|
||||
let out_bits = ty_bits(ty);
|
||||
let op = match (signed, in_bits, out_bits) {
|
||||
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
|
||||
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
|
||||
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
|
||||
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
|
||||
(false, 64, 32) => IntToFpuOp::U64ToF32,
|
||||
(true, 64, 32) => IntToFpuOp::I64ToF32,
|
||||
(false, 64, 64) => IntToFpuOp::U64ToF64,
|
||||
(true, 64, 64) => IntToFpuOp::I64ToF64,
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported types: {:?} -> {:?}",
|
||||
op, input_ty, ty
|
||||
)))
|
||||
}
|
||||
};
|
||||
let narrow_mode = match (signed, in_bits) {
|
||||
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
|
||||
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
|
||||
(false, 64) => NarrowValueMode::ZeroExtend64,
|
||||
(true, 64) => NarrowValueMode::SignExtend64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
ctx.emit(Inst::IntToFpu { op, rd, rn });
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
|
||||
let in_ty = ctx.input_ty(insn, 0);
|
||||
let ty = ty.unwrap();
|
||||
let out_signed = op == Opcode::FcvtToSintSat;
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
|
||||
if ty.is_vector() {
|
||||
if in_ty.lane_bits() != ty.lane_bits() {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"{}: Unsupported types: {:?} -> {:?}",
|
||||
op, in_ty, ty
|
||||
)));
|
||||
}
|
||||
|
||||
let op = if out_signed {
|
||||
VecMisc2::Fcvtzs
|
||||
} else {
|
||||
VecMisc2::Fcvtzu
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
} else {
|
||||
let in_bits = ty_bits(in_ty);
|
||||
let out_bits = ty_bits(ty);
|
||||
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
||||
// FMIN Vtmp2, Vin, Vtmp1
|
||||
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
||||
// FMAX Vtmp2, Vtmp2, Vtmp1
|
||||
// (if signed) FIMM Vtmp1, 0
|
||||
// FCMP Vin, Vin
|
||||
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
||||
// convert Rout, Vtmp2
|
||||
|
||||
assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
|
||||
assert!(out_bits == 32 || out_bits == 64);
|
||||
|
||||
let min: f64 = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MIN as f64,
|
||||
(32, false) => 0.0,
|
||||
(64, true) => std::i64::MIN as f64,
|
||||
(64, false) => 0.0,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let max = match (out_bits, out_signed) {
|
||||
(32, true) => std::i32::MAX as f64,
|
||||
(32, false) => std::u32::MAX as f64,
|
||||
(64, true) => std::i64::MAX as f64,
|
||||
(64, false) => std::u64::MAX as f64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
|
||||
let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
|
||||
|
||||
if in_bits == 32 {
|
||||
lower_constant_f32(ctx, rtmp1, max as f32);
|
||||
} else {
|
||||
lower_constant_f64(ctx, rtmp1, max);
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Min,
|
||||
size: ScalarSize::from_ty(in_ty),
|
||||
rd: rtmp2,
|
||||
rn,
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
if in_bits == 32 {
|
||||
lower_constant_f32(ctx, rtmp1, min as f32);
|
||||
} else {
|
||||
lower_constant_f64(ctx, rtmp1, min);
|
||||
}
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Max,
|
||||
size: ScalarSize::from_ty(in_ty),
|
||||
rd: rtmp2,
|
||||
rn: rtmp2.to_reg(),
|
||||
rm: rtmp1.to_reg(),
|
||||
});
|
||||
if out_signed {
|
||||
if in_bits == 32 {
|
||||
lower_constant_f32(ctx, rtmp1, 0.0);
|
||||
} else {
|
||||
lower_constant_f64(ctx, rtmp1, 0.0);
|
||||
}
|
||||
}
|
||||
ctx.emit(Inst::FpuCmp {
|
||||
size: ScalarSize::from_ty(in_ty),
|
||||
rn,
|
||||
rm: rn,
|
||||
});
|
||||
if in_bits == 32 {
|
||||
ctx.emit(Inst::FpuCSel32 {
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::FpuCSel64 {
|
||||
rd: rtmp2,
|
||||
rn: rtmp1.to_reg(),
|
||||
rm: rtmp2.to_reg(),
|
||||
cond: Cond::Ne,
|
||||
});
|
||||
}
|
||||
|
||||
let cvt = match (in_bits, out_bits, out_signed) {
|
||||
(32, 32, false) => FpuToIntOp::F32ToU32,
|
||||
(32, 32, true) => FpuToIntOp::F32ToI32,
|
||||
(32, 64, false) => FpuToIntOp::F32ToU64,
|
||||
(32, 64, true) => FpuToIntOp::F32ToI64,
|
||||
(64, 32, false) => FpuToIntOp::F64ToU32,
|
||||
(64, 32, true) => FpuToIntOp::F64ToI32,
|
||||
(64, 64, false) => FpuToIntOp::F64ToU64,
|
||||
(64, 64, true) => FpuToIntOp::F64ToI64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(Inst::FpuToInt {
|
||||
op: cvt,
|
||||
rd,
|
||||
rn: rtmp2.to_reg(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::IaddIfcout => {
|
||||
// This is a two-output instruction that is needed for the
|
||||
|
||||
Reference in New Issue
Block a user