Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64)

Ported the existing implementations of the following opcodes to ISLE on
AArch64:
- `Fcopysign`
  - Also introduced missing support for `fcopysign` on vector values, as
    per the docs.
  - This introduces the vector encoding for the `SLI` machine
    instruction.
- `FcvtToUint`
- `FcvtToSint`
- `FcvtFromUint`
- `FcvtFromSint`
- `FcvtToUintSat`
- `FcvtToSintSat`

Copyright (c) 2022 Arm Limited

* Document helpers and abstract conversion checks
This commit is contained in:
Damian Heaton
2022-08-24 18:37:14 +01:00
committed by GitHub
parent 7e3c481f4e
commit 94bcbe8446
12 changed files with 863 additions and 548 deletions

View File

@@ -2,10 +2,9 @@
use super::lower::*;
use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::ir::{InstructionData, Opcode};
use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::settings as aarch64_settings;
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(
Opcode::Fma => implemented_in_isle(ctx),
Opcode::Fcopysign => {
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
//
// This is a scalar Fcopysign.
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
// In the latter case it still sets all bits except the lowest 32 to 0.
//
// mov vd, vn
// ushr vtmp, vm, #63 / #31
// sli vd, vtmp, #63 / #31
Opcode::Fcopysign => implemented_in_isle(ctx),
let ty = ctx.output_ty(insn, 0);
Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),
if ty != F32 && ty != F64 {
return Err(CodegenError::Unsupported(format!(
"Fcopysign: Unsupported type: {:?}",
ty
)));
}
Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),
let bits = ty_bits(ty) as u8;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
// Copy LHS to rd.
ctx.emit(Inst::gen_move(rd, rn, ty));
// Copy the sign bit to the lowest bit in tmp.
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
rd: tmp,
rn: rm,
});
// Insert the bit from tmp into the sign bit of rd.
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
ctx.emit(Inst::FpuRRI {
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
rd,
rn: tmp.to_reg(),
});
}
Opcode::FcvtToUint | Opcode::FcvtToSint => {
let input_ty = ctx.input_ty(insn, 0);
let in_bits = ty_bits(input_ty);
let output_ty = ty.unwrap();
let out_bits = ty_bits(output_ty);
let signed = op == Opcode::FcvtToSint;
let op = match (signed, in_bits, out_bits) {
(false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
(true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
(false, 32, 64) => FpuToIntOp::F32ToU64,
(true, 32, 64) => FpuToIntOp::F32ToI64,
(false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
(true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
(false, 64, 64) => FpuToIntOp::F64ToU64,
(true, 64, 64) => FpuToIntOp::F64ToI64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, output_ty
)))
}
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
// First, check the output: it's important to carry the NaN conversion before the
// in-bounds conversion, per wasm semantics.
// Check that the input is not a NaN.
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(input_ty),
rn,
rm: rn,
});
let trap_code = TrapCode::BadConversionToInteger;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
});
let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
// Check that the input is in range, with "truncate towards zero" semantics. This means
// we allow values that are slightly out of range:
// - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
// can be represented), and strictly less than INT_MAX+1 (when this can be
// represented).
// - for unsigned conversions, we allow values strictly greater than -1, and strictly
// less than UINT_MAX+1 (when this can be represented).
if in_bits == 32 {
// From float32.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f32 + 1.,
),
(true, 16) => (
i16::min_value() as f32 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f32 + 1.,
),
(true, 32) => (
i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i32::max_value() as f32 + 1.,
),
(true, 64) => (
i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f32 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f32(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f32(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size32,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
} else {
// From float64.
let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
(true, 8) => (
i8::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i8::max_value() as f64 + 1.,
),
(true, 16) => (
i16::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i16::max_value() as f64 + 1.,
),
(true, 32) => (
i32::min_value() as f64 - 1.,
FloatCC::GreaterThan,
i32::max_value() as f64 + 1.,
),
(true, 64) => (
i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
FloatCC::GreaterThanOrEqual,
i64::max_value() as f64 + 1.,
),
(false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
(false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
(false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
(false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
_ => unreachable!(),
};
// >= low_bound
lower_constant_f64(ctx, tmp, low_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
});
// <= high_bound
lower_constant_f64(ctx, tmp, high_bound);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::Size64,
rn,
rm: tmp.to_reg(),
});
let trap_code = TrapCode::IntegerOverflow;
ctx.emit(Inst::TrapIf {
trap_code,
kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
});
};
// Do the conversion.
ctx.emit(Inst::FpuToInt { op, rd, rn });
}
Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
let input_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let signed = op == Opcode::FcvtFromSint;
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if input_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)));
}
let op = if signed {
VecMisc2::Scvtf
} else {
VecMisc2::Ucvtf
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(input_ty);
let out_bits = ty_bits(ty);
let op = match (signed, in_bits, out_bits) {
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
(false, 64, 32) => IntToFpuOp::U64ToF32,
(true, 64, 32) => IntToFpuOp::I64ToF32,
(false, 64, 64) => IntToFpuOp::U64ToF64,
(true, 64, 64) => IntToFpuOp::I64ToF64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, input_ty, ty
)))
}
};
let narrow_mode = match (signed, in_bits) {
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
(false, 64) => NarrowValueMode::ZeroExtend64,
(true, 64) => NarrowValueMode::SignExtend64,
_ => unreachable!(),
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
ctx.emit(Inst::IntToFpu { op, rd, rn });
}
}
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
let in_ty = ctx.input_ty(insn, 0);
let ty = ty.unwrap();
let out_signed = op == Opcode::FcvtToSintSat;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if ty.is_vector() {
if in_ty.lane_bits() != ty.lane_bits() {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported types: {:?} -> {:?}",
op, in_ty, ty
)));
}
let op = if out_signed {
VecMisc2::Fcvtzs
} else {
VecMisc2::Fcvtzu
};
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(in_ty);
let out_bits = ty_bits(ty);
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
// FMIN Vtmp2, Vin, Vtmp1
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
// FMAX Vtmp2, Vtmp2, Vtmp1
// (if signed) FIMM Vtmp1, 0
// FCMP Vin, Vin
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
// convert Rout, Vtmp2
assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
assert!(out_bits == 32 || out_bits == 64);
let min: f64 = match (out_bits, out_signed) {
(32, true) => std::i32::MIN as f64,
(32, false) => 0.0,
(64, true) => std::i64::MIN as f64,
(64, false) => 0.0,
_ => unreachable!(),
};
let max = match (out_bits, out_signed) {
(32, true) => std::i32::MAX as f64,
(32, false) => std::u32::MAX as f64,
(64, true) => std::i64::MAX as f64,
(64, false) => std::u64::MAX as f64,
_ => unreachable!(),
};
let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, max as f32);
} else {
lower_constant_f64(ctx, rtmp1, max);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Min,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn,
rm: rtmp1.to_reg(),
});
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, min as f32);
} else {
lower_constant_f64(ctx, rtmp1, min);
}
ctx.emit(Inst::FpuRRR {
fpu_op: FPUOp2::Max,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2,
rn: rtmp2.to_reg(),
rm: rtmp1.to_reg(),
});
if out_signed {
if in_bits == 32 {
lower_constant_f32(ctx, rtmp1, 0.0);
} else {
lower_constant_f64(ctx, rtmp1, 0.0);
}
}
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(in_ty),
rn,
rm: rn,
});
if in_bits == 32 {
ctx.emit(Inst::FpuCSel32 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
} else {
ctx.emit(Inst::FpuCSel64 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
}
let cvt = match (in_bits, out_bits, out_signed) {
(32, 32, false) => FpuToIntOp::F32ToU32,
(32, 32, true) => FpuToIntOp::F32ToI32,
(32, 64, false) => FpuToIntOp::F32ToU64,
(32, 64, true) => FpuToIntOp::F32ToI64,
(64, 32, false) => FpuToIntOp::F64ToU32,
(64, 32, true) => FpuToIntOp::F64ToI32,
(64, 64, false) => FpuToIntOp::F64ToU64,
(64, 64, true) => FpuToIntOp::F64ToI64,
_ => unreachable!(),
};
ctx.emit(Inst::FpuToInt {
op: cvt,
rd,
rn: rtmp2.to_reg(),
});
}
}
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),
Opcode::IaddIfcout => {
// This is a two-output instruction that is needed for the