Files
wasmtime/cranelift/codegen/src/isa/aarch64/lower.rs
Jamey Sharp 3d6d49daba cranelift: Remove of/nof overflow flags from icmp (#4879)
* cranelift: Remove of/nof overflow flags from icmp

Neither Wasmtime nor cg-clif use these flags under any circumstances.
From discussion on #3060 I see it's long been unclear what purpose these
flags served.

Fixes #3060, fixes #4406, and fixes #4875... by deleting all the code
that could have been buggy.

This changes the cranelift-fuzzgen input format by removing some IntCC
options, so I've gone ahead and enabled I128 icmp tests at the same
time. Since only the of/nof cases were failing before, I expect these to
work.

* Restore trapif tests

It's still useful to validate that iadd_ifcout's iflags result can be
forwarded correctly to trapif, and for that purpose it doesn't really
matter what condition code is checked.
2022-09-07 08:38:41 -07:00

1426 lines
48 KiB
Rust

//! Lowering rules for AArch64.
//!
//! TODO: opportunities for better code generation:
//!
//! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize
//! pre/post-index opportunities.
//!
//! - Floating-point immediates (FIMM instruction).
use super::lower_inst;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{Opcode, Type, Value};
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::AArch64Backend;
use crate::machinst::lower::*;
use crate::machinst::{Reg, Writable};
use crate::{machinst::*, trace};
use crate::{CodegenError, CodegenResult};
use smallvec::{smallvec, SmallVec};
use std::cmp;
pub mod isle;
//============================================================================
// Result enum types.
//
// Lowering of a given value results in one of these enums, depending on the
// modes in which we can accept the value.
/// A lowering result: register, register-shift. An SSA value can always be
/// lowered into one of these options; the register form is the fallback.
#[derive(Clone, Debug)]
enum ResultRS {
Reg(Reg),
RegShift(Reg, ShiftOpAndAmt),
}
/// A lowering result: register, register-shift, register-extend. An SSA value can always be
/// lowered into one of these options; the register form is the fallback.
#[derive(Clone, Debug)]
enum ResultRSE {
Reg(Reg),
RegShift(Reg, ShiftOpAndAmt),
RegExtend(Reg, ExtendOp),
}
impl ResultRSE {
fn from_rs(rs: ResultRS) -> ResultRSE {
match rs {
ResultRS::Reg(r) => ResultRSE::Reg(r),
ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
}
}
}
/// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
/// An SSA value can always be lowered into one of these options; the register form is the
/// fallback.
#[derive(Clone, Debug)]
pub(crate) enum ResultRSEImm12 {
Reg(Reg),
RegShift(Reg, ShiftOpAndAmt),
RegExtend(Reg, ExtendOp),
Imm12(Imm12),
}
impl ResultRSEImm12 {
fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
match rse {
ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
}
}
}
//============================================================================
// Lowering: convert instruction inputs to forms that we can use.
/// Lower an instruction input to a 64-bit constant, if possible.
pub(crate) fn input_to_const(ctx: &mut Lower<Inst>, input: InsnInput) -> Option<u64> {
let input = ctx.get_input_as_source_or_const(input.insn, input.input);
input.constant
}
/// Lower an instruction input to a constant register-shift amount, if possible.
pub(crate) fn input_to_shiftimm(
ctx: &mut Lower<Inst>,
input: InsnInput,
) -> Option<ShiftOpShiftImm> {
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
}
/// How to handle narrow values loaded into registers; see note on `narrow_mode`
/// parameter to `put_input_in_*` below.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum NarrowValueMode {
None,
/// Zero-extend to 32 bits if original is < 32 bits.
ZeroExtend32,
/// Sign-extend to 32 bits if original is < 32 bits.
SignExtend32,
/// Zero-extend to 64 bits if original is < 64 bits.
ZeroExtend64,
/// Sign-extend to 64 bits if original is < 64 bits.
SignExtend64,
}
impl NarrowValueMode {
fn is_32bit(&self) -> bool {
match self {
NarrowValueMode::None => false,
NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
}
}
fn is_signed(&self) -> bool {
match self {
NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true,
NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false,
NarrowValueMode::None => false,
}
}
}
/// Emits instruction(s) to generate the given constant value into newly-allocated
/// temporary registers, returning these registers.
fn generate_constant(ctx: &mut Lower<Inst>, ty: Type, c: u128) -> ValueRegs<Reg> {
let from_bits = ty_bits(ty);
let masked = if from_bits < 128 {
c & ((1u128 << from_bits) - 1)
} else {
c
};
let cst_copy = ctx.alloc_tmp(ty);
for inst in Inst::gen_constant(cst_copy, masked, ty, |ty| {
ctx.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
ctx.emit(inst);
}
non_writable_value_regs(cst_copy)
}
/// Extends a register according to `narrow_mode`.
/// If extended, the value is always extended to 64 bits, for simplicity.
fn extend_reg(
ctx: &mut Lower<Inst>,
ty: Type,
in_reg: Reg,
is_const: bool,
narrow_mode: NarrowValueMode,
) -> Reg {
let from_bits = ty_bits(ty) as u8;
match (narrow_mode, from_bits) {
(NarrowValueMode::None, _) => in_reg,
(NarrowValueMode::ZeroExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 32,
});
tmp.to_reg()
}
(NarrowValueMode::SignExtend32, n) if n < 32 => {
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: true,
from_bits,
to_bits: 32,
});
tmp.to_reg()
}
(NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
(NarrowValueMode::ZeroExtend64, n) if n < 64 => {
if is_const {
// Constants are zero-extended to full 64-bit width on load already.
in_reg
} else {
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: false,
from_bits,
to_bits: 64,
});
tmp.to_reg()
}
}
(NarrowValueMode::SignExtend64, n) if n < 64 => {
let tmp = ctx.alloc_tmp(I32).only_reg().unwrap();
ctx.emit(Inst::Extend {
rd: tmp,
rn: in_reg,
signed: true,
from_bits,
to_bits: 64,
});
tmp.to_reg()
}
(_, 64) => in_reg,
(_, 128) => in_reg,
_ => panic!(
"Unsupported input width: input ty {} bits {} mode {:?}",
ty, from_bits, narrow_mode
),
}
}
/// Lowers an instruction input to multiple regs
fn lower_value_to_regs(ctx: &mut Lower<Inst>, value: Value) -> (ValueRegs<Reg>, Type, bool) {
trace!("lower_value_to_regs: value {:?}", value);
let ty = ctx.value_ty(value);
let inputs = ctx.get_value_as_source_or_const(value);
let is_const = inputs.constant.is_some();
let in_regs = if let Some(c) = inputs.constant {
// Generate constants fresh at each use to minimize long-range register pressure.
generate_constant(ctx, ty, c as u128)
} else {
ctx.put_value_in_regs(value)
};
(in_regs, ty, is_const)
}
/// Lower an instruction input to a register
///
/// The given register will be extended appropriately, according to
/// `narrow_mode` and the input's type. If extended, the value is
/// always extended to 64 bits, for simplicity.
pub(crate) fn put_input_in_reg(
ctx: &mut Lower<Inst>,
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> Reg {
let value = ctx.input_as_value(input.insn, input.input);
put_value_in_reg(ctx, value, narrow_mode)
}
/// Like above, only for values
fn put_value_in_reg(ctx: &mut Lower<Inst>, value: Value, narrow_mode: NarrowValueMode) -> Reg {
let (in_regs, ty, is_const) = lower_value_to_regs(ctx, value);
let reg = in_regs
.only_reg()
.expect("Multi-register value not expected");
extend_reg(ctx, ty, reg, is_const, narrow_mode)
}
/// Lower an instruction input to multiple regs
pub(crate) fn put_input_in_regs(ctx: &mut Lower<Inst>, input: InsnInput) -> ValueRegs<Reg> {
let value = ctx.input_as_value(input.insn, input.input);
let (in_regs, _, _) = lower_value_to_regs(ctx, value);
in_regs
}
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
///
/// The `narrow_mode` flag indicates whether the consumer of this value needs
/// the high bits clear. For many operations, such as an add/sub/mul or any
/// bitwise logical operation, the low-bit results depend only on the low-bit
/// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
/// value is stored in the low 8 bits of the register and the high 24 bits are
/// undefined. If the op truly needs the high N bits clear (such as for a
/// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
/// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
/// register will be provided the extended value.
fn put_input_in_rs(
ctx: &mut Lower<Inst>,
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRS {
let inputs = ctx.get_input_as_source_or_const(input.insn, input.input);
// Unique or non-unique use is fine for merging here.
if let Some((insn, 0)) = inputs.inst.as_inst() {
let op = ctx.data(insn).opcode();
if op == Opcode::Ishl {
let shiftee = InsnInput { insn, input: 0 };
let shift_amt = InsnInput { insn, input: 1 };
// Can we get the shift amount as an immediate?
if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
let shiftee_bits = ty_bits(ctx.input_ty(insn, 0));
if shiftee_bits <= std::u8::MAX as usize {
let shiftimm = shiftimm.mask(shiftee_bits as u8);
let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
}
}
}
}
ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
}
/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
/// This does not actually codegen the source instruction; it just uses the
/// vreg into which the source instruction will generate its value.
///
/// See note on `put_input_in_rs` for a description of `narrow_mode`.
fn put_input_in_rse(
ctx: &mut Lower<Inst>,
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSE {
let value = ctx.input_as_value(input.insn, input.input);
if let Some((val, extendop)) = get_as_extended_value(ctx, value, narrow_mode) {
let reg = put_value_in_reg(ctx, val, NarrowValueMode::None);
return ResultRSE::RegExtend(reg, extendop);
}
ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
}
fn get_as_extended_value(
ctx: &mut Lower<Inst>,
val: Value,
narrow_mode: NarrowValueMode,
) -> Option<(Value, ExtendOp)> {
let inputs = ctx.get_value_as_source_or_const(val);
let (insn, n) = inputs.inst.as_inst()?;
if n != 0 {
return None;
}
let op = ctx.data(insn).opcode();
let out_ty = ctx.output_ty(insn, 0);
let out_bits = ty_bits(out_ty);
// Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
if op == Opcode::Uextend || op == Opcode::Sextend {
let sign_extend = op == Opcode::Sextend;
let inner_ty = ctx.input_ty(insn, 0);
let inner_bits = ty_bits(inner_ty);
assert!(inner_bits < out_bits);
if match (sign_extend, narrow_mode) {
// A single zero-extend or sign-extend is equal to itself.
(_, NarrowValueMode::None) => true,
// Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
(false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => true,
(true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => true,
// A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
(false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
false
}
(true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => false,
} {
let extendop = match (sign_extend, inner_bits) {
(true, 8) => ExtendOp::SXTB,
(false, 8) => ExtendOp::UXTB,
(true, 16) => ExtendOp::SXTH,
(false, 16) => ExtendOp::UXTH,
(true, 32) => ExtendOp::SXTW,
(false, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
return Some((ctx.input_as_value(insn, 0), extendop));
}
}
// If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
// then get the result into a register and return an Extend-mode operand on
// that register.
if narrow_mode != NarrowValueMode::None
&& ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64))
{
let extendop = match (narrow_mode, out_bits) {
(NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
ExtendOp::SXTB
}
(NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
ExtendOp::UXTB
}
(NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
ExtendOp::SXTB
}
(NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
ExtendOp::UXTB
}
(NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
ExtendOp::SXTH
}
(NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
ExtendOp::UXTH
}
(NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
(NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
_ => unreachable!(),
};
return Some((val, extendop));
}
None
}
pub(crate) fn put_input_in_rse_imm12(
ctx: &mut Lower<Inst>,
input: InsnInput,
narrow_mode: NarrowValueMode,
) -> ResultRSEImm12 {
if let Some(imm_value) = input_to_const(ctx, input) {
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
let out_ty_bits = ty_bits(ctx.input_ty(input.insn, input.input));
let is_negative = (i.bits as u64) & (1 << (cmp::max(out_ty_bits, 1) - 1)) != 0;
// This condition can happen if we matched a value that overflows the output type of
// its `iconst` when viewed as a signed value (i.e. iconst.i8 200).
// When that happens we need to lower as a negative value, which we cannot do here.
if !(narrow_mode.is_signed() && is_negative) {
return ResultRSEImm12::Imm12(i);
}
}
}
ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
}
//============================================================================
// ALU instruction constructors.
pub(crate) fn alu_inst_imm12(
op: ALUOp,
ty: Type,
rd: Writable<Reg>,
rn: Reg,
rm: ResultRSEImm12,
) -> Inst {
let size = OperandSize::from_ty(ty);
match rm {
ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
alu_op: op,
size,
rd,
rn,
imm12,
},
ResultRSEImm12::Reg(rm) => Inst::AluRRR {
alu_op: op,
size,
rd,
rn,
rm,
},
ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
alu_op: op,
size,
rd,
rn,
rm,
shiftop,
},
ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
alu_op: op,
size,
rd,
rn,
rm,
extendop,
},
}
}
//============================================================================
// Lowering: addressing mode support. Takes instruction directly, rather
// than an `InsnInput`, to do more introspection.
/// 32-bit addends that make up an address: an input, and an extension mode on that
/// input.
type AddressAddend32List = SmallVec<[(Reg, ExtendOp); 4]>;
/// 64-bit addends that make up an address: just an input.
type AddressAddend64List = SmallVec<[Reg; 4]>;
/// Collect all addends that feed into an address computation, with extend-modes
/// on each. Note that a load/store may have multiple address components (and
/// the CLIF semantics are that these components are added to form the final
/// address), but sometimes the CLIF that we receive still has arguments that
/// refer to `iadd` instructions. We also want to handle uextend/sextend below
/// the add(s).
///
/// We match any 64-bit add (and descend into its inputs), and we match any
/// 32-to-64-bit sign or zero extension. The returned addend-list will use
/// NarrowValueMode values to indicate how to extend each input:
///
/// - NarrowValueMode::None: the associated input is 64 bits wide; no extend.
/// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide;
/// do a sign-extension.
/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
/// do a zero-extension.
///
/// We do not descend further into the inputs of extensions (unless it is a constant),
/// because supporting (e.g.) a 32-bit add that is later extended would require
/// additional masking of high-order bits, which is too complex. So, in essence, we
/// descend any number of adds from the roots, collecting all 64-bit address addends;
/// then possibly support extensions at these leaves.
fn collect_address_addends(
ctx: &mut Lower<Inst>,
root: Value,
) -> (AddressAddend64List, AddressAddend32List, i64) {
let mut result32: AddressAddend32List = SmallVec::new();
let mut result64: AddressAddend64List = SmallVec::new();
let mut offset: i64 = 0;
let mut workqueue: SmallVec<[Value; 4]> = smallvec![root];
while let Some(value) = workqueue.pop() {
debug_assert_eq!(ty_bits(ctx.value_ty(value)), 64);
if let Some((op, insn)) = maybe_value_multi(
ctx,
value,
&[
Opcode::Uextend,
Opcode::Sextend,
Opcode::Iadd,
Opcode::Iconst,
],
) {
match op {
Opcode::Uextend | Opcode::Sextend if ty_bits(ctx.input_ty(insn, 0)) == 32 => {
let extendop = if op == Opcode::Uextend {
ExtendOp::UXTW
} else {
ExtendOp::SXTW
};
let extendee_input = InsnInput { insn, input: 0 };
// If the input is a zero-extension of a constant, add the value to the known
// offset.
// Only do this for zero-extension, as generating a sign-extended
// constant may be more instructions than using the 'SXTW' addressing mode.
if let (Some(insn), ExtendOp::UXTW) = (
maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
extendop,
) {
let value = (ctx.get_constant(insn).unwrap() & 0xFFFF_FFFF_u64) as i64;
offset += value;
} else {
let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
result32.push((reg, extendop));
}
}
Opcode::Uextend | Opcode::Sextend => {
let reg = put_value_in_reg(ctx, value, NarrowValueMode::None);
result64.push(reg);
}
Opcode::Iadd => {
for input in 0..ctx.num_inputs(insn) {
let addend = ctx.input_as_value(insn, input);
workqueue.push(addend);
}
}
Opcode::Iconst => {
let value: i64 = ctx.get_constant(insn).unwrap() as i64;
offset += value;
}
_ => panic!("Unexpected opcode from maybe_input_insn_multi"),
}
} else {
let reg = put_value_in_reg(ctx, value, NarrowValueMode::ZeroExtend64);
result64.push(reg);
}
}
(result64, result32, offset)
}
/// Lower the address of a pair load or store.
pub(crate) fn lower_pair_address(ctx: &mut Lower<Inst>, addr: Value, offset: i32) -> PairAMode {
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
// extends and addition ops. We update these as we consume address
// components, so they represent the remaining addends not yet handled.
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr);
let offset = args_offset + (offset as i64);
trace!(
"lower_pair_address: addends64 {:?}, addends32 {:?}, offset {}",
addends64,
addends32,
offset
);
// Pairs basically only have reg + imm formats so we only have to worry about those
let base_reg = if let Some(reg64) = addends64.pop() {
reg64
} else if let Some((reg32, extendop)) = addends32.pop() {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let signed = match extendop {
ExtendOp::SXTW => true,
ExtendOp::UXTW => false,
_ => unreachable!(),
};
ctx.emit(Inst::Extend {
rd: tmp,
rn: reg32,
signed,
from_bits: 32,
to_bits: 64,
});
tmp.to_reg()
} else {
zero_reg()
};
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(addr, base_reg, I64));
// We have the base register, if we have any others, we need to add them
lower_add_addends(ctx, addr, addends64, addends32);
// Figure out what offset we should emit
let imm7 = SImm7Scaled::maybe_from_i64(offset, I64).unwrap_or_else(|| {
lower_add_immediate(ctx, addr, addr.to_reg(), offset);
SImm7Scaled::maybe_from_i64(0, I64).unwrap()
});
PairAMode::SignedOffset(addr.to_reg(), imm7)
}
/// Lower the address of a load or store.
pub(crate) fn lower_address(
ctx: &mut Lower<Inst>,
elem_ty: Type,
addr: Value,
offset: i32,
) -> AMode {
// TODO: support base_reg + scale * index_reg. For this, we would need to
// pattern-match shl or mul instructions.
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
// extends and addition ops. We update these as we consume address
// components, so they represent the remaining addends not yet handled.
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr);
let mut offset = args_offset + (offset as i64);
trace!(
"lower_address: addends64 {:?}, addends32 {:?}, offset {}",
addends64,
addends32,
offset
);
// First, decide what the `AMode` will be. Take one extendee and one 64-bit
// reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension,
// or some other combination as appropriate.
let memarg = if addends64.len() > 0 {
if addends32.len() > 0 {
let (reg32, extendop) = addends32.pop().unwrap();
let reg64 = addends64.pop().unwrap();
AMode::RegExtended {
rn: reg64,
rm: reg32,
extendop,
}
} else if offset > 0 && offset < 0x1000 {
let reg64 = addends64.pop().unwrap();
let off = offset;
offset = 0;
AMode::RegOffset {
rn: reg64,
off,
ty: elem_ty,
}
} else if addends64.len() >= 2 {
let reg1 = addends64.pop().unwrap();
let reg2 = addends64.pop().unwrap();
AMode::RegReg { rn: reg1, rm: reg2 }
} else {
let reg1 = addends64.pop().unwrap();
AMode::reg(reg1)
}
} else
/* addends64.len() == 0 */
{
if addends32.len() > 0 {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg1, extendop) = addends32.pop().unwrap();
let signed = match extendop {
ExtendOp::SXTW => true,
ExtendOp::UXTW => false,
_ => unreachable!(),
};
ctx.emit(Inst::Extend {
rd: tmp,
rn: reg1,
signed,
from_bits: 32,
to_bits: 64,
});
if let Some((reg2, extendop)) = addends32.pop() {
AMode::RegExtended {
rn: tmp.to_reg(),
rm: reg2,
extendop,
}
} else {
AMode::reg(tmp.to_reg())
}
} else
/* addends32.len() == 0 */
{
let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap();
lower_constant_u64(ctx, off_reg, offset as u64);
offset = 0;
AMode::reg(off_reg.to_reg())
}
};
// At this point, if we have any remaining components, we need to allocate a
// temp, replace one of the registers in the AMode with the temp, and emit
// instructions to add together the remaining components. Return immediately
// if this is *not* the case.
if offset == 0 && addends32.len() == 0 && addends64.len() == 0 {
return memarg;
}
// Allocate the temp and shoehorn it into the AMode.
let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
let (reg, memarg) = match memarg {
AMode::RegExtended { rn, rm, extendop } => (
rn,
AMode::RegExtended {
rn: addr.to_reg(),
rm,
extendop,
},
),
AMode::RegOffset { rn, off, ty } => (
rn,
AMode::RegOffset {
rn: addr.to_reg(),
off,
ty,
},
),
AMode::RegReg { rn, rm } => (
rm,
AMode::RegReg {
rn: addr.to_reg(),
rm: rn,
},
),
AMode::UnsignedOffset { rn, uimm12 } => (
rn,
AMode::UnsignedOffset {
rn: addr.to_reg(),
uimm12,
},
),
_ => unreachable!(),
};
// If there is any offset, load that first into `addr`, and add the `reg`
// that we kicked out of the `AMode`; otherwise, start with that reg.
if offset != 0 {
lower_add_immediate(ctx, addr, reg, offset)
} else {
ctx.emit(Inst::gen_move(addr, reg, I64));
}
// Now handle reg64 and reg32-extended components.
lower_add_addends(ctx, addr, addends64, addends32);
memarg
}
fn lower_add_addends(
ctx: &mut Lower<Inst>,
rd: Writable<Reg>,
addends64: AddressAddend64List,
addends32: AddressAddend32List,
) {
for reg in addends64 {
// If the register is the stack reg, we must move it to another reg
// before adding it.
let reg = if reg == stack_reg() {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
tmp.to_reg()
} else {
reg
};
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Add,
size: OperandSize::Size64,
rd,
rn: rd.to_reg(),
rm: reg,
});
}
for (reg, extendop) in addends32 {
assert!(reg != stack_reg());
ctx.emit(Inst::AluRRRExtend {
alu_op: ALUOp::Add,
size: OperandSize::Size64,
rd,
rn: rd.to_reg(),
rm: reg,
extendop,
});
}
}
/// Adds into `rd` a signed imm pattern matching the best instruction for it.
// TODO: This function is duplicated in ctx.gen_add_imm
fn lower_add_immediate(ctx: &mut Lower<Inst>, dst: Writable<Reg>, src: Reg, imm: i64) {
// If we can fit offset or -offset in an imm12, use an add-imm
// Otherwise, lower the constant first then add.
if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::Add,
size: OperandSize::Size64,
rd: dst,
rn: src,
imm12,
});
} else if let Some(imm12) = Imm12::maybe_from_u64(imm.wrapping_neg() as u64) {
ctx.emit(Inst::AluRRImm12 {
alu_op: ALUOp::Sub,
size: OperandSize::Size64,
rd: dst,
rn: src,
imm12,
});
} else {
lower_constant_u64(ctx, dst, imm as u64);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Add,
size: OperandSize::Size64,
rd: dst,
rn: dst.to_reg(),
rm: src,
});
}
}
pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u64) {
for inst in Inst::load_constant(rd, value) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f32(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f32) {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f64) {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f128(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u128) {
if value == 0 {
// Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`,
// is potentially expensive.
ctx.emit(Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size8),
invert: false,
size: VectorSize::Size8x16,
});
} else {
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
ctx.emit(inst);
}
}
}
pub(crate) fn lower_splat_const(
ctx: &mut Lower<Inst>,
rd: Writable<Reg>,
value: u64,
size: VectorSize,
) {
let (value, narrow_size) = match size.lane_size() {
ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
ScalarSize::Size64 => (value, ScalarSize::Size32),
_ => unreachable!(),
};
let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
Some((value, lane_size)) => (
value,
VectorSize::from_lane_size(lane_size, size.is_128bits()),
),
None => (value, size),
};
let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
match cc {
IntCC::Equal => Cond::Eq,
IntCC::NotEqual => Cond::Ne,
IntCC::SignedGreaterThanOrEqual => Cond::Ge,
IntCC::SignedGreaterThan => Cond::Gt,
IntCC::SignedLessThanOrEqual => Cond::Le,
IntCC::SignedLessThan => Cond::Lt,
IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
IntCC::UnsignedGreaterThan => Cond::Hi,
IntCC::UnsignedLessThanOrEqual => Cond::Ls,
IntCC::UnsignedLessThan => Cond::Lo,
}
}
pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
// Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
// The FCMP instruction sets:
// NZCV
// - PCSR.NZCV = 0011 on UN (unordered),
// 0110 on EQ,
// 1000 on LT,
// 0010 on GT.
match cc {
// EQ | LT | GT. Vc => V clear.
FloatCC::Ordered => Cond::Vc,
// UN. Vs => V set.
FloatCC::Unordered => Cond::Vs,
// EQ. Eq => Z set.
FloatCC::Equal => Cond::Eq,
// UN | LT | GT. Ne => Z clear.
FloatCC::NotEqual => Cond::Ne,
// LT | GT.
FloatCC::OrderedNotEqual => unimplemented!(),
// UN | EQ
FloatCC::UnorderedOrEqual => unimplemented!(),
// LT. Mi => N set.
FloatCC::LessThan => Cond::Mi,
// LT | EQ. Ls => C clear or Z set.
FloatCC::LessThanOrEqual => Cond::Ls,
// GT. Gt => Z clear, N = V.
FloatCC::GreaterThan => Cond::Gt,
// GT | EQ. Ge => N = V.
FloatCC::GreaterThanOrEqual => Cond::Ge,
// UN | LT
FloatCC::UnorderedOrLessThan => unimplemented!(),
// UN | LT | EQ
FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
// UN | GT
FloatCC::UnorderedOrGreaterThan => unimplemented!(),
// UN | GT | EQ
FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
}
}
pub(crate) fn lower_vector_compare(
ctx: &mut Lower<Inst>,
rd: Writable<Reg>,
mut rn: Reg,
mut rm: Reg,
ty: Type,
cond: Cond,
) -> CodegenResult<()> {
let is_float = ty.lane_type().is_float();
let size = VectorSize::from_ty(ty);
if is_float && (cond == Cond::Vc || cond == Cond::Vs) {
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Fcmeq,
rd,
rn,
rm: rn,
size,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Fcmeq,
rd: tmp,
rn: rm,
rm,
size,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::And,
rd,
rn: rd.to_reg(),
rm: tmp.to_reg(),
size,
});
if cond == Cond::Vs {
ctx.emit(Inst::VecMisc {
op: VecMisc2::Not,
rd,
rn: rd.to_reg(),
size,
});
}
} else {
// 'Less than' operations are implemented by swapping
// the order of operands and using the 'greater than'
// instructions.
// 'Not equal' is implemented with 'equal' and inverting
// the result.
let (alu_op, swap) = match (is_float, cond) {
(false, Cond::Eq) => (VecALUOp::Cmeq, false),
(false, Cond::Ne) => (VecALUOp::Cmeq, false),
(false, Cond::Ge) => (VecALUOp::Cmge, false),
(false, Cond::Gt) => (VecALUOp::Cmgt, false),
(false, Cond::Le) => (VecALUOp::Cmge, true),
(false, Cond::Lt) => (VecALUOp::Cmgt, true),
(false, Cond::Hs) => (VecALUOp::Cmhs, false),
(false, Cond::Hi) => (VecALUOp::Cmhi, false),
(false, Cond::Ls) => (VecALUOp::Cmhs, true),
(false, Cond::Lo) => (VecALUOp::Cmhi, true),
(true, Cond::Eq) => (VecALUOp::Fcmeq, false),
(true, Cond::Ne) => (VecALUOp::Fcmeq, false),
(true, Cond::Mi) => (VecALUOp::Fcmgt, true),
(true, Cond::Ls) => (VecALUOp::Fcmge, true),
(true, Cond::Ge) => (VecALUOp::Fcmge, false),
(true, Cond::Gt) => (VecALUOp::Fcmgt, false),
_ => {
return Err(CodegenError::Unsupported(format!(
"Unsupported {} SIMD vector comparison: {:?}",
if is_float {
"floating-point"
} else {
"integer"
},
cond
)))
}
};
if swap {
std::mem::swap(&mut rn, &mut rm);
}
ctx.emit(Inst::VecRRR {
alu_op,
rd,
rn,
rm,
size,
});
if cond == Cond::Ne {
ctx.emit(Inst::VecMisc {
op: VecMisc2::Not,
rd,
rn: rd.to_reg(),
size,
});
}
}
Ok(())
}
/// Determines whether this condcode interprets inputs as signed or unsigned. See the
/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
/// for further insights into this.
pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
match cc {
IntCC::Equal
| IntCC::UnsignedGreaterThanOrEqual
| IntCC::UnsignedGreaterThan
| IntCC::UnsignedLessThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::NotEqual => false,
IntCC::SignedGreaterThanOrEqual
| IntCC::SignedGreaterThan
| IntCC::SignedLessThanOrEqual
| IntCC::SignedLessThan => true,
}
}
//=============================================================================
// Helpers for instruction lowering.
/// Checks for an instance of `op` feeding the given input.
pub(crate) fn maybe_input_insn(
c: &mut Lower<Inst>,
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
let inputs = c.get_input_as_source_or_const(input.insn, input.input);
trace!(
"maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
input,
inputs,
op
);
if let Some((src_inst, _)) = inputs.inst.as_inst() {
let data = c.data(src_inst);
trace!(" -> input inst {:?}", data);
if data.opcode() == op {
return Some(src_inst);
}
}
None
}
/// Checks for an instance of `op` defining the given value.
pub(crate) fn maybe_value(c: &mut Lower<Inst>, value: Value, op: Opcode) -> Option<IRInst> {
let inputs = c.get_value_as_source_or_const(value);
if let Some((src_inst, _)) = inputs.inst.as_inst() {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
None
}
/// Checks for an instance of any one of `ops` defining the given value.
pub(crate) fn maybe_value_multi(
c: &mut Lower<Inst>,
value: Value,
ops: &[Opcode],
) -> Option<(Opcode, IRInst)> {
for &op in ops {
if let Some(inst) = maybe_value(c, value, op) {
return Some((op, inst));
}
}
None
}
/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
/// Bint or a bitcast).
///
/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
/// a bit more generic.
pub(crate) fn maybe_input_insn_via_conv(
c: &mut Lower<Inst>,
input: InsnInput,
op: Opcode,
conv: Opcode,
) -> Option<IRInst> {
let inputs = c.get_input_as_source_or_const(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst.as_inst() {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
if data.opcode() == conv {
let inputs = c.get_input_as_source_or_const(src_inst, 0);
if let Some((src_inst, _)) = inputs.inst.as_inst() {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
}
}
None
}
/// Specifies what [lower_icmp] should do when lowering
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum IcmpOutput {
/// Lowers the comparison into a cond code, discarding the results. The cond code emitted can
/// be checked in the resulting [IcmpResult].
CondCode,
/// Materializes the results into a register. This may overwrite any flags previously set.
Register(Writable<Reg>),
}
impl IcmpOutput {
pub fn reg(&self) -> Option<Writable<Reg>> {
match self {
IcmpOutput::CondCode => None,
IcmpOutput::Register(reg) => Some(*reg),
}
}
}
/// The output of an Icmp lowering.
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum IcmpResult {
/// The result was output into the given [Cond]. Callers may perform operations using this [Cond]
/// and its inverse, other [Cond]'s are not guaranteed to be correct.
CondCode(Cond),
/// The result was materialized into the output register.
Register,
}
impl IcmpResult {
pub fn unwrap_cond(&self) -> Cond {
match self {
IcmpResult::CondCode(c) => *c,
_ => panic!("Unwrapped cond, but IcmpResult was {:?}", self),
}
}
}
/// Lower an icmp comparision
///
/// We can lower into the status flags, or materialize the result into a register
/// This is controlled by the `output` parameter.
pub(crate) fn lower_icmp(
ctx: &mut Lower<Inst>,
insn: IRInst,
condcode: IntCC,
output: IcmpOutput,
) -> CodegenResult<IcmpResult> {
trace!(
"lower_icmp: insn {}, condcode: {}, output: {:?}",
insn,
condcode,
output
);
let rd = output.reg().unwrap_or(writable_zero_reg());
let inputs = insn_inputs(ctx, insn);
let cond = lower_condcode(condcode);
let is_signed = condcode_is_signed(condcode);
let ty = ctx.input_ty(insn, 0);
let bits = ty_bits(ty);
let narrow_mode = match (bits <= 32, is_signed) {
(true, true) => NarrowValueMode::SignExtend32,
(true, false) => NarrowValueMode::ZeroExtend32,
(false, true) => NarrowValueMode::SignExtend64,
(false, false) => NarrowValueMode::ZeroExtend64,
};
let mut should_materialize = output.reg().is_some();
let out_condcode = if ty == I128 {
let lhs = put_input_in_regs(ctx, inputs[0]);
let rhs = put_input_in_regs(ctx, inputs[1]);
let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
match condcode {
IntCC::Equal | IntCC::NotEqual => {
// cmp lhs_lo, rhs_lo
// ccmp lhs_hi, rhs_hi, #0, eq
// cset dst, {eq, ne}
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::CCmp {
size: OperandSize::Size64,
rn: lhs.regs()[1],
rm: rhs.regs()[1],
nzcv: NZCV::new(false, false, false, false),
cond: Cond::Eq,
});
cond
}
_ => {
// cmp lhs_lo, rhs_lo
// cset tmp1, unsigned_cond
// cmp lhs_hi, rhs_hi
// cset tmp2, cond
// csel dst, tmp1, tmp2, eq
let rd = output.reg().unwrap_or(tmp1);
let unsigned_cond = lower_condcode(condcode.unsigned());
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
materialize_bool_result(ctx, insn, tmp1, unsigned_cond);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
materialize_bool_result(ctx, insn, tmp2, cond);
ctx.emit(Inst::CSel {
cond: Cond::Eq,
rd,
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
if output == IcmpOutput::CondCode {
// We only need to guarantee that the flags for `cond` are correct, so we can
// compare rd with 0 or 1
// If we are doing compare or equal, we want to compare with 1 instead of zero
if condcode.without_equal() != condcode {
lower_constant_u64(ctx, tmp2, 1);
}
let xzr = zero_reg();
let rd = rd.to_reg();
let tmp2 = tmp2.to_reg();
let (rn, rm) = match condcode {
IntCC::SignedGreaterThanOrEqual => (rd, tmp2),
IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2),
IntCC::SignedLessThanOrEqual => (tmp2, rd),
IntCC::UnsignedLessThanOrEqual => (tmp2, rd),
IntCC::SignedGreaterThan => (rd, xzr),
IntCC::UnsignedGreaterThan => (rd, xzr),
IntCC::SignedLessThan => (xzr, rd),
IntCC::UnsignedLessThan => (xzr, rd),
_ => unreachable!(),
};
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn,
rm,
});
}
// Prevent a second materialize_bool_result to be emitted at the end of the function
should_materialize = false;
cond
}
}
} else if ty.is_vector() {
assert_ne!(output, IcmpOutput::CondCode);
should_materialize = false;
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
cond
} else {
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
ctx.emit(alu_inst_imm12(ALUOp::SubS, ty, writable_zero_reg(), rn, rm));
cond
};
// Most of the comparisons above produce flags by default, if the caller requested the result
// in a register we materialize those flags into a register. Some branches do end up producing
// the result as a register by default, so we ignore those.
if should_materialize {
materialize_bool_result(ctx, insn, rd, out_condcode);
}
Ok(match output {
// We currently never emit a different register than what was asked for
IcmpOutput::Register(_) => IcmpResult::Register,
IcmpOutput::CondCode => IcmpResult::CondCode(out_condcode),
})
}
pub(crate) fn lower_fcmp_or_ffcmp_to_flags(ctx: &mut Lower<Inst>, insn: IRInst) {
let ty = ctx.input_ty(insn, 0);
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
ctx.emit(Inst::FpuCmp {
size: ScalarSize::from_ty(ty),
rn,
rm,
});
}
/// Materialize a boolean value into a register from the flags
/// (e.g set by a comparison).
/// A 0 / -1 (all-ones) result as expected for bool operations.
pub(crate) fn materialize_bool_result(
ctx: &mut Lower<Inst>,
insn: IRInst,
rd: Writable<Reg>,
cond: Cond,
) {
// A boolean is 0 / -1; if output width is > 1 use `csetm`,
// otherwise use `cset`.
if ty_bits(ctx.output_ty(insn, 0)) > 1 {
ctx.emit(Inst::CSetm { rd, cond });
} else {
ctx.emit(Inst::CSet { rd, cond });
}
}
//=============================================================================
// Lowering-backend trait implementation.
impl LowerBackend for AArch64Backend {
type MInst = Inst;
fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags)
}
fn lower_branch_group(
&self,
ctx: &mut Lower<Inst>,
branches: &[IRInst],
targets: &[MachLabel],
) -> CodegenResult<()> {
lower_inst::lower_branch(ctx, branches, targets)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {
Some(xreg(PINNED_REG))
}
}