//! Lowering rules for AArch64. //! //! TODO: opportunities for better code generation: //! //! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize //! pre/post-index opportunities. //! //! - Floating-point immediates (FIMM instruction). use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{Opcode, Type}; use crate::machinst::lower::*; use crate::machinst::*; use crate::CodegenResult; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; use super::lower_inst; use crate::data_value::DataValue; use log::{debug, trace}; use regalloc::{Reg, Writable}; use smallvec::SmallVec; //============================================================================ // Result enum types. // // Lowering of a given value results in one of these enums, depending on the // modes in which we can accept the value. /// A lowering result: register, register-shift. An SSA value can always be /// lowered into one of these options; the register form is the fallback. #[derive(Clone, Debug)] enum ResultRS { Reg(Reg), RegShift(Reg, ShiftOpAndAmt), } /// A lowering result: register, register-shift, register-extend. An SSA value can always be /// lowered into one of these options; the register form is the fallback. #[derive(Clone, Debug)] enum ResultRSE { Reg(Reg), RegShift(Reg, ShiftOpAndAmt), RegExtend(Reg, ExtendOp), } impl ResultRSE { fn from_rs(rs: ResultRS) -> ResultRSE { match rs { ResultRS::Reg(r) => ResultRSE::Reg(r), ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s), } } } /// A lowering result: register, register-shift, register-extend, or 12-bit immediate form. /// An SSA value can always be lowered into one of these options; the register form is the /// fallback. #[derive(Clone, Debug)] pub(crate) enum ResultRSEImm12 { Reg(Reg), RegShift(Reg, ShiftOpAndAmt), RegExtend(Reg, ExtendOp), Imm12(Imm12), } impl ResultRSEImm12 { fn from_rse(rse: ResultRSE) -> ResultRSEImm12 { match rse { ResultRSE::Reg(r) => ResultRSEImm12::Reg(r), ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s), ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e), } } } /// A lowering result: register, register-shift, or logical immediate form. /// An SSA value can always be lowered into one of these options; the register form is the /// fallback. #[derive(Clone, Debug)] pub(crate) enum ResultRSImmLogic { Reg(Reg), RegShift(Reg, ShiftOpAndAmt), ImmLogic(ImmLogic), } impl ResultRSImmLogic { fn from_rs(rse: ResultRS) -> ResultRSImmLogic { match rse { ResultRS::Reg(r) => ResultRSImmLogic::Reg(r), ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s), } } } /// A lowering result: register or immediate shift amount (arg to a shift op). /// An SSA value can always be lowered into one of these options; the register form is the /// fallback. #[derive(Clone, Debug)] pub(crate) enum ResultRegImmShift { Reg(Reg), ImmShift(ImmShift), } //============================================================================ // Lowering: convert instruction inputs to forms that we can use. /// Lower an instruction input to a 64-bit constant, if possible. pub(crate) fn input_to_const>(ctx: &mut C, input: InsnInput) -> Option { let input = ctx.get_input_as_source_or_const(input.insn, input.input); input.constant } /// Lower an instruction input to a constant register-shift amount, if possible. pub(crate) fn input_to_shiftimm>( ctx: &mut C, input: InsnInput, ) -> Option { input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift) } pub(crate) fn const_param_to_u128>( ctx: &mut C, inst: IRInst, ) -> Option { match ctx.get_immediate(inst) { Some(DataValue::V128(bytes)) => Some(u128::from_le_bytes(bytes)), _ => None, } } /// How to handle narrow values loaded into registers; see note on `narrow_mode` /// parameter to `put_input_in_*` below. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum NarrowValueMode { None, /// Zero-extend to 32 bits if original is < 32 bits. ZeroExtend32, /// Sign-extend to 32 bits if original is < 32 bits. SignExtend32, /// Zero-extend to 64 bits if original is < 64 bits. ZeroExtend64, /// Sign-extend to 64 bits if original is < 64 bits. SignExtend64, } impl NarrowValueMode { fn is_32bit(&self) -> bool { match self { NarrowValueMode::None => false, NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true, NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false, } } } /// Emits instruction(s) to generate the given 64-bit constant value into a newly-allocated /// temporary register, returning that register. fn generate_constant>(ctx: &mut C, ty: Type, c: u64) -> ValueRegs { let from_bits = ty_bits(ty); let masked = if from_bits < 64 { c & ((1u64 << from_bits) - 1) } else { c }; let cst_copy = ctx.alloc_tmp(ty); for inst in Inst::gen_constant(cst_copy, masked as u128, ty, |ty| { ctx.alloc_tmp(ty).only_reg().unwrap() }) .into_iter() { ctx.emit(inst); } non_writable_value_regs(cst_copy) } /// Extends a register according to `narrow_mode`. /// If extended, the value is always extended to 64 bits, for simplicity. fn narrow_reg>( ctx: &mut C, ty: Type, in_reg: Reg, is_const: bool, narrow_mode: NarrowValueMode, ) -> Reg { let from_bits = ty_bits(ty) as u8; match (narrow_mode, from_bits) { (NarrowValueMode::None, _) => in_reg, (NarrowValueMode::ZeroExtend32, n) if n < 32 => { let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn: in_reg, signed: false, from_bits, to_bits: 32, }); tmp.to_reg() } (NarrowValueMode::SignExtend32, n) if n < 32 => { let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn: in_reg, signed: true, from_bits, to_bits: 32, }); tmp.to_reg() } (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg, (NarrowValueMode::ZeroExtend64, n) if n < 64 => { if is_const { // Constants are zero-extended to full 64-bit width on load already. in_reg } else { let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn: in_reg, signed: false, from_bits, to_bits: 64, }); tmp.to_reg() } } (NarrowValueMode::SignExtend64, n) if n < 64 => { let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn: in_reg, signed: true, from_bits, to_bits: 64, }); tmp.to_reg() } (_, 64) => in_reg, (_, 128) => in_reg, _ => panic!( "Unsupported input width: input ty {} bits {} mode {:?}", ty, from_bits, narrow_mode ), } } /// Lower an instruction input to a register /// /// The given register will be extended appropriately, according to /// `narrow_mode` and the input's type. If extended, the value is /// always extended to 64 bits, for simplicity. pub(crate) fn put_input_in_reg>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg { let reg = put_input_in_regs(ctx, input) .only_reg() .expect("Multi-register value not expected"); let is_const = ctx .get_input_as_source_or_const(input.insn, input.input) .constant .is_some(); let ty = ctx.input_ty(input.insn, input.input); narrow_reg(ctx, ty, reg, is_const, narrow_mode) } /// Lower an instruction input to multiple regs pub(crate) fn put_input_in_regs>( ctx: &mut C, input: InsnInput, ) -> ValueRegs { debug!("put_input_in_reg: input {:?}", input); let ty = ctx.input_ty(input.insn, input.input); let inputs = ctx.get_input_as_source_or_const(input.insn, input.input); let in_regs = if let Some(c) = inputs.constant { // Generate constants fresh at each use to minimize long-range register pressure. generate_constant(ctx, ty, c) } else { ctx.put_input_in_regs(input.insn, input.input) }; in_regs } /// Lower an instruction input to a reg or reg/shift, or reg/extend operand. /// /// The `narrow_mode` flag indicates whether the consumer of this value needs /// the high bits clear. For many operations, such as an add/sub/mul or any /// bitwise logical operation, the low-bit results depend only on the low-bit /// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit /// value is stored in the low 8 bits of the register and the high 24 bits are /// undefined. If the op truly needs the high N bits clear (such as for a /// divide or a right-shift or a compare-to-zero), `narrow_mode` should be /// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting /// register will be provided the extended value. fn put_input_in_rs>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRS { let inputs = ctx.get_input_as_source_or_const(input.insn, input.input); if let Some((insn, 0)) = inputs.inst { let op = ctx.data(insn).opcode(); if op == Opcode::Ishl { let shiftee = InsnInput { insn, input: 0 }; let shift_amt = InsnInput { insn, input: 1 }; // Can we get the shift amount as an immediate? if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) { let shiftee_bits = ty_bits(ctx.input_ty(insn, 0)); if shiftee_bits <= std::u8::MAX as usize { let shiftimm = shiftimm.mask(shiftee_bits as u8); let reg = put_input_in_reg(ctx, shiftee, narrow_mode); return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm)); } } } } ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode)) } /// Lower an instruction input to a reg or reg/shift, or reg/extend operand. /// This does not actually codegen the source instruction; it just uses the /// vreg into which the source instruction will generate its value. /// /// See note on `put_input_in_rs` for a description of `narrow_mode`. fn put_input_in_rse>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSE { let inputs = ctx.get_input_as_source_or_const(input.insn, input.input); if let Some((insn, 0)) = inputs.inst { let op = ctx.data(insn).opcode(); let out_ty = ctx.output_ty(insn, 0); let out_bits = ty_bits(out_ty); // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator? if op == Opcode::Uextend || op == Opcode::Sextend { let sign_extend = op == Opcode::Sextend; let inner_ty = ctx.input_ty(insn, 0); let inner_bits = ty_bits(inner_ty); assert!(inner_bits < out_bits); if match (sign_extend, narrow_mode) { // A single zero-extend or sign-extend is equal to itself. (_, NarrowValueMode::None) => true, // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend. (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => { true } (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => { true } // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => { false } (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => { false } } { let extendop = match (sign_extend, inner_bits) { (true, 8) => ExtendOp::SXTB, (false, 8) => ExtendOp::UXTB, (true, 16) => ExtendOp::SXTH, (false, 16) => ExtendOp::UXTH, (true, 32) => ExtendOp::SXTW, (false, 32) => ExtendOp::UXTW, _ => unreachable!(), }; let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); return ResultRSE::RegExtend(reg, extendop); } } // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend, // then get the result into a register and return an Extend-mode operand on // that register. if narrow_mode != NarrowValueMode::None && ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64)) { let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); let extendop = match (narrow_mode, out_bits) { (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => { ExtendOp::SXTB } (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => { ExtendOp::UXTB } (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => { ExtendOp::SXTB } (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => { ExtendOp::UXTB } (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => { ExtendOp::SXTH } (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => { ExtendOp::UXTH } (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW, (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW, _ => unreachable!(), }; return ResultRSE::RegExtend(reg, extendop); } } ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode)) } pub(crate) fn put_input_in_rse_imm12>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSEImm12 { if let Some(imm_value) = input_to_const(ctx, input) { if let Some(i) = Imm12::maybe_from_u64(imm_value) { return ResultRSEImm12::Imm12(i); } } ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)) } /// Like `put_input_in_rse_imm12` above, except is allowed to negate the /// argument (assuming a two's-complement representation with the given bit /// width) if this allows use of 12-bit immediate. Used to flip `add`s with /// negative immediates to `sub`s (and vice-versa). pub(crate) fn put_input_in_rse_imm12_maybe_negated>( ctx: &mut C, input: InsnInput, twos_complement_bits: usize, narrow_mode: NarrowValueMode, ) -> (ResultRSEImm12, bool) { assert!(twos_complement_bits <= 64); if let Some(imm_value) = input_to_const(ctx, input) { if let Some(i) = Imm12::maybe_from_u64(imm_value) { return (ResultRSEImm12::Imm12(i), false); } let sign_extended = ((imm_value as i64) << (64 - twos_complement_bits)) >> (64 - twos_complement_bits); let inverted = sign_extended.wrapping_neg(); if let Some(i) = Imm12::maybe_from_u64(inverted as u64) { return (ResultRSEImm12::Imm12(i), true); } } ( ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)), false, ) } pub(crate) fn put_input_in_rs_immlogic>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> ResultRSImmLogic { if let Some(imm_value) = input_to_const(ctx, input) { let ty = ctx.input_ty(input.insn, input.input); let ty = if ty_bits(ty) < 32 { I32 } else { ty }; if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) { return ResultRSImmLogic::ImmLogic(i); } } ResultRSImmLogic::from_rs(put_input_in_rs(ctx, input, narrow_mode)) } pub(crate) fn put_input_in_reg_immshift>( ctx: &mut C, input: InsnInput, shift_width_bits: usize, ) -> ResultRegImmShift { if let Some(imm_value) = input_to_const(ctx, input) { let imm_value = imm_value & ((shift_width_bits - 1) as u64); if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) { return ResultRegImmShift::ImmShift(immshift); } } ResultRegImmShift::Reg(put_input_in_reg(ctx, input, NarrowValueMode::None)) } //============================================================================ // ALU instruction constructors. pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable, rn: Reg, rm: ResultRSEImm12) -> Inst { match rm { ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 { alu_op: op, rd, rn, imm12, }, ResultRSEImm12::Reg(rm) => Inst::AluRRR { alu_op: op, rd, rn, rm, }, ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift { alu_op: op, rd, rn, rm, shiftop, }, ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend { alu_op: op, rd, rn, rm, extendop, }, } } pub(crate) fn alu_inst_immlogic( op: ALUOp, rd: Writable, rn: Reg, rm: ResultRSImmLogic, ) -> Inst { match rm { ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic { alu_op: op, rd, rn, imml, }, ResultRSImmLogic::Reg(rm) => Inst::AluRRR { alu_op: op, rd, rn, rm, }, ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift { alu_op: op, rd, rn, rm, shiftop, }, } } pub(crate) fn alu_inst_immshift( op: ALUOp, rd: Writable, rn: Reg, rm: ResultRegImmShift, ) -> Inst { match rm { ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift { alu_op: op, rd, rn, immshift, }, ResultRegImmShift::Reg(rm) => Inst::AluRRR { alu_op: op, rd, rn, rm, }, } } //============================================================================ // Lowering: addressing mode support. Takes instruction directly, rather // than an `InsnInput`, to do more introspection. /// 32-bit addends that make up an address: an input, and an extension mode on that /// input. type AddressAddend32List = SmallVec<[(Reg, ExtendOp); 4]>; /// 64-bit addends that make up an address: just an input. type AddressAddend64List = SmallVec<[Reg; 4]>; /// Collect all addends that feed into an address computation, with extend-modes /// on each. Note that a load/store may have multiple address components (and /// the CLIF semantics are that these components are added to form the final /// address), but sometimes the CLIF that we receive still has arguments that /// refer to `iadd` instructions. We also want to handle uextend/sextend below /// the add(s). /// /// We match any 64-bit add (and descend into its inputs), and we match any /// 32-to-64-bit sign or zero extension. The returned addend-list will use /// NarrowValueMode values to indicate how to extend each input: /// /// - NarrowValueMode::None: the associated input is 64 bits wide; no extend. /// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide; /// do a sign-extension. /// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide; /// do a zero-extension. /// /// We do not descend further into the inputs of extensions (unless it is a constant), /// because supporting (e.g.) a 32-bit add that is later extended would require /// additional masking of high-order bits, which is too complex. So, in essence, we /// descend any number of adds from the roots, collecting all 64-bit address addends; /// then possibly support extensions at these leaves. fn collect_address_addends>( ctx: &mut C, roots: &[InsnInput], ) -> (AddressAddend64List, AddressAddend32List, i64) { let mut result32: AddressAddend32List = SmallVec::new(); let mut result64: AddressAddend64List = SmallVec::new(); let mut offset: i64 = 0; let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect(); while let Some(input) = workqueue.pop() { debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64); if let Some((op, insn)) = maybe_input_insn_multi( ctx, input, &[ Opcode::Uextend, Opcode::Sextend, Opcode::Iadd, Opcode::Iconst, ], ) { match op { Opcode::Uextend | Opcode::Sextend if ty_bits(ctx.input_ty(insn, 0)) == 32 => { let extendop = if op == Opcode::Uextend { ExtendOp::UXTW } else { ExtendOp::SXTW }; let extendee_input = InsnInput { insn, input: 0 }; // If the input is a zero-extension of a constant, add the value to the known // offset. // Only do this for zero-extension, as generating a sign-extended // constant may be more instructions than using the 'SXTW' addressing mode. if let (Some(insn), ExtendOp::UXTW) = ( maybe_input_insn(ctx, extendee_input, Opcode::Iconst), extendop, ) { let value = (ctx.get_constant(insn).unwrap() & 0xFFFF_FFFF_u64) as i64; offset += value; } else { let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None); result32.push((reg, extendop)); } } Opcode::Uextend | Opcode::Sextend => { let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); result64.push(reg); } Opcode::Iadd => { for input in 0..ctx.num_inputs(insn) { let addend = InsnInput { insn, input }; workqueue.push(addend); } } Opcode::Iconst => { let value: i64 = ctx.get_constant(insn).unwrap() as i64; offset += value; } _ => panic!("Unexpected opcode from maybe_input_insn_multi"), } } else { let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64); result64.push(reg); } } (result64, result32, offset) } /// Lower the address of a load or store. pub(crate) fn lower_address>( ctx: &mut C, elem_ty: Type, roots: &[InsnInput], offset: i32, ) -> AMode { // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or // mul instructions (Load/StoreComplex don't include scale factors). // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero // extends and addition ops. We update these as we consume address // components, so they represent the remaining addends not yet handled. let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots); let mut offset = args_offset + (offset as i64); trace!( "lower_address: addends64 {:?}, addends32 {:?}, offset {}", addends64, addends32, offset ); // First, decide what the `AMode` will be. Take one extendee and one 64-bit // reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension, // or some other combination as appropriate. let memarg = if addends64.len() > 0 { if addends32.len() > 0 { let (reg32, extendop) = addends32.pop().unwrap(); let reg64 = addends64.pop().unwrap(); AMode::RegExtended(reg64, reg32, extendop) } else if offset > 0 && offset < 0x1000 { let reg64 = addends64.pop().unwrap(); let off = offset; offset = 0; AMode::RegOffset(reg64, off, elem_ty) } else if addends64.len() >= 2 { let reg1 = addends64.pop().unwrap(); let reg2 = addends64.pop().unwrap(); AMode::RegReg(reg1, reg2) } else { let reg1 = addends64.pop().unwrap(); AMode::reg(reg1) } } else /* addends64.len() == 0 */ { if addends32.len() > 0 { let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); let (reg1, extendop) = addends32.pop().unwrap(); let signed = match extendop { ExtendOp::SXTW => true, ExtendOp::UXTW => false, _ => unreachable!(), }; ctx.emit(Inst::Extend { rd: tmp, rn: reg1, signed, from_bits: 32, to_bits: 64, }); if let Some((reg2, extendop)) = addends32.pop() { AMode::RegExtended(tmp.to_reg(), reg2, extendop) } else { AMode::reg(tmp.to_reg()) } } else /* addends32.len() == 0 */ { let off_reg = ctx.alloc_tmp(I64).only_reg().unwrap(); lower_constant_u64(ctx, off_reg, offset as u64); offset = 0; AMode::reg(off_reg.to_reg()) } }; // At this point, if we have any remaining components, we need to allocate a // temp, replace one of the registers in the AMode with the temp, and emit // instructions to add together the remaining components. Return immediately // if this is *not* the case. if offset == 0 && addends32.len() == 0 && addends64.len() == 0 { return memarg; } // Allocate the temp and shoehorn it into the AMode. let addr = ctx.alloc_tmp(I64).only_reg().unwrap(); let (reg, memarg) = match memarg { AMode::RegExtended(r1, r2, extendop) => { (r1, AMode::RegExtended(addr.to_reg(), r2, extendop)) } AMode::RegOffset(r, off, ty) => (r, AMode::RegOffset(addr.to_reg(), off, ty)), AMode::RegReg(r1, r2) => (r2, AMode::RegReg(addr.to_reg(), r1)), AMode::UnsignedOffset(r, imm) => (r, AMode::UnsignedOffset(addr.to_reg(), imm)), _ => unreachable!(), }; // If there is any offset, load that first into `addr`, and add the `reg` // that we kicked out of the `AMode`; otherwise, start with that reg. if offset != 0 { // If we can fit offset or -offset in an imm12, use an add-imm // to combine the reg and offset. Otherwise, load value first then add. if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { ctx.emit(Inst::AluRRImm12 { alu_op: ALUOp::Add64, rd: addr, rn: reg, imm12, }); } else if let Some(imm12) = Imm12::maybe_from_u64(offset.wrapping_neg() as u64) { ctx.emit(Inst::AluRRImm12 { alu_op: ALUOp::Sub64, rd: addr, rn: reg, imm12, }); } else { lower_constant_u64(ctx, addr, offset as u64); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add64, rd: addr, rn: addr.to_reg(), rm: reg, }); } } else { ctx.emit(Inst::gen_move(addr, reg, I64)); } // Now handle reg64 and reg32-extended components. for reg in addends64 { // If the register is the stack reg, we must move it to another reg // before adding it. let reg = if reg == stack_reg() { let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); ctx.emit(Inst::gen_move(tmp, stack_reg(), I64)); tmp.to_reg() } else { reg }; ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add64, rd: addr, rn: addr.to_reg(), rm: reg, }); } for (reg, extendop) in addends32 { assert!(reg != stack_reg()); ctx.emit(Inst::AluRRRExtend { alu_op: ALUOp::Add64, rd: addr, rn: addr.to_reg(), rm: reg, extendop, }); } memarg } pub(crate) fn lower_constant_u64>( ctx: &mut C, rd: Writable, value: u64, ) { for inst in Inst::load_constant(rd, value) { ctx.emit(inst); } } pub(crate) fn lower_constant_f32>( ctx: &mut C, rd: Writable, value: f32, ) { let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) { ctx.emit(inst); } } pub(crate) fn lower_constant_f64>( ctx: &mut C, rd: Writable, value: f64, ) { let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) { ctx.emit(inst); } } pub(crate) fn lower_constant_f128>( ctx: &mut C, rd: Writable, value: u128, ) { if value == 0 { // Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`, // is potentially expensive. ctx.emit(Inst::VecDupImm { rd, imm: ASIMDMovModImm::zero(ScalarSize::Size8), invert: false, size: VectorSize::Size8x16, }); } else { let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) { ctx.emit(inst); } } } pub(crate) fn lower_splat_const>( ctx: &mut C, rd: Writable, value: u64, size: VectorSize, ) { let (value, narrow_size) = match size.lane_size() { ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128), ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8), ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16), ScalarSize::Size64 => (value, ScalarSize::Size32), _ => unreachable!(), }; let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) { Some((value, lane_size)) => ( value, VectorSize::from_lane_size(lane_size, size.is_128bits()), ), None => (value, size), }; let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap(); for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) { ctx.emit(inst); } } pub(crate) fn lower_condcode(cc: IntCC) -> Cond { match cc { IntCC::Equal => Cond::Eq, IntCC::NotEqual => Cond::Ne, IntCC::SignedGreaterThanOrEqual => Cond::Ge, IntCC::SignedGreaterThan => Cond::Gt, IntCC::SignedLessThanOrEqual => Cond::Le, IntCC::SignedLessThan => Cond::Lt, IntCC::UnsignedGreaterThanOrEqual => Cond::Hs, IntCC::UnsignedGreaterThan => Cond::Hi, IntCC::UnsignedLessThanOrEqual => Cond::Ls, IntCC::UnsignedLessThan => Cond::Lo, IntCC::Overflow => Cond::Vs, IntCC::NotOverflow => Cond::Vc, } } pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond { // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs. // The FCMP instruction sets: // NZCV // - PCSR.NZCV = 0011 on UN (unordered), // 0110 on EQ, // 1000 on LT, // 0010 on GT. match cc { // EQ | LT | GT. Vc => V clear. FloatCC::Ordered => Cond::Vc, // UN. Vs => V set. FloatCC::Unordered => Cond::Vs, // EQ. Eq => Z set. FloatCC::Equal => Cond::Eq, // UN | LT | GT. Ne => Z clear. FloatCC::NotEqual => Cond::Ne, // LT | GT. FloatCC::OrderedNotEqual => unimplemented!(), // UN | EQ FloatCC::UnorderedOrEqual => unimplemented!(), // LT. Mi => N set. FloatCC::LessThan => Cond::Mi, // LT | EQ. Ls => C clear or Z set. FloatCC::LessThanOrEqual => Cond::Ls, // GT. Gt => Z clear, N = V. FloatCC::GreaterThan => Cond::Gt, // GT | EQ. Ge => N = V. FloatCC::GreaterThanOrEqual => Cond::Ge, // UN | LT FloatCC::UnorderedOrLessThan => unimplemented!(), // UN | LT | EQ FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(), // UN | GT FloatCC::UnorderedOrGreaterThan => unimplemented!(), // UN | GT | EQ FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(), } } pub(crate) fn lower_vector_compare>( ctx: &mut C, rd: Writable, mut rn: Reg, mut rm: Reg, ty: Type, cond: Cond, ) -> CodegenResult<()> { let is_float = match ty { F32X4 | F64X2 => true, _ => false, }; let size = VectorSize::from_ty(ty); // 'Less than' operations are implemented by swapping // the order of operands and using the 'greater than' // instructions. // 'Not equal' is implemented with 'equal' and inverting // the result. let (alu_op, swap) = match (is_float, cond) { (false, Cond::Eq) => (VecALUOp::Cmeq, false), (false, Cond::Ne) => (VecALUOp::Cmeq, false), (false, Cond::Ge) => (VecALUOp::Cmge, false), (false, Cond::Gt) => (VecALUOp::Cmgt, false), (false, Cond::Le) => (VecALUOp::Cmge, true), (false, Cond::Lt) => (VecALUOp::Cmgt, true), (false, Cond::Hs) => (VecALUOp::Cmhs, false), (false, Cond::Hi) => (VecALUOp::Cmhi, false), (false, Cond::Ls) => (VecALUOp::Cmhs, true), (false, Cond::Lo) => (VecALUOp::Cmhi, true), (true, Cond::Eq) => (VecALUOp::Fcmeq, false), (true, Cond::Ne) => (VecALUOp::Fcmeq, false), (true, Cond::Mi) => (VecALUOp::Fcmgt, true), (true, Cond::Ls) => (VecALUOp::Fcmge, true), (true, Cond::Ge) => (VecALUOp::Fcmge, false), (true, Cond::Gt) => (VecALUOp::Fcmgt, false), _ => unreachable!(), }; if swap { std::mem::swap(&mut rn, &mut rm); } ctx.emit(Inst::VecRRR { alu_op, rd, rn, rm, size, }); if cond == Cond::Ne { ctx.emit(Inst::VecMisc { op: VecMisc2::Not, rd, rn: rd.to_reg(), size, }); } Ok(()) } /// Determines whether this condcode interprets inputs as signed or unsigned. See the /// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs /// for further insights into this. pub(crate) fn condcode_is_signed(cc: IntCC) -> bool { match cc { IntCC::Equal | IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThanOrEqual | IntCC::UnsignedLessThan | IntCC::NotEqual => false, IntCC::SignedGreaterThanOrEqual | IntCC::SignedGreaterThan | IntCC::SignedLessThanOrEqual | IntCC::SignedLessThan | IntCC::Overflow | IntCC::NotOverflow => true, } } //============================================================================= // Helpers for instruction lowering. pub(crate) fn choose_32_64(ty: Type, op32: T, op64: T) -> T { let bits = ty_bits(ty); if bits <= 32 { op32 } else if bits == 64 { op64 } else { panic!("choose_32_64 on > 64 bits!") } } /// Checks for an instance of `op` feeding the given input. pub(crate) fn maybe_input_insn>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option { let inputs = c.get_input_as_source_or_const(input.insn, input.input); debug!( "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}", input, inputs, op ); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); debug!(" -> input inst {:?}", data); if data.opcode() == op { return Some(src_inst); } } None } /// Checks for an instance of any one of `ops` feeding the given input. pub(crate) fn maybe_input_insn_multi>( c: &mut C, input: InsnInput, ops: &[Opcode], ) -> Option<(Opcode, IRInst)> { for &op in ops { if let Some(inst) = maybe_input_insn(c, input, op) { return Some((op, inst)); } } None } /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., /// Bint or a bitcast). /// /// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it /// a bit more generic. pub(crate) fn maybe_input_insn_via_conv>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option { let inputs = c.get_input_as_source_or_const(input.insn, input.input); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); if data.opcode() == op { return Some(src_inst); } if data.opcode() == conv { let inputs = c.get_input_as_source_or_const(src_inst, 0); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); if data.opcode() == op { return Some(src_inst); } } } } None } pub(crate) fn lower_icmp_or_ifcmp_to_flags>( ctx: &mut C, insn: IRInst, is_signed: bool, ) { debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn); let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let narrow_mode = match (bits <= 32, is_signed) { (true, true) => NarrowValueMode::SignExtend32, (true, false) => NarrowValueMode::ZeroExtend32, (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let ty = ctx.input_ty(insn, 0); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm); let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); let rd = writable_zero_reg(); ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); } pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, insn: IRInst) { let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); match bits { 32 => { ctx.emit(Inst::FpuCmp32 { rn, rm }); } 64 => { ctx.emit(Inst::FpuCmp64 { rn, rm }); } _ => panic!("Unknown float size"), } } /// Materialize a boolean value into a register from the flags /// (e.g set by a comparison). /// A 0 / -1 (all-ones) result as expected for bool operations. pub(crate) fn materialize_bool_result>( ctx: &mut C, insn: IRInst, rd: Writable, cond: Cond, ) { // A boolean is 0 / -1; if output width is > 1 use `csetm`, // otherwise use `cset`. if ty_bits(ctx.output_ty(insn, 0)) > 1 { ctx.emit(Inst::CSetm { rd, cond }); } else { ctx.emit(Inst::CSet { rd, cond }); } } /// This is target-word-size dependent. And it excludes booleans and reftypes. pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool { match ty { I8 | I16 | I32 | I64 => true, _ => false, } } fn load_op_to_ty(op: Opcode) -> Option { match op { Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8), Opcode::Sload16 | Opcode::Uload16 | Opcode::Sload16Complex | Opcode::Uload16Complex => { Some(I16) } Opcode::Sload32 | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => { Some(I32) } Opcode::Load | Opcode::LoadComplex => None, Opcode::Sload8x8 | Opcode::Uload8x8 | Opcode::Sload8x8Complex | Opcode::Uload8x8Complex => { Some(I8X8) } Opcode::Sload16x4 | Opcode::Uload16x4 | Opcode::Sload16x4Complex | Opcode::Uload16x4Complex => Some(I16X4), Opcode::Sload32x2 | Opcode::Uload32x2 | Opcode::Sload32x2Complex | Opcode::Uload32x2Complex => Some(I32X2), _ => None, } } /// Helper to lower a load instruction; this is used in several places, because /// a load can sometimes be merged into another operation. pub(crate) fn lower_load, F: FnMut(&mut C, Writable, Type, AMode)>( ctx: &mut C, ir_inst: IRInst, inputs: &[InsnInput], output: InsnOutput, mut f: F, ) { let op = ctx.data(ir_inst).opcode(); let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0)); let off = ctx.data(ir_inst).load_store_offset().unwrap(); let mem = lower_address(ctx, elem_ty, &inputs[..], off); let rd = get_output_reg(ctx, output).only_reg().unwrap(); f(ctx, rd, elem_ty, mem); } //============================================================================= // Lowering-backend trait implementation. impl LowerBackend for AArch64Backend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags) } fn lower_branch_group>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()> { lower_inst::lower_branch(ctx, branches, targets) } fn maybe_pinned_reg(&self) -> Option { Some(xreg(PINNED_REG)) } }